claude-flow-novice 2.5.2 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/adaptive-context/cfn-v3-reflection.json +21 -0
- package/.claude/agents/AGENT_LIFECYCLE.md +495 -0
- package/.claude/agents/CLAUDE.md +1002 -1233
- package/.claude/agents/accessibility-advocate.md +457 -0
- package/.claude/agents/agent-principles/README.md +226 -226
- package/.claude/agents/agent-principles/agent-type-guidelines.md +10 -0
- package/.claude/agents/agent-principles/format-selection.md +10 -0
- package/.claude/agents/agent-principles/phase4-template-optimization.md +502 -494
- package/.claude/agents/agent-principles/prompt-engineering.md +8 -0
- package/.claude/agents/agent-principles/quality-metrics.md +8 -0
- package/.claude/agents/analysis/code-analyzer.md +7 -17
- package/.claude/agents/analysis/code-review/analyze-code-quality.md +2 -104
- package/.claude/agents/analysis/perf-analyzer.md +2 -196
- package/.claude/agents/context/context-curator.md +78 -84
- package/.claude/agents/context/context-reflector.md +27 -81
- package/.claude/agents/coordinators/README.md +42 -0
- package/.claude/agents/coordinators/cfn-v3-coordinator.md +440 -0
- package/.claude/agents/{product-owner-team → coordinators}/cto-agent.md +154 -187
- package/.claude/agents/coordinators/multi-sprint-coordinator.md +50 -0
- package/.claude/agents/{product-owner-team → coordinators}/product-owner-agent.md +6 -39
- package/.claude/agents/{cfn-loop → coordinators}/product-owner.md +72 -17
- package/.claude/agents/core-agents/reviewer.md +114 -135
- package/.claude/agents/custom/agent-builder.md +637 -0
- package/.claude/agents/developers/README.md +69 -0
- package/.claude/agents/developers/backend-dev.md +77 -0
- package/.claude/agents/{core-agents → developers}/coder.md +131 -26
- package/.claude/agents/developers/react-frontend-engineer.md +121 -0
- package/.claude/agents/{frontend → developers}/state-architect.md +1 -0
- package/.claude/agents/{frontend → developers}/ui-designer.md +1 -0
- package/.claude/agents/development/backend/dev-backend-api.md +0 -29
- package/.claude/agents/development/npm-package-specialist.md +355 -347
- package/.claude/agents/documentation/api-docs/docs-api-openapi.md +8 -0
- package/.claude/agents/documentation/api-docs.md +8 -0
- package/.claude/agents/github/github-commit-agent.md +125 -117
- package/.claude/agents/goal/goal-planner.md +8 -0
- package/.claude/agents/infrastructure/README.md +100 -0
- package/.claude/agents/{specialized → infrastructure}/devops-engineer.md +131 -150
- package/.claude/agents/planners/README.md +94 -0
- package/.claude/agents/{core-agents → planners}/analyst.md +1 -22
- package/.claude/agents/{planning-team → planners}/api-designer-persona.md +8 -0
- package/.claude/agents/{core-agents → planners}/architect.md +7 -20
- package/.claude/agents/{core-agents → planners}/planner.md +0 -21
- package/.claude/agents/{planning-team → planners}/security-architect-persona.md +8 -28
- package/.claude/agents/{planning-team → planners}/system-architect-persona.md +6 -38
- package/.claude/agents/{architecture → planners}/system-architect.md +12 -17
- package/.claude/agents/product-owner-team/accessibility-advocate-persona.md +132 -161
- package/.claude/agents/product-owner-team/power-user-persona.md +149 -182
- package/.claude/agents/retrospective-analyst.md +84 -0
- package/.claude/agents/reviewers/README.md +58 -0
- package/.claude/agents/{analysis → reviewers}/code-quality-validator.md +8 -17
- package/.claude/agents/reviewers/reviewer.md +181 -0
- package/.claude/agents/sparc/architecture.md +6 -25
- package/.claude/agents/sparc/pseudocode.md +6 -0
- package/.claude/agents/sparc/refinement.md +6 -0
- package/.claude/agents/sparc/specification.md +1 -0
- package/.claude/agents/specialists/README.md +60 -0
- package/.claude/agents/{core-agents → specialists}/base-template-generator.md +8 -21
- package/.claude/agents/{specialized → specialists}/cli-agent-optimizer.md +1 -1
- package/.claude/agents/{specialized → specialists}/code-booster.md +1 -0
- package/.claude/agents/{consensus → specialists}/consensus-builder.md +1 -17
- package/.claude/agents/{specialized/mobile → specialists}/mobile-dev.md +0 -20
- package/.claude/agents/{core-agents → specialists}/performance-benchmarker.md +134 -148
- package/.claude/agents/{specialized → specialists}/rust-developer.md +1 -20
- package/.claude/agents/{specialized → specialists}/rust-enterprise-developer.md +1 -20
- package/.claude/agents/{specialized → specialists}/rust-mvp-developer.md +1 -20
- package/.claude/agents/{core-agents → specialists}/security-manager.md +68 -88
- package/.claude/agents/{security → specialists}/security-specialist-existing.md +6 -57
- package/.claude/agents/{security → specialists}/security-specialist.md +6 -30
- package/.claude/agents/{specialized/mobile → specialists}/spec-mobile-react-native.md +2 -21
- package/.claude/agents/testers/README.md +94 -0
- package/.claude/agents/{testing → testers}/e2e/playwright-agent.md +1 -20
- package/.claude/agents/{testing → testers}/interaction-tester.md +1 -20
- package/.claude/agents/{testing → testers}/playwright-tester.md +1 -1
- package/.claude/agents/testers/tester.md +139 -0
- package/.claude/agents/testers/unit/tdd-london-swarm.md +49 -0
- package/.claude/agents/testers/validation/production-validator.md +33 -0
- package/.claude/agents-ignore/cfn-loop-coordinator.md +157 -0
- package/.claude/agents-ignore/cfn-loop-coordinator.md.backup +156 -0
- package/.claude/agents-ignore/coordinator.md.backup +182 -0
- package/.claude/agents-ignore/cost-savings-cfn-loop-coordinator.md +760 -0
- package/.claude/agents-ignore/cost-savings-coordinator.md +173 -0
- package/.claude/artifacts/ace-reflections/REFLECT-001-summary.json +39 -0
- package/.claude/artifacts/ace-reflections/sprint-7_$(date -u +/"%Y%m%d_%H%M%S/").json" +47 -0
- package/.claude/artifacts/reflection-merge-logs/cli-agent-spawning-v2.5.2-merge-report.md +61 -0
- package/.claude/commands/CFN_COORDINATOR_PARAMETERS.md +10 -10
- package/.claude/commands/cfn-loop-epic.md +43 -19
- package/.claude/commands/cfn-loop-single.md +3 -3
- package/.claude/commands/cfn-loop-sprints.md +1 -1
- package/.claude/commands/cfn-loop.md +45 -32
- package/.claude/commands/cfn-mode.md +20 -0
- package/.claude/commands/custom-routing-activate.md +37 -123
- package/.claude/commands/custom-routing-deactivate.md +27 -124
- package/.claude/commands/switch-api.md +41 -16
- package/.claude/commands/write-plan.md +104 -0
- package/.claude/data/cfn-loop.db +0 -0
- package/.claude/data/cfn_loop_logs.db +0 -0
- package/.claude/hooks/BACKUP_USAGE.md +243 -0
- package/.claude/hooks/post-edit-cfn-retrospective.sh +79 -0
- package/.claude/hooks/post-edit.sh +21 -0
- package/.claude/hooks/pre-edit-backup.sh +71 -0
- package/.claude/hooks/restore-from-backup.sh +37 -0
- package/.claude/prompts/cfn-loop-context.md +115 -0
- package/.claude/prompts/loop-specific/loop2.md +50 -0
- package/.claude/prompts/loop-specific/loop3.md +43 -0
- package/.claude/prompts/loop-specific/loop4.md +54 -0
- package/.claude/root-claude-distribute/CLAUDE.md +76 -2
- package/.claude/skills/ace-system/sprint-7-lessons.json +46 -0
- package/.claude/skills/ace-system/store-reflection.sh +33 -136
- package/.claude/skills/agent-discovery/SKILL.md +40 -0
- package/.claude/skills/agent-discovery/agents-registry-clean.json +0 -0
- package/.claude/skills/agent-discovery/agents-registry-fixed.json +19 -0
- package/.claude/skills/agent-discovery/agents-registry.json +718 -0
- package/.claude/skills/agent-discovery/discover-agents.py +175 -0
- package/.claude/skills/agent-discovery/discover-agents.sh +87 -0
- package/.claude/skills/agent-discovery/invoke-registry.sh +11 -0
- package/.claude/skills/agent-discovery/temp_script.py +0 -0
- package/.claude/skills/agent-execution/execute-agent.sh +126 -0
- package/.claude/skills/agent-output-processing/SKILL.md +359 -0
- package/.claude/skills/agent-selector/SKILL.md +90 -0
- package/.claude/skills/agent-selector/select-agents.sh +96 -0
- package/.claude/skills/agent-spawning/agent-selection-guide.md +1 -1
- package/.claude/skills/agent-swap/SKILL.md +36 -0
- package/.claude/skills/agent-swap/recommend-swap.sh +60 -0
- package/.claude/skills/api-validation/test-endpoints.sh +54 -0
- package/.claude/skills/automatic-memory-persistence/SKILL.md +73 -0
- package/.claude/skills/automatic-memory-persistence/persist-agent-output.sh +49 -0
- package/.claude/skills/automatic-memory-persistence/query-agent-history.sh +35 -0
- package/.claude/skills/automatic-memory-persistence/test-memory-persistence.sh +235 -0
- package/.claude/skills/cfn-loop-orchestration/README.md +41 -0
- package/.claude/skills/cfn-loop-orchestration/SKILL.md +299 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/auto-tune-timeouts.sh +228 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/consensus.sh +84 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/deliverable-verifier.sh +71 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/gate-check.sh +90 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/iteration-manager.sh +87 -0
- package/.claude/skills/cfn-loop-orchestration/helpers/timeout-calculator.sh +51 -0
- package/.claude/skills/cfn-loop-orchestration/inject-loop-context.sh +41 -0
- package/.claude/skills/cfn-loop-orchestration/monitor-execution.sh +156 -0
- package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +840 -0
- package/.claude/skills/cfn-loop-orchestration/security_utils.sh +99 -0
- package/.claude/skills/cfn-loop-orchestration/test-cfn-orchestration.sh +281 -0
- package/.claude/skills/cfn-loop-orchestration/test-edge-cases.sh +188 -0
- package/.claude/skills/cfn-loop-validation/SKILL.md +307 -217
- package/.claude/skills/complexity-estimator/SKILL.md +96 -0
- package/.claude/skills/complexity-estimator/estimate-complexity.sh +144 -0
- package/.claude/skills/context-pruner/SKILL.md +75 -0
- package/.claude/skills/context-pruner/prune-context.sh +73 -0
- package/.claude/skills/defense-in-depth/SKILL.md +133 -0
- package/.claude/skills/dependency-extractor/SKILL.md +35 -0
- package/.claude/skills/dependency-extractor/extract-dependencies.sh +66 -0
- package/.claude/skills/epic-decomposer/SKILL.md +44 -0
- package/.claude/skills/epic-decomposer/decompose-epic.sh +104 -0
- package/.claude/skills/improvement-recommender/SKILL.md +33 -0
- package/.claude/skills/improvement-recommender/recommend-improvements.sh +92 -0
- package/.claude/skills/intervention-detector/SKILL.md +39 -0
- package/.claude/skills/intervention-detector/detect-intervention.sh +111 -0
- package/.claude/skills/intervention-orchestrator/SKILL.md +43 -0
- package/.claude/skills/intervention-orchestrator/execute-intervention.sh +59 -0
- package/.claude/skills/loop2-output-processing/SKILL.md +163 -0
- package/.claude/skills/loop2-output-processing/execute-and-extract.sh +77 -0
- package/.claude/skills/loop2-output-processing/execute-and-extract.sh.backup +36 -0
- package/.claude/skills/loop2-output-processing/parse-feedback.sh +147 -0
- package/.claude/skills/loop2-output-processing/process-validator-output.sh +275 -0
- package/.claude/skills/loop2-output-processing/test-bug27-fix.sh +200 -0
- package/.claude/skills/loop2-output-processing/test-loop2-processing.sh +113 -0
- package/.claude/skills/loop3-output-processing/AGENT_COMPLETION_PROTOCOL.md +206 -0
- package/.claude/skills/loop3-output-processing/SKILL.md +421 -0
- package/.claude/skills/loop3-output-processing/calculate-confidence.sh +28 -0
- package/.claude/skills/loop3-output-processing/execute-and-extract.sh +85 -0
- package/.claude/skills/loop3-output-processing/parse-confidence.sh +31 -0
- package/.claude/skills/loop3-output-processing/test-agent-timeout.sh +327 -0
- package/.claude/skills/loop3-output-processing/test-loop3-processing.sh +155 -0
- package/.claude/skills/loop3-output-processing/verify-deliverables.sh +42 -0
- package/.claude/skills/pattern-extraction/SKILL.md +30 -0
- package/.claude/skills/pattern-extraction/extract-patterns.sh +80 -0
- package/.claude/skills/playbook/SKILL.md +113 -0
- package/.claude/skills/playbook/init-playbook.sh +54 -0
- package/.claude/skills/playbook/playbook.db +0 -0
- package/.claude/skills/playbook/query-playbook.sh +79 -0
- package/.claude/skills/playbook/update-playbook.sh +69 -0
- package/.claude/skills/playbook-auto-update/SKILL.md +29 -0
- package/.claude/skills/playbook-auto-update/auto-update-playbook.sh +86 -0
- package/.claude/skills/product-owner-decision/SKILL.md +332 -0
- package/.claude/skills/product-owner-decision/execute-decision.sh +176 -0
- package/.claude/skills/product-owner-decision/parse-decision.sh +66 -0
- package/.claude/skills/product-owner-decision/validate-deliverables.sh +82 -0
- package/.claude/skills/redis-coordination/AGENT_LOGGING.md +280 -0
- package/.claude/skills/redis-coordination/LOGGING.md +260 -0
- package/.claude/skills/redis-coordination/README.md +30 -29
- package/.claude/skills/redis-coordination/SKILL.md +685 -83
- package/.claude/skills/redis-coordination/agent-log.sh +124 -0
- package/.claude/skills/redis-coordination/analyze-task-complexity.sh +277 -0
- package/.claude/skills/redis-coordination/cfn-loop-exec.sh +468 -0
- package/.claude/skills/redis-coordination/collect-confidence-scores.sh +179 -0
- package/.claude/skills/redis-coordination/collect-results.sh +75 -0
- package/.claude/skills/redis-coordination/data/cfn-loop.db +0 -0
- package/.claude/skills/redis-coordination/demos/test-iteration-feedback.sh +320 -0
- package/.claude/skills/redis-coordination/{test-orchestrator.sh → demos/test-orchestrator.sh} +25 -0
- package/.claude/skills/redis-coordination/execute-product-owner-decision.sh +258 -0
- package/.claude/skills/redis-coordination/get-agent-timeout.sh +176 -176
- package/.claude/skills/redis-coordination/init-swarm.sh +6 -1
- package/.claude/skills/redis-coordination/invoke-waiting-mode.sh +106 -183
- package/.claude/skills/redis-coordination/invoke-waiting-mode.sh.backup-p7 +423 -0
- package/.claude/skills/redis-coordination/log-event.sh +109 -0
- package/.claude/skills/redis-coordination/monitor-cfn-violations.sh +391 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop-v3.sh +141 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh +31 -933
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup +38 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup-1761167675 +1672 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup-p5 +1604 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup-phase1 +1550 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup-phase2 +1621 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.backup-phase3 +1621 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.broken +1627 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.corrupted +80 -0
- package/.claude/skills/redis-coordination/orchestrate-cfn-loop.sh.deprecated +1864 -0
- package/.claude/skills/redis-coordination/query-logs.sh +103 -0
- package/.claude/skills/redis-coordination/retrieve-context.sh +58 -0
- package/.claude/skills/redis-coordination/select-specialist-agent.sh +371 -0
- package/.claude/skills/redis-coordination/semantic-match-tfidf.py +252 -0
- package/.claude/skills/redis-coordination/send-heartbeat.sh +164 -72
- package/.claude/skills/redis-coordination/signal.sh +38 -0
- package/.claude/skills/redis-coordination/store-context.sh +86 -0
- package/.claude/skills/redis-coordination/store-epic-context.sh +123 -0
- package/.claude/skills/redis-coordination/test-context-injection.sh +354 -0
- package/.claude/skills/redis-coordination/test-timeout-enforcement.sh +513 -0
- package/.claude/skills/redis-coordination/tests/convert-line-endings.sh +15 -0
- package/.claude/skills/redis-coordination/tests/dlq-functionality-test.sh +101 -101
- package/.claude/skills/redis-coordination/tests/edge-cases-test.sh +98 -98
- package/.claude/skills/redis-coordination/tests/integration-test.sh +169 -169
- package/.claude/skills/redis-coordination/tests/retry-mechanism-test.sh +81 -81
- package/.claude/skills/redis-coordination/tests/run-test-suite.sh +91 -91
- package/.claude/skills/redis-coordination/tests/run-tests.sh +4 -0
- package/.claude/skills/redis-coordination/tests/test-primitives.sh +166 -0
- package/.claude/skills/redis-coordination/tests/test-utils.sh +53 -121
- package/.claude/skills/redis-coordination/tests/test_coordination_primitives.sh.deprecated +20 -0
- package/.claude/skills/redis-coordination/tests/test_utils.sh +49 -0
- package/.claude/skills/redis-coordination/v2_modularization/core_orchestration.sh +76 -0
- package/.claude/skills/redis-coordination/validate-parameters.sh +492 -0
- package/.claude/skills/retrospective-report/SKILL.md +31 -0
- package/.claude/skills/retrospective-report/generate-report.sh +101 -0
- package/.claude/skills/run-all-skill-tests.sh +124 -0
- package/.claude/skills/scope-simplifier/SKILL.md +37 -0
- package/.claude/skills/scope-simplifier/simplify-scope.sh +68 -0
- package/.claude/skills/simplified-agent-lifecycle/COST_ANALYSIS.md +49 -0
- package/.claude/skills/simplified-agent-lifecycle/DESIGN.md +98 -0
- package/.claude/skills/simplified-agent-lifecycle/MIGRATION_PLAN.md +74 -0
- package/.claude/skills/skill-builder/SKILL.md +910 -0
- package/.claude/skills/specialist-injection/SKILL.md +41 -0
- package/.claude/skills/specialist-injection/recommend-specialist.sh +57 -0
- package/.claude/skills/sprint-execution/SKILL.md +27 -0
- package/.claude/skills/sprint-execution/execute-sprint-task.sh +59 -0
- package/.claude/skills/sprint-execution/execute-sprint.sh +65 -0
- package/.claude/skills/sprint-planner/SKILL.md +37 -0
- package/.claude/skills/sprint-planner/plan-sprint.sh +85 -0
- package/.claude/skills/standardized-error-handling/SKILL.md +56 -0
- package/.claude/skills/standardized-error-handling/capture-agent-error.sh +87 -0
- package/.claude/skills/standardized-error-handling/test-error-handling.sh +166 -0
- package/.claude/skills/task-classifier/SKILL.md +94 -0
- package/.claude/skills/task-classifier/classify-task.sh +115 -0
- package/.claude/skills/validation-templates/SKILL.md +47 -0
- package/.claude/skills/validation-templates/content.json +38 -0
- package/.claude/skills/validation-templates/data.json +38 -0
- package/.claude/skills/validation-templates/design.json +38 -0
- package/.claude/skills/validation-templates/infrastructure.json +38 -0
- package/.claude/skills/validation-templates/research.json +38 -0
- package/.claude/skills/validation-templates/software.json +38 -0
- package/.claude/skills/webapp-testing/README.md +142 -0
- package/.claude/skills/webapp-testing/SCREENSHOT_NAMING_CONVENTION.md +547 -0
- package/.claude/skills/webapp-testing/SKILL.md +877 -0
- package/.claude/skills/webapp-testing/capture-screenshot.sh +238 -0
- package/.claude/skills/webapp-testing/cfn-loop-integration.sh +265 -0
- package/.claude/skills/webapp-testing/compare-screenshots.sh +199 -0
- package/.claude/skills/webapp-testing/init-storage.sh +150 -0
- package/.claude/skills/webapp-testing/set-baseline.sh +196 -0
- package/.claude/skills/webapp-testing/test-webapp-testing.sh +233 -0
- package/.claude/spawn-pattern-examples.md +3 -3
- package/CLAUDE.md +364 -16
- package/README.md +598 -251
- package/dist/agents/agent-loader.js +146 -165
- package/dist/agents/agent-loader.js.map +1 -1
- package/dist/cli/agent-command.js +153 -0
- package/dist/cli/agent-command.js.map +1 -0
- package/dist/cli/agent-definition-parser.js +183 -0
- package/dist/cli/agent-definition-parser.js.map +1 -0
- package/dist/cli/agent-executor.js +310 -0
- package/dist/cli/agent-executor.js.map +1 -0
- package/dist/cli/agent-prompt-builder.js +268 -0
- package/dist/cli/agent-prompt-builder.js.map +1 -0
- package/dist/cli/agent-spawn.js +56 -2
- package/dist/cli/agent-spawn.js.map +1 -1
- package/dist/cli/anthropic-client.js +421 -0
- package/dist/cli/anthropic-client.js.map +1 -0
- package/dist/cli/cfn-context.js +150 -0
- package/dist/cli/cfn-context.js.map +1 -1
- package/dist/cli/cfn-fork.js +159 -0
- package/dist/cli/cfn-fork.js.map +1 -0
- package/dist/cli/cli-agent-context.js +358 -0
- package/dist/cli/cli-agent-context.js.map +1 -0
- package/dist/cli/config-manager.js +109 -91
- package/dist/cli/config-manager.js.map +1 -1
- package/dist/cli/conversation-fork.js +201 -0
- package/dist/cli/conversation-fork.js.map +1 -0
- package/dist/cli/index.js +119 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/iteration-history.js +188 -0
- package/dist/cli/iteration-history.js.map +1 -0
- package/dist/cli/tool-definitions.js +263 -0
- package/dist/cli/tool-definitions.js.map +1 -0
- package/dist/cli/tool-executor.js +247 -0
- package/dist/cli/tool-executor.js.map +1 -0
- package/dist/hello.js +8 -0
- package/dist/hello.js.map +1 -0
- package/package.json +16 -6
- package/scripts/README.md +68 -0
- package/scripts/cfn-intervention-example.sh +21 -0
- package/scripts/migrate-test-infrastructure.sh +40 -0
- package/scripts/switch-api.sh +233 -0
- package/scripts/validate-test-migration.sh +49 -0
- package/scripts/verify-no-secrets.sh +55 -0
- package/.claude/agents/architecture/system-architect.md.backup +0 -603
- package/.claude/agents/code-booster.md +0 -131
- package/.claude/agents/consensus/performance-benchmarker.md +0 -101
- package/.claude/agents/consensus/security-manager.md +0 -107
- package/.claude/agents/context-curator.md +0 -167
- package/.claude/agents/context-reflector.md +0 -65
- package/.claude/agents/core-agents/cfn-loop-coordinator.md +0 -134
- package/.claude/agents/core-agents/code-quality-validator.md +0 -149
- package/.claude/agents/core-agents/context-curator.md +0 -452
- package/.claude/agents/core-agents/context-reflector.md +0 -273
- package/.claude/agents/core-agents/cost-savings-cfn-loop-coordinator.md +0 -190
- package/.claude/agents/core-agents/tester.md +0 -170
- package/.claude/agents/development/backend-dev.md +0 -165
- package/.claude/agents/devops/devops-engineer.md +0 -148
- package/.claude/agents/frontend/interaction-tester.md +0 -139
- package/.claude/agents/frontend/react-frontend-engineer.md +0 -9
- package/.claude/agents/personas/accessibility-advocate-persona.md +0 -107
- package/.claude/agents/testing/production-validator.md +0 -179
- package/.claude/agents/testing/tdd-london-swarm.md +0 -209
- package/.claude/agents/testing/unit/tdd-london-swarm.md +0 -43
- package/.claude/agents/testing/validation/production-validator.md +0 -43
- package/dist/coordination/fleet-manager.test.js +0 -141
- package/dist/coordination/fleet-manager.test.js.map +0 -1
- package/dist/middleware/transparency-middleware.test.js +0 -184
- package/dist/middleware/transparency-middleware.test.js.map +0 -1
- /package/.claude/agents/{core-agents → developers}/researcher.md +0 -0
- /package/.claude/agents/{consensus → specialists}/crdt-synchronizer.md +0 -0
- /package/.claude/agents/{consensus → specialists}/quorum-manager.md +0 -0
- /package/.claude/agents/{consensus → specialists}/raft-manager.md +0 -0
- /package/.claude/{agents/core-agents → agents-ignore}/coordinator.md +0 -0
- /package/.claude/{agents/core-agents/cost-savings-coordinator.md → agents-ignore/cost-savings-coordinator.md.backup} +0 -0
- /package/.claude/skills/redis-coordination/{phase4-wake-queue-test-report.md → demos/phase4-wake-queue-test-report.md} +0 -0
- /package/.claude/skills/redis-coordination/{test-bzpopmin-fix.sh → demos/test-bzpopmin-fix.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-cancel-swarm.sh → demos/test-cancel-swarm.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-dlq.sh → demos/test-dlq.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-priority-wake-phase4-unix.sh → demos/test-priority-wake-phase4-unix.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-priority-wake-phase4.sh → demos/test-priority-wake-phase4.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-priority-wake.sh → demos/test-priority-wake.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quick-fix.sh → demos/test-quick-fix.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quorum-absolute.sh → demos/test-quorum-absolute.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quorum-fallback.sh → demos/test-quorum-fallback.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quorum-percentage.sh → demos/test-quorum-percentage.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quorum-with-retry.sh → demos/test-quorum-with-retry.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-quorum.sh → demos/test-quorum.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-shutdown-handling.sh → demos/test-shutdown-handling.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-shutdown.sh → demos/test-shutdown.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-utils-unix.sh → demos/test-utils-unix.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-utils.sh → demos/test-utils.sh} +0 -0
- /package/.claude/skills/redis-coordination/{test-waiting-mode.sh → demos/test-waiting-mode.sh} +0 -0
|
@@ -0,0 +1,1672 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
##############################################################################
|
|
4
|
+
# CFN Loop Orchestration v2.0.0
|
|
5
|
+
# Manages multi-loop CFN execution with dependency tracking and consensus
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# ./orchestrate-cfn-loop.sh --task-id <id> \
|
|
9
|
+
# --mode <mvp|standard|enterprise> \
|
|
10
|
+
# --loop3-agents <agent1,agent2,...> \
|
|
11
|
+
# --loop2-agents <agent1,agent2,...> \
|
|
12
|
+
# --product-owner <agent-id> \
|
|
13
|
+
# [--max-iterations <n>] \
|
|
14
|
+
# [--min-quorum-loop3 <n|n%|0.n>] \
|
|
15
|
+
# [--min-quorum-loop2 <n|n%|0.n>] \
|
|
16
|
+
# [--epic-context <json>] \
|
|
17
|
+
# [--phase-context <json>] \
|
|
18
|
+
# [--success-criteria <json>]
|
|
19
|
+
#
|
|
20
|
+
# CFN Loop Structure (CORRECTED):
|
|
21
|
+
# Loop 3 (Primary Swarm - Self Validation)
|
|
22
|
+
# ↓
|
|
23
|
+
# IF Loop 3 self-validation gate FAILS → RELAUNCH Loop 3 (skip Loop 2)
|
|
24
|
+
# IF Loop 3 self-validation gate PASSES → Proceed to Loop 2
|
|
25
|
+
# ↓
|
|
26
|
+
# Loop 2 (Consensus Validators)
|
|
27
|
+
# ↓
|
|
28
|
+
# Product Owner Decision
|
|
29
|
+
#
|
|
30
|
+
# Dependency Enforcement:
|
|
31
|
+
# - Loop 3 agents self-validate via confidence scores
|
|
32
|
+
# - Gate check determines if Loop 2 validators should be engaged
|
|
33
|
+
# - Loop 2 agents WAIT for gate pass signal before starting work
|
|
34
|
+
# - Product Owner BLOCKS until all Loop 2 agents signal completion
|
|
35
|
+
# - Uses Redis BLPOP for zero-token waiting
|
|
36
|
+
#
|
|
37
|
+
# Quorum Configuration:
|
|
38
|
+
# - Absolute: --min-quorum-loop3 3 (requires exactly 3 agents)
|
|
39
|
+
# - Percentage: --min-quorum-loop3 85% (requires 85% of agents)
|
|
40
|
+
# - Decimal: --min-quorum-loop3 0.66 (requires 66% of agents)
|
|
41
|
+
# - Default: 0.66 (2/3 majority) if not specified
|
|
42
|
+
#
|
|
43
|
+
# Agent Requirements:
|
|
44
|
+
# Loop 3 (Implementers):
|
|
45
|
+
# 1. Complete work
|
|
46
|
+
# 2. Signal done: redis-cli lpush "swarm:${TASK_ID}:${AGENT_ID}:done" "complete"
|
|
47
|
+
# 3. Report confidence: invoke-waiting-mode.sh report --confidence <0.0-1.0>
|
|
48
|
+
# 4. Enter waiting: invoke-waiting-mode.sh enter (for potential iteration)
|
|
49
|
+
#
|
|
50
|
+
# Loop 2 (Validators):
|
|
51
|
+
# 1. WAIT for gate pass: redis-cli blpop "swarm:${TASK_ID}:gate-passed" 0
|
|
52
|
+
# 2. Retrieve Loop 3 results for review
|
|
53
|
+
# 3. Perform validation
|
|
54
|
+
# 4. Signal done: redis-cli lpush "swarm:${TASK_ID}:${AGENT_ID}:done" "complete"
|
|
55
|
+
# 5. Report consensus: invoke-waiting-mode.sh report --confidence <0.0-1.0>
|
|
56
|
+
# 6. Enter waiting: invoke-waiting-mode.sh enter (for potential iteration)
|
|
57
|
+
##############################################################################
|
|
58
|
+
|
|
59
|
+
set -euo pipefail
|
|
60
|
+
|
|
61
|
+
# Configuration
|
|
62
|
+
TASK_ID=""
|
|
63
|
+
MODE="standard"
|
|
64
|
+
LOOP3_AGENTS=""
|
|
65
|
+
LOOP2_AGENTS=""
|
|
66
|
+
PRODUCT_OWNER=""
|
|
67
|
+
MAX_ITERATIONS=10
|
|
68
|
+
TIMEOUT=3600 # 60 minute default timeout for agent completion
|
|
69
|
+
RETRY_COUNT=3
|
|
70
|
+
RETRY_DELAY=5000 # Base delay in milliseconds
|
|
71
|
+
MIN_QUORUM_LOOP3="" # Minimum agents required for Loop 3 (absolute or percentage)
|
|
72
|
+
MIN_QUORUM_LOOP2="" # Minimum agents required for Loop 2 (absolute or percentage)
|
|
73
|
+
ORCHESTRATOR_PID=$$
|
|
74
|
+
SHUTDOWN_MONITOR_PID=""
|
|
75
|
+
SHUTDOWN_REQUESTED=0
|
|
76
|
+
LOOP3_HEARTBEAT_MONITOR_PID=""
|
|
77
|
+
LOOP2_HEARTBEAT_MONITOR_PID=""
|
|
78
|
+
|
|
79
|
+
# Epic Context (optional - for agent system prompts)
|
|
80
|
+
EPIC_CONTEXT=""
|
|
81
|
+
PHASE_CONTEXT=""
|
|
82
|
+
SUCCESS_CRITERIA=""
|
|
83
|
+
EXPECTED_FILES="" # BUG #12 FIX: Explicit file verification
|
|
84
|
+
PHASE_ID="" # BUG #16 FIX: Phase identifier for timeout configuration
|
|
85
|
+
|
|
86
|
+
# Thresholds by mode
|
|
87
|
+
declare -A GATE_THRESHOLD=(
|
|
88
|
+
[mvp]=0.70
|
|
89
|
+
[standard]=0.75
|
|
90
|
+
[enterprise]=0.75
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
declare -A CONSENSUS_THRESHOLD=(
|
|
94
|
+
[mvp]=0.80
|
|
95
|
+
[standard]=0.90
|
|
96
|
+
[enterprise]=0.95
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Parse arguments
|
|
100
|
+
while [[ $# -gt 0 ]]; do
|
|
101
|
+
case $1 in
|
|
102
|
+
--task-id)
|
|
103
|
+
TASK_ID="$2"
|
|
104
|
+
shift 2
|
|
105
|
+
;;
|
|
106
|
+
--mode)
|
|
107
|
+
MODE="$2"
|
|
108
|
+
shift 2
|
|
109
|
+
;;
|
|
110
|
+
--loop3-agents)
|
|
111
|
+
LOOP3_AGENTS="$2"
|
|
112
|
+
shift 2
|
|
113
|
+
;;
|
|
114
|
+
--loop2-agents)
|
|
115
|
+
LOOP2_AGENTS="$2"
|
|
116
|
+
shift 2
|
|
117
|
+
;;
|
|
118
|
+
--product-owner)
|
|
119
|
+
PRODUCT_OWNER="$2"
|
|
120
|
+
shift 2
|
|
121
|
+
;;
|
|
122
|
+
--max-iterations)
|
|
123
|
+
MAX_ITERATIONS="$2"
|
|
124
|
+
shift 2
|
|
125
|
+
;;
|
|
126
|
+
--retry-count)
|
|
127
|
+
RETRY_COUNT="$2"
|
|
128
|
+
shift 2
|
|
129
|
+
;;
|
|
130
|
+
--retry-delay)
|
|
131
|
+
RETRY_DELAY="$2"
|
|
132
|
+
shift 2
|
|
133
|
+
;;
|
|
134
|
+
--timeout)
|
|
135
|
+
TIMEOUT="$2"
|
|
136
|
+
shift 2
|
|
137
|
+
;;
|
|
138
|
+
--min-quorum-loop3)
|
|
139
|
+
MIN_QUORUM_LOOP3="$2"
|
|
140
|
+
shift 2
|
|
141
|
+
;;
|
|
142
|
+
--min-quorum-loop2)
|
|
143
|
+
MIN_QUORUM_LOOP2="$2"
|
|
144
|
+
shift 2
|
|
145
|
+
;;
|
|
146
|
+
--epic-context)
|
|
147
|
+
EPIC_CONTEXT="$2"
|
|
148
|
+
shift 2
|
|
149
|
+
;;
|
|
150
|
+
--phase-context)
|
|
151
|
+
PHASE_CONTEXT="$2"
|
|
152
|
+
shift 2
|
|
153
|
+
;;
|
|
154
|
+
--success-criteria)
|
|
155
|
+
SUCCESS_CRITERIA="$2"
|
|
156
|
+
shift 2
|
|
157
|
+
;;
|
|
158
|
+
--expected-files)
|
|
159
|
+
EXPECTED_FILES="$2"
|
|
160
|
+
shift 2
|
|
161
|
+
;;
|
|
162
|
+
--phase-id)
|
|
163
|
+
PHASE_ID="$2"
|
|
164
|
+
shift 2
|
|
165
|
+
;;
|
|
166
|
+
*)
|
|
167
|
+
echo "Unknown option: $1"
|
|
168
|
+
exit 1
|
|
169
|
+
;;
|
|
170
|
+
esac
|
|
171
|
+
done
|
|
172
|
+
|
|
173
|
+
# Validation
|
|
174
|
+
if [ -z "$TASK_ID" ] || [ -z "$LOOP3_AGENTS" ] || [ -z "$LOOP2_AGENTS" ] || [ -z "$PRODUCT_OWNER" ]; then
|
|
175
|
+
echo "Error: Required parameters missing"
|
|
176
|
+
echo "Usage: $0 --task-id <id> --mode <mode> --loop3-agents <agents> --loop2-agents <agents> --product-owner <agent>"
|
|
177
|
+
exit 1
|
|
178
|
+
fi
|
|
179
|
+
|
|
180
|
+
GATE=${GATE_THRESHOLD[$MODE]}
|
|
181
|
+
CONSENSUS=${CONSENSUS_THRESHOLD[$MODE]}
|
|
182
|
+
|
|
183
|
+
# Set default quorum values if not specified (66% = 2/3 majority)
|
|
184
|
+
MIN_QUORUM_LOOP3=${MIN_QUORUM_LOOP3:-0.66}
|
|
185
|
+
MIN_QUORUM_LOOP2=${MIN_QUORUM_LOOP2:-0.66}
|
|
186
|
+
|
|
187
|
+
##############################################################################
|
|
188
|
+
# Shutdown Handling Functions
|
|
189
|
+
##############################################################################
|
|
190
|
+
function cleanup_and_exit() {
|
|
191
|
+
local exit_code="${1:-130}"
|
|
192
|
+
local reason="${2:-user_interrupt}"
|
|
193
|
+
|
|
194
|
+
# Set shutdown flag to stop any ongoing operations
|
|
195
|
+
SHUTDOWN_REQUESTED=1
|
|
196
|
+
|
|
197
|
+
echo ""
|
|
198
|
+
echo "=============================================="
|
|
199
|
+
echo "🛑 Orchestrator shutting down gracefully..."
|
|
200
|
+
echo "=============================================="
|
|
201
|
+
echo "Reason: $reason"
|
|
202
|
+
echo "Exit Code: $exit_code"
|
|
203
|
+
|
|
204
|
+
# Kill shutdown monitor if running
|
|
205
|
+
if [ -n "$SHUTDOWN_MONITOR_PID" ] && kill -0 "$SHUTDOWN_MONITOR_PID" 2>/dev/null; then
|
|
206
|
+
kill "$SHUTDOWN_MONITOR_PID" 2>/dev/null || true
|
|
207
|
+
wait "$SHUTDOWN_MONITOR_PID" 2>/dev/null || true
|
|
208
|
+
fi
|
|
209
|
+
|
|
210
|
+
# Stop heartbeat monitors if running
|
|
211
|
+
if [ -n "${LOOP3_HEARTBEAT_MONITOR_PID:-}" ]; then
|
|
212
|
+
echo "Stopping Loop 3 heartbeat monitor..."
|
|
213
|
+
stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
|
|
214
|
+
fi
|
|
215
|
+
if [ -n "${LOOP2_HEARTBEAT_MONITOR_PID:-}" ]; then
|
|
216
|
+
echo "Stopping Loop 2 heartbeat monitor..."
|
|
217
|
+
stop_heartbeat_monitor "$TASK_ID" "loop2" "$LOOP2_HEARTBEAT_MONITOR_PID"
|
|
218
|
+
fi
|
|
219
|
+
|
|
220
|
+
# Mark swarm as cancelled if initialized
|
|
221
|
+
if [ -n "$TASK_ID" ] && [ -n "${SWARM_ID:-}" ]; then
|
|
222
|
+
echo "Marking swarm as cancelled..."
|
|
223
|
+
./.claude/skills/redis-coordination/complete-swarm.sh \
|
|
224
|
+
--swarm-id "$SWARM_ID" \
|
|
225
|
+
--final-metric "status=cancelled" \
|
|
226
|
+
--final-metric "shutdown_reason=$reason" 2>/dev/null || echo " ⚠️ Failed to mark swarm as cancelled"
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
# Clean up Redis keys
|
|
230
|
+
if [ -n "$TASK_ID" ]; then
|
|
231
|
+
echo "Cleaning up Redis keys..."
|
|
232
|
+
local keys_deleted=$(redis-cli --scan --pattern "swarm:${TASK_ID}:*" | xargs -r redis-cli DEL 2>/dev/null || echo "0")
|
|
233
|
+
echo " Deleted $keys_deleted Redis keys"
|
|
234
|
+
fi
|
|
235
|
+
|
|
236
|
+
# Clean up heartbeat monitor marker files
|
|
237
|
+
rm -f /tmp/heartbeat-monitor-${TASK_ID}-*.active 2>/dev/null || true
|
|
238
|
+
|
|
239
|
+
echo "=============================================="
|
|
240
|
+
echo "Shutdown complete"
|
|
241
|
+
echo "=============================================="
|
|
242
|
+
|
|
243
|
+
exit "$exit_code"
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
# Trap SIGTERM and SIGINT for graceful shutdown
|
|
247
|
+
trap 'echo "[TRAP] Caught SIGINT" >&2; cleanup_and_exit 130 "SIGINT_received"' SIGINT
|
|
248
|
+
trap 'echo "[TRAP] Caught SIGTERM" >&2; cleanup_and_exit 143 "SIGTERM_received"' SIGTERM
|
|
249
|
+
|
|
250
|
+
##############################################################################
|
|
251
|
+
# Start Shutdown Monitor (Background Process)
|
|
252
|
+
##############################################################################
|
|
253
|
+
function start_shutdown_monitor() {
|
|
254
|
+
local task_id="$1"
|
|
255
|
+
|
|
256
|
+
(
|
|
257
|
+
# Block on shutdown channel (zero-token waiting)
|
|
258
|
+
SHUTDOWN_KEY="swarm:${task_id}:shutdown"
|
|
259
|
+
SHUTDOWN_RESULT=$(redis-cli BLPOP "$SHUTDOWN_KEY" 0 2>/dev/null || echo "")
|
|
260
|
+
|
|
261
|
+
if [ -n "$SHUTDOWN_RESULT" ]; then
|
|
262
|
+
# Extract shutdown payload (format: key value)
|
|
263
|
+
SHUTDOWN_PAYLOAD=$(echo "$SHUTDOWN_RESULT" | tail -1)
|
|
264
|
+
REASON=$(echo "$SHUTDOWN_PAYLOAD" | jq -r '.reason // "external_shutdown"' 2>/dev/null || echo "external_shutdown")
|
|
265
|
+
|
|
266
|
+
echo ""
|
|
267
|
+
echo "🛑 Shutdown signal received from Redis channel: $REASON"
|
|
268
|
+
echo " Sending SIGTERM to orchestrator PID: $ORCHESTRATOR_PID"
|
|
269
|
+
|
|
270
|
+
# Send SIGTERM to main orchestrator process
|
|
271
|
+
if kill -TERM "$ORCHESTRATOR_PID" 2>/dev/null; then
|
|
272
|
+
echo " ✅ SIGTERM sent successfully"
|
|
273
|
+
else
|
|
274
|
+
echo " ❌ Failed to send SIGTERM (process may have already exited)"
|
|
275
|
+
exit 0
|
|
276
|
+
fi
|
|
277
|
+
fi
|
|
278
|
+
) &
|
|
279
|
+
|
|
280
|
+
SHUTDOWN_MONITOR_PID=$!
|
|
281
|
+
echo "Shutdown monitor started (PID: $SHUTDOWN_MONITOR_PID)"
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
##############################################################################
|
|
285
|
+
# Feedback Accumulation Function (PHASE 1 - BUG #23 FIX)
|
|
286
|
+
##############################################################################
|
|
287
|
+
# Accumulates feedback across iterations to enable learning
|
|
288
|
+
# Usage: accumulate_feedback <task_id> <iteration> <source> <feedback_message>
|
|
289
|
+
function accumulate_feedback() {
|
|
290
|
+
local task_id="$1"
|
|
291
|
+
local iteration="$2"
|
|
292
|
+
local source="$3"
|
|
293
|
+
local feedback_message="$4"
|
|
294
|
+
|
|
295
|
+
local feedback_key="swarm:${task_id}:feedback:history"
|
|
296
|
+
|
|
297
|
+
# Retrieve existing feedback history
|
|
298
|
+
local feedback_history
|
|
299
|
+
feedback_history=$(redis-cli GET "$feedback_key" 2>/dev/null)
|
|
300
|
+
# Normalize empty/nil to valid JSON array
|
|
301
|
+
if [ -z "$feedback_history" ] || [ "$feedback_history" = "(nil)" ]; then
|
|
302
|
+
feedback_history="[]"
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
# Append new feedback with metadata
|
|
306
|
+
local new_feedback
|
|
307
|
+
new_feedback=$(jq -nc \
|
|
308
|
+
--argjson history "$feedback_history" \
|
|
309
|
+
--arg iteration "$iteration" \
|
|
310
|
+
--arg source "$source" \
|
|
311
|
+
--arg feedback "$feedback_message" \
|
|
312
|
+
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
313
|
+
'$history + [{
|
|
314
|
+
iteration: ($iteration | tonumber),
|
|
315
|
+
source: $source,
|
|
316
|
+
feedback: $feedback,
|
|
317
|
+
timestamp: $timestamp
|
|
318
|
+
}]')
|
|
319
|
+
|
|
320
|
+
# Store accumulated history
|
|
321
|
+
echo "$new_feedback" | redis-cli -x SET "$feedback_key" EX 86400 >/dev/null
|
|
322
|
+
|
|
323
|
+
echo "[Feedback] ✅ Accumulated feedback for iteration $iteration (source: $source)"
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
##############################################################################
|
|
327
|
+
# Quorum Calculation Function
|
|
328
|
+
##############################################################################
|
|
329
|
+
function calculate_quorum() {
|
|
330
|
+
local quorum_spec="$1"
|
|
331
|
+
local total_agents="$2"
|
|
332
|
+
|
|
333
|
+
# If no quorum specified, require all agents
|
|
334
|
+
if [ -z "$quorum_spec" ]; then
|
|
335
|
+
echo "$total_agents"
|
|
336
|
+
return 0
|
|
337
|
+
fi
|
|
338
|
+
|
|
339
|
+
# Check if percentage format (e.g., "85%")
|
|
340
|
+
if [[ "$quorum_spec" =~ %$ ]]; then
|
|
341
|
+
# Extract percentage value (remove % suffix)
|
|
342
|
+
local pct="${quorum_spec%\%}"
|
|
343
|
+
# Calculate: ceil(total_agents * pct / 100)
|
|
344
|
+
echo "scale=0; ($total_agents * $pct + 50) / 100" | bc
|
|
345
|
+
# Check if decimal (0.0-1.0), treat as fraction
|
|
346
|
+
elif [[ "$quorum_spec" =~ ^0?\.[0-9]+$ ]]; then
|
|
347
|
+
# Calculate: ceil(total_agents * fraction)
|
|
348
|
+
echo "scale=0; ($quorum_spec * $total_agents + 0.5) / 1" | bc
|
|
349
|
+
else
|
|
350
|
+
# Absolute number - validate it doesn't exceed total
|
|
351
|
+
if [ "$quorum_spec" -gt "$total_agents" ]; then
|
|
352
|
+
echo "Error: Quorum ($quorum_spec) exceeds total agents ($total_agents)" >&2
|
|
353
|
+
return 1
|
|
354
|
+
fi
|
|
355
|
+
echo "$quorum_spec"
|
|
356
|
+
fi
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
##############################################################################
|
|
360
|
+
# Dead Letter Queue (DLQ) Functions
|
|
361
|
+
##############################################################################
|
|
362
|
+
function write_to_dlq() {
|
|
363
|
+
local agent="$1"
|
|
364
|
+
local reason="$2"
|
|
365
|
+
local retry_count="$3"
|
|
366
|
+
|
|
367
|
+
DLQ_KEY="swarm:${TASK_ID}:dlq:${agent}"
|
|
368
|
+
DLQ_ENTRY=$(jq -n \
|
|
369
|
+
--arg reason "$reason" \
|
|
370
|
+
--arg retries "$retry_count" \
|
|
371
|
+
--arg ts "$(date +%s)" \
|
|
372
|
+
'{reason: $reason, retry_count: ($retries | tonumber), timestamp: ($ts | tonumber)}')
|
|
373
|
+
|
|
374
|
+
echo "$DLQ_ENTRY" | redis-cli -x LPUSH "$DLQ_KEY" >/dev/null
|
|
375
|
+
redis-cli EXPIRE "$DLQ_KEY" 604800 >/dev/null # 7 days TTL
|
|
376
|
+
|
|
377
|
+
echo " ❌ $agent → DLQ (reason: $reason, retries: $retry_count)"
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
##############################################################################
|
|
381
|
+
# Exponential Backoff Retry Function
|
|
382
|
+
##############################################################################
|
|
383
|
+
function retry_with_backoff() {
|
|
384
|
+
local agent="$1"
|
|
385
|
+
local attempt="$2"
|
|
386
|
+
local max_retries="$3"
|
|
387
|
+
local base_delay="$4"
|
|
388
|
+
|
|
389
|
+
# Check for shutdown before sleeping
|
|
390
|
+
if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
|
|
391
|
+
echo " [SHUTDOWN] Skipping backoff delay for $agent" >&2
|
|
392
|
+
return 0
|
|
393
|
+
fi
|
|
394
|
+
|
|
395
|
+
# Exponential backoff: delay = base_delay * (2 ^ attempt)
|
|
396
|
+
local delay=$(echo "$base_delay * (2 ^ $attempt)" | bc)
|
|
397
|
+
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
398
|
+
|
|
399
|
+
echo " [$timestamp] [Retry $attempt/$max_retries] Waiting ${delay}ms before retry for $agent..."
|
|
400
|
+
|
|
401
|
+
# Use interruptible sleep - sleep in small increments and check for shutdown
|
|
402
|
+
local delay_sec=$(echo "scale=3; $delay / 1000" | bc)
|
|
403
|
+
local elapsed=0
|
|
404
|
+
while (( $(echo "$elapsed < $delay_sec" | bc -l) )); do
|
|
405
|
+
# Sleep for 0.5s increments (or remaining time if less)
|
|
406
|
+
local remaining=$(echo "$delay_sec - $elapsed" | bc)
|
|
407
|
+
local sleep_time=$(echo "if ($remaining < 0.5) $remaining else 0.5" | bc)
|
|
408
|
+
|
|
409
|
+
sleep "$sleep_time" &
|
|
410
|
+
wait $! 2>/dev/null || return 0 # If wait is interrupted (SIGTERM), return immediately
|
|
411
|
+
|
|
412
|
+
elapsed=$(echo "$elapsed + $sleep_time" | bc)
|
|
413
|
+
|
|
414
|
+
# Check for shutdown after each sleep increment
|
|
415
|
+
if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
|
|
416
|
+
echo " [SHUTDOWN] Interrupted backoff delay for $agent" >&2
|
|
417
|
+
return 0
|
|
418
|
+
fi
|
|
419
|
+
done
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
##############################################################################
|
|
423
|
+
# Heartbeat Monitoring Functions
|
|
424
|
+
##############################################################################
|
|
425
|
+
declare -A MISSED_HEARTBEATS # Track missed heartbeats per agent
|
|
426
|
+
|
|
427
|
+
function check_agent_heartbeat() {
|
|
428
|
+
local agent="$1"
|
|
429
|
+
local task_id="$2"
|
|
430
|
+
local iteration="$3"
|
|
431
|
+
|
|
432
|
+
# Agents create heartbeat as: swarm:${task_id}:agent:${agent_id} (HASH with heartbeat field)
|
|
433
|
+
# Agent ID includes iteration suffix: react-frontend-engineer-1
|
|
434
|
+
HB_KEY="swarm:${task_id}:agent:${agent}-${iteration}"
|
|
435
|
+
HB_DATA=$(redis-cli HGET "$HB_KEY" heartbeat 2>/dev/null || echo "")
|
|
436
|
+
|
|
437
|
+
if [ -z "$HB_DATA" ] || [ "$HB_DATA" = "(nil)" ]; then
|
|
438
|
+
return 1 # Dead
|
|
439
|
+
else
|
|
440
|
+
return 0 # Alive
|
|
441
|
+
fi
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function check_heartbeats_loop() {
|
|
445
|
+
local task_id="$1"
|
|
446
|
+
local loop_name="$2"
|
|
447
|
+
local iteration="$3"
|
|
448
|
+
shift 3
|
|
449
|
+
local agents=("$@")
|
|
450
|
+
|
|
451
|
+
for AGENT in "${agents[@]}"; do
|
|
452
|
+
# Skip agents already marked as failed
|
|
453
|
+
if [[ " ${LOOP3_FAILED_AGENTS[@]} ${LOOP2_FAILED_AGENTS[@]} " =~ " ${AGENT} " ]]; then
|
|
454
|
+
continue
|
|
455
|
+
fi
|
|
456
|
+
|
|
457
|
+
if ! check_agent_heartbeat "$AGENT" "$task_id" "$iteration"; then
|
|
458
|
+
MISSED_HEARTBEATS["$AGENT"]=$((${MISSED_HEARTBEATS["$AGENT"]:-0} + 1))
|
|
459
|
+
|
|
460
|
+
if [ ${MISSED_HEARTBEATS["$AGENT"]} -ge 2 ]; then
|
|
461
|
+
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
462
|
+
echo " [$timestamp] [$loop_name] ⚠️ $AGENT appears hung (no heartbeat for 60s)" >&2
|
|
463
|
+
|
|
464
|
+
# Determine which loop this agent belongs to and check quorum
|
|
465
|
+
if [[ " ${LOOP3_AGENTS} " =~ " ${AGENT} " ]]; then
|
|
466
|
+
REMAINING=$((${#LOOP3_COMPLETED_AGENTS[@]}))
|
|
467
|
+
REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP3" "$LOOP3_TOTAL")
|
|
468
|
+
elif [[ " ${LOOP2_AGENTS} " =~ " ${AGENT} " ]]; then
|
|
469
|
+
# Safety check: Skip if Loop 2 hasn't been initialized yet
|
|
470
|
+
if [ -z "${LOOP2_COMPLETED_AGENTS+x}" ]; then
|
|
471
|
+
continue
|
|
472
|
+
fi
|
|
473
|
+
REMAINING=$((${#LOOP2_COMPLETED_AGENTS[@]}))
|
|
474
|
+
REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP2" "$LOOP2_TOTAL")
|
|
475
|
+
else
|
|
476
|
+
continue
|
|
477
|
+
fi
|
|
478
|
+
|
|
479
|
+
if [ $REMAINING -ge $REQUIRED ]; then
|
|
480
|
+
echo " [$timestamp] [$loop_name] ℹ️ Continuing with quorum (${REMAINING}/${REQUIRED} agents)" >&2
|
|
481
|
+
else
|
|
482
|
+
echo " [$timestamp] [$loop_name] ⚠️ Cannot meet quorum without $AGENT (${REMAINING}/${REQUIRED})" >&2
|
|
483
|
+
fi
|
|
484
|
+
fi
|
|
485
|
+
else
|
|
486
|
+
MISSED_HEARTBEATS["$AGENT"]=0 # Reset counter
|
|
487
|
+
fi
|
|
488
|
+
done
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function start_heartbeat_monitor() {
|
|
492
|
+
local task_id="$1"
|
|
493
|
+
local loop_name="$2"
|
|
494
|
+
local iteration="$3"
|
|
495
|
+
shift 3
|
|
496
|
+
local agents=("$@")
|
|
497
|
+
|
|
498
|
+
# Create marker file for this monitor
|
|
499
|
+
local monitor_marker="/tmp/heartbeat-monitor-${task_id}-${loop_name}.active"
|
|
500
|
+
touch "$monitor_marker"
|
|
501
|
+
|
|
502
|
+
# [BUG #7 FIX] Spawn background process and let caller capture $!
|
|
503
|
+
(
|
|
504
|
+
while [ -f "$monitor_marker" ]; do
|
|
505
|
+
# Check for shutdown
|
|
506
|
+
if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
|
|
507
|
+
break
|
|
508
|
+
fi
|
|
509
|
+
|
|
510
|
+
check_heartbeats_loop "$task_id" "$loop_name" "$iteration" "${agents[@]}"
|
|
511
|
+
sleep 30
|
|
512
|
+
done
|
|
513
|
+
) &
|
|
514
|
+
|
|
515
|
+
# No echo - caller will use $! to get PID
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function stop_heartbeat_monitor() {
|
|
519
|
+
local task_id="$1"
|
|
520
|
+
local loop_name="$2"
|
|
521
|
+
local monitor_pid="$3"
|
|
522
|
+
|
|
523
|
+
# Remove marker file to stop the monitor loop
|
|
524
|
+
rm -f "/tmp/heartbeat-monitor-${task_id}-${loop_name}.active"
|
|
525
|
+
|
|
526
|
+
# Kill monitor process if still running
|
|
527
|
+
if [ -n "$monitor_pid" ] && kill -0 "$monitor_pid" 2>/dev/null; then
|
|
528
|
+
kill "$monitor_pid" 2>/dev/null || true
|
|
529
|
+
wait "$monitor_pid" 2>/dev/null || true
|
|
530
|
+
fi
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
##############################################################################
|
|
534
|
+
# Get Agent-Specific Timeout
|
|
535
|
+
##############################################################################
|
|
536
|
+
function get_agent_timeout() {
|
|
537
|
+
local agent="$1"
|
|
538
|
+
local task_id="$2"
|
|
539
|
+
|
|
540
|
+
# Use get-agent-timeout.sh helper script
|
|
541
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
542
|
+
AGENT_TIMEOUT=$("$SCRIPT_DIR/get-agent-timeout.sh" --task-id "$task_id" --agent-id "$agent" 2>/dev/null || echo "$TIMEOUT")
|
|
543
|
+
|
|
544
|
+
echo "$AGENT_TIMEOUT"
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
##############################################################################
|
|
548
|
+
# Process-Based Completion Monitoring
|
|
549
|
+
##############################################################################
|
|
550
|
+
function monitor_agent_process() {
|
|
551
|
+
local agent_id="$1"
|
|
552
|
+
local agent_pid="$2"
|
|
553
|
+
local task_id="$3"
|
|
554
|
+
local done_key="$4"
|
|
555
|
+
|
|
556
|
+
# Monitor agent process in background
|
|
557
|
+
(
|
|
558
|
+
# Wait for process to exit
|
|
559
|
+
wait "$agent_pid" 2>/dev/null
|
|
560
|
+
EXIT_CODE=$?
|
|
561
|
+
|
|
562
|
+
# Check if done signal already sent (agent may have signaled normally)
|
|
563
|
+
DONE_COUNT=$(redis-cli LLEN "$done_key" 2>/dev/null || echo "0")
|
|
564
|
+
if [ "$DONE_COUNT" -gt 0 ]; then
|
|
565
|
+
# Agent signaled normally - nothing to do
|
|
566
|
+
exit 0
|
|
567
|
+
fi
|
|
568
|
+
|
|
569
|
+
# Process exited without signaling - auto-complete
|
|
570
|
+
if [ $EXIT_CODE -eq 0 ]; then
|
|
571
|
+
echo " [Process Monitor] $agent_id exited successfully (code 0) - auto-signaling completion" >&2
|
|
572
|
+
redis-cli LPUSH "$done_key" "auto-completed-success" >/dev/null
|
|
573
|
+
else
|
|
574
|
+
echo " [Process Monitor] $agent_id exited with error (code $EXIT_CODE) - auto-signaling failure" >&2
|
|
575
|
+
redis-cli LPUSH "$done_key" "auto-completed-error:$EXIT_CODE" >/dev/null
|
|
576
|
+
|
|
577
|
+
# METRICS: Increment error counter
|
|
578
|
+
redis-cli INCR "swarm:${task_id}:metrics:agent_errors" >/dev/null
|
|
579
|
+
fi
|
|
580
|
+
) &
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
##############################################################################
|
|
584
|
+
# BLPOP with Retry Logic + Process Monitoring
|
|
585
|
+
##############################################################################
|
|
586
|
+
function blpop_with_retry() {
|
|
587
|
+
local agent="$1"
|
|
588
|
+
local done_key="$2"
|
|
589
|
+
local timeout="$3"
|
|
590
|
+
local retry_count="$4"
|
|
591
|
+
local retry_delay="$5"
|
|
592
|
+
local agent_pid="${6:-}" # Optional: PID for process monitoring
|
|
593
|
+
|
|
594
|
+
for ATTEMPT in $(seq 1 $retry_count); do
|
|
595
|
+
# Check for shutdown before attempting BLPOP
|
|
596
|
+
if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
|
|
597
|
+
echo " [SHUTDOWN] Aborting BLPOP for $agent" >&2
|
|
598
|
+
return 1
|
|
599
|
+
fi
|
|
600
|
+
|
|
601
|
+
# Use Redis's native BLPOP timeout instead of shell timeout command
|
|
602
|
+
# This allows SIGTERM to properly interrupt the process
|
|
603
|
+
RESULT=$(redis-cli blpop "$done_key" "$timeout" 2>/dev/null || echo "")
|
|
604
|
+
|
|
605
|
+
if [ -n "$RESULT" ]; then
|
|
606
|
+
echo "$RESULT"
|
|
607
|
+
return 0 # Success
|
|
608
|
+
fi
|
|
609
|
+
|
|
610
|
+
# BLPOP timeout - check if process is still alive
|
|
611
|
+
if [ -n "$agent_pid" ]; then
|
|
612
|
+
if ! kill -0 "$agent_pid" 2>/dev/null; then
|
|
613
|
+
echo " [Process Check] Agent process $agent_pid no longer running" >&2
|
|
614
|
+
|
|
615
|
+
# Process exited - check if done signal was auto-generated
|
|
616
|
+
RESULT=$(redis-cli LPOP "$done_key" 2>/dev/null || echo "")
|
|
617
|
+
if [ -n "$RESULT" ]; then
|
|
618
|
+
echo " [Auto-Complete] Retrieved: $RESULT" >&2
|
|
619
|
+
echo "$RESULT"
|
|
620
|
+
return 0
|
|
621
|
+
fi
|
|
622
|
+
fi
|
|
623
|
+
fi
|
|
624
|
+
|
|
625
|
+
# Check for shutdown after BLPOP timeout
|
|
626
|
+
if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
|
|
627
|
+
echo " [SHUTDOWN] Aborting retry for $agent" >&2
|
|
628
|
+
return 1
|
|
629
|
+
fi
|
|
630
|
+
|
|
631
|
+
# Check heartbeat status
|
|
632
|
+
HEARTBEAT_KEY="swarm:${TASK_ID}:${agent}:heartbeat"
|
|
633
|
+
HEARTBEAT_EXISTS=$(redis-cli EXISTS "$HEARTBEAT_KEY" 2>/dev/null || echo "0")
|
|
634
|
+
|
|
635
|
+
if [ "$HEARTBEAT_EXISTS" -eq 0 ]; then
|
|
636
|
+
echo " ⚠️ No heartbeat from $agent - agent may be stuck or crashed" >&2
|
|
637
|
+
|
|
638
|
+
# If we have PID and process is stuck, kill it
|
|
639
|
+
if [ -n "$agent_pid" ] && kill -0 "$agent_pid" 2>/dev/null; then
|
|
640
|
+
echo " [Timeout Kill] Terminating stuck process $agent_pid" >&2
|
|
641
|
+
kill "$agent_pid" 2>/dev/null || true
|
|
642
|
+
sleep 2
|
|
643
|
+
|
|
644
|
+
# Force kill if still alive
|
|
645
|
+
if kill -0 "$agent_pid" 2>/dev/null; then
|
|
646
|
+
kill -9 "$agent_pid" 2>/dev/null || true
|
|
647
|
+
fi
|
|
648
|
+
|
|
649
|
+
# METRICS: Increment timeout counter
|
|
650
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:agent_killed" >/dev/null
|
|
651
|
+
fi
|
|
652
|
+
fi
|
|
653
|
+
|
|
654
|
+
# Log retry attempt (to stderr so it's visible during command substitution)
|
|
655
|
+
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
656
|
+
echo " [$timestamp] ⚠️ BLPOP attempt $ATTEMPT/$retry_count failed for $agent" >&2
|
|
657
|
+
|
|
658
|
+
if [ $ATTEMPT -lt $retry_count ]; then
|
|
659
|
+
# METRICS: Increment retry counter
|
|
660
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:retry_count" >/dev/null
|
|
661
|
+
|
|
662
|
+
retry_with_backoff "$agent" "$ATTEMPT" "$retry_count" "$retry_delay" >&2
|
|
663
|
+
else
|
|
664
|
+
# Final failure - write to DLQ
|
|
665
|
+
echo " [$timestamp] ❌ FINAL FAILURE: $agent after $retry_count attempts" >&2
|
|
666
|
+
write_to_dlq "$agent" "timeout_after_retries" "$retry_count"
|
|
667
|
+
return 1
|
|
668
|
+
fi
|
|
669
|
+
done
|
|
670
|
+
|
|
671
|
+
return 1
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
echo "=== CFN Loop Orchestration ==="
|
|
675
|
+
echo "Task ID: $TASK_ID"
|
|
676
|
+
echo "Mode: $MODE (Gate: $GATE, Consensus: $CONSENSUS)"
|
|
677
|
+
echo "Max Iterations: $MAX_ITERATIONS"
|
|
678
|
+
echo ""
|
|
679
|
+
|
|
680
|
+
# Initialize swarm using general Redis coordination primitive
|
|
681
|
+
SWARM_ID="swarm-${TASK_ID}"
|
|
682
|
+
ALL_AGENTS="${LOOP3_AGENTS},${LOOP2_AGENTS},${PRODUCT_OWNER}"
|
|
683
|
+
|
|
684
|
+
# LOG: Swarm initialization
|
|
685
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
686
|
+
--task-id "$TASK_ID" \
|
|
687
|
+
--event-type "swarm_init" \
|
|
688
|
+
--details "{\"mode\": \"$MODE\", \"loop3_agents\": \"$LOOP3_AGENTS\", \"loop2_agents\": \"$LOOP2_AGENTS\", \"product_owner\": \"$PRODUCT_OWNER\", \"max_iterations\": $MAX_ITERATIONS, \"gate_threshold\": $GATE, \"consensus_threshold\": $CONSENSUS}" \
|
|
689
|
+
--level "INFO" 2>/dev/null || true
|
|
690
|
+
|
|
691
|
+
# Build CFN-specific metadata
|
|
692
|
+
CFN_METADATA=$(cat <<EOF
|
|
693
|
+
{
|
|
694
|
+
"mode": "$MODE",
|
|
695
|
+
"loop3_agents": "$LOOP3_AGENTS",
|
|
696
|
+
"loop2_agents": "$LOOP2_AGENTS",
|
|
697
|
+
"product_owner": "$PRODUCT_OWNER",
|
|
698
|
+
"workflow_type": "cfn_loop"
|
|
699
|
+
}
|
|
700
|
+
EOF
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
# Use general init-swarm primitive
|
|
704
|
+
./.claude/skills/redis-coordination/init-swarm.sh \
|
|
705
|
+
--swarm-id "$SWARM_ID" \
|
|
706
|
+
--agents "$ALL_AGENTS" \
|
|
707
|
+
--task-id "$TASK_ID" \
|
|
708
|
+
--topology "hierarchical" \
|
|
709
|
+
--metadata "$CFN_METADATA" > /dev/null
|
|
710
|
+
|
|
711
|
+
# Start shutdown monitor in background
|
|
712
|
+
start_shutdown_monitor "$TASK_ID"
|
|
713
|
+
|
|
714
|
+
# Store epic context in Redis (if provided)
|
|
715
|
+
if [ -n "$EPIC_CONTEXT" ]; then
|
|
716
|
+
echo "📋 Storing epic context in Redis..."
|
|
717
|
+
# Escape single quotes for Redis
|
|
718
|
+
EPIC_ESCAPED="${EPIC_CONTEXT//\'/\'\\\'\'}"
|
|
719
|
+
redis-cli setex "swarm:${TASK_ID}:epic-context" 604800 "$EPIC_ESCAPED" >/dev/null
|
|
720
|
+
echo " ✅ Epic context stored (TTL: 7 days)"
|
|
721
|
+
fi
|
|
722
|
+
|
|
723
|
+
if [ -n "$PHASE_CONTEXT" ]; then
|
|
724
|
+
echo "📋 Storing phase context in Redis..."
|
|
725
|
+
PHASE_ESCAPED="${PHASE_CONTEXT//\'/\'\\\'\'}"
|
|
726
|
+
redis-cli setex "swarm:${TASK_ID}:phase-context" 604800 "$PHASE_ESCAPED" >/dev/null
|
|
727
|
+
echo " ✅ Phase context stored (TTL: 7 days)"
|
|
728
|
+
fi
|
|
729
|
+
|
|
730
|
+
if [ -n "$SUCCESS_CRITERIA" ]; then
|
|
731
|
+
echo "📋 Storing success criteria in Redis..."
|
|
732
|
+
CRITERIA_ESCAPED="${SUCCESS_CRITERIA//\'/\'\\\'\'}"
|
|
733
|
+
redis-cli setex "swarm:${TASK_ID}:success-criteria" 604800 "$CRITERIA_ESCAPED" >/dev/null
|
|
734
|
+
echo " ✅ Success criteria stored (TTL: 7 days)"
|
|
735
|
+
fi
|
|
736
|
+
|
|
737
|
+
echo ""
|
|
738
|
+
|
|
739
|
+
# [BUG #15 FIX] REMOVED: Early Product Owner spawn at iteration 0
|
|
740
|
+
# Product Owner now only spawned after Loop 2 completes (see line 1283)
|
|
741
|
+
# This prevents timeout issues with waiting mode initialization
|
|
742
|
+
echo "[Product Owner] Will spawn after Loop 2 consensus (just-in-time pattern)"
|
|
743
|
+
echo ""
|
|
744
|
+
|
|
745
|
+
# Iteration loop
|
|
746
|
+
for ITERATION in $(seq 1 $MAX_ITERATIONS); do
|
|
747
|
+
echo "=== Iteration $ITERATION/$MAX_ITERATIONS ==="
|
|
748
|
+
|
|
749
|
+
# METRICS: Iteration start timestamp
|
|
750
|
+
ITERATION_START=$(date +%s%N | cut -b1-13) # milliseconds
|
|
751
|
+
redis-cli LPUSH "swarm:${TASK_ID}:metrics:iteration_start" "$ITERATION_START" >/dev/null
|
|
752
|
+
|
|
753
|
+
# Step 1: Build detailed agent context from Redis (BUG #20 FIX - Option 2)
|
|
754
|
+
echo "[Loop 3] Building agent context from Redis..."
|
|
755
|
+
|
|
756
|
+
# Retrieve stored context
|
|
757
|
+
EPIC_CTX=$(redis-cli get "swarm:${TASK_ID}:epic-context" 2>/dev/null || echo "{}")
|
|
758
|
+
PHASE_CTX=$(redis-cli get "swarm:${TASK_ID}:phase-context" 2>/dev/null || echo "{}")
|
|
759
|
+
SUCCESS_CTX=$(redis-cli get "swarm:${TASK_ID}:success-criteria" 2>/dev/null || echo "{}")
|
|
760
|
+
|
|
761
|
+
# Extract key fields with jq (safe parsing)
|
|
762
|
+
EPIC_GOAL=$(echo "$EPIC_CTX" | jq -r '.epicGoal // "No epic goal specified"')
|
|
763
|
+
IN_SCOPE=$(echo "$EPIC_CTX" | jq -r '.inScope[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
|
|
764
|
+
OUT_SCOPE=$(echo "$EPIC_CTX" | jq -r '.outOfScope[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
|
|
765
|
+
DELIVERABLES=$(echo "$PHASE_CTX" | jq -r '.deliverables[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
|
|
766
|
+
DIRECTORY=$(echo "$PHASE_CTX" | jq -r '.directory // ""')
|
|
767
|
+
ACCEPTANCE=$(echo "$SUCCESS_CTX" | jq -r '.acceptanceCriteria[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
|
|
768
|
+
|
|
769
|
+
# Build structured agent context
|
|
770
|
+
LOOP3_AGENT_CONTEXT="Loop 3 implementation for iteration $ITERATION
|
|
771
|
+
|
|
772
|
+
Epic Goal: $EPIC_GOAL
|
|
773
|
+
|
|
774
|
+
In Scope:
|
|
775
|
+
$IN_SCOPE
|
|
776
|
+
|
|
777
|
+
Out of Scope:
|
|
778
|
+
$OUT_SCOPE
|
|
779
|
+
|
|
780
|
+
Deliverables (CRITICAL - you MUST create these files):
|
|
781
|
+
$DELIVERABLES
|
|
782
|
+
$([ -n "$DIRECTORY" ] && echo "
|
|
783
|
+
Target Directory: $DIRECTORY")
|
|
784
|
+
|
|
785
|
+
Acceptance Criteria:
|
|
786
|
+
$ACCEPTANCE
|
|
787
|
+
|
|
788
|
+
IMPORTANT:
|
|
789
|
+
- Use Write tool to create each deliverable file
|
|
790
|
+
- Verify files created with 'ls -la \$DIRECTORY' after each Write
|
|
791
|
+
- All deliverables must exist for validation to pass
|
|
792
|
+
- Report confidence score based on actual file creation
|
|
793
|
+
"
|
|
794
|
+
|
|
795
|
+
# NEW: Add real-time file checklist (STRAT-025: Explicit Deliverable Tracking)
|
|
796
|
+
# Extract deliverable file paths from phase context
|
|
797
|
+
DELIVERABLE_FILES=$(echo "$PHASE_CTX" | jq -r '.deliverables[]? // empty' 2>/dev/null)
|
|
798
|
+
|
|
799
|
+
if [ -n "$DELIVERABLE_FILES" ]; then
|
|
800
|
+
DELIVERABLE_CHECKLIST="\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
801
|
+
DELIVERABLE CHECKLIST (verify BEFORE reporting confidence)
|
|
802
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
|
|
803
|
+
|
|
804
|
+
MISSING_COUNT=0
|
|
805
|
+
COMPLETE_COUNT=0
|
|
806
|
+
|
|
807
|
+
# Check each deliverable file
|
|
808
|
+
while IFS= read -r file; do
|
|
809
|
+
# Skip empty lines
|
|
810
|
+
[ -z "$file" ] && continue
|
|
811
|
+
|
|
812
|
+
# Convert relative paths to absolute if needed
|
|
813
|
+
if [[ "$file" != /* ]]; then
|
|
814
|
+
# If DIRECTORY is set, prepend it
|
|
815
|
+
if [ -n "$DIRECTORY" ]; then
|
|
816
|
+
file="${DIRECTORY}/${file}"
|
|
817
|
+
fi
|
|
818
|
+
fi
|
|
819
|
+
|
|
820
|
+
if [ -f "$file" ]; then
|
|
821
|
+
DELIVERABLE_CHECKLIST="${DELIVERABLE_CHECKLIST}✅ COMPLETE: $file\n"
|
|
822
|
+
COMPLETE_COUNT=$((COMPLETE_COUNT + 1))
|
|
823
|
+
else
|
|
824
|
+
DELIVERABLE_CHECKLIST="${DELIVERABLE_CHECKLIST}❌ MISSING: $file (YOU MUST CREATE THIS)\n"
|
|
825
|
+
MISSING_COUNT=$((MISSING_COUNT + 1))
|
|
826
|
+
fi
|
|
827
|
+
done <<< "$DELIVERABLE_FILES"
|
|
828
|
+
|
|
829
|
+
# Add status summary
|
|
830
|
+
DELIVERABLE_CHECKLIST="${DELIVERABLE_CHECKLIST}\nStatus: ${COMPLETE_COUNT} complete, ${MISSING_COUNT} missing\n"
|
|
831
|
+
|
|
832
|
+
if [ "$MISSING_COUNT" -gt 0 ]; then
|
|
833
|
+
DELIVERABLE_CHECKLIST="${DELIVERABLE_CHECKLIST}\n⚠️ CRITICAL: ${MISSING_COUNT} file(s) marked ❌ MISSING above.
|
|
834
|
+
Your confidence should be LOW (<0.50) until ALL files are created.
|
|
835
|
+
Create ALL missing files before reporting high confidence.\n"
|
|
836
|
+
else
|
|
837
|
+
DELIVERABLE_CHECKLIST="${DELIVERABLE_CHECKLIST}\n✅ All deliverables complete! You may report high confidence if quality requirements met.\n"
|
|
838
|
+
fi
|
|
839
|
+
|
|
840
|
+
# Append checklist to agent context
|
|
841
|
+
LOOP3_AGENT_CONTEXT="${LOOP3_AGENT_CONTEXT}${DELIVERABLE_CHECKLIST}"
|
|
842
|
+
|
|
843
|
+
echo " 📋 Deliverable checklist: ${COMPLETE_COUNT} complete, ${MISSING_COUNT} missing"
|
|
844
|
+
fi
|
|
845
|
+
|
|
846
|
+
# PHASE 1 (BUG #23): Inject feedback history for iterative learning
|
|
847
|
+
if [ "$ITERATION" -gt 1 ]; then
|
|
848
|
+
FEEDBACK_HISTORY=$(redis-cli GET "swarm:${TASK_ID}:feedback:history" 2>/dev/null)
|
|
849
|
+
# Normalize empty/nil to valid JSON array
|
|
850
|
+
if [ -z "$FEEDBACK_HISTORY" ] || [ "$FEEDBACK_HISTORY" = "(nil)" ]; then
|
|
851
|
+
FEEDBACK_HISTORY="[]"
|
|
852
|
+
fi
|
|
853
|
+
|
|
854
|
+
if [ "$FEEDBACK_HISTORY" != "[]" ]; then
|
|
855
|
+
# Format feedback for human readability
|
|
856
|
+
FEEDBACK_SUMMARY=$(echo "$FEEDBACK_HISTORY" | jq -r '.[] | "- Iteration \(.iteration) (\(.source)): \(.feedback)"' 2>/dev/null || echo "")
|
|
857
|
+
|
|
858
|
+
if [ -n "$FEEDBACK_SUMMARY" ]; then
|
|
859
|
+
# Prepend feedback to agent context
|
|
860
|
+
LOOP3_AGENT_CONTEXT="Loop 3 implementation for iteration $ITERATION
|
|
861
|
+
|
|
862
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
863
|
+
PREVIOUS ITERATION FEEDBACK (LEARN FROM THIS)
|
|
864
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
865
|
+
|
|
866
|
+
$FEEDBACK_SUMMARY
|
|
867
|
+
|
|
868
|
+
CRITICAL: Address the feedback above. Do NOT repeat previous mistakes.
|
|
869
|
+
|
|
870
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
871
|
+
|
|
872
|
+
$LOOP3_AGENT_CONTEXT"
|
|
873
|
+
echo " 📝 Injected feedback history ($(echo "$FEEDBACK_HISTORY" | jq '. | length') items)"
|
|
874
|
+
fi
|
|
875
|
+
fi
|
|
876
|
+
fi
|
|
877
|
+
|
|
878
|
+
echo " ✅ Agent context built ($(echo "$LOOP3_AGENT_CONTEXT" | wc -c) characters)"
|
|
879
|
+
echo ""
|
|
880
|
+
|
|
881
|
+
# Step 2: Spawn Loop 3 agents via CLI
|
|
882
|
+
echo "[Loop 3] Spawning implementers via CLI..."
|
|
883
|
+
IFS=',' read -ra AGENTS <<< "$LOOP3_AGENTS"
|
|
884
|
+
|
|
885
|
+
# Track instance counts to generate unique agent IDs for duplicate agent types
|
|
886
|
+
declare -A AGENT_INSTANCE_COUNTS
|
|
887
|
+
declare -A AGENT_IDS # Map from array index to unique agent ID
|
|
888
|
+
|
|
889
|
+
# Pre-calculate unique agent IDs
|
|
890
|
+
for i in "${!AGENTS[@]}"; do
|
|
891
|
+
AGENT="${AGENTS[$i]}"
|
|
892
|
+
|
|
893
|
+
# Increment instance counter for this agent type
|
|
894
|
+
AGENT_INSTANCE_COUNTS["$AGENT"]=$((${AGENT_INSTANCE_COUNTS["$AGENT"]:-0} + 1))
|
|
895
|
+
INSTANCE_NUM="${AGENT_INSTANCE_COUNTS["$AGENT"]}"
|
|
896
|
+
|
|
897
|
+
# Generate unique agent ID: agent-type-iteration-instance
|
|
898
|
+
UNIQUE_AGENT_ID="${AGENT}-${ITERATION}-${INSTANCE_NUM}"
|
|
899
|
+
AGENT_IDS["$i"]="$UNIQUE_AGENT_ID"
|
|
900
|
+
|
|
901
|
+
echo " [Instance Tracking] ${AGENT} #${INSTANCE_NUM} → ${UNIQUE_AGENT_ID}"
|
|
902
|
+
done
|
|
903
|
+
|
|
904
|
+
echo ""
|
|
905
|
+
|
|
906
|
+
# [PHASE 1 INTEGRATION] Loop 3 Skill-Based Output Processing (Parallel)
|
|
907
|
+
# Uses .claude/skills/loop3-output-processing/ for guaranteed confidence extraction
|
|
908
|
+
echo "[Loop 3] Using skill-based output processing (parallel execution)"
|
|
909
|
+
|
|
910
|
+
LOOP3_TOTAL=${#AGENTS[@]}
|
|
911
|
+
LOOP3_REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP3" "$LOOP3_TOTAL")
|
|
912
|
+
LOOP3_COMPLETED_AGENTS=()
|
|
913
|
+
LOOP3_FAILED_AGENTS=()
|
|
914
|
+
|
|
915
|
+
echo "[Loop 3] Quorum: $LOOP3_REQUIRED/$LOOP3_TOTAL agents required"
|
|
916
|
+
echo ""
|
|
917
|
+
|
|
918
|
+
# Step 2a: Spawn all agents in parallel (background processes)
|
|
919
|
+
declare -A AGENT_PIDS
|
|
920
|
+
declare -A AGENT_OUTPUT_FILES
|
|
921
|
+
|
|
922
|
+
for i in "${!AGENTS[@]}"; do
|
|
923
|
+
AGENT="${AGENTS[$i]}"
|
|
924
|
+
UNIQUE_AGENT_ID="${AGENT_IDS[$i]}"
|
|
925
|
+
|
|
926
|
+
# Get agent-specific timeout
|
|
927
|
+
AGENT_TIMEOUT=$(get_agent_timeout "$AGENT" "$TASK_ID")
|
|
928
|
+
|
|
929
|
+
# Create temp file for agent output
|
|
930
|
+
OUTPUT_FILE="/tmp/loop3-${TASK_ID}-${UNIQUE_AGENT_ID}.json"
|
|
931
|
+
AGENT_OUTPUT_FILES["$UNIQUE_AGENT_ID"]="$OUTPUT_FILE"
|
|
932
|
+
|
|
933
|
+
echo " Spawning $AGENT (ID: $UNIQUE_AGENT_ID, timeout: ${AGENT_TIMEOUT}s)"
|
|
934
|
+
|
|
935
|
+
# LOG: Loop 3 agent spawn
|
|
936
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
937
|
+
--task-id "$TASK_ID" \
|
|
938
|
+
--event-type "agent_spawn" \
|
|
939
|
+
--loop "loop3" \
|
|
940
|
+
--agent-id "$UNIQUE_AGENT_ID" \
|
|
941
|
+
--iteration "$ITERATION" \
|
|
942
|
+
--details "{\"agent_type\": \"$AGENT\", \"timeout\": $AGENT_TIMEOUT}" \
|
|
943
|
+
--level "INFO" 2>/dev/null || true
|
|
944
|
+
|
|
945
|
+
# Execute agent via Loop 3 skill in background
|
|
946
|
+
(
|
|
947
|
+
# Record start time
|
|
948
|
+
START_TIME=$(date +%s%N | cut -b1-13)
|
|
949
|
+
|
|
950
|
+
# Execute skill (BUG #20 FIX - inject detailed context)
|
|
951
|
+
if SKILL_RESULT=$(./.claude/skills/loop3-output-processing/execute-and-extract.sh \
|
|
952
|
+
--agent-type "$AGENT" \
|
|
953
|
+
--task-id "$TASK_ID" \
|
|
954
|
+
--agent-id "$UNIQUE_AGENT_ID" \
|
|
955
|
+
--context "$LOOP3_AGENT_CONTEXT" \
|
|
956
|
+
--iteration "$ITERATION" \
|
|
957
|
+
--timeout "$AGENT_TIMEOUT" 2>&1); then
|
|
958
|
+
|
|
959
|
+
# Record end time
|
|
960
|
+
END_TIME=$(date +%s%N | cut -b1-13)
|
|
961
|
+
LATENCY=$((END_TIME - START_TIME))
|
|
962
|
+
|
|
963
|
+
# Add latency to result
|
|
964
|
+
RESULT_WITH_LATENCY=$(echo "$SKILL_RESULT" | jq --arg latency "$LATENCY" '. + {latency_ms: ($latency | tonumber)}')
|
|
965
|
+
|
|
966
|
+
# Save to temp file
|
|
967
|
+
echo "$RESULT_WITH_LATENCY" > "$OUTPUT_FILE"
|
|
968
|
+
|
|
969
|
+
# Store result in Redis
|
|
970
|
+
echo "$RESULT_WITH_LATENCY" | redis-cli -x LPUSH "swarm:${TASK_ID}:${UNIQUE_AGENT_ID}:result" >/dev/null
|
|
971
|
+
redis-cli LPUSH "swarm:${TASK_ID}:${UNIQUE_AGENT_ID}:done" "complete" >/dev/null
|
|
972
|
+
|
|
973
|
+
exit 0
|
|
974
|
+
else
|
|
975
|
+
# Skill failed - save error
|
|
976
|
+
echo "{\"error\": true, \"output\": \"$SKILL_RESULT\"}" > "$OUTPUT_FILE"
|
|
977
|
+
exit 1
|
|
978
|
+
fi
|
|
979
|
+
) &
|
|
980
|
+
|
|
981
|
+
AGENT_PIDS["$UNIQUE_AGENT_ID"]=$!
|
|
982
|
+
echo " ✅ Spawned $UNIQUE_AGENT_ID (PID: ${AGENT_PIDS[$UNIQUE_AGENT_ID]})"
|
|
983
|
+
done
|
|
984
|
+
|
|
985
|
+
echo ""
|
|
986
|
+
echo "[Loop 3] All agents spawned, waiting for completion..."
|
|
987
|
+
echo ""
|
|
988
|
+
|
|
989
|
+
# Step 2b: Wait for all agents to complete
|
|
990
|
+
for i in "${!AGENTS[@]}"; do
|
|
991
|
+
AGENT="${AGENTS[$i]}"
|
|
992
|
+
UNIQUE_AGENT_ID="${AGENT_IDS[$i]}"
|
|
993
|
+
AGENT_PID="${AGENT_PIDS[$UNIQUE_AGENT_ID]}"
|
|
994
|
+
OUTPUT_FILE="${AGENT_OUTPUT_FILES[$UNIQUE_AGENT_ID]}"
|
|
995
|
+
|
|
996
|
+
echo " Waiting for $UNIQUE_AGENT_ID (PID: $AGENT_PID)..."
|
|
997
|
+
|
|
998
|
+
# Wait for specific agent process
|
|
999
|
+
if wait "$AGENT_PID" 2>/dev/null; then
|
|
1000
|
+
# Success - read result from temp file
|
|
1001
|
+
if [ -f "$OUTPUT_FILE" ]; then
|
|
1002
|
+
SKILL_RESULT=$(cat "$OUTPUT_FILE")
|
|
1003
|
+
|
|
1004
|
+
# Check if result has error flag
|
|
1005
|
+
HAS_ERROR=$(echo "$SKILL_RESULT" | jq -r '.error // false')
|
|
1006
|
+
|
|
1007
|
+
if [ "$HAS_ERROR" = "false" ]; then
|
|
1008
|
+
# Extract metrics
|
|
1009
|
+
CONFIDENCE=$(echo "$SKILL_RESULT" | jq -r '.confidence')
|
|
1010
|
+
FILES_CHANGED=$(echo "$SKILL_RESULT" | jq -r '.files_changed')
|
|
1011
|
+
CONFIDENCE_SOURCE=$(echo "$SKILL_RESULT" | jq -r '.confidence_source')
|
|
1012
|
+
LATENCY=$(echo "$SKILL_RESULT" | jq -r '.latency_ms')
|
|
1013
|
+
|
|
1014
|
+
echo " ✅ $UNIQUE_AGENT_ID complete (${LATENCY}ms, confidence: $CONFIDENCE [$CONFIDENCE_SOURCE], files: $FILES_CHANGED)"
|
|
1015
|
+
|
|
1016
|
+
# BUGFIX #21: Store confidence in Redis for consensus collection
|
|
1017
|
+
# The skill script extracts confidence but doesn't store it where invoke-waiting-mode.sh collect expects
|
|
1018
|
+
./.claude/skills/redis-coordination/invoke-waiting-mode.sh report \
|
|
1019
|
+
--task-id "$TASK_ID" \
|
|
1020
|
+
--agent-id "$UNIQUE_AGENT_ID" \
|
|
1021
|
+
--confidence "$CONFIDENCE" \
|
|
1022
|
+
--iteration "$ITERATION" >/dev/null
|
|
1023
|
+
|
|
1024
|
+
# LOG: Loop 3 agent completion
|
|
1025
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
1026
|
+
--task-id "$TASK_ID" \
|
|
1027
|
+
--event-type "agent_complete" \
|
|
1028
|
+
--loop "loop3" \
|
|
1029
|
+
--agent-id "$UNIQUE_AGENT_ID" \
|
|
1030
|
+
--iteration "$ITERATION" \
|
|
1031
|
+
--details "{\"confidence\": $CONFIDENCE, \"confidence_source\": \"$CONFIDENCE_SOURCE\", \"files_changed\": $FILES_CHANGED, \"latency_ms\": $LATENCY}" \
|
|
1032
|
+
--level "INFO" 2>/dev/null || true
|
|
1033
|
+
|
|
1034
|
+
# Store latency metric
|
|
1035
|
+
METRIC=$(jq -nc \
|
|
1036
|
+
--arg agent "$UNIQUE_AGENT_ID" \
|
|
1037
|
+
--arg latency "$LATENCY" \
|
|
1038
|
+
--arg loop "loop3" \
|
|
1039
|
+
--arg iteration "$ITERATION" \
|
|
1040
|
+
'{agent: $agent, latency_ms: ($latency | tonumber), loop: $loop, iteration: ($iteration | tonumber)}')
|
|
1041
|
+
echo "$METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:agent_latency" >/dev/null
|
|
1042
|
+
|
|
1043
|
+
LOOP3_COMPLETED_AGENTS+=("$UNIQUE_AGENT_ID")
|
|
1044
|
+
else
|
|
1045
|
+
ERROR_OUTPUT=$(echo "$SKILL_RESULT" | jq -r '.output')
|
|
1046
|
+
echo " ❌ $UNIQUE_AGENT_ID failed (skill execution error)"
|
|
1047
|
+
echo " Error: $ERROR_OUTPUT"
|
|
1048
|
+
|
|
1049
|
+
# LOG: Loop 3 agent failure
|
|
1050
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
1051
|
+
--task-id "$TASK_ID" \
|
|
1052
|
+
--event-type "agent_failure" \
|
|
1053
|
+
--loop "loop3" \
|
|
1054
|
+
--agent-id "$UNIQUE_AGENT_ID" \
|
|
1055
|
+
--iteration "$ITERATION" \
|
|
1056
|
+
--details "{\"error\": \"skill_execution_error\", \"output\": \"$ERROR_OUTPUT\"}" \
|
|
1057
|
+
--level "ERROR" 2>/dev/null || true
|
|
1058
|
+
|
|
1059
|
+
LOOP3_FAILED_AGENTS+=("$AGENT")
|
|
1060
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
|
|
1061
|
+
fi
|
|
1062
|
+
|
|
1063
|
+
# Cleanup temp file
|
|
1064
|
+
rm -f "$OUTPUT_FILE"
|
|
1065
|
+
else
|
|
1066
|
+
echo " ❌ $UNIQUE_AGENT_ID failed (no output file)"
|
|
1067
|
+
LOOP3_FAILED_AGENTS+=("$AGENT")
|
|
1068
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
|
|
1069
|
+
fi
|
|
1070
|
+
else
|
|
1071
|
+
echo " ❌ $UNIQUE_AGENT_ID failed (process error)"
|
|
1072
|
+
LOOP3_FAILED_AGENTS+=("$AGENT")
|
|
1073
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
|
|
1074
|
+
rm -f "$OUTPUT_FILE"
|
|
1075
|
+
fi
|
|
1076
|
+
|
|
1077
|
+
echo ""
|
|
1078
|
+
done
|
|
1079
|
+
|
|
1080
|
+
# Validate quorum
|
|
1081
|
+
if [ ${#LOOP3_COMPLETED_AGENTS[@]} -ge "$LOOP3_REQUIRED" ]; then
|
|
1082
|
+
echo "[Loop 3] ✅ Quorum met: ${#LOOP3_COMPLETED_AGENTS[@]}/$LOOP3_REQUIRED agents completed"
|
|
1083
|
+
if [ ${#LOOP3_FAILED_AGENTS[@]} -gt 0 ]; then
|
|
1084
|
+
echo "[Loop 3] ⚠️ Failed agents (continuing with quorum): ${LOOP3_FAILED_AGENTS[*]}"
|
|
1085
|
+
|
|
1086
|
+
# METRICS: Increment quorum fallback counter
|
|
1087
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:quorum_fallback" >/dev/null
|
|
1088
|
+
fi
|
|
1089
|
+
else
|
|
1090
|
+
echo "[Loop 3] ❌ Quorum FAILED: ${#LOOP3_COMPLETED_AGENTS[@]} < $LOOP3_REQUIRED"
|
|
1091
|
+
echo "[Loop 3] Failed agents: ${LOOP3_FAILED_AGENTS[*]}"
|
|
1092
|
+
exit 1
|
|
1093
|
+
fi
|
|
1094
|
+
echo ""
|
|
1095
|
+
|
|
1096
|
+
# Step 2: Collect Loop 3 confidence scores (only from completed agents)
|
|
1097
|
+
echo "[Loop 3] Collecting confidence scores from ${#LOOP3_COMPLETED_AGENTS[@]} agents..."
|
|
1098
|
+
LOOP3_COMPLETED_IDS=$(IFS=','; echo "${LOOP3_COMPLETED_AGENTS[*]}")
|
|
1099
|
+
LOOP3_CONSENSUS=$(./.claude/skills/redis-coordination/invoke-waiting-mode.sh collect \
|
|
1100
|
+
--task-id "$TASK_ID" \
|
|
1101
|
+
--agent-ids "$LOOP3_COMPLETED_IDS" | tail -1)
|
|
1102
|
+
|
|
1103
|
+
echo "[Loop 3] Average confidence: $LOOP3_CONSENSUS (from ${#LOOP3_COMPLETED_AGENTS[@]}/${LOOP3_TOTAL} agents)"
|
|
1104
|
+
|
|
1105
|
+
# METRICS: Store Loop 3 consensus score
|
|
1106
|
+
LOOP3_METRIC=$(jq -nc \
|
|
1107
|
+
--arg consensus "$LOOP3_CONSENSUS" \
|
|
1108
|
+
--arg iteration "$ITERATION" \
|
|
1109
|
+
'{consensus: ($consensus | tonumber), iteration: ($iteration | tonumber)}')
|
|
1110
|
+
echo "$LOOP3_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:loop3_consensus" >/dev/null
|
|
1111
|
+
|
|
1112
|
+
# BUG #12 FIX: Deliverable Verification with explicit file checking
|
|
1113
|
+
echo "[Deliverable Check] Verifying implementation artifacts..."
|
|
1114
|
+
|
|
1115
|
+
# Use enhanced validate-deliverables.sh skill
|
|
1116
|
+
DELIVERABLE_ARGS="--task-id $TASK_ID"
|
|
1117
|
+
if [ -n "$EXPECTED_FILES" ]; then
|
|
1118
|
+
DELIVERABLE_ARGS="$DELIVERABLE_ARGS --expected-files $EXPECTED_FILES"
|
|
1119
|
+
echo " Expected files: $EXPECTED_FILES"
|
|
1120
|
+
fi
|
|
1121
|
+
|
|
1122
|
+
DELIVERABLE_STATUS=$(./.claude/skills/product-owner-decision/validate-deliverables.sh $DELIVERABLE_ARGS)
|
|
1123
|
+
|
|
1124
|
+
if [ "$DELIVERABLE_STATUS" = "FAILED" ]; then
|
|
1125
|
+
# Retrieve missing files from Redis (if available)
|
|
1126
|
+
MISSING_FILES_JSON=$(redis-cli get "swarm:${TASK_ID}:missing-files" 2>/dev/null || echo "[]")
|
|
1127
|
+
MISSING_FILES_LIST=$(echo "$MISSING_FILES_JSON" | jq -r '.[]' | tr '\n' ', ' | sed 's/,$//')
|
|
1128
|
+
|
|
1129
|
+
if [ -n "$MISSING_FILES_LIST" ]; then
|
|
1130
|
+
echo "❌ DELIVERABLE VERIFICATION FAILED: Missing files"
|
|
1131
|
+
echo " Expected but not found: $MISSING_FILES_LIST"
|
|
1132
|
+
else
|
|
1133
|
+
echo "❌ DELIVERABLE VERIFICATION FAILED: No files created or modified"
|
|
1134
|
+
fi
|
|
1135
|
+
|
|
1136
|
+
echo " This prevents 'consensus on vapor' - validators approving nothing"
|
|
1137
|
+
echo ""
|
|
1138
|
+
echo "Decision: RELAUNCH iteration $((ITERATION + 1)) (skip Loop 2 validation)"
|
|
1139
|
+
echo ""
|
|
1140
|
+
|
|
1141
|
+
# METRICS: Increment deliverable failure counter
|
|
1142
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:deliverable_failures" >/dev/null
|
|
1143
|
+
|
|
1144
|
+
# Override all Loop 3 confidence scores to 0.0 (prevent gate pass)
|
|
1145
|
+
for AGENT in "${LOOP3_COMPLETED_AGENTS[@]}"; do
|
|
1146
|
+
redis-cli DEL "swarm:${TASK_ID}:${AGENT}:result" >/dev/null
|
|
1147
|
+
redis-cli LPUSH "swarm:${TASK_ID}:${AGENT}:result" "0.0" >/dev/null
|
|
1148
|
+
echo " [Override] ${AGENT} confidence: 1.0 → 0.0 (no deliverables)"
|
|
1149
|
+
done
|
|
1150
|
+
|
|
1151
|
+
# Recalculate consensus (should be 0.0 now)
|
|
1152
|
+
LOOP3_CONSENSUS=$(./.claude/skills/redis-coordination/invoke-waiting-mode.sh collect \
|
|
1153
|
+
--task-id "$TASK_ID" \
|
|
1154
|
+
--agent-ids "$LOOP3_COMPLETED_IDS" | tail -1)
|
|
1155
|
+
|
|
1156
|
+
echo ""
|
|
1157
|
+
echo "[Loop 3] Recalculated confidence after override: $LOOP3_CONSENSUS"
|
|
1158
|
+
echo ""
|
|
1159
|
+
|
|
1160
|
+
# Build specific feedback with missing files
|
|
1161
|
+
if [ -n "$MISSING_FILES_LIST" ]; then
|
|
1162
|
+
FEEDBACK="CRITICAL: Create these missing files: $MISSING_FILES_LIST
|
|
1163
|
+
|
|
1164
|
+
Use the Write tool for each file. Verify with 'ls -la' after each Write operation."
|
|
1165
|
+
else
|
|
1166
|
+
FEEDBACK="CRITICAL: You must create or modify files. No deliverables were produced in iteration $ITERATION."
|
|
1167
|
+
fi
|
|
1168
|
+
|
|
1169
|
+
# PHASE 1 (BUG #23): Accumulate feedback across iterations for learning
|
|
1170
|
+
# BUGFIX #22: Store feedback in Redis for next iteration (agents will be re-spawned, not woken)
|
|
1171
|
+
# Per P3 agent lifecycle: agents exit cleanly, orchestrator spawns fresh agents
|
|
1172
|
+
accumulate_feedback "$TASK_ID" "$ITERATION" "deliverable_check" "$FEEDBACK"
|
|
1173
|
+
echo " Reason: no_deliverables"
|
|
1174
|
+
echo " Priority: 40 (HIGH)"
|
|
1175
|
+
|
|
1176
|
+
continue # Next iteration (skip gate check and Loop 2)
|
|
1177
|
+
fi
|
|
1178
|
+
|
|
1179
|
+
echo "[Deliverable Check] ✅ Deliverables verified - proceeding to gate check"
|
|
1180
|
+
echo ""
|
|
1181
|
+
|
|
1182
|
+
# Gate check
|
|
1183
|
+
if (( $(echo "$LOOP3_CONSENSUS < $GATE" | bc -l) )); then
|
|
1184
|
+
echo "❌ Gate FAILED ($LOOP3_CONSENSUS < $GATE)"
|
|
1185
|
+
echo "Decision: RELAUNCH iteration $((ITERATION + 1))"
|
|
1186
|
+
|
|
1187
|
+
# LOG: Gate check failure
|
|
1188
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
1189
|
+
--task-id "$TASK_ID" \
|
|
1190
|
+
--event-type "gate_check" \
|
|
1191
|
+
--iteration "$ITERATION" \
|
|
1192
|
+
--details "{\"consensus\": $LOOP3_CONSENSUS, \"threshold\": $GATE, \"result\": \"FAIL\", \"decision\": \"RELAUNCH\"}" \
|
|
1193
|
+
--level "WARN" 2>/dev/null || true
|
|
1194
|
+
|
|
1195
|
+
# METRICS: Increment gate failure counter
|
|
1196
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:gate_failures" >/dev/null
|
|
1197
|
+
|
|
1198
|
+
# PHASE 1 (BUG #23): Accumulate feedback across iterations for learning
|
|
1199
|
+
# BUGFIX #22: Store feedback in Redis for next iteration (agents will be re-spawned, not woken)
|
|
1200
|
+
# Per P3 agent lifecycle: agents exit cleanly, orchestrator spawns fresh agents
|
|
1201
|
+
FEEDBACK_MSG="Improve confidence from $LOOP3_CONSENSUS to >$GATE"
|
|
1202
|
+
accumulate_feedback "$TASK_ID" "$ITERATION" "gate_check" "$FEEDBACK_MSG"
|
|
1203
|
+
echo " Reason: gate_failed"
|
|
1204
|
+
echo " Priority: 30 (MEDIUM)"
|
|
1205
|
+
|
|
1206
|
+
continue # Next iteration
|
|
1207
|
+
fi
|
|
1208
|
+
|
|
1209
|
+
echo "✅ Gate PASSED ($LOOP3_CONSENSUS >= $GATE)"
|
|
1210
|
+
|
|
1211
|
+
# LOG: Gate check success
|
|
1212
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
1213
|
+
--task-id "$TASK_ID" \
|
|
1214
|
+
--event-type "gate_check" \
|
|
1215
|
+
--iteration "$ITERATION" \
|
|
1216
|
+
--details "{\"consensus\": $LOOP3_CONSENSUS, \"threshold\": $GATE, \"result\": \"PASS\"}" \
|
|
1217
|
+
--level "INFO" 2>/dev/null || true
|
|
1218
|
+
|
|
1219
|
+
echo ""
|
|
1220
|
+
|
|
1221
|
+
# Signal Loop 2 validators that gate has passed (they can start work)
|
|
1222
|
+
GATE_PASS_KEY="swarm:${TASK_ID}:gate-passed"
|
|
1223
|
+
redis-cli lpush "$GATE_PASS_KEY" "{\"iteration\": $ITERATION, \"loop3_confidence\": $LOOP3_CONSENSUS}" > /dev/null
|
|
1224
|
+
echo "[Loop 3] Gate pass signal sent to Loop 2 validators"
|
|
1225
|
+
echo ""
|
|
1226
|
+
|
|
1227
|
+
# Step 3: Build Loop 2 validator context (BUG #20 FIX - inject same deliverables)
|
|
1228
|
+
LOOP2_VALIDATOR_CONTEXT="Loop 2 validation for iteration $ITERATION
|
|
1229
|
+
|
|
1230
|
+
Review Loop 3 implementation against these requirements:
|
|
1231
|
+
|
|
1232
|
+
Epic Goal: $EPIC_GOAL
|
|
1233
|
+
|
|
1234
|
+
Expected Deliverables:
|
|
1235
|
+
$DELIVERABLES
|
|
1236
|
+
$([ -n "$DIRECTORY" ] && echo "
|
|
1237
|
+
Target Directory: $DIRECTORY")
|
|
1238
|
+
|
|
1239
|
+
Acceptance Criteria:
|
|
1240
|
+
$ACCEPTANCE
|
|
1241
|
+
|
|
1242
|
+
Your Validation Tasks:
|
|
1243
|
+
- Verify all deliverable files exist in correct directory
|
|
1244
|
+
- Check files contain actual implementation (not placeholders)
|
|
1245
|
+
- Validate against acceptance criteria
|
|
1246
|
+
- Provide structured feedback (critical/warnings/suggestions)
|
|
1247
|
+
- Report confidence score based on deliverable completeness
|
|
1248
|
+
"
|
|
1249
|
+
|
|
1250
|
+
echo "[Loop 2] Validator context built"
|
|
1251
|
+
echo ""
|
|
1252
|
+
|
|
1253
|
+
# Step 4: Spawn Loop 2 validators using skill-based output processing (parallel execution)
|
|
1254
|
+
echo "[Loop 2] Using skill-based output processing (parallel execution)"
|
|
1255
|
+
IFS=',' read -ra VALIDATORS <<< "$LOOP2_AGENTS"
|
|
1256
|
+
|
|
1257
|
+
# Track instance counts to generate unique validator IDs for duplicate validator types
|
|
1258
|
+
declare -A VALIDATOR_INSTANCE_COUNTS
|
|
1259
|
+
declare -A VALIDATOR_IDS # Map from array index to unique validator ID
|
|
1260
|
+
|
|
1261
|
+
# Pre-calculate unique validator IDs
|
|
1262
|
+
for i in "${!VALIDATORS[@]}"; do
|
|
1263
|
+
VALIDATOR="${VALIDATORS[$i]}"
|
|
1264
|
+
|
|
1265
|
+
# Increment instance counter for this validator type
|
|
1266
|
+
VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]=$((${VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]:-0} + 1))
|
|
1267
|
+
INSTANCE_NUM="${VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]}"
|
|
1268
|
+
|
|
1269
|
+
# Generate unique validator ID: validator-type-iteration-instance
|
|
1270
|
+
UNIQUE_VALIDATOR_ID="${VALIDATOR}-${ITERATION}-${INSTANCE_NUM}"
|
|
1271
|
+
VALIDATOR_IDS["$i"]="$UNIQUE_VALIDATOR_ID"
|
|
1272
|
+
|
|
1273
|
+
echo " [Instance Tracking] ${VALIDATOR} #${INSTANCE_NUM} → ${UNIQUE_VALIDATOR_ID}"
|
|
1274
|
+
done
|
|
1275
|
+
|
|
1276
|
+
echo ""
|
|
1277
|
+
|
|
1278
|
+
# Step 3a: Spawn all validators in parallel using skill
|
|
1279
|
+
echo "[Loop 2] Spawning validators in parallel..."
|
|
1280
|
+
declare -A VALIDATOR_PIDS # Map from validator ID to background PID
|
|
1281
|
+
declare -A VALIDATOR_OUTPUT_FILES # Map from validator ID to temp output file
|
|
1282
|
+
|
|
1283
|
+
LOOP2_TOTAL=${#VALIDATORS[@]}
|
|
1284
|
+
LOOP2_REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP2" "$LOOP2_TOTAL")
|
|
1285
|
+
|
|
1286
|
+
echo "[Loop 2] Quorum: $LOOP2_REQUIRED/$LOOP2_TOTAL validators required"
|
|
1287
|
+
echo ""
|
|
1288
|
+
|
|
1289
|
+
for i in "${!VALIDATORS[@]}"; do
|
|
1290
|
+
VALIDATOR="${VALIDATORS[$i]}"
|
|
1291
|
+
UNIQUE_VALIDATOR_ID="${VALIDATOR_IDS[$i]}"
|
|
1292
|
+
|
|
1293
|
+
# Get agent-specific timeout (use base validator type, not unique ID)
|
|
1294
|
+
AGENT_TIMEOUT=$(get_agent_timeout "$VALIDATOR" "$TASK_ID")
|
|
1295
|
+
|
|
1296
|
+
# Create temp output file for this validator
|
|
1297
|
+
OUTPUT_FILE="/tmp/loop2-${TASK_ID}-${UNIQUE_VALIDATOR_ID}.json"
|
|
1298
|
+
VALIDATOR_OUTPUT_FILES["$UNIQUE_VALIDATOR_ID"]="$OUTPUT_FILE"
|
|
1299
|
+
|
|
1300
|
+
echo " Spawning: $VALIDATOR (ID: $UNIQUE_VALIDATOR_ID, timeout: ${AGENT_TIMEOUT}s)"
|
|
1301
|
+
|
|
1302
|
+
# Execute skill in background - captures agent output and extracts structured data
|
|
1303
|
+
(
|
|
1304
|
+
# METRICS: Agent latency start
|
|
1305
|
+
AGENT_START=$(date +%s%N | cut -b1-13)
|
|
1306
|
+
|
|
1307
|
+
# Execute skill to spawn validator and extract feedback (BUG #20 FIX - inject detailed context)
|
|
1308
|
+
SKILL_RESULT=$(./.claude/skills/loop2-output-processing/execute-and-extract.sh \
|
|
1309
|
+
--agent-type "$VALIDATOR" \
|
|
1310
|
+
--task-id "$TASK_ID" \
|
|
1311
|
+
--agent-id "$UNIQUE_VALIDATOR_ID" \
|
|
1312
|
+
--context "$LOOP2_VALIDATOR_CONTEXT" \
|
|
1313
|
+
--iteration "$ITERATION" \
|
|
1314
|
+
--timeout "$AGENT_TIMEOUT" 2>&1)
|
|
1315
|
+
|
|
1316
|
+
# METRICS: Agent latency end
|
|
1317
|
+
AGENT_END=$(date +%s%N | cut -b1-13)
|
|
1318
|
+
LATENCY=$((AGENT_END - AGENT_START))
|
|
1319
|
+
|
|
1320
|
+
# Inject latency into result JSON
|
|
1321
|
+
SKILL_RESULT_WITH_LATENCY=$(echo "$SKILL_RESULT" | jq --arg latency "$LATENCY" '. + {latency_ms: ($latency | tonumber)}')
|
|
1322
|
+
|
|
1323
|
+
# Write result to temp file
|
|
1324
|
+
echo "$SKILL_RESULT_WITH_LATENCY" > "$OUTPUT_FILE"
|
|
1325
|
+
|
|
1326
|
+
# Also push to Redis for compatibility with existing tools
|
|
1327
|
+
echo "$SKILL_RESULT_WITH_LATENCY" | redis-cli -x LPUSH "swarm:${TASK_ID}:${UNIQUE_VALIDATOR_ID}:result" >/dev/null
|
|
1328
|
+
|
|
1329
|
+
# Signal completion
|
|
1330
|
+
redis-cli LPUSH "swarm:${TASK_ID}:${UNIQUE_VALIDATOR_ID}:done" "complete" >/dev/null
|
|
1331
|
+
) &
|
|
1332
|
+
|
|
1333
|
+
# Track background PID
|
|
1334
|
+
VALIDATOR_PIDS["$UNIQUE_VALIDATOR_ID"]=$!
|
|
1335
|
+
echo " ✅ Spawned $UNIQUE_VALIDATOR_ID (PID: ${VALIDATOR_PIDS[$UNIQUE_VALIDATOR_ID]})"
|
|
1336
|
+
done
|
|
1337
|
+
|
|
1338
|
+
echo ""
|
|
1339
|
+
echo "[Loop 2] All validators spawned, waiting for completion..."
|
|
1340
|
+
echo ""
|
|
1341
|
+
|
|
1342
|
+
# Step 3b: Wait for all validators to complete and collect results
|
|
1343
|
+
LOOP2_COMPLETED_AGENTS=()
|
|
1344
|
+
LOOP2_FAILED_AGENTS=()
|
|
1345
|
+
declare -A LOOP2_CONFIDENCES # Map from validator ID to confidence score
|
|
1346
|
+
|
|
1347
|
+
for i in "${!VALIDATORS[@]}"; do
|
|
1348
|
+
VALIDATOR="${VALIDATORS[$i]}"
|
|
1349
|
+
UNIQUE_VALIDATOR_ID="${VALIDATOR_IDS[$i]}"
|
|
1350
|
+
VALIDATOR_PID="${VALIDATOR_PIDS[$UNIQUE_VALIDATOR_ID]}"
|
|
1351
|
+
OUTPUT_FILE="${VALIDATOR_OUTPUT_FILES[$UNIQUE_VALIDATOR_ID]}"
|
|
1352
|
+
|
|
1353
|
+
echo " Waiting for $UNIQUE_VALIDATOR_ID (PID: $VALIDATOR_PID)..."
|
|
1354
|
+
|
|
1355
|
+
# Wait for background process to complete
|
|
1356
|
+
if wait "$VALIDATOR_PID" 2>/dev/null; then
|
|
1357
|
+
# Process completed successfully, read result from temp file
|
|
1358
|
+
if [ -f "$OUTPUT_FILE" ] && [ -s "$OUTPUT_FILE" ]; then
|
|
1359
|
+
SKILL_RESULT=$(cat "$OUTPUT_FILE")
|
|
1360
|
+
|
|
1361
|
+
# Validate JSON structure
|
|
1362
|
+
if echo "$SKILL_RESULT" | jq empty 2>/dev/null; then
|
|
1363
|
+
# Extract confidence score
|
|
1364
|
+
CONFIDENCE=$(echo "$SKILL_RESULT" | jq -r '.confidence // 0.0')
|
|
1365
|
+
CONFIDENCE_SOURCE=$(echo "$SKILL_RESULT" | jq -r '.confidence_source // "unknown"')
|
|
1366
|
+
FEEDBACK=$(echo "$SKILL_RESULT" | jq -r '.feedback // {}')
|
|
1367
|
+
LATENCY=$(echo "$SKILL_RESULT" | jq -r '.latency_ms // 0')
|
|
1368
|
+
|
|
1369
|
+
# Store confidence for consensus calculation
|
|
1370
|
+
LOOP2_CONFIDENCES["$UNIQUE_VALIDATOR_ID"]="$CONFIDENCE"
|
|
1371
|
+
|
|
1372
|
+
# Store latency metric
|
|
1373
|
+
METRIC=$(jq -nc \
|
|
1374
|
+
--arg agent "$UNIQUE_VALIDATOR_ID" \
|
|
1375
|
+
--arg latency "$LATENCY" \
|
|
1376
|
+
--arg loop "loop2" \
|
|
1377
|
+
--arg iteration "$ITERATION" \
|
|
1378
|
+
'{agent: $agent, latency_ms: ($latency | tonumber), loop: $loop, iteration: ($iteration | tonumber)}')
|
|
1379
|
+
echo "$METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:agent_latency" >/dev/null
|
|
1380
|
+
|
|
1381
|
+
# Count feedback items
|
|
1382
|
+
CRITICAL_COUNT=$(echo "$FEEDBACK" | jq -r '.critical | length')
|
|
1383
|
+
WARNINGS_COUNT=$(echo "$FEEDBACK" | jq -r '.warnings | length')
|
|
1384
|
+
SUGGESTIONS_COUNT=$(echo "$FEEDBACK" | jq -r '.suggestions | length')
|
|
1385
|
+
|
|
1386
|
+
echo " ✅ $UNIQUE_VALIDATOR_ID complete (${LATENCY}ms, confidence: $CONFIDENCE [$CONFIDENCE_SOURCE], feedback: ${CRITICAL_COUNT}C/${WARNINGS_COUNT}W/${SUGGESTIONS_COUNT}S)"
|
|
1387
|
+
|
|
1388
|
+
LOOP2_COMPLETED_AGENTS+=("$UNIQUE_VALIDATOR_ID")
|
|
1389
|
+
else
|
|
1390
|
+
echo " ⚠️ $UNIQUE_VALIDATOR_ID returned invalid JSON, treating as failed"
|
|
1391
|
+
LOOP2_FAILED_AGENTS+=("$VALIDATOR")
|
|
1392
|
+
|
|
1393
|
+
# METRICS: Increment timeout counter
|
|
1394
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
|
|
1395
|
+
fi
|
|
1396
|
+
else
|
|
1397
|
+
echo " ⚠️ $UNIQUE_VALIDATOR_ID completed but no output file found"
|
|
1398
|
+
LOOP2_FAILED_AGENTS+=("$VALIDATOR")
|
|
1399
|
+
|
|
1400
|
+
# METRICS: Increment timeout counter
|
|
1401
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
|
|
1402
|
+
fi
|
|
1403
|
+
else
|
|
1404
|
+
echo " ❌ $UNIQUE_VALIDATOR_ID failed (process exited with error)"
|
|
1405
|
+
LOOP2_FAILED_AGENTS+=("$VALIDATOR")
|
|
1406
|
+
|
|
1407
|
+
# METRICS: Increment timeout counter
|
|
1408
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
|
|
1409
|
+
fi
|
|
1410
|
+
|
|
1411
|
+
# Cleanup temp file
|
|
1412
|
+
rm -f "$OUTPUT_FILE"
|
|
1413
|
+
done
|
|
1414
|
+
|
|
1415
|
+
echo ""
|
|
1416
|
+
|
|
1417
|
+
# Validate quorum
|
|
1418
|
+
if [ ${#LOOP2_COMPLETED_AGENTS[@]} -ge "$LOOP2_REQUIRED" ]; then
|
|
1419
|
+
echo "[Loop 2] ✅ Quorum met: ${#LOOP2_COMPLETED_AGENTS[@]}/$LOOP2_REQUIRED validators completed"
|
|
1420
|
+
if [ ${#LOOP2_FAILED_AGENTS[@]} -gt 0 ]; then
|
|
1421
|
+
echo "[Loop 2] ⚠️ Failed validators (continuing with quorum): ${LOOP2_FAILED_AGENTS[*]}"
|
|
1422
|
+
|
|
1423
|
+
# METRICS: Increment quorum fallback counter
|
|
1424
|
+
redis-cli INCR "swarm:${TASK_ID}:metrics:quorum_fallback" >/dev/null
|
|
1425
|
+
fi
|
|
1426
|
+
else
|
|
1427
|
+
echo "[Loop 2] ❌ Quorum FAILED: ${#LOOP2_COMPLETED_AGENTS[@]} < $LOOP2_REQUIRED"
|
|
1428
|
+
echo "[Loop 2] Failed validators: ${LOOP2_FAILED_AGENTS[*]}"
|
|
1429
|
+
exit 1
|
|
1430
|
+
fi
|
|
1431
|
+
echo ""
|
|
1432
|
+
|
|
1433
|
+
# Step 3c: Calculate Loop 2 consensus from extracted confidence scores
|
|
1434
|
+
echo "[Loop 2] Calculating consensus from ${#LOOP2_COMPLETED_AGENTS[@]} validators..."
|
|
1435
|
+
|
|
1436
|
+
# Calculate average confidence from completed validators
|
|
1437
|
+
LOOP2_TOTAL_CONFIDENCE=0
|
|
1438
|
+
LOOP2_CONFIDENCE_COUNT=0
|
|
1439
|
+
|
|
1440
|
+
for VALIDATOR_ID in "${LOOP2_COMPLETED_AGENTS[@]}"; do
|
|
1441
|
+
CONFIDENCE="${LOOP2_CONFIDENCES[$VALIDATOR_ID]}"
|
|
1442
|
+
if [ -n "$CONFIDENCE" ] && [ "$CONFIDENCE" != "null" ]; then
|
|
1443
|
+
LOOP2_TOTAL_CONFIDENCE=$(echo "$LOOP2_TOTAL_CONFIDENCE + $CONFIDENCE" | bc -l)
|
|
1444
|
+
LOOP2_CONFIDENCE_COUNT=$((LOOP2_CONFIDENCE_COUNT + 1))
|
|
1445
|
+
fi
|
|
1446
|
+
done
|
|
1447
|
+
|
|
1448
|
+
if [ "$LOOP2_CONFIDENCE_COUNT" -gt 0 ]; then
|
|
1449
|
+
LOOP2_CONSENSUS=$(echo "scale=2; $LOOP2_TOTAL_CONFIDENCE / $LOOP2_CONFIDENCE_COUNT" | bc -l)
|
|
1450
|
+
else
|
|
1451
|
+
echo "⚠️ No valid confidence scores found, defaulting to 0.0"
|
|
1452
|
+
LOOP2_CONSENSUS=0.0
|
|
1453
|
+
fi
|
|
1454
|
+
|
|
1455
|
+
echo "[Loop 2] Average consensus: $LOOP2_CONSENSUS (from ${LOOP2_CONFIDENCE_COUNT} validators)"
|
|
1456
|
+
|
|
1457
|
+
# METRICS: Store Loop 2 consensus score
|
|
1458
|
+
LOOP2_METRIC=$(jq -nc \
|
|
1459
|
+
--arg consensus "$LOOP2_CONSENSUS" \
|
|
1460
|
+
--arg iteration "$ITERATION" \
|
|
1461
|
+
'{consensus: ($consensus | tonumber), iteration: ($iteration | tonumber)}')
|
|
1462
|
+
echo "$LOOP2_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:loop2_consensus" >/dev/null
|
|
1463
|
+
|
|
1464
|
+
# Display consensus status
|
|
1465
|
+
echo ""
|
|
1466
|
+
if (( $(echo "$LOOP2_CONSENSUS >= $CONSENSUS" | bc -l) )); then
|
|
1467
|
+
echo "✅ CONSENSUS REACHED ($LOOP2_CONSENSUS >= $CONSENSUS)"
|
|
1468
|
+
else
|
|
1469
|
+
echo "⚠️ CONSENSUS NOT REACHED ($LOOP2_CONSENSUS < $CONSENSUS)"
|
|
1470
|
+
fi
|
|
1471
|
+
echo ""
|
|
1472
|
+
|
|
1473
|
+
# [BUG #11 FIX] Product Owner decision via output parsing (not Redis wait)
|
|
1474
|
+
echo "[Product Owner] Spawning Product Owner for strategic decision..."
|
|
1475
|
+
|
|
1476
|
+
# BUG #19 FIX: Define PO_UNIQUE_ID BEFORE building context string
|
|
1477
|
+
PO_UNIQUE_ID="${PRODUCT_OWNER}-${ITERATION}-decision"
|
|
1478
|
+
|
|
1479
|
+
# Build Product Owner context
|
|
1480
|
+
PO_CONTEXT="CFN Loop iteration $ITERATION complete.
|
|
1481
|
+
|
|
1482
|
+
Loop 2 Consensus: $LOOP2_CONSENSUS (threshold: $CONSENSUS)
|
|
1483
|
+
Task ID: $TASK_ID
|
|
1484
|
+
Agent ID: $PO_UNIQUE_ID
|
|
1485
|
+
|
|
1486
|
+
Make your strategic decision: PROCEED, ITERATE, or ABORT
|
|
1487
|
+
|
|
1488
|
+
Decision Framework:
|
|
1489
|
+
- PROCEED: Consensus >= $CONSENSUS AND deliverables verified
|
|
1490
|
+
- ITERATE: Consensus < $CONSENSUS AND iteration < $MAX_ITERATIONS
|
|
1491
|
+
- ABORT: Max iterations reached without consensus
|
|
1492
|
+
|
|
1493
|
+
Output your decision clearly with reasoning."
|
|
1494
|
+
|
|
1495
|
+
# Spawn Product Owner and capture output
|
|
1496
|
+
PO_TIMEOUT=$(get_agent_timeout "$PRODUCT_OWNER" "$TASK_ID")
|
|
1497
|
+
echo "[Product Owner] Spawning with timeout: ${PO_TIMEOUT}s"
|
|
1498
|
+
|
|
1499
|
+
PO_OUTPUT=$(timeout "$PO_TIMEOUT" npx claude-flow-novice agent "$PRODUCT_OWNER" \
|
|
1500
|
+
--task-id "$TASK_ID" \
|
|
1501
|
+
--agent-id "$PO_UNIQUE_ID" \
|
|
1502
|
+
--context "$PO_CONTEXT" 2>&1 || true)
|
|
1503
|
+
|
|
1504
|
+
# Parse structured decision JSON from Redis (created by execute-product-owner-decision.sh)
|
|
1505
|
+
echo "[Product Owner] Retrieving structured decision from Redis..."
|
|
1506
|
+
DECISION=$(redis-cli lindex "swarm:${TASK_ID}:${PO_UNIQUE_ID}:decision" 0)
|
|
1507
|
+
|
|
1508
|
+
if [ -z "$DECISION" ] || [ "$DECISION" = "(nil)" ]; then
|
|
1509
|
+
echo "❌ ERROR: Could not retrieve Product Owner decision from Redis"
|
|
1510
|
+
echo "Expected key: swarm:${TASK_ID}:${PO_UNIQUE_ID}:decision"
|
|
1511
|
+
echo "Product Owner output:"
|
|
1512
|
+
echo "$PO_OUTPUT"
|
|
1513
|
+
exit 1
|
|
1514
|
+
fi
|
|
1515
|
+
|
|
1516
|
+
# Extract fields from structured JSON
|
|
1517
|
+
DECISION_TYPE=$(echo "$DECISION" | jq -r '.decision')
|
|
1518
|
+
DECISION_REASONING=$(echo "$DECISION" | jq -r '.reasoning')
|
|
1519
|
+
DECISION_CONFIDENCE=$(echo "$DECISION" | jq -r '.confidence')
|
|
1520
|
+
IN_SCOPE_CONSENSUS=$(echo "$DECISION" | jq -r '.scope_analysis.in_scope_consensus // 0')
|
|
1521
|
+
BACKLOG_COUNT=$(echo "$DECISION" | jq -r '.backlog_items | length')
|
|
1522
|
+
|
|
1523
|
+
if [ -z "$DECISION_TYPE" ] || [ "$DECISION_TYPE" = "null" ]; then
|
|
1524
|
+
echo "❌ ERROR: Invalid Product Owner decision JSON"
|
|
1525
|
+
echo "Received: $DECISION"
|
|
1526
|
+
exit 1
|
|
1527
|
+
fi
|
|
1528
|
+
|
|
1529
|
+
echo " Decision Type: $DECISION_TYPE"
|
|
1530
|
+
echo " Confidence: $DECISION_CONFIDENCE"
|
|
1531
|
+
echo " In-Scope Consensus: $IN_SCOPE_CONSENSUS"
|
|
1532
|
+
echo " Backlog Items: $BACKLOG_COUNT"
|
|
1533
|
+
echo ""
|
|
1534
|
+
|
|
1535
|
+
# LOG: Product Owner decision
|
|
1536
|
+
./.claude/skills/redis-coordination/log-event.sh \
|
|
1537
|
+
--task-id "$TASK_ID" \
|
|
1538
|
+
--event-type "po_decision" \
|
|
1539
|
+
--agent-id "$PO_UNIQUE_ID" \
|
|
1540
|
+
--iteration "$ITERATION" \
|
|
1541
|
+
--details "$DECISION" \
|
|
1542
|
+
--level "INFO" 2>/dev/null || true
|
|
1543
|
+
|
|
1544
|
+
echo "[Product Owner] Decision: $DECISION_TYPE"
|
|
1545
|
+
echo ""
|
|
1546
|
+
|
|
1547
|
+
# Handle Product Owner decision
|
|
1548
|
+
if [ "$DECISION_TYPE" = "PROCEED" ] || [ "$DECISION_TYPE" = "DEFER_AND_PROCEED" ]; then
|
|
1549
|
+
# Handle backlog items if DEFER_AND_PROCEED
|
|
1550
|
+
if [ "$DECISION_TYPE" = "DEFER_AND_PROCEED" ]; then
|
|
1551
|
+
echo "📋 Product Owner Decision: DEFER_AND_PROCEED"
|
|
1552
|
+
echo " In-scope work complete (consensus: $IN_SCOPE_CONSENSUS)"
|
|
1553
|
+
echo " Deferred $BACKLOG_COUNT out-of-scope items to backlog"
|
|
1554
|
+
echo ""
|
|
1555
|
+
fi
|
|
1556
|
+
|
|
1557
|
+
# DELIVERABLE VERIFICATION (Sprint 8 - prevent "consensus on vapor")
|
|
1558
|
+
echo "[Deliverable Verification] Checking success criteria..."
|
|
1559
|
+
|
|
1560
|
+
SUCCESS_CRITERIA_RAW=$(redis-cli GET "swarm:${TASK_ID}:success-criteria" 2>/dev/null)
|
|
1561
|
+
if [ -n "$SUCCESS_CRITERIA_RAW" ]; then
|
|
1562
|
+
# Check if task description includes file/deliverable keywords
|
|
1563
|
+
TASK_DESC=$(redis-cli GET "swarm:${TASK_ID}:task" 2>/dev/null)
|
|
1564
|
+
|
|
1565
|
+
if echo "$TASK_DESC" | grep -qiE "create|build|implement|generate|file|component|module|test"; then
|
|
1566
|
+
echo "[Deliverable Verification] Task involves implementation - checking for file changes..."
|
|
1567
|
+
|
|
1568
|
+
# Count modified/created files since orchestrator started
|
|
1569
|
+
FILES_CREATED=$(git status --short 2>/dev/null | grep -E "^(A|M|\\?\\?)" | wc -l)
|
|
1570
|
+
|
|
1571
|
+
if [ "$FILES_CREATED" -eq 0 ]; then
|
|
1572
|
+
echo "⚠️ DELIVERABLE VERIFICATION FAILED"
|
|
1573
|
+
echo " Task requires implementation but no files were created/modified"
|
|
1574
|
+
echo " Consensus reached on plans without actual deliverables"
|
|
1575
|
+
echo ""
|
|
1576
|
+
echo " Options:"
|
|
1577
|
+
echo " 1. Force ITERATE to create actual implementation"
|
|
1578
|
+
echo " 2. Override verification (--skip-deliverable-check flag)"
|
|
1579
|
+
echo " 3. Manual intervention to verify work was done"
|
|
1580
|
+
echo ""
|
|
1581
|
+
echo " Recommendation: Force ITERATE with explicit deliverable requirement"
|
|
1582
|
+
|
|
1583
|
+
# Store verification failure
|
|
1584
|
+
redis-cli SET "swarm:${TASK_ID}:deliverable_verification" "failed" EX 86400 >/dev/null
|
|
1585
|
+
|
|
1586
|
+
# Optional: Force ITERATE (commented for now - requires flag)
|
|
1587
|
+
# echo "[Forced Override] Changing PROCEED → ITERATE due to missing deliverables"
|
|
1588
|
+
# DECISION_TYPE="ITERATE"
|
|
1589
|
+
# DECISION_REASONING="No deliverables created despite implementation task"
|
|
1590
|
+
else
|
|
1591
|
+
echo "✅ Deliverable verification passed ($FILES_CREATED files created/modified)"
|
|
1592
|
+
redis-cli SET "swarm:${TASK_ID}:deliverable_verification" "passed:$FILES_CREATED" EX 86400 >/dev/null
|
|
1593
|
+
fi
|
|
1594
|
+
else
|
|
1595
|
+
echo "[Deliverable Verification] Task is analysis/planning - skipping file check"
|
|
1596
|
+
fi
|
|
1597
|
+
fi
|
|
1598
|
+
|
|
1599
|
+
echo "🎉 CFN Loop Complete (Product Owner: PROCEED)"
|
|
1600
|
+
echo "Final Consensus: $LOOP2_CONSENSUS (Iteration $ITERATION)"
|
|
1601
|
+
|
|
1602
|
+
# METRICS: Iteration end timestamp and duration
|
|
1603
|
+
ITERATION_END=$(date +%s%N | cut -b1-13)
|
|
1604
|
+
ITERATION_DURATION=$((ITERATION_END - ITERATION_START))
|
|
1605
|
+
|
|
1606
|
+
# Store final iteration duration metric
|
|
1607
|
+
DURATION_METRIC=$(jq -nc \
|
|
1608
|
+
--arg duration "$ITERATION_DURATION" \
|
|
1609
|
+
--arg iteration "$ITERATION" \
|
|
1610
|
+
'{duration_ms: ($duration | tonumber), iteration: ($iteration | tonumber)}')
|
|
1611
|
+
echo "$DURATION_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:iteration_duration" >/dev/null
|
|
1612
|
+
|
|
1613
|
+
# BUGFIX #22: Agents have already exited (P3 clean-exit pattern), no wake needed
|
|
1614
|
+
# The task is complete, agents were already cleaned up when they reported confidence
|
|
1615
|
+
echo "[Coordinator] Task complete (PROCEED decision)"
|
|
1616
|
+
echo " All agents have already exited cleanly per P3 lifecycle"
|
|
1617
|
+
|
|
1618
|
+
# Use general complete-swarm primitive
|
|
1619
|
+
./.claude/skills/redis-coordination/complete-swarm.sh \
|
|
1620
|
+
--swarm-id "$SWARM_ID" \
|
|
1621
|
+
--final-metric "final_consensus=$LOOP2_CONSENSUS" \
|
|
1622
|
+
--final-metric "total_iterations=$ITERATION" > /dev/null
|
|
1623
|
+
|
|
1624
|
+
exit 0
|
|
1625
|
+
|
|
1626
|
+
elif [ "$DECISION_TYPE" = "ITERATE" ]; then
|
|
1627
|
+
echo "⚠️ Product Owner Decision: ITERATE (improve quality)"
|
|
1628
|
+
|
|
1629
|
+
# METRICS: Iteration end timestamp and duration
|
|
1630
|
+
ITERATION_END=$(date +%s%N | cut -b1-13)
|
|
1631
|
+
ITERATION_DURATION=$((ITERATION_END - ITERATION_START))
|
|
1632
|
+
|
|
1633
|
+
# Store iteration duration metric
|
|
1634
|
+
DURATION_METRIC=$(jq -nc \
|
|
1635
|
+
--arg duration "$ITERATION_DURATION" \
|
|
1636
|
+
--arg iteration "$ITERATION" \
|
|
1637
|
+
'{duration_ms: ($duration | tonumber), iteration: ($iteration | tonumber)}')
|
|
1638
|
+
echo "$DURATION_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:iteration_duration" >/dev/null
|
|
1639
|
+
|
|
1640
|
+
# Check max iterations
|
|
1641
|
+
if [ $ITERATION -eq $MAX_ITERATIONS ]; then
|
|
1642
|
+
echo "❌ Maximum iterations ($MAX_ITERATIONS) reached - cannot iterate further"
|
|
1643
|
+
echo " Product Owner wanted ITERATE but max iterations exhausted"
|
|
1644
|
+
exit 1
|
|
1645
|
+
fi
|
|
1646
|
+
|
|
1647
|
+
# PHASE 1 (BUG #23): Accumulate feedback across iterations for learning
|
|
1648
|
+
# BUGFIX #22: Store feedback in Redis for next iteration (agents will be re-spawned, not woken)
|
|
1649
|
+
# Per P3 agent lifecycle: agents exit cleanly, orchestrator spawns fresh agents for next iteration
|
|
1650
|
+
echo "[Coordinator] Storing feedback for iteration $((ITERATION + 1))..."
|
|
1651
|
+
|
|
1652
|
+
FEEDBACK_MSG="Product Owner decision: ITERATE - Improve consensus from $LOOP2_CONSENSUS to >=$CONSENSUS"
|
|
1653
|
+
accumulate_feedback "$TASK_ID" "$ITERATION" "product_owner_iterate" "$FEEDBACK_MSG"
|
|
1654
|
+
|
|
1655
|
+
echo " Reason: cfn_loop_iteration (Product Owner ITERATE decision)"
|
|
1656
|
+
echo " Priority: 30 (MEDIUM - Loop 3), 10 (HIGH - Loop 2)"
|
|
1657
|
+
echo ""
|
|
1658
|
+
|
|
1659
|
+
elif [ "$DECISION_TYPE" = "ABORT" ]; then
|
|
1660
|
+
echo "❌ Product Owner Decision: ABORT (scope too large or out of scope)"
|
|
1661
|
+
echo " Consensus: $LOOP2_CONSENSUS, Iteration: $ITERATION"
|
|
1662
|
+
exit 1
|
|
1663
|
+
|
|
1664
|
+
else
|
|
1665
|
+
echo "❌ ERROR: Unknown Product Owner decision: $DECISION_TYPE"
|
|
1666
|
+
echo " Expected: PROCEED, DEFER_AND_PROCEED, ITERATE, or ABORT"
|
|
1667
|
+
exit 1
|
|
1668
|
+
fi
|
|
1669
|
+
done
|
|
1670
|
+
|
|
1671
|
+
echo "❌ CFN Loop failed after $MAX_ITERATIONS iterations"
|
|
1672
|
+
exit 1
|