claude-flow-novice 2.9.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. package/.claude/agents/cfn-dev-team/CLAUDE.md +1086 -0
  2. package/.claude/agents/cfn-dev-team/README.md +116 -0
  3. package/.claude/agents/cfn-dev-team/architecture/api-designer-persona.md +149 -0
  4. package/.claude/agents/cfn-dev-team/architecture/base-template-generator.md +196 -0
  5. package/.claude/agents/cfn-dev-team/architecture/goal-planner.md +183 -0
  6. package/.claude/agents/cfn-dev-team/architecture/planner.md +182 -0
  7. package/.claude/agents/cfn-dev-team/architecture/system-architect.md +162 -0
  8. package/.claude/agents/cfn-dev-team/coordinators/cfn-frontend-coordinator.md +540 -0
  9. package/.claude/agents/cfn-dev-team/coordinators/cfn-v3-coordinator.md +20 -14
  10. package/.claude/agents/cfn-dev-team/coordinators/consensus-builder.md +167 -0
  11. package/.claude/agents/cfn-dev-team/dev-ops/devops-engineer.md +148 -0
  12. package/.claude/agents/cfn-dev-team/dev-ops/github-commit-agent.md +118 -0
  13. package/.claude/agents/cfn-dev-team/dev-ops/kubernetes-specialist.md +540 -0
  14. package/.claude/agents/cfn-dev-team/developers/backend-dev.md +20 -0
  15. package/.claude/agents/cfn-dev-team/developers/data/data-engineer.md +585 -0
  16. package/.claude/agents/cfn-dev-team/developers/database/database-architect.md +276 -0
  17. package/.claude/agents/cfn-dev-team/developers/dev-backend-api.md +147 -0
  18. package/.claude/agents/cfn-dev-team/developers/frontend/mobile-dev.md +218 -0
  19. package/.claude/agents/cfn-dev-team/developers/{react-frontend-engineer.md → frontend/react-frontend-engineer.md} +53 -5
  20. package/.claude/agents/cfn-dev-team/developers/frontend/spec-mobile-react-native.md +199 -0
  21. package/.claude/agents/cfn-dev-team/developers/graphql-specialist.md +615 -0
  22. package/.claude/agents/cfn-dev-team/developers/rust-developer.md +174 -0
  23. package/.claude/agents/cfn-dev-team/documentation/README-VALIDATION.md +243 -0
  24. package/.claude/agents/cfn-dev-team/documentation/agent-type-guidelines.md +465 -0
  25. package/.claude/agents/cfn-dev-team/documentation/api-docs.md +103 -0
  26. package/.claude/agents/cfn-dev-team/documentation/docs-api-openapi.md +98 -0
  27. package/.claude/agents/cfn-dev-team/documentation/pseudocode.md +159 -0
  28. package/.claude/agents/cfn-dev-team/documentation/specification.md +157 -0
  29. package/.claude/agents/cfn-dev-team/product-owners/accessibility-advocate-persona.md +109 -0
  30. package/.claude/agents/cfn-dev-team/{coordinators → product-owners}/cto-agent.md +8 -6
  31. package/.claude/agents/cfn-dev-team/product-owners/power-user-persona.md +190 -0
  32. package/.claude/agents/cfn-dev-team/{coordinators → product-owners}/product-owner.md +85 -59
  33. package/.claude/agents/cfn-dev-team/reviewers/quality/analyze-code-quality.md +141 -0
  34. package/.claude/agents/cfn-dev-team/reviewers/quality/code-analyzer.md +200 -0
  35. package/.claude/agents/cfn-dev-team/reviewers/quality/cyclomatic-complexity-reducer.md +321 -0
  36. package/.claude/agents/cfn-dev-team/reviewers/quality/perf-analyzer.md +238 -0
  37. package/.claude/agents/cfn-dev-team/reviewers/quality/performance-benchmarker.md +101 -0
  38. package/.claude/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +375 -0
  39. package/.claude/agents/cfn-dev-team/reviewers/quality/security-specialist.md +193 -0
  40. package/.claude/agents/cfn-dev-team/reviewers/reviewer.md +39 -0
  41. package/.claude/agents/cfn-dev-team/testers/interaction-tester.md +31 -0
  42. package/.claude/agents/cfn-dev-team/testers/load-testing-specialist.md +469 -0
  43. package/.claude/agents/cfn-dev-team/testers/playwright-tester.md +24 -0
  44. package/.claude/agents/cfn-dev-team/testers/tester.md +20 -0
  45. package/.claude/agents/cfn-dev-team/utility/agent-builder.md +151 -0
  46. package/.claude/agents/cfn-dev-team/utility/analyst.md +178 -0
  47. package/.claude/agents/cfn-dev-team/utility/claude-code-expert.md +1043 -0
  48. package/.claude/agents/cfn-dev-team/utility/code-booster.md +139 -0
  49. package/.claude/agents/cfn-dev-team/utility/context-curator.md +99 -0
  50. package/.claude/agents/cfn-dev-team/{developers → utility}/researcher.md +6 -4
  51. package/.claude/commands/cfn/CFN_LOOP_FRONTEND.md +741 -0
  52. package/.claude/commands/cfn/CFN_LOOP_TASK_MODE.md +353 -0
  53. package/.claude/commands/cfn/cfn-loop-frontend.md +555 -0
  54. package/.claude/commands/cfn/cfn-loop.md +168 -7
  55. package/{CFN-CLAUDE.md → .claude/root-claude-distribute/CFN-CLAUDE.md} +23 -3
  56. package/.claude/skills/cfn-ace-system/SKILL.md +364 -0
  57. package/.claude/skills/cfn-ace-system/add-bullet.sh +145 -0
  58. package/.claude/skills/cfn-ace-system/analyze-anti-pattern-effectiveness.sh +56 -0
  59. package/.claude/skills/cfn-ace-system/classify-task.sh +18 -0
  60. package/.claude/skills/cfn-ace-system/export-ace-metrics.sh +48 -0
  61. package/.claude/skills/cfn-ace-system/extract-tags.sh +385 -0
  62. package/.claude/skills/cfn-ace-system/format-negative-context.sh +180 -0
  63. package/.claude/skills/cfn-ace-system/init-indexes.sql +160 -0
  64. package/.claude/skills/cfn-ace-system/invoke-context-curate.sh +192 -0
  65. package/.claude/skills/cfn-ace-system/invoke-context-inject.sh +361 -0
  66. package/.claude/skills/cfn-ace-system/invoke-context-query.sh +139 -0
  67. package/.claude/skills/cfn-ace-system/invoke-context-reflect.sh +343 -0
  68. package/.claude/skills/cfn-ace-system/invoke-context-stats.sh +227 -0
  69. package/.claude/skills/cfn-ace-system/log-merge.sh +67 -0
  70. package/.claude/skills/cfn-ace-system/monitor-injection-performance.sh +138 -0
  71. package/.claude/skills/cfn-ace-system/optimize-injection-pipeline.sh +169 -0
  72. package/.claude/skills/cfn-ace-system/query-anti-patterns.sh +276 -0
  73. package/.claude/skills/cfn-ace-system/query-contexts.sh +150 -0
  74. package/.claude/skills/cfn-ace-system/query-reflections.sh +35 -0
  75. package/.claude/skills/cfn-ace-system/schema/001-create-context-reflections.sql +237 -0
  76. package/.claude/skills/cfn-ace-system/schema/README.md +723 -0
  77. package/.claude/skills/cfn-ace-system/schema/SCHEMA_DESIGN_SUMMARY.md +564 -0
  78. package/.claude/skills/cfn-ace-system/schema/populate-test-data-simple.sh +62 -0
  79. package/.claude/skills/cfn-ace-system/schema/populate-test-data.sh +247 -0
  80. package/.claude/skills/cfn-ace-system/schema/run-migration.sh +231 -0
  81. package/.claude/skills/cfn-ace-system/schema/validate-schema.sql +280 -0
  82. package/.claude/skills/cfn-ace-system/score-relevance-adapter.sh +138 -0
  83. package/.claude/skills/cfn-ace-system/score-relevance.sh +253 -0
  84. package/.claude/skills/cfn-ace-system/sprint-7-lessons.json +46 -0
  85. package/.claude/skills/cfn-ace-system/store-reflection.sh +46 -0
  86. package/.claude/skills/cfn-ace-system/test-ace-skill.sh +312 -0
  87. package/.claude/skills/cfn-ace-system/track-ab-test.sh +42 -0
  88. package/.claude/skills/cfn-ace-system/update-reflection.sh +41 -0
  89. package/.claude/skills/cfn-agent-discovery/SKILL.md +40 -0
  90. package/.claude/skills/cfn-agent-discovery/agents-registry-clean.json +0 -0
  91. package/.claude/skills/cfn-agent-discovery/agents-registry-fixed.json +19 -0
  92. package/.claude/skills/cfn-agent-discovery/agents-registry.json +718 -0
  93. package/.claude/skills/cfn-agent-discovery/discover-agents.py +184 -0
  94. package/.claude/skills/cfn-agent-discovery/discover-agents.sh +87 -0
  95. package/.claude/skills/cfn-agent-discovery/invoke-registry.sh +11 -0
  96. package/.claude/skills/cfn-agent-discovery/temp_script.py +0 -0
  97. package/.claude/skills/cfn-agent-execution/execute-agent.sh +126 -0
  98. package/.claude/skills/cfn-agent-output-processing/SKILL.md +359 -0
  99. package/.claude/skills/cfn-agent-selector/SKILL.md +90 -0
  100. package/.claude/skills/cfn-agent-selector/select-agents.sh +112 -0
  101. package/.claude/skills/cfn-agent-spawning/SKILL.md +135 -0
  102. package/.claude/skills/cfn-agent-spawning/agent-selection-guide.md +814 -0
  103. package/.claude/skills/cfn-agent-spawning/check-dependencies.sh +30 -0
  104. package/.claude/skills/cfn-agent-spawning/spawn-agent.sh +263 -0
  105. package/.claude/skills/cfn-agent-spawning/spawn-templates.sh +613 -0
  106. package/.claude/skills/cfn-analytics/description-refinement-guide.md +164 -0
  107. package/.claude/skills/cfn-analytics/log-skill-invocation.js +122 -0
  108. package/.claude/skills/cfn-analytics/run-production-criteria-tests.sh +126 -0
  109. package/.claude/skills/cfn-analytics/skill-analytics-dashboard.js +113 -0
  110. package/.claude/skills/cfn-analytics/skill-invocation-hook.sh +28 -0
  111. package/.claude/skills/cfn-analytics/skill-invocations.sql +58 -0
  112. package/.claude/skills/cfn-analytics/test-corpus.json +32 -0
  113. package/.claude/skills/cfn-analytics/test-data-generator.js +115 -0
  114. package/.claude/skills/cfn-analytics/test-manual-override-rate.js +285 -0
  115. package/.claude/skills/cfn-analytics/validate-skill-selection.js +188 -0
  116. package/.claude/skills/cfn-config-management/SKILL.md +34 -0
  117. package/.claude/skills/cfn-config-management/check-dependencies.sh +56 -0
  118. package/.claude/skills/cfn-config-management/config.json +32 -0
  119. package/.claude/skills/cfn-config-management/manage-config.sh +113 -0
  120. package/.claude/skills/cfn-event-bus/SKILL.md +412 -0
  121. package/.claude/skills/cfn-event-bus/config.json +111 -0
  122. package/.claude/skills/cfn-event-bus/eventbus-wrapper.cjs +69 -0
  123. package/.claude/skills/cfn-event-bus/invoke-event-publish.sh +147 -0
  124. package/.claude/skills/cfn-event-bus/invoke-event-subscribe.sh +171 -0
  125. package/.claude/skills/cfn-event-bus/invoke-lifecycle-track.sh +201 -0
  126. package/.claude/skills/cfn-event-bus/test-event-bus.sh +280 -0
  127. package/.claude/skills/cfn-fleet-manager/SKILL.md +412 -0
  128. package/.claude/skills/cfn-fleet-manager/config.json +60 -0
  129. package/.claude/skills/cfn-fleet-manager/invoke-fleet-allocate.sh +182 -0
  130. package/.claude/skills/cfn-fleet-manager/invoke-fleet-balance.sh +239 -0
  131. package/.claude/skills/cfn-fleet-manager/invoke-fleet-metrics.sh +193 -0
  132. package/.claude/skills/cfn-fleet-manager/invoke-fleet-register.sh +124 -0
  133. package/.claude/skills/cfn-fleet-manager/test-fleet-manager.sh +345 -0
  134. package/.claude/skills/cfn-hook-pipeline/SKILL.md +148 -0
  135. package/.claude/skills/cfn-hook-pipeline/auto-resolve.sh +66 -0
  136. package/.claude/skills/cfn-hook-pipeline/check-dependencies.sh +40 -0
  137. package/.claude/skills/cfn-hook-pipeline/feedback-resolver.sh +452 -0
  138. package/.claude/skills/cfn-hook-pipeline/post-edit-handler.sh +154 -0
  139. package/.claude/skills/cfn-hook-pipeline/security-scan.json +60 -0
  140. package/.claude/skills/cfn-hook-pipeline/security-scanner.sh +121 -0
  141. package/.claude/skills/cfn-hook-pipeline/test-root-warning-resolution.sh +148 -0
  142. package/.claude/skills/cfn-hybrid-routing/SKILL.md +46 -0
  143. package/.claude/skills/cfn-hybrid-routing/check-dependencies.sh +52 -0
  144. package/.claude/skills/cfn-hybrid-routing/config.json +26 -0
  145. package/.claude/skills/cfn-hybrid-routing/spawn-worker.sh +44 -0
  146. package/.claude/skills/cfn-loop-orchestration/SKILL.md +299 -0
  147. package/.claude/skills/cfn-loop-orchestration/helpers/auto-tune-timeouts.sh +228 -0
  148. package/.claude/skills/cfn-loop-orchestration/helpers/consensus.sh +84 -0
  149. package/.claude/skills/cfn-loop-orchestration/helpers/context-injection.sh +142 -0
  150. package/.claude/skills/cfn-loop-orchestration/helpers/context-lookup.sh +359 -0
  151. package/.claude/skills/cfn-loop-orchestration/helpers/deliverable-verifier.sh +71 -0
  152. package/.claude/skills/cfn-loop-orchestration/helpers/gate-check.sh +90 -0
  153. package/.claude/skills/cfn-loop-orchestration/helpers/iteration-manager.sh +87 -0
  154. package/.claude/skills/cfn-loop-orchestration/helpers/spawn-agents.sh +271 -0
  155. package/.claude/skills/cfn-loop-orchestration/helpers/timeout-calculator.sh +51 -0
  156. package/.claude/skills/cfn-loop-orchestration/inject-loop-context.sh +41 -0
  157. package/.claude/skills/cfn-loop-orchestration/monitor-execution.sh +156 -0
  158. package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +884 -0
  159. package/.claude/skills/cfn-loop-orchestration/orchestrate.sh.backup +840 -0
  160. package/.claude/skills/cfn-loop-orchestration/security_utils.sh +99 -0
  161. package/.claude/skills/cfn-loop-orchestration/test-cfn-orchestration.sh +281 -0
  162. package/.claude/skills/cfn-loop-orchestration/test-edge-cases.sh +188 -0
  163. package/.claude/skills/cfn-loop-validation/SKILL.md +353 -0
  164. package/.claude/skills/cfn-loop-validation/check-dependencies.sh +31 -0
  165. package/.claude/skills/cfn-loop-validation/config.json +161 -0
  166. package/.claude/skills/cfn-loop-validation/consensus-calculator.js +477 -0
  167. package/.claude/skills/cfn-loop-validation/evidence-chain.sql +163 -0
  168. package/.claude/skills/cfn-loop-validation/examples/README.md +453 -0
  169. package/.claude/skills/cfn-loop-validation/examples/coordinator-full-cfn-loop.sh +234 -0
  170. package/.claude/skills/cfn-loop-validation/examples/coordinator-loop2-consensus.sh +132 -0
  171. package/.claude/skills/cfn-loop-validation/examples/coordinator-loop3-gate.sh +115 -0
  172. package/.claude/skills/cfn-loop-validation/examples/coordinator-redis-integration.sh +186 -0
  173. package/.claude/skills/cfn-loop-validation/orchestrate-cfn-loop.sh +252 -0
  174. package/.claude/skills/cfn-loop-validation/validate-iteration.sh +134 -0
  175. package/.claude/skills/cfn-process-lifecycle/SKILL.md +39 -0
  176. package/.claude/skills/cfn-process-lifecycle/check-dependencies.sh +58 -0
  177. package/.claude/skills/cfn-process-lifecycle/config.json +39 -0
  178. package/.claude/skills/cfn-process-lifecycle/process-manager.sh +144 -0
  179. package/.claude/skills/cfn-product-owner-decision/SKILL.md +332 -0
  180. package/.claude/skills/cfn-product-owner-decision/execute-decision.sh +176 -0
  181. package/.claude/skills/cfn-product-owner-decision/parse-decision.sh +66 -0
  182. package/.claude/skills/cfn-product-owner-decision/validate-deliverables.sh +82 -0
  183. package/.claude/skills/cfn-redis-coordination/AGENT_LOGGING.md +280 -0
  184. package/.claude/skills/cfn-redis-coordination/BZPOPMIN_FIX_SUMMARY.md +209 -0
  185. package/.claude/skills/cfn-redis-coordination/HEARTBEAT.md +57 -0
  186. package/.claude/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +267 -0
  187. package/.claude/skills/cfn-redis-coordination/LOGGING.md +260 -0
  188. package/.claude/skills/cfn-redis-coordination/SECURITY_REVIEW.md +25 -0
  189. package/.claude/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +164 -0
  190. package/.claude/skills/cfn-redis-coordination/SKILL.md +720 -0
  191. package/.claude/skills/cfn-redis-coordination/agent-log.sh +124 -0
  192. package/.claude/skills/cfn-redis-coordination/agent-recovery.sh +75 -0
  193. package/.claude/skills/cfn-redis-coordination/analyze-task-complexity.sh +277 -0
  194. package/.claude/skills/cfn-redis-coordination/cancel-swarm.sh +221 -0
  195. package/.claude/skills/cfn-redis-coordination/cfn-loop-exec.sh +468 -0
  196. package/.claude/skills/cfn-redis-coordination/cfn-loop-relaunch.sh +29 -0
  197. package/.claude/skills/cfn-redis-coordination/check-dependencies.sh +32 -0
  198. package/.claude/skills/cfn-redis-coordination/collect-confidence-scores.sh +179 -0
  199. package/.claude/skills/cfn-redis-coordination/collect-results.sh +75 -0
  200. package/.claude/skills/cfn-redis-coordination/complete-swarm.sh +75 -0
  201. package/.claude/skills/cfn-redis-coordination/config.json +61 -0
  202. package/.claude/skills/cfn-redis-coordination/data/cfn-loop.db +0 -0
  203. package/.claude/skills/cfn-redis-coordination/demos/phase4-wake-queue-test-report.md +82 -0
  204. package/.claude/skills/cfn-redis-coordination/demos/test-bzpopmin-fix.sh +274 -0
  205. package/.claude/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +276 -0
  206. package/.claude/skills/cfn-redis-coordination/demos/test-dlq.sh +129 -0
  207. package/.claude/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +320 -0
  208. package/.claude/skills/cfn-redis-coordination/demos/test-orchestrator.sh +249 -0
  209. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +148 -0
  210. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +163 -0
  211. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake.sh +138 -0
  212. package/.claude/skills/cfn-redis-coordination/demos/test-quick-fix.sh +81 -0
  213. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +45 -0
  214. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +68 -0
  215. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +56 -0
  216. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +81 -0
  217. package/.claude/skills/cfn-redis-coordination/demos/test-quorum.sh +57 -0
  218. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +187 -0
  219. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown.sh +160 -0
  220. package/.claude/skills/cfn-redis-coordination/demos/test-utils-unix.sh +97 -0
  221. package/.claude/skills/cfn-redis-coordination/demos/test-utils.sh +97 -0
  222. package/.claude/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +59 -0
  223. package/.claude/skills/cfn-redis-coordination/examples/README.md +73 -0
  224. package/.claude/skills/cfn-redis-coordination/examples/grafana-dashboard.json +352 -0
  225. package/.claude/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +127 -0
  226. package/.claude/skills/cfn-redis-coordination/examples/mesh-pattern.sh +171 -0
  227. package/.claude/skills/cfn-redis-coordination/examples/timeout-handling.sh +227 -0
  228. package/.claude/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +239 -0
  229. package/.claude/skills/cfn-redis-coordination/execute-product-owner-decision.sh +258 -0
  230. package/.claude/skills/cfn-redis-coordination/get-agent-timeout.sh +177 -0
  231. package/.claude/skills/cfn-redis-coordination/heartbeat-functions.sh +137 -0
  232. package/.claude/skills/cfn-redis-coordination/heartbeat-protocol.md +106 -0
  233. package/.claude/skills/cfn-redis-coordination/heartbeat.sh +126 -0
  234. package/.claude/skills/cfn-redis-coordination/init-swarm.sh +148 -0
  235. package/.claude/skills/cfn-redis-coordination/invoke-redis-pattern.sh +220 -0
  236. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +283 -0
  237. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh.backup-p7 +423 -0
  238. package/.claude/skills/cfn-redis-coordination/list-active-swarms.sh +147 -0
  239. package/.claude/skills/cfn-redis-coordination/log-event.sh +109 -0
  240. package/.claude/skills/cfn-redis-coordination/metrics-export.sh +674 -0
  241. package/.claude/skills/cfn-redis-coordination/metrics-schema.json +66 -0
  242. package/.claude/skills/cfn-redis-coordination/metrics-storage.md +31 -0
  243. package/.claude/skills/cfn-redis-coordination/monitor-cfn-violations.sh +391 -0
  244. package/.claude/skills/cfn-redis-coordination/monitor-heartbeats.sh +101 -0
  245. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +141 -0
  246. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +31 -0
  247. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup +38 -0
  248. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup-1761167675 +1672 -0
  249. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup-p5 +1604 -0
  250. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup-phase1 +1550 -0
  251. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup-phase2 +1621 -0
  252. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.backup-phase3 +1621 -0
  253. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  254. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.broken +1627 -0
  255. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.corrupted +80 -0
  256. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.deprecated +1864 -0
  257. package/.claude/skills/cfn-redis-coordination/priority-wake-mechanism.md +75 -0
  258. package/.claude/skills/cfn-redis-coordination/priority_wake.py +134 -0
  259. package/.claude/skills/cfn-redis-coordination/query-dlq.sh +162 -0
  260. package/.claude/skills/cfn-redis-coordination/query-logs.sh +103 -0
  261. package/.claude/skills/cfn-redis-coordination/redis-pattern.sh +619 -0
  262. package/.claude/skills/cfn-redis-coordination/retrieve-context.sh +58 -0
  263. package/.claude/skills/cfn-redis-coordination/select-specialist-agent.sh +371 -0
  264. package/.claude/skills/cfn-redis-coordination/semantic-match-tfidf.py +252 -0
  265. package/.claude/skills/cfn-redis-coordination/send-heartbeat.sh +165 -0
  266. package/.claude/skills/cfn-redis-coordination/signal.sh +38 -0
  267. package/.claude/skills/cfn-redis-coordination/store-context.sh +86 -0
  268. package/.claude/skills/cfn-redis-coordination/store-epic-context.sh +123 -0
  269. package/.claude/skills/cfn-redis-coordination/test-context-injection.sh +354 -0
  270. package/.claude/skills/cfn-redis-coordination/test-timeout-enforcement.sh +513 -0
  271. package/.claude/skills/cfn-redis-coordination/tests/convert-line-endings.sh +15 -0
  272. package/.claude/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +102 -0
  273. package/.claude/skills/cfn-redis-coordination/tests/edge-cases-test.sh +99 -0
  274. package/.claude/skills/cfn-redis-coordination/tests/integration-test.sh +170 -0
  275. package/.claude/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +82 -0
  276. package/.claude/skills/cfn-redis-coordination/tests/run-test-suite.sh +92 -0
  277. package/.claude/skills/cfn-redis-coordination/tests/run-tests.sh +4 -0
  278. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +418 -0
  279. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +124 -0
  280. package/.claude/skills/cfn-redis-coordination/tests/test-primitives.sh +166 -0
  281. package/.claude/skills/cfn-redis-coordination/tests/test-utils.sh +54 -0
  282. package/.claude/skills/cfn-redis-coordination/tests/test_coordination_primitives.sh.deprecated +20 -0
  283. package/.claude/skills/cfn-redis-coordination/tests/test_utils.sh +49 -0
  284. package/.claude/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +76 -0
  285. package/.claude/skills/cfn-redis-coordination/validate-parameters.sh +492 -0
  286. package/.claude/skills/cfn-sqlite-memory/IMPLEMENTATION_REPORT.md +393 -0
  287. package/.claude/skills/cfn-sqlite-memory/QUICK_REFERENCE.md +204 -0
  288. package/.claude/skills/cfn-sqlite-memory/SKILL.md +415 -0
  289. package/.claude/skills/cfn-sqlite-memory/acl-queries.sql +452 -0
  290. package/.claude/skills/cfn-sqlite-memory/check-dependencies.sh +36 -0
  291. package/.claude/skills/cfn-sqlite-memory/config.json +45 -0
  292. package/.claude/skills/cfn-sqlite-memory/memory-cli.sh +88 -0
  293. package/.claude/skills/cfn-sqlite-memory/test-state-persistence.js +187 -0
  294. package/.claude/skills/cfn-sqlite-memory/ttl-cleanup.sh +274 -0
  295. package/.claude/skills/cfn-test-execution/SKILL.md +128 -0
  296. package/.claude/skills/cfn-test-execution/check-dependencies.sh +36 -0
  297. package/.claude/skills/cfn-test-execution/test-cache-reader.sh +134 -0
  298. package/.claude/skills/cfn-test-execution/test-concurrent-conflicts.sh +115 -0
  299. package/.claude/skills/cfn-test-execution/test-coordinator-pattern.sh +109 -0
  300. package/.claude/skills/cfn-transparency-middleware/Cargo.toml +18 -0
  301. package/.claude/skills/cfn-transparency-middleware/SECURITY.md +41 -0
  302. package/.claude/skills/cfn-transparency-middleware/SKILL.md +91 -0
  303. package/.claude/skills/cfn-transparency-middleware/TEST_RESULTS.md +174 -0
  304. package/.claude/skills/cfn-transparency-middleware/config.json +31 -0
  305. package/.claude/skills/cfn-transparency-middleware/examples/basic-usage.ts +39 -0
  306. package/.claude/skills/cfn-transparency-middleware/examples/batch-processing.ts +52 -0
  307. package/.claude/skills/cfn-transparency-middleware/examples/custom-filtering.ts +61 -0
  308. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-filter.sh +98 -0
  309. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-init.sh +224 -0
  310. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-level.sh +333 -0
  311. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-metrics.sh +345 -0
  312. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-observe.sh +140 -0
  313. package/.claude/skills/cfn-transparency-middleware/invoke-transparency-stop.sh +235 -0
  314. package/.claude/skills/cfn-transparency-middleware/memory_query.rs +85 -0
  315. package/.claude/skills/cfn-transparency-middleware/memory_repository.rs +140 -0
  316. package/.claude/skills/cfn-transparency-middleware/memory_schema.rs +64 -0
  317. package/.claude/skills/cfn-transparency-middleware/middleware-config.sh +29 -0
  318. package/.claude/skills/cfn-transparency-middleware/performance-benchmark.sh +79 -0
  319. package/.claude/skills/cfn-transparency-middleware/test-e2e.sh +406 -0
  320. package/.claude/skills/cfn-transparency-middleware/test-integration.sh +162 -0
  321. package/.claude/skills/cfn-transparency-middleware/test-transparency-skill.sh +368 -0
  322. package/.claude/skills/cfn-transparency-middleware/test-transparency-skill.sh.unix +126 -0
  323. package/.claude/skills/cfn-transparency-middleware/tests/input-validation.sh +93 -0
  324. package/.claude/skills/cfn-transparency-middleware/wrap-agent.sh +132 -0
  325. package/.claude/skills/cfn-webapp-testing/SCREENSHOT_NAMING_CONVENTION.md +547 -0
  326. package/.claude/skills/cfn-webapp-testing/SKILL.md +877 -0
  327. package/.claude/skills/cfn-webapp-testing/capture-screenshot.sh +238 -0
  328. package/.claude/skills/cfn-webapp-testing/cfn-loop-integration.sh +265 -0
  329. package/.claude/skills/cfn-webapp-testing/compare-screenshots.sh +199 -0
  330. package/.claude/skills/cfn-webapp-testing/init-storage.sh +150 -0
  331. package/.claude/skills/cfn-webapp-testing/set-baseline.sh +196 -0
  332. package/.claude/skills/cfn-webapp-testing/test-webapp-testing.sh +233 -0
  333. package/README.md +51 -2
  334. package/dist/ace/ace-reflector.js +109 -10
  335. package/dist/ace/ace-reflector.js.map +1 -1
  336. package/dist/agents/agent-loader.js +165 -146
  337. package/dist/agents/agent-loader.js.map +1 -1
  338. package/dist/cli/agent-executor.js +1 -1
  339. package/dist/cli/agent-executor.js.map +1 -1
  340. package/dist/cli/config-manager.js +109 -91
  341. package/dist/cli/config-manager.js.map +1 -1
  342. package/package.json +43 -7
  343. package/readme/README.md +15 -4
  344. package/scripts/init-project.js +84 -29
  345. package/scripts/run-marketing-tests.sh +43 -0
  346. package/scripts/update_paths.sh +47 -0
  347. package/tools/install-lizard.sh +37 -0
  348. package/tools/simple-complexity.sh +44 -0
  349. package/.claude/agents/cfn-dev-team/developers/coder.md +0 -270
  350. package/.claude/agents/cfn-dev-team/developers/state-architect.md +0 -127
  351. package/.claude/agents/cfn-dev-team/reviewers/code-quality-validator.md +0 -128
  352. /package/.claude/agents/cfn-dev-team/developers/{ui-designer.md → frontend/ui-designer.md} +0 -0
  353. /package/.claude/agents/cfn-dev-team/{coordinators → product-owners}/product-owner-agent.md +0 -0
@@ -0,0 +1,1627 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ # CFN Loop Orchestration v2.0.0
5
+ # Manages multi-loop CFN execution with dependency tracking and consensus
6
+ #
7
+ # Usage:
8
+ # ./orchestrate-cfn-loop.sh --task-id <id> \
9
+ # --mode <mvp|standard|enterprise> \
10
+ # --loop3-agents <agent1,agent2,...> \
11
+ # --loop2-agents <agent1,agent2,...> \
12
+ # --product-owner <agent-id> \
13
+ # [--max-iterations <n>] \
14
+ # [--min-quorum-loop3 <n|n%|0.n>] \
15
+ # [--min-quorum-loop2 <n|n%|0.n>] \
16
+ # [--epic-context <json>] \
17
+ # [--phase-context <json>] \
18
+ # [--success-criteria <json>]
19
+ #
20
+ # CFN Loop Structure (CORRECTED):
21
+ # Loop 3 (Primary Swarm - Self Validation)
22
+ # ↓
23
+ # IF Loop 3 self-validation gate FAILS → RELAUNCH Loop 3 (skip Loop 2)
24
+ # IF Loop 3 self-validation gate PASSES → Proceed to Loop 2
25
+ # ↓
26
+ # Loop 2 (Consensus Validators)
27
+ # ↓
28
+ # Product Owner Decision
29
+ #
30
+ # Dependency Enforcement:
31
+ # - Loop 3 agents self-validate via confidence scores
32
+ # - Gate check determines if Loop 2 validators should be engaged
33
+ # - Loop 2 agents WAIT for gate pass signal before starting work
34
+ # - Product Owner BLOCKS until all Loop 2 agents signal completion
35
+ # - Uses Redis BLPOP for zero-token waiting
36
+ #
37
+ # Quorum Configuration:
38
+ # - Absolute: --min-quorum-loop3 3 (requires exactly 3 agents)
39
+ # - Percentage: --min-quorum-loop3 85% (requires 85% of agents)
40
+ # - Decimal: --min-quorum-loop3 0.66 (requires 66% of agents)
41
+ # - Default: 0.66 (2/3 majority) if not specified
42
+ #
43
+ # Agent Requirements:
44
+ # Loop 3 (Implementers):
45
+ # 1. Complete work
46
+ # 2. Signal done: redis-cli lpush "swarm:${TASK_ID}:${AGENT_ID}:done" "complete"
47
+ # 3. Report confidence: invoke-waiting-mode.sh report --confidence <0.0-1.0>
48
+ # 4. Enter waiting: invoke-waiting-mode.sh enter (for potential iteration)
49
+ #
50
+ # Loop 2 (Validators):
51
+ # 1. WAIT for gate pass: redis-cli blpop "swarm:${TASK_ID}:gate-passed" 0
52
+ # 2. Retrieve Loop 3 results for review
53
+ # 3. Perform validation
54
+ # 4. Signal done: redis-cli lpush "swarm:${TASK_ID}:${AGENT_ID}:done" "complete"
55
+ # 5. Report consensus: invoke-waiting-mode.sh report --confidence <0.0-1.0>
56
+ # 6. Enter waiting: invoke-waiting-mode.sh enter (for potential iteration)
57
+ ##############################################################################
58
+
59
+ set -euo pipefail
60
+
61
+ # Configuration
62
+ TASK_ID=""
63
+ MODE="standard"
64
+ LOOP3_AGENTS=""
65
+ LOOP2_AGENTS=""
66
+ PRODUCT_OWNER=""
67
+ MAX_ITERATIONS=10
68
+ TIMEOUT=3600 # 60 minute default timeout for agent completion
69
+ RETRY_COUNT=3
70
+ RETRY_DELAY=5000 # Base delay in milliseconds
71
+ MIN_QUORUM_LOOP3="" # Minimum agents required for Loop 3 (absolute or percentage)
72
+ MIN_QUORUM_LOOP2="" # Minimum agents required for Loop 2 (absolute or percentage)
73
+ ORCHESTRATOR_PID=$$
74
+ SHUTDOWN_MONITOR_PID=""
75
+ SHUTDOWN_REQUESTED=0
76
+ LOOP3_HEARTBEAT_MONITOR_PID=""
77
+ LOOP2_HEARTBEAT_MONITOR_PID=""
78
+
79
+ # Epic Context (optional - for agent system prompts)
80
+ EPIC_CONTEXT=""
81
+ PHASE_CONTEXT=""
82
+ SUCCESS_CRITERIA=""
83
+ EXPECTED_FILES="" # BUG #12 FIX: Explicit file verification
84
+ PHASE_ID="" # BUG #16 FIX: Phase identifier for timeout configuration
85
+
86
+ # Thresholds by mode
87
+ declare -A GATE_THRESHOLD=(
88
+ [mvp]=0.70
89
+ [standard]=0.75
90
+ [enterprise]=0.75
91
+ )
92
+
93
+ declare -A CONSENSUS_THRESHOLD=(
94
+ [mvp]=0.80
95
+ [standard]=0.90
96
+ [enterprise]=0.95
97
+ )
98
+
99
+ # Parse arguments
100
+ while [[ $# -gt 0 ]]; do
101
+ case $1 in
102
+ --task-id)
103
+ TASK_ID="$2"
104
+ shift 2
105
+ ;;
106
+ --mode)
107
+ MODE="$2"
108
+ shift 2
109
+ ;;
110
+ --loop3-agents)
111
+ LOOP3_AGENTS="$2"
112
+ shift 2
113
+ ;;
114
+ --loop2-agents)
115
+ LOOP2_AGENTS="$2"
116
+ shift 2
117
+ ;;
118
+ --product-owner)
119
+ PRODUCT_OWNER="$2"
120
+ shift 2
121
+ ;;
122
+ --max-iterations)
123
+ MAX_ITERATIONS="$2"
124
+ shift 2
125
+ ;;
126
+ --retry-count)
127
+ RETRY_COUNT="$2"
128
+ shift 2
129
+ ;;
130
+ --retry-delay)
131
+ RETRY_DELAY="$2"
132
+ shift 2
133
+ ;;
134
+ --timeout)
135
+ TIMEOUT="$2"
136
+ shift 2
137
+ ;;
138
+ --min-quorum-loop3)
139
+ MIN_QUORUM_LOOP3="$2"
140
+ shift 2
141
+ ;;
142
+ --min-quorum-loop2)
143
+ MIN_QUORUM_LOOP2="$2"
144
+ shift 2
145
+ ;;
146
+ --epic-context)
147
+ EPIC_CONTEXT="$2"
148
+ shift 2
149
+ ;;
150
+ --phase-context)
151
+ PHASE_CONTEXT="$2"
152
+ shift 2
153
+ ;;
154
+ --success-criteria)
155
+ SUCCESS_CRITERIA="$2"
156
+ shift 2
157
+ ;;
158
+ --expected-files)
159
+ EXPECTED_FILES="$2"
160
+ shift 2
161
+ ;;
162
+ --phase-id)
163
+ PHASE_ID="$2"
164
+ shift 2
165
+ ;;
166
+ *)
167
+ echo "Unknown option: $1"
168
+ exit 1
169
+ ;;
170
+ esac
171
+ done
172
+
173
+ # Validation
174
+ if [ -z "$TASK_ID" ] || [ -z "$LOOP3_AGENTS" ] || [ -z "$LOOP2_AGENTS" ] || [ -z "$PRODUCT_OWNER" ]; then
175
+ echo "Error: Required parameters missing"
176
+ echo "Usage: $0 --task-id <id> --mode <mode> --loop3-agents <agents> --loop2-agents <agents> --product-owner <agent>"
177
+ exit 1
178
+ fi
179
+
180
+ GATE=${GATE_THRESHOLD[$MODE]}
181
+ CONSENSUS=${CONSENSUS_THRESHOLD[$MODE]}
182
+
183
+ # Set default quorum values if not specified (66% = 2/3 majority)
184
+ MIN_QUORUM_LOOP3=${MIN_QUORUM_LOOP3:-0.66}
185
+ MIN_QUORUM_LOOP2=${MIN_QUORUM_LOOP2:-0.66}
186
+
187
+ ##############################################################################
188
+ # Shutdown Handling Functions
189
+ ##############################################################################
190
+ function cleanup_and_exit() {
191
+ local exit_code="${1:-130}"
192
+ local reason="${2:-user_interrupt}"
193
+
194
+ # Set shutdown flag to stop any ongoing operations
195
+ SHUTDOWN_REQUESTED=1
196
+
197
+ echo ""
198
+ echo "=============================================="
199
+ echo "🛑 Orchestrator shutting down gracefully..."
200
+ echo "=============================================="
201
+ echo "Reason: $reason"
202
+ echo "Exit Code: $exit_code"
203
+
204
+ # Kill shutdown monitor if running
205
+ if [ -n "$SHUTDOWN_MONITOR_PID" ] && kill -0 "$SHUTDOWN_MONITOR_PID" 2>/dev/null; then
206
+ kill "$SHUTDOWN_MONITOR_PID" 2>/dev/null || true
207
+ wait "$SHUTDOWN_MONITOR_PID" 2>/dev/null || true
208
+ fi
209
+
210
+ # Stop heartbeat monitors if running
211
+ if [ -n "${LOOP3_HEARTBEAT_MONITOR_PID:-}" ]; then
212
+ echo "Stopping Loop 3 heartbeat monitor..."
213
+ stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
214
+ fi
215
+ if [ -n "${LOOP2_HEARTBEAT_MONITOR_PID:-}" ]; then
216
+ echo "Stopping Loop 2 heartbeat monitor..."
217
+ stop_heartbeat_monitor "$TASK_ID" "loop2" "$LOOP2_HEARTBEAT_MONITOR_PID"
218
+ fi
219
+
220
+ # Mark swarm as cancelled if initialized
221
+ if [ -n "$TASK_ID" ] && [ -n "${SWARM_ID:-}" ]; then
222
+ echo "Marking swarm as cancelled..."
223
+ ./.claude/skills/redis-coordination/complete-swarm.sh \
224
+ --swarm-id "$SWARM_ID" \
225
+ --final-metric "status=cancelled" \
226
+ --final-metric "shutdown_reason=$reason" 2>/dev/null || echo " ⚠️ Failed to mark swarm as cancelled"
227
+ fi
228
+
229
+ # Clean up Redis keys
230
+ if [ -n "$TASK_ID" ]; then
231
+ echo "Cleaning up Redis keys..."
232
+ local keys_deleted=$(redis-cli --scan --pattern "swarm:${TASK_ID}:*" | xargs -r redis-cli DEL 2>/dev/null || echo "0")
233
+ echo " Deleted $keys_deleted Redis keys"
234
+ fi
235
+
236
+ # Clean up heartbeat monitor marker files
237
+ rm -f /tmp/heartbeat-monitor-${TASK_ID}-*.active 2>/dev/null || true
238
+
239
+ echo "=============================================="
240
+ echo "Shutdown complete"
241
+ echo "=============================================="
242
+
243
+ exit "$exit_code"
244
+ }
245
+
246
+ # Trap SIGTERM and SIGINT for graceful shutdown
247
+ trap 'echo "[TRAP] Caught SIGINT" >&2; cleanup_and_exit 130 "SIGINT_received"' SIGINT
248
+ trap 'echo "[TRAP] Caught SIGTERM" >&2; cleanup_and_exit 143 "SIGTERM_received"' SIGTERM
249
+
250
+ ##############################################################################
251
+ # Start Shutdown Monitor (Background Process)
252
+ ##############################################################################
253
+ function start_shutdown_monitor() {
254
+ local task_id="$1"
255
+
256
+ (
257
+ # Block on shutdown channel (zero-token waiting)
258
+ SHUTDOWN_KEY="swarm:${task_id}:shutdown"
259
+ SHUTDOWN_RESULT=$(redis-cli BLPOP "$SHUTDOWN_KEY" 0 2>/dev/null || echo "")
260
+
261
+ if [ -n "$SHUTDOWN_RESULT" ]; then
262
+ # Extract shutdown payload (format: key value)
263
+ SHUTDOWN_PAYLOAD=$(echo "$SHUTDOWN_RESULT" | tail -1)
264
+ REASON=$(echo "$SHUTDOWN_PAYLOAD" | jq -r '.reason // "external_shutdown"' 2>/dev/null || echo "external_shutdown")
265
+
266
+ echo ""
267
+ echo "🛑 Shutdown signal received from Redis channel: $REASON"
268
+ echo " Sending SIGTERM to orchestrator PID: $ORCHESTRATOR_PID"
269
+
270
+ # Send SIGTERM to main orchestrator process
271
+ if kill -TERM "$ORCHESTRATOR_PID" 2>/dev/null; then
272
+ echo " ✅ SIGTERM sent successfully"
273
+ else
274
+ echo " ❌ Failed to send SIGTERM (process may have already exited)"
275
+ exit 0
276
+ fi
277
+ fi
278
+ ) &
279
+
280
+ SHUTDOWN_MONITOR_PID=$!
281
+ echo "Shutdown monitor started (PID: $SHUTDOWN_MONITOR_PID)"
282
+ }
283
+
284
+ ##############################################################################
285
+ # Quorum Calculation Function
286
+ ##############################################################################
287
+ function calculate_quorum() {
288
+ local quorum_spec="$1"
289
+ local total_agents="$2"
290
+
291
+ # If no quorum specified, require all agents
292
+ if [ -z "$quorum_spec" ]; then
293
+ echo "$total_agents"
294
+ return 0
295
+ fi
296
+
297
+ # Check if percentage format (e.g., "85%")
298
+ if [[ "$quorum_spec" =~ %$ ]]; then
299
+ # Extract percentage value (remove % suffix)
300
+ local pct="${quorum_spec%\%}"
301
+ # Calculate: ceil(total_agents * pct / 100)
302
+ echo "scale=0; ($total_agents * $pct + 50) / 100" | bc
303
+ # Check if decimal (0.0-1.0), treat as fraction
304
+ elif [[ "$quorum_spec" =~ ^0?\.[0-9]+$ ]]; then
305
+ # Calculate: ceil(total_agents * fraction)
306
+ echo "scale=0; ($quorum_spec * $total_agents + 0.5) / 1" | bc
307
+ else
308
+ # Absolute number - validate it doesn't exceed total
309
+ if [ "$quorum_spec" -gt "$total_agents" ]; then
310
+ echo "Error: Quorum ($quorum_spec) exceeds total agents ($total_agents)" >&2
311
+ return 1
312
+ fi
313
+ echo "$quorum_spec"
314
+ fi
315
+ }
316
+
317
+ ##############################################################################
318
+ # Dead Letter Queue (DLQ) Functions
319
+ ##############################################################################
320
+ function write_to_dlq() {
321
+ local agent="$1"
322
+ local reason="$2"
323
+ local retry_count="$3"
324
+
325
+ DLQ_KEY="swarm:${TASK_ID}:dlq:${agent}"
326
+ DLQ_ENTRY=$(jq -n \
327
+ --arg reason "$reason" \
328
+ --arg retries "$retry_count" \
329
+ --arg ts "$(date +%s)" \
330
+ '{reason: $reason, retry_count: ($retries | tonumber), timestamp: ($ts | tonumber)}')
331
+
332
+ echo "$DLQ_ENTRY" | redis-cli -x LPUSH "$DLQ_KEY" >/dev/null
333
+ redis-cli EXPIRE "$DLQ_KEY" 604800 >/dev/null # 7 days TTL
334
+
335
+ echo " ❌ $agent → DLQ (reason: $reason, retries: $retry_count)"
336
+ }
337
+
338
+ ##############################################################################
339
+ # Exponential Backoff Retry Function
340
+ ##############################################################################
341
+ function retry_with_backoff() {
342
+ local agent="$1"
343
+ local attempt="$2"
344
+ local max_retries="$3"
345
+ local base_delay="$4"
346
+
347
+ # Check for shutdown before sleeping
348
+ if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
349
+ echo " [SHUTDOWN] Skipping backoff delay for $agent" >&2
350
+ return 0
351
+ fi
352
+
353
+ # Exponential backoff: delay = base_delay * (2 ^ attempt)
354
+ local delay=$(echo "$base_delay * (2 ^ $attempt)" | bc)
355
+ local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
356
+
357
+ echo " [$timestamp] [Retry $attempt/$max_retries] Waiting ${delay}ms before retry for $agent..."
358
+
359
+ # Use interruptible sleep - sleep in small increments and check for shutdown
360
+ local delay_sec=$(echo "scale=3; $delay / 1000" | bc)
361
+ local elapsed=0
362
+ while (( $(echo "$elapsed < $delay_sec" | bc -l) )); do
363
+ # Sleep for 0.5s increments (or remaining time if less)
364
+ local remaining=$(echo "$delay_sec - $elapsed" | bc)
365
+ local sleep_time=$(echo "if ($remaining < 0.5) $remaining else 0.5" | bc)
366
+
367
+ sleep "$sleep_time" &
368
+ wait $! 2>/dev/null || return 0 # If wait is interrupted (SIGTERM), return immediately
369
+
370
+ elapsed=$(echo "$elapsed + $sleep_time" | bc)
371
+
372
+ # Check for shutdown after each sleep increment
373
+ if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
374
+ echo " [SHUTDOWN] Interrupted backoff delay for $agent" >&2
375
+ return 0
376
+ fi
377
+ done
378
+ }
379
+
380
+ ##############################################################################
381
+ # Heartbeat Monitoring Functions
382
+ ##############################################################################
383
+ declare -A MISSED_HEARTBEATS # Track missed heartbeats per agent
384
+
385
+ function check_agent_heartbeat() {
386
+ local agent="$1"
387
+ local task_id="$2"
388
+ local iteration="$3"
389
+
390
+ # Agents create heartbeat as: swarm:${task_id}:agent:${agent_id} (HASH with heartbeat field)
391
+ # Agent ID includes iteration suffix: react-frontend-engineer-1
392
+ HB_KEY="swarm:${task_id}:agent:${agent}-${iteration}"
393
+ HB_DATA=$(redis-cli HGET "$HB_KEY" heartbeat 2>/dev/null || echo "")
394
+
395
+ if [ -z "$HB_DATA" ] || [ "$HB_DATA" = "(nil)" ]; then
396
+ return 1 # Dead
397
+ else
398
+ return 0 # Alive
399
+ fi
400
+ }
401
+
402
+ function check_heartbeats_loop() {
403
+ local task_id="$1"
404
+ local loop_name="$2"
405
+ local iteration="$3"
406
+ shift 3
407
+ local agents=("$@")
408
+
409
+ for AGENT in "${agents[@]}"; do
410
+ # Skip agents already marked as failed
411
+ if [[ " ${LOOP3_FAILED_AGENTS[@]} ${LOOP2_FAILED_AGENTS[@]} " =~ " ${AGENT} " ]]; then
412
+ continue
413
+ fi
414
+
415
+ if ! check_agent_heartbeat "$AGENT" "$task_id" "$iteration"; then
416
+ MISSED_HEARTBEATS["$AGENT"]=$((${MISSED_HEARTBEATS["$AGENT"]:-0} + 1))
417
+
418
+ if [ ${MISSED_HEARTBEATS["$AGENT"]} -ge 2 ]; then
419
+ local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
420
+ echo " [$timestamp] [$loop_name] ⚠️ $AGENT appears hung (no heartbeat for 60s)" >&2
421
+
422
+ # Determine which loop this agent belongs to and check quorum
423
+ if [[ " ${LOOP3_AGENTS} " =~ " ${AGENT} " ]]; then
424
+ REMAINING=$((${#LOOP3_COMPLETED_AGENTS[@]}))
425
+ REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP3" "$LOOP3_TOTAL")
426
+ elif [[ " ${LOOP2_AGENTS} " =~ " ${AGENT} " ]]; then
427
+ # Safety check: Skip if Loop 2 hasn't been initialized yet
428
+ if [ -z "${LOOP2_COMPLETED_AGENTS+x}" ]; then
429
+ continue
430
+ fi
431
+ REMAINING=$((${#LOOP2_COMPLETED_AGENTS[@]}))
432
+ REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP2" "$LOOP2_TOTAL")
433
+ else
434
+ continue
435
+ fi
436
+
437
+ if [ $REMAINING -ge $REQUIRED ]; then
438
+ echo " [$timestamp] [$loop_name] ℹ️ Continuing with quorum (${REMAINING}/${REQUIRED} agents)" >&2
439
+ else
440
+ echo " [$timestamp] [$loop_name] ⚠️ Cannot meet quorum without $AGENT (${REMAINING}/${REQUIRED})" >&2
441
+ fi
442
+ fi
443
+ else
444
+ MISSED_HEARTBEATS["$AGENT"]=0 # Reset counter
445
+ fi
446
+ done
447
+ }
448
+
449
+ function start_heartbeat_monitor() {
450
+ local task_id="$1"
451
+ local loop_name="$2"
452
+ local iteration="$3"
453
+ shift 3
454
+ local agents=("$@")
455
+
456
+ # Create marker file for this monitor
457
+ local monitor_marker="/tmp/heartbeat-monitor-${task_id}-${loop_name}.active"
458
+ touch "$monitor_marker"
459
+
460
+ # [BUG #7 FIX] Spawn background process and let caller capture $!
461
+ (
462
+ while [ -f "$monitor_marker" ]; do
463
+ # Check for shutdown
464
+ if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
465
+ break
466
+ fi
467
+
468
+ check_heartbeats_loop "$task_id" "$loop_name" "$iteration" "${agents[@]}"
469
+ sleep 30
470
+ done
471
+ ) &
472
+
473
+ # No echo - caller will use $! to get PID
474
+ }
475
+
476
+ function stop_heartbeat_monitor() {
477
+ local task_id="$1"
478
+ local loop_name="$2"
479
+ local monitor_pid="$3"
480
+
481
+ # Remove marker file to stop the monitor loop
482
+ rm -f "/tmp/heartbeat-monitor-${task_id}-${loop_name}.active"
483
+
484
+ # Kill monitor process if still running
485
+ if [ -n "$monitor_pid" ] && kill -0 "$monitor_pid" 2>/dev/null; then
486
+ kill "$monitor_pid" 2>/dev/null || true
487
+ wait "$monitor_pid" 2>/dev/null || true
488
+ fi
489
+ }
490
+
491
+ ##############################################################################
492
+ # Get Agent-Specific Timeout
493
+ ##############################################################################
494
+ function get_agent_timeout() {
495
+ local agent="$1"
496
+ local task_id="$2"
497
+
498
+ # Use get-agent-timeout.sh helper script
499
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
500
+ AGENT_TIMEOUT=$("$SCRIPT_DIR/get-agent-timeout.sh" --task-id "$task_id" --agent-id "$agent" 2>/dev/null || echo "$TIMEOUT")
501
+
502
+ echo "$AGENT_TIMEOUT"
503
+ }
504
+
505
+ ##############################################################################
506
+ # Process-Based Completion Monitoring
507
+ ##############################################################################
508
+ function monitor_agent_process() {
509
+ local agent_id="$1"
510
+ local agent_pid="$2"
511
+ local task_id="$3"
512
+ local done_key="$4"
513
+
514
+ # Monitor agent process in background
515
+ (
516
+ # Wait for process to exit
517
+ wait "$agent_pid" 2>/dev/null
518
+ EXIT_CODE=$?
519
+
520
+ # Check if done signal already sent (agent may have signaled normally)
521
+ DONE_COUNT=$(redis-cli LLEN "$done_key" 2>/dev/null || echo "0")
522
+ if [ "$DONE_COUNT" -gt 0 ]; then
523
+ # Agent signaled normally - nothing to do
524
+ exit 0
525
+ fi
526
+
527
+ # Process exited without signaling - auto-complete
528
+ if [ $EXIT_CODE -eq 0 ]; then
529
+ echo " [Process Monitor] $agent_id exited successfully (code 0) - auto-signaling completion" >&2
530
+ redis-cli LPUSH "$done_key" "auto-completed-success" >/dev/null
531
+ else
532
+ echo " [Process Monitor] $agent_id exited with error (code $EXIT_CODE) - auto-signaling failure" >&2
533
+ redis-cli LPUSH "$done_key" "auto-completed-error:$EXIT_CODE" >/dev/null
534
+
535
+ # METRICS: Increment error counter
536
+ redis-cli INCR "swarm:${task_id}:metrics:agent_errors" >/dev/null
537
+ fi
538
+ ) &
539
+ }
540
+
541
+ ##############################################################################
542
+ # BLPOP with Retry Logic + Process Monitoring
543
+ ##############################################################################
544
+ function blpop_with_retry() {
545
+ local agent="$1"
546
+ local done_key="$2"
547
+ local timeout="$3"
548
+ local retry_count="$4"
549
+ local retry_delay="$5"
550
+ local agent_pid="${6:-}" # Optional: PID for process monitoring
551
+
552
+ for ATTEMPT in $(seq 1 $retry_count); do
553
+ # Check for shutdown before attempting BLPOP
554
+ if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
555
+ echo " [SHUTDOWN] Aborting BLPOP for $agent" >&2
556
+ return 1
557
+ fi
558
+
559
+ # Use Redis's native BLPOP timeout instead of shell timeout command
560
+ # This allows SIGTERM to properly interrupt the process
561
+ RESULT=$(redis-cli blpop "$done_key" "$timeout" 2>/dev/null || echo "")
562
+
563
+ if [ -n "$RESULT" ]; then
564
+ echo "$RESULT"
565
+ return 0 # Success
566
+ fi
567
+
568
+ # BLPOP timeout - check if process is still alive
569
+ if [ -n "$agent_pid" ]; then
570
+ if ! kill -0 "$agent_pid" 2>/dev/null; then
571
+ echo " [Process Check] Agent process $agent_pid no longer running" >&2
572
+
573
+ # Process exited - check if done signal was auto-generated
574
+ RESULT=$(redis-cli LPOP "$done_key" 2>/dev/null || echo "")
575
+ if [ -n "$RESULT" ]; then
576
+ echo " [Auto-Complete] Retrieved: $RESULT" >&2
577
+ echo "$RESULT"
578
+ return 0
579
+ fi
580
+ fi
581
+ fi
582
+
583
+ # Check for shutdown after BLPOP timeout
584
+ if [ "$SHUTDOWN_REQUESTED" -eq 1 ]; then
585
+ echo " [SHUTDOWN] Aborting retry for $agent" >&2
586
+ return 1
587
+ fi
588
+
589
+ # Check heartbeat status
590
+ HEARTBEAT_KEY="swarm:${TASK_ID}:${agent}:heartbeat"
591
+ HEARTBEAT_EXISTS=$(redis-cli EXISTS "$HEARTBEAT_KEY" 2>/dev/null || echo "0")
592
+
593
+ if [ "$HEARTBEAT_EXISTS" -eq 0 ]; then
594
+ echo " ⚠️ No heartbeat from $agent - agent may be stuck or crashed" >&2
595
+
596
+ # If we have PID and process is stuck, kill it
597
+ if [ -n "$agent_pid" ] && kill -0 "$agent_pid" 2>/dev/null; then
598
+ echo " [Timeout Kill] Terminating stuck process $agent_pid" >&2
599
+ kill "$agent_pid" 2>/dev/null || true
600
+ sleep 2
601
+
602
+ # Force kill if still alive
603
+ if kill -0 "$agent_pid" 2>/dev/null; then
604
+ kill -9 "$agent_pid" 2>/dev/null || true
605
+ fi
606
+
607
+ # METRICS: Increment timeout counter
608
+ redis-cli INCR "swarm:${TASK_ID}:metrics:agent_killed" >/dev/null
609
+ fi
610
+ fi
611
+
612
+ # Log retry attempt (to stderr so it's visible during command substitution)
613
+ local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
614
+ echo " [$timestamp] ⚠️ BLPOP attempt $ATTEMPT/$retry_count failed for $agent" >&2
615
+
616
+ if [ $ATTEMPT -lt $retry_count ]; then
617
+ # METRICS: Increment retry counter
618
+ redis-cli INCR "swarm:${TASK_ID}:metrics:retry_count" >/dev/null
619
+
620
+ retry_with_backoff "$agent" "$ATTEMPT" "$retry_count" "$retry_delay" >&2
621
+ else
622
+ # Final failure - write to DLQ
623
+ echo " [$timestamp] ❌ FINAL FAILURE: $agent after $retry_count attempts" >&2
624
+ write_to_dlq "$agent" "timeout_after_retries" "$retry_count"
625
+ return 1
626
+ fi
627
+ done
628
+
629
+ return 1
630
+ }
631
+
632
+ echo "=== CFN Loop Orchestration ==="
633
+ echo "Task ID: $TASK_ID"
634
+ echo "Mode: $MODE (Gate: $GATE, Consensus: $CONSENSUS)"
635
+ echo "Max Iterations: $MAX_ITERATIONS"
636
+ echo ""
637
+
638
+ # Initialize swarm using general Redis coordination primitive
639
+ SWARM_ID="swarm-${TASK_ID}"
640
+ ALL_AGENTS="${LOOP3_AGENTS},${LOOP2_AGENTS},${PRODUCT_OWNER}"
641
+
642
+ # LOG: Swarm initialization
643
+ ./.claude/skills/redis-coordination/log-event.sh \
644
+ --task-id "$TASK_ID" \
645
+ --event-type "swarm_init" \
646
+ --details "{\"mode\": \"$MODE\", \"loop3_agents\": \"$LOOP3_AGENTS\", \"loop2_agents\": \"$LOOP2_AGENTS\", \"product_owner\": \"$PRODUCT_OWNER\", \"max_iterations\": $MAX_ITERATIONS, \"gate_threshold\": $GATE, \"consensus_threshold\": $CONSENSUS}" \
647
+ --level "INFO" 2>/dev/null || true
648
+
649
+ # Build CFN-specific metadata
650
+ CFN_METADATA=$(cat <<EOF
651
+ {
652
+ "mode": "$MODE",
653
+ "loop3_agents": "$LOOP3_AGENTS",
654
+ "loop2_agents": "$LOOP2_AGENTS",
655
+ "product_owner": "$PRODUCT_OWNER",
656
+ "workflow_type": "cfn_loop"
657
+ }
658
+ EOF
659
+ )
660
+
661
+ # Use general init-swarm primitive
662
+ ./.claude/skills/redis-coordination/init-swarm.sh \
663
+ --swarm-id "$SWARM_ID" \
664
+ --agents "$ALL_AGENTS" \
665
+ --task-id "$TASK_ID" \
666
+ --topology "hierarchical" \
667
+ --metadata "$CFN_METADATA" > /dev/null
668
+
669
+ # Start shutdown monitor in background
670
+ start_shutdown_monitor "$TASK_ID"
671
+
672
+ # Store epic context in Redis (if provided)
673
+ if [ -n "$EPIC_CONTEXT" ]; then
674
+ echo "📋 Storing epic context in Redis..."
675
+ # Escape single quotes for Redis
676
+ EPIC_ESCAPED="${EPIC_CONTEXT//\'/\'\\\'\'}"
677
+ redis-cli setex "swarm:${TASK_ID}:epic-context" 604800 "$EPIC_ESCAPED" >/dev/null
678
+ echo " ✅ Epic context stored (TTL: 7 days)"
679
+ fi
680
+
681
+ if [ -n "$PHASE_CONTEXT" ]; then
682
+ echo "📋 Storing phase context in Redis..."
683
+ PHASE_ESCAPED="${PHASE_CONTEXT//\'/\'\\\'\'}"
684
+ redis-cli setex "swarm:${TASK_ID}:phase-context" 604800 "$PHASE_ESCAPED" >/dev/null
685
+ echo " ✅ Phase context stored (TTL: 7 days)"
686
+ fi
687
+
688
+ if [ -n "$SUCCESS_CRITERIA" ]; then
689
+ echo "📋 Storing success criteria in Redis..."
690
+ CRITERIA_ESCAPED="${SUCCESS_CRITERIA//\'/\'\\\'\'}"
691
+ redis-cli setex "swarm:${TASK_ID}:success-criteria" 604800 "$CRITERIA_ESCAPED" >/dev/null
692
+ echo " ✅ Success criteria stored (TTL: 7 days)"
693
+ fi
694
+
695
+ echo ""
696
+
697
+ # [BUG #15 FIX] REMOVED: Early Product Owner spawn at iteration 0
698
+ # Product Owner now only spawned after Loop 2 completes (see line 1283)
699
+ # This prevents timeout issues with waiting mode initialization
700
+ echo "[Product Owner] Will spawn after Loop 2 consensus (just-in-time pattern)"
701
+ echo ""
702
+
703
+ # Iteration loop
704
+ for ITERATION in $(seq 1 $MAX_ITERATIONS); do
705
+ echo "=== Iteration $ITERATION/$MAX_ITERATIONS ==="
706
+
707
+ # METRICS: Iteration start timestamp
708
+ ITERATION_START=$(date +%s%N | cut -b1-13) # milliseconds
709
+ redis-cli LPUSH "swarm:${TASK_ID}:metrics:iteration_start" "$ITERATION_START" >/dev/null
710
+
711
+ # Step 1: Build detailed agent context from Redis (BUG #20 FIX - Option 2)
712
+ echo "[Loop 3] Building agent context from Redis..."
713
+
714
+ # Retrieve stored context
715
+ EPIC_CTX=$(redis-cli get "swarm:${TASK_ID}:epic-context" 2>/dev/null || echo "{}")
716
+ PHASE_CTX=$(redis-cli get "swarm:${TASK_ID}:phase-context" 2>/dev/null || echo "{}")
717
+ SUCCESS_CTX=$(redis-cli get "swarm:${TASK_ID}:success-criteria" 2>/dev/null || echo "{}")
718
+
719
+ # Extract key fields with jq (safe parsing)
720
+ EPIC_GOAL=$(echo "$EPIC_CTX" | jq -r '.epicGoal // "No epic goal specified"')
721
+ IN_SCOPE=$(echo "$EPIC_CTX" | jq -r '.inScope[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
722
+ OUT_SCOPE=$(echo "$EPIC_CTX" | jq -r '.outOfScope[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
723
+ DELIVERABLES=$(echo "$PHASE_CTX" | jq -r '.deliverables[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
724
+ DIRECTORY=$(echo "$PHASE_CTX" | jq -r '.directory // ""')
725
+ ACCEPTANCE=$(echo "$SUCCESS_CTX" | jq -r '.acceptanceCriteria[]? // empty' | sed 's/^/- /' || echo "- (not specified)")
726
+
727
+ # Build structured agent context
728
+ LOOP3_AGENT_CONTEXT="Loop 3 implementation for iteration $ITERATION
729
+
730
+ Epic Goal: $EPIC_GOAL
731
+
732
+ In Scope:
733
+ $IN_SCOPE
734
+
735
+ Out of Scope:
736
+ $OUT_SCOPE
737
+
738
+ Deliverables (CRITICAL - you MUST create these files):
739
+ $DELIVERABLES
740
+ $([ -n "$DIRECTORY" ] && echo "
741
+ Target Directory: $DIRECTORY")
742
+
743
+ Acceptance Criteria:
744
+ $ACCEPTANCE
745
+
746
+ IMPORTANT:
747
+ - Use Write tool to create each deliverable file
748
+ - Verify files created with 'ls -la \$DIRECTORY' after each Write
749
+ - All deliverables must exist for validation to pass
750
+ - Report confidence score based on actual file creation
751
+ "
752
+
753
+ echo " ✅ Agent context built ($(echo "$LOOP3_AGENT_CONTEXT" | wc -c) characters)"
754
+ echo ""
755
+
756
+ # Step 2: Spawn Loop 3 agents via CLI
757
+ echo "[Loop 3] Spawning implementers via CLI..."
758
+ IFS=',' read -ra AGENTS <<< "$LOOP3_AGENTS"
759
+
760
+ # Track instance counts to generate unique agent IDs for duplicate agent types
761
+ declare -A AGENT_INSTANCE_COUNTS
762
+ declare -A AGENT_IDS # Map from array index to unique agent ID
763
+
764
+ # Pre-calculate unique agent IDs
765
+ for i in "${!AGENTS[@]}"; do
766
+ AGENT="${AGENTS[$i]}"
767
+
768
+ # Increment instance counter for this agent type
769
+ AGENT_INSTANCE_COUNTS["$AGENT"]=$((${AGENT_INSTANCE_COUNTS["$AGENT"]:-0} + 1))
770
+ INSTANCE_NUM="${AGENT_INSTANCE_COUNTS["$AGENT"]}"
771
+
772
+ # Generate unique agent ID: agent-type-iteration-instance
773
+ UNIQUE_AGENT_ID="${AGENT}-${ITERATION}-${INSTANCE_NUM}"
774
+ AGENT_IDS["$i"]="$UNIQUE_AGENT_ID"
775
+
776
+ echo " [Instance Tracking] ${AGENT} #${INSTANCE_NUM} → ${UNIQUE_AGENT_ID}"
777
+ done
778
+
779
+ echo ""
780
+
781
+ # [PHASE 1 INTEGRATION] Loop 3 Skill-Based Output Processing (Parallel)
782
+ # Uses .claude/skills/loop3-output-processing/ for guaranteed confidence extraction
783
+ echo "[Loop 3] Using skill-based output processing (parallel execution)"
784
+
785
+ LOOP3_TOTAL=${#AGENTS[@]}
786
+ LOOP3_REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP3" "$LOOP3_TOTAL")
787
+ LOOP3_COMPLETED_AGENTS=()
788
+ LOOP3_FAILED_AGENTS=()
789
+
790
+ echo "[Loop 3] Quorum: $LOOP3_REQUIRED/$LOOP3_TOTAL agents required"
791
+ echo ""
792
+
793
+ # Step 2a: Spawn all agents in parallel (background processes)
794
+ declare -A AGENT_PIDS
795
+ declare -A AGENT_OUTPUT_FILES
796
+
797
+ for i in "${!AGENTS[@]}"; do
798
+ AGENT="${AGENTS[$i]}"
799
+ UNIQUE_AGENT_ID="${AGENT_IDS[$i]}"
800
+
801
+ # Get agent-specific timeout
802
+ AGENT_TIMEOUT=$(get_agent_timeout "$AGENT" "$TASK_ID")
803
+
804
+ # Create temp file for agent output
805
+ OUTPUT_FILE="/tmp/loop3-${TASK_ID}-${UNIQUE_AGENT_ID}.json"
806
+ AGENT_OUTPUT_FILES["$UNIQUE_AGENT_ID"]="$OUTPUT_FILE"
807
+
808
+ echo " Spawning $AGENT (ID: $UNIQUE_AGENT_ID, timeout: ${AGENT_TIMEOUT}s)"
809
+
810
+ # LOG: Loop 3 agent spawn
811
+ ./.claude/skills/redis-coordination/log-event.sh \
812
+ --task-id "$TASK_ID" \
813
+ --event-type "agent_spawn" \
814
+ --loop "loop3" \
815
+ --agent-id "$UNIQUE_AGENT_ID" \
816
+ --iteration "$ITERATION" \
817
+ --details "{\"agent_type\": \"$AGENT\", \"timeout\": $AGENT_TIMEOUT}" \
818
+ --level "INFO" 2>/dev/null || true
819
+
820
+ # Execute agent via Loop 3 skill in background
821
+ (
822
+ # Record start time
823
+ START_TIME=$(date +%s%N | cut -b1-13)
824
+
825
+ # Execute skill (BUG #20 FIX - inject detailed context)
826
+ if SKILL_RESULT=$(./.claude/skills/loop3-output-processing/execute-and-extract.sh \
827
+ --agent-type "$AGENT" \
828
+ --task-id "$TASK_ID" \
829
+ --agent-id "$UNIQUE_AGENT_ID" \
830
+ --context "$LOOP3_AGENT_CONTEXT" \
831
+ --iteration "$ITERATION" \
832
+ --timeout "$AGENT_TIMEOUT" 2>&1); then
833
+
834
+ # Record end time
835
+ END_TIME=$(date +%s%N | cut -b1-13)
836
+ LATENCY=$((END_TIME - START_TIME))
837
+
838
+ # Add latency to result
839
+ RESULT_WITH_LATENCY=$(echo "$SKILL_RESULT" | jq --arg latency "$LATENCY" '. + {latency_ms: ($latency | tonumber)}')
840
+
841
+ # Save to temp file
842
+ echo "$RESULT_WITH_LATENCY" > "$OUTPUT_FILE"
843
+
844
+ # Store result in Redis
845
+ echo "$RESULT_WITH_LATENCY" | redis-cli -x LPUSH "swarm:${TASK_ID}:${UNIQUE_AGENT_ID}:result" >/dev/null
846
+ redis-cli LPUSH "swarm:${TASK_ID}:${UNIQUE_AGENT_ID}:done" "complete" >/dev/null
847
+
848
+ exit 0
849
+ else
850
+ # Skill failed - save error
851
+ echo "{\"error\": true, \"output\": \"$SKILL_RESULT\"}" > "$OUTPUT_FILE"
852
+ exit 1
853
+ fi
854
+ ) &
855
+
856
+ AGENT_PIDS["$UNIQUE_AGENT_ID"]=$!
857
+ echo " ✅ Spawned $UNIQUE_AGENT_ID (PID: ${AGENT_PIDS[$UNIQUE_AGENT_ID]})"
858
+ done
859
+
860
+ echo ""
861
+ echo "[Loop 3] All agents spawned, waiting for completion..."
862
+ echo ""
863
+
864
+ # Step 2b: Wait for all agents to complete
865
+ for i in "${!AGENTS[@]}"; do
866
+ AGENT="${AGENTS[$i]}"
867
+ UNIQUE_AGENT_ID="${AGENT_IDS[$i]}"
868
+ AGENT_PID="${AGENT_PIDS[$UNIQUE_AGENT_ID]}"
869
+ OUTPUT_FILE="${AGENT_OUTPUT_FILES[$UNIQUE_AGENT_ID]}"
870
+
871
+ echo " Waiting for $UNIQUE_AGENT_ID (PID: $AGENT_PID)..."
872
+
873
+ # Wait for specific agent process
874
+ if wait "$AGENT_PID" 2>/dev/null; then
875
+ # Success - read result from temp file
876
+ if [ -f "$OUTPUT_FILE" ]; then
877
+ SKILL_RESULT=$(cat "$OUTPUT_FILE")
878
+
879
+ # Check if result has error flag
880
+ HAS_ERROR=$(echo "$SKILL_RESULT" | jq -r '.error // false')
881
+
882
+ if [ "$HAS_ERROR" = "false" ]; then
883
+ # Extract metrics
884
+ CONFIDENCE=$(echo "$SKILL_RESULT" | jq -r '.confidence')
885
+ FILES_CHANGED=$(echo "$SKILL_RESULT" | jq -r '.files_changed')
886
+ CONFIDENCE_SOURCE=$(echo "$SKILL_RESULT" | jq -r '.confidence_source')
887
+ LATENCY=$(echo "$SKILL_RESULT" | jq -r '.latency_ms')
888
+
889
+ echo " ✅ $UNIQUE_AGENT_ID complete (${LATENCY}ms, confidence: $CONFIDENCE [$CONFIDENCE_SOURCE], files: $FILES_CHANGED)"
890
+
891
+ # LOG: Loop 3 agent completion
892
+ ./.claude/skills/redis-coordination/log-event.sh \
893
+ --task-id "$TASK_ID" \
894
+ --event-type "agent_complete" \
895
+ --loop "loop3" \
896
+ --agent-id "$UNIQUE_AGENT_ID" \
897
+ --iteration "$ITERATION" \
898
+ --details "{\"confidence\": $CONFIDENCE, \"confidence_source\": \"$CONFIDENCE_SOURCE\", \"files_changed\": $FILES_CHANGED, \"latency_ms\": $LATENCY}" \
899
+ --level "INFO" 2>/dev/null || true
900
+
901
+ # Store latency metric
902
+ METRIC=$(jq -nc \
903
+ --arg agent "$UNIQUE_AGENT_ID" \
904
+ --arg latency "$LATENCY" \
905
+ --arg loop "loop3" \
906
+ --arg iteration "$ITERATION" \
907
+ '{agent: $agent, latency_ms: ($latency | tonumber), loop: $loop, iteration: ($iteration | tonumber)}')
908
+ echo "$METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:agent_latency" >/dev/null
909
+
910
+ LOOP3_COMPLETED_AGENTS+=("$UNIQUE_AGENT_ID")
911
+ else
912
+ ERROR_OUTPUT=$(echo "$SKILL_RESULT" | jq -r '.output')
913
+ echo " ❌ $UNIQUE_AGENT_ID failed (skill execution error)"
914
+ echo " Error: $ERROR_OUTPUT"
915
+
916
+ # LOG: Loop 3 agent failure
917
+ ./.claude/skills/redis-coordination/log-event.sh \
918
+ --task-id "$TASK_ID" \
919
+ --event-type "agent_failure" \
920
+ --loop "loop3" \
921
+ --agent-id "$UNIQUE_AGENT_ID" \
922
+ --iteration "$ITERATION" \
923
+ --details "{\"error\": \"skill_execution_error\", \"output\": \"$ERROR_OUTPUT\"}" \
924
+ --level "ERROR" 2>/dev/null || true
925
+
926
+ LOOP3_FAILED_AGENTS+=("$AGENT")
927
+ redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
928
+ fi
929
+
930
+ # Cleanup temp file
931
+ rm -f "$OUTPUT_FILE"
932
+ else
933
+ echo " ❌ $UNIQUE_AGENT_ID failed (no output file)"
934
+ LOOP3_FAILED_AGENTS+=("$AGENT")
935
+ redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
936
+ fi
937
+ else
938
+ echo " ❌ $UNIQUE_AGENT_ID failed (process error)"
939
+ LOOP3_FAILED_AGENTS+=("$AGENT")
940
+ redis-cli INCR "swarm:${TASK_ID}:metrics:agent_failure_count" >/dev/null
941
+ rm -f "$OUTPUT_FILE"
942
+ fi
943
+
944
+ echo ""
945
+ done
946
+
947
+ # Validate quorum
948
+ if [ ${#LOOP3_COMPLETED_AGENTS[@]} -ge "$LOOP3_REQUIRED" ]; then
949
+ echo "[Loop 3] ✅ Quorum met: ${#LOOP3_COMPLETED_AGENTS[@]}/$LOOP3_REQUIRED agents completed"
950
+ if [ ${#LOOP3_FAILED_AGENTS[@]} -gt 0 ]; then
951
+ echo "[Loop 3] ⚠️ Failed agents (continuing with quorum): ${LOOP3_FAILED_AGENTS[*]}"
952
+
953
+ # METRICS: Increment quorum fallback counter
954
+ redis-cli INCR "swarm:${TASK_ID}:metrics:quorum_fallback" >/dev/null
955
+ fi
956
+ else
957
+ echo "[Loop 3] ❌ Quorum FAILED: ${#LOOP3_COMPLETED_AGENTS[@]} < $LOOP3_REQUIRED"
958
+ echo "[Loop 3] Failed agents: ${LOOP3_FAILED_AGENTS[*]}"
959
+ exit 1
960
+ fi
961
+ echo ""
962
+
963
+ # Step 2: Collect Loop 3 confidence scores (only from completed agents)
964
+ echo "[Loop 3] Collecting confidence scores from ${#LOOP3_COMPLETED_AGENTS[@]} agents..."
965
+ LOOP3_COMPLETED_IDS=$(IFS=','; echo "${LOOP3_COMPLETED_AGENTS[*]}")
966
+ # Collect confidence scores from Redis (agents report before exiting)
967
+ TOTAL_CONFIDENCE=0
968
+ COUNT=0
969
+ IFS=','' read -ra AGENT_ARRAY <<< "$LOOP3_COMPLETED_IDS"
970
+ for AGENT_ID in "${AGENT_ARRAY[@]}"; do
971
+ CONFIDENCE=$(redis-cli get "swarm:${TASK_ID}:${AGENT_ID}:confidence" 2>/dev/null || echo "0")
972
+ if [ "$CONFIDENCE" != "(nil)" ] && [ -n "$CONFIDENCE" ]; then
973
+ TOTAL_CONFIDENCE=$(echo "$TOTAL_CONFIDENCE + $CONFIDENCE" | bc -l)
974
+ COUNT=$((COUNT + 1))
975
+ fi
976
+ done
977
+
978
+ if [ "$COUNT" -gt 0 ]; then
979
+ LOOP3_CONSENSUS=$(echo "scale=2; $TOTAL_CONFIDENCE / $COUNT" | bc -l)
980
+ else
981
+ LOOP3_CONSENSUS="0.0"
982
+ fi
983
+
984
+
985
+ echo "[Loop 3] Average confidence: $LOOP3_CONSENSUS (from ${#LOOP3_COMPLETED_AGENTS[@]}/${LOOP3_TOTAL} agents)"
986
+
987
+ # METRICS: Store Loop 3 consensus score
988
+ LOOP3_METRIC=$(jq -nc \
989
+ --arg consensus "$LOOP3_CONSENSUS" \
990
+ --arg iteration "$ITERATION" \
991
+ '{consensus: ($consensus | tonumber), iteration: ($iteration | tonumber)}')
992
+ echo "$LOOP3_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:loop3_consensus" >/dev/null
993
+
994
+ # SPRINT 4: Create conversation forks after iteration 1
995
+ if [ "$ITERATION" -eq 1 ]; then
996
+ echo "[Coordinator] Creating conversation forks for iteration 2..."
997
+ for AGENT in "${LOOP3_COMPLETED_AGENTS[@]}"; do
998
+ FORK_ID=$(npx cfn-fork create --task-id "$TASK_ID" --agent-id "$AGENT" --iteration 1 2>/dev/null || echo "")
999
+
1000
+ if [ -n "$FORK_ID" ] && [ "$FORK_ID" != "(nil)" ]; then
1001
+ # Store fork ID in Redis for this agent
1002
+ redis-cli setex "swarm:${TASK_ID}:${AGENT}:fork-id" 86400 "$FORK_ID" >/dev/null
1003
+ echo " ✓ Fork created for $AGENT: $FORK_ID"
1004
+ else
1005
+ echo " ⚠ Fork creation skipped for $AGENT (will use context rebuild)"
1006
+ fi
1007
+ done
1008
+ echo ""
1009
+ fi
1010
+
1011
+ # BUG #12 FIX: Deliverable Verification with explicit file checking
1012
+ echo "[Deliverable Check] Verifying implementation artifacts..."
1013
+
1014
+ # Use enhanced validate-deliverables.sh skill
1015
+ DELIVERABLE_ARGS="--task-id $TASK_ID"
1016
+ if [ -n "$EXPECTED_FILES" ]; then
1017
+ DELIVERABLE_ARGS="$DELIVERABLE_ARGS --expected-files $EXPECTED_FILES"
1018
+ echo " Expected files: $EXPECTED_FILES"
1019
+ fi
1020
+
1021
+ DELIVERABLE_STATUS=$(./.claude/skills/product-owner-decision/validate-deliverables.sh $DELIVERABLE_ARGS)
1022
+
1023
+ if [ "$DELIVERABLE_STATUS" = "FAILED" ]; then
1024
+ # Retrieve missing files from Redis (if available)
1025
+ MISSING_FILES_JSON=$(redis-cli get "swarm:${TASK_ID}:missing-files" 2>/dev/null || echo "[]")
1026
+ MISSING_FILES_LIST=$(echo "$MISSING_FILES_JSON" | jq -r '.[]' | tr '\n' ', ' | sed 's/,$//')
1027
+
1028
+ if [ -n "$MISSING_FILES_LIST" ]; then
1029
+ echo "❌ DELIVERABLE VERIFICATION FAILED: Missing files"
1030
+ echo " Expected but not found: $MISSING_FILES_LIST"
1031
+ else
1032
+ echo "❌ DELIVERABLE VERIFICATION FAILED: No files created or modified"
1033
+ fi
1034
+
1035
+ echo " This prevents 'consensus on vapor' - validators approving nothing"
1036
+ echo ""
1037
+ echo "Decision: RELAUNCH iteration $((ITERATION + 1)) (skip Loop 2 validation)"
1038
+ echo ""
1039
+
1040
+ # METRICS: Increment deliverable failure counter
1041
+ redis-cli INCR "swarm:${TASK_ID}:metrics:deliverable_failures" >/dev/null
1042
+
1043
+ # Override all Loop 3 confidence scores to 0.0 (prevent gate pass)
1044
+ for AGENT in "${LOOP3_COMPLETED_AGENTS[@]}"; do
1045
+ redis-cli DEL "swarm:${TASK_ID}:${AGENT}:result" >/dev/null
1046
+ redis-cli LPUSH "swarm:${TASK_ID}:${AGENT}:result" "0.0" >/dev/null
1047
+ echo " [Override] ${AGENT} confidence: 1.0 → 0.0 (no deliverables)"
1048
+ done
1049
+
1050
+ # Recalculate consensus (should be 0.0 now)
1051
+ # Collect confidence scores from Redis (agents report before exiting)
1052
+ TOTAL_CONFIDENCE=0
1053
+ COUNT=0
1054
+ IFS=',"' read -ra AGENT_ARRAY <<< "$LOOP3_COMPLETED_IDS"
1055
+ for AGENT_ID in "${AGENT_ARRAY[@]}"; do
1056
+ CONFIDENCE=$(redis-cli get "swarm:${TASK_ID}:${AGENT_ID}:confidence" 2>/dev/null || echo "0")
1057
+ if [ "$CONFIDENCE" != "(nil)" ] && [ -n "$CONFIDENCE" ]; then
1058
+ TOTAL_CONFIDENCE=$(echo "$TOTAL_CONFIDENCE + $CONFIDENCE" | bc -l)
1059
+ COUNT=$((COUNT + 1))
1060
+ fi
1061
+ done
1062
+
1063
+ if [ "$COUNT" -gt 0 ]; then
1064
+ LOOP3_CONSENSUS=$(echo "scale=2; $TOTAL_CONFIDENCE / $COUNT" | bc -l)
1065
+ else
1066
+ LOOP3_CONSENSUS="0.0"
1067
+ fi
1068
+
1069
+
1070
+ echo ""
1071
+ echo "[Loop 3] Recalculated confidence after override: $LOOP3_CONSENSUS"
1072
+ echo ""
1073
+
1074
+ # Build specific feedback with missing files
1075
+ if [ -n "$MISSING_FILES_LIST" ]; then
1076
+ FEEDBACK="CRITICAL: Create these missing files: $MISSING_FILES_LIST
1077
+
1078
+ Use the Write tool for each file. Verify with 'ls -la' after each Write operation."
1079
+ else
1080
+ FEEDBACK="CRITICAL: You must create or modify files. No deliverables were produced in iteration $ITERATION."
1081
+ fi
1082
+
1083
+ # Wake Loop 3 agents for next iteration with HIGH priority (priority=40)
1084
+ IFS=',' read -ra AGENTS <<< "$LOOP3_AGENTS"
1085
+ for AGENT in "${AGENTS[@]}"; do
1086
+ # Get fork ID if exists
1087
+ FORK_ID=$(redis-cli get "swarm:${TASK_ID}:${AGENT}:fork-id" 2>/dev/null || echo "")
1088
+ if [ "$FORK_ID" = "(nil)" ]; then FORK_ID=""; fi
1089
+
1090
+ ./.claude/skills/redis-coordination/invoke-waiting-mode.sh wake \
1091
+ --task-id "$TASK_ID" \
1092
+ --agent-id "$AGENT" \
1093
+ --priority 40 \
1094
+ --reason "no_deliverables" \
1095
+ --iteration $((ITERATION + 1)) \
1096
+ --fork-id "$FORK_ID" \
1097
+ --feedback "$FEEDBACK"
1098
+ done
1099
+
1100
+ continue # Next iteration (skip gate check and Loop 2)
1101
+ fi
1102
+
1103
+ echo "[Deliverable Check] ✅ Deliverables verified - proceeding to gate check"
1104
+ echo ""
1105
+
1106
+ # Gate check
1107
+ if (( $(echo "$LOOP3_CONSENSUS < $GATE" | bc -l) )); then
1108
+ echo "❌ Gate FAILED ($LOOP3_CONSENSUS < $GATE)"
1109
+ echo "Decision: RELAUNCH iteration $((ITERATION + 1))"
1110
+
1111
+ # LOG: Gate check failure
1112
+ ./.claude/skills/redis-coordination/log-event.sh \
1113
+ --task-id "$TASK_ID" \
1114
+ --event-type "gate_check" \
1115
+ --iteration "$ITERATION" \
1116
+ --details "{\"consensus\": $LOOP3_CONSENSUS, \"threshold\": $GATE, \"result\": \"FAIL\", \"decision\": \"RELAUNCH\"}" \
1117
+ --level "WARN" 2>/dev/null || true
1118
+
1119
+ # METRICS: Increment gate failure counter
1120
+ redis-cli INCR "swarm:${TASK_ID}:metrics:gate_failures" >/dev/null
1121
+
1122
+ # Wake Loop 3 agents for next iteration with MEDIUM priority (priority=30)
1123
+ IFS=',' read -ra AGENTS <<< "$LOOP3_AGENTS"
1124
+ for AGENT in "${AGENTS[@]}"; do
1125
+ # SPRINT 4: Get fork ID if exists
1126
+ FORK_ID=$(redis-cli get "swarm:${TASK_ID}:${AGENT}:fork-id" 2>/dev/null || echo "")
1127
+ if [ "$FORK_ID" = "(nil)" ]; then FORK_ID=""; fi
1128
+
1129
+ ./.claude/skills/redis-coordination/invoke-waiting-mode.sh wake \
1130
+ --task-id "$TASK_ID" \
1131
+ --agent-id "$AGENT" \
1132
+ --priority 30 \
1133
+ --reason "gate_failed" \
1134
+ --iteration $((ITERATION + 1)) \
1135
+ --fork-id "$FORK_ID" \
1136
+ --feedback "Improve confidence from $LOOP3_CONSENSUS to >$GATE"
1137
+ done
1138
+
1139
+ continue # Next iteration
1140
+ fi
1141
+
1142
+ echo "✅ Gate PASSED ($LOOP3_CONSENSUS >= $GATE)"
1143
+
1144
+ # LOG: Gate check success
1145
+ ./.claude/skills/redis-coordination/log-event.sh \
1146
+ --task-id "$TASK_ID" \
1147
+ --event-type "gate_check" \
1148
+ --iteration "$ITERATION" \
1149
+ --details "{\"consensus\": $LOOP3_CONSENSUS, \"threshold\": $GATE, \"result\": \"PASS\"}" \
1150
+ --level "INFO" 2>/dev/null || true
1151
+
1152
+ echo ""
1153
+
1154
+ # Signal Loop 2 validators that gate has passed (they can start work)
1155
+ GATE_PASS_KEY="swarm:${TASK_ID}:gate-passed"
1156
+ redis-cli lpush "$GATE_PASS_KEY" "{\"iteration\": $ITERATION, \"loop3_confidence\": $LOOP3_CONSENSUS}" > /dev/null
1157
+ echo "[Loop 3] Gate pass signal sent to Loop 2 validators"
1158
+ echo ""
1159
+
1160
+ # Step 3: Build Loop 2 validator context (BUG #20 FIX - inject same deliverables)
1161
+ LOOP2_VALIDATOR_CONTEXT="Loop 2 validation for iteration $ITERATION
1162
+
1163
+ Review Loop 3 implementation against these requirements:
1164
+
1165
+ Epic Goal: $EPIC_GOAL
1166
+
1167
+ Expected Deliverables:
1168
+ $DELIVERABLES
1169
+ $([ -n "$DIRECTORY" ] && echo "
1170
+ Target Directory: $DIRECTORY")
1171
+
1172
+ Acceptance Criteria:
1173
+ $ACCEPTANCE
1174
+
1175
+ Your Validation Tasks:
1176
+ - Verify all deliverable files exist in correct directory
1177
+ - Check files contain actual implementation (not placeholders)
1178
+ - Validate against acceptance criteria
1179
+ - Provide structured feedback (critical/warnings/suggestions)
1180
+ - Report confidence score based on deliverable completeness
1181
+ "
1182
+
1183
+ echo "[Loop 2] Validator context built"
1184
+ echo ""
1185
+
1186
+ # Step 4: Spawn Loop 2 validators using skill-based output processing (parallel execution)
1187
+ echo "[Loop 2] Using skill-based output processing (parallel execution)"
1188
+ IFS=',' read -ra VALIDATORS <<< "$LOOP2_AGENTS"
1189
+
1190
+ # Track instance counts to generate unique validator IDs for duplicate validator types
1191
+ declare -A VALIDATOR_INSTANCE_COUNTS
1192
+ declare -A VALIDATOR_IDS # Map from array index to unique validator ID
1193
+
1194
+ # Pre-calculate unique validator IDs
1195
+ for i in "${!VALIDATORS[@]}"; do
1196
+ VALIDATOR="${VALIDATORS[$i]}"
1197
+
1198
+ # Increment instance counter for this validator type
1199
+ VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]=$((${VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]:-0} + 1))
1200
+ INSTANCE_NUM="${VALIDATOR_INSTANCE_COUNTS["$VALIDATOR"]}"
1201
+
1202
+ # Generate unique validator ID: validator-type-iteration-instance
1203
+ UNIQUE_VALIDATOR_ID="${VALIDATOR}-${ITERATION}-${INSTANCE_NUM}"
1204
+ VALIDATOR_IDS["$i"]="$UNIQUE_VALIDATOR_ID"
1205
+
1206
+ echo " [Instance Tracking] ${VALIDATOR} #${INSTANCE_NUM} → ${UNIQUE_VALIDATOR_ID}"
1207
+ done
1208
+
1209
+ echo ""
1210
+
1211
+ # Step 3a: Spawn all validators in parallel using skill
1212
+ echo "[Loop 2] Spawning validators in parallel..."
1213
+ declare -A VALIDATOR_PIDS # Map from validator ID to background PID
1214
+ declare -A VALIDATOR_OUTPUT_FILES # Map from validator ID to temp output file
1215
+
1216
+ LOOP2_TOTAL=${#VALIDATORS[@]}
1217
+ LOOP2_REQUIRED=$(calculate_quorum "$MIN_QUORUM_LOOP2" "$LOOP2_TOTAL")
1218
+
1219
+ echo "[Loop 2] Quorum: $LOOP2_REQUIRED/$LOOP2_TOTAL validators required"
1220
+ echo ""
1221
+
1222
+ for i in "${!VALIDATORS[@]}"; do
1223
+ VALIDATOR="${VALIDATORS[$i]}"
1224
+ UNIQUE_VALIDATOR_ID="${VALIDATOR_IDS[$i]}"
1225
+
1226
+ # Get agent-specific timeout (use base validator type, not unique ID)
1227
+ AGENT_TIMEOUT=$(get_agent_timeout "$VALIDATOR" "$TASK_ID")
1228
+
1229
+ # Create temp output file for this validator
1230
+ OUTPUT_FILE="/tmp/loop2-${TASK_ID}-${UNIQUE_VALIDATOR_ID}.json"
1231
+ VALIDATOR_OUTPUT_FILES["$UNIQUE_VALIDATOR_ID"]="$OUTPUT_FILE"
1232
+
1233
+ echo " Spawning: $VALIDATOR (ID: $UNIQUE_VALIDATOR_ID, timeout: ${AGENT_TIMEOUT}s)"
1234
+
1235
+ # Execute skill in background - captures agent output and extracts structured data
1236
+ (
1237
+ # METRICS: Agent latency start
1238
+ AGENT_START=$(date +%s%N | cut -b1-13)
1239
+
1240
+ # Execute skill to spawn validator and extract feedback (BUG #20 FIX - inject detailed context)
1241
+ SKILL_RESULT=$(./.claude/skills/loop2-output-processing/execute-and-extract.sh \
1242
+ --agent-type "$VALIDATOR" \
1243
+ --task-id "$TASK_ID" \
1244
+ --agent-id "$UNIQUE_VALIDATOR_ID" \
1245
+ --context "$LOOP2_VALIDATOR_CONTEXT" \
1246
+ --iteration "$ITERATION" \
1247
+ --timeout "$AGENT_TIMEOUT" 2>&1)
1248
+
1249
+ # METRICS: Agent latency end
1250
+ AGENT_END=$(date +%s%N | cut -b1-13)
1251
+ LATENCY=$((AGENT_END - AGENT_START))
1252
+
1253
+ # Inject latency into result JSON
1254
+ SKILL_RESULT_WITH_LATENCY=$(echo "$SKILL_RESULT" | jq --arg latency "$LATENCY" '. + {latency_ms: ($latency | tonumber)}')
1255
+
1256
+ # Write result to temp file
1257
+ echo "$SKILL_RESULT_WITH_LATENCY" > "$OUTPUT_FILE"
1258
+
1259
+ # Also push to Redis for compatibility with existing tools
1260
+ echo "$SKILL_RESULT_WITH_LATENCY" | redis-cli -x LPUSH "swarm:${TASK_ID}:${UNIQUE_VALIDATOR_ID}:result" >/dev/null
1261
+
1262
+ # Signal completion
1263
+ redis-cli LPUSH "swarm:${TASK_ID}:${UNIQUE_VALIDATOR_ID}:done" "complete" >/dev/null
1264
+ ) &
1265
+
1266
+ # Track background PID
1267
+ VALIDATOR_PIDS["$UNIQUE_VALIDATOR_ID"]=$!
1268
+ echo " ✅ Spawned $UNIQUE_VALIDATOR_ID (PID: ${VALIDATOR_PIDS[$UNIQUE_VALIDATOR_ID]})"
1269
+ done
1270
+
1271
+ echo ""
1272
+ echo "[Loop 2] All validators spawned, waiting for completion..."
1273
+ echo ""
1274
+
1275
+ # Step 3b: Wait for all validators to complete and collect results
1276
+ LOOP2_COMPLETED_AGENTS=()
1277
+ LOOP2_FAILED_AGENTS=()
1278
+ declare -A LOOP2_CONFIDENCES # Map from validator ID to confidence score
1279
+
1280
+ for i in "${!VALIDATORS[@]}"; do
1281
+ VALIDATOR="${VALIDATORS[$i]}"
1282
+ UNIQUE_VALIDATOR_ID="${VALIDATOR_IDS[$i]}"
1283
+ VALIDATOR_PID="${VALIDATOR_PIDS[$UNIQUE_VALIDATOR_ID]}"
1284
+ OUTPUT_FILE="${VALIDATOR_OUTPUT_FILES[$UNIQUE_VALIDATOR_ID]}"
1285
+
1286
+ echo " Waiting for $UNIQUE_VALIDATOR_ID (PID: $VALIDATOR_PID)..."
1287
+
1288
+ # Wait for background process to complete
1289
+ if wait "$VALIDATOR_PID" 2>/dev/null; then
1290
+ # Process completed successfully, read result from temp file
1291
+ if [ -f "$OUTPUT_FILE" ] && [ -s "$OUTPUT_FILE" ]; then
1292
+ SKILL_RESULT=$(cat "$OUTPUT_FILE")
1293
+
1294
+ # Validate JSON structure
1295
+ if echo "$SKILL_RESULT" | jq empty 2>/dev/null; then
1296
+ # Extract confidence score
1297
+ CONFIDENCE=$(echo "$SKILL_RESULT" | jq -r '.confidence // 0.0')
1298
+ CONFIDENCE_SOURCE=$(echo "$SKILL_RESULT" | jq -r '.confidence_source // "unknown"')
1299
+ FEEDBACK=$(echo "$SKILL_RESULT" | jq -r '.feedback // {}')
1300
+ LATENCY=$(echo "$SKILL_RESULT" | jq -r '.latency_ms // 0')
1301
+
1302
+ # Store confidence for consensus calculation
1303
+ LOOP2_CONFIDENCES["$UNIQUE_VALIDATOR_ID"]="$CONFIDENCE"
1304
+
1305
+ # Store latency metric
1306
+ METRIC=$(jq -nc \
1307
+ --arg agent "$UNIQUE_VALIDATOR_ID" \
1308
+ --arg latency "$LATENCY" \
1309
+ --arg loop "loop2" \
1310
+ --arg iteration "$ITERATION" \
1311
+ '{agent: $agent, latency_ms: ($latency | tonumber), loop: $loop, iteration: ($iteration | tonumber)}')
1312
+ echo "$METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:agent_latency" >/dev/null
1313
+
1314
+ # Count feedback items
1315
+ CRITICAL_COUNT=$(echo "$FEEDBACK" | jq -r '.critical | length')
1316
+ WARNINGS_COUNT=$(echo "$FEEDBACK" | jq -r '.warnings | length')
1317
+ SUGGESTIONS_COUNT=$(echo "$FEEDBACK" | jq -r '.suggestions | length')
1318
+
1319
+ echo " ✅ $UNIQUE_VALIDATOR_ID complete (${LATENCY}ms, confidence: $CONFIDENCE [$CONFIDENCE_SOURCE], feedback: ${CRITICAL_COUNT}C/${WARNINGS_COUNT}W/${SUGGESTIONS_COUNT}S)"
1320
+
1321
+ LOOP2_COMPLETED_AGENTS+=("$UNIQUE_VALIDATOR_ID")
1322
+ else
1323
+ echo " ⚠️ $UNIQUE_VALIDATOR_ID returned invalid JSON, treating as failed"
1324
+ LOOP2_FAILED_AGENTS+=("$VALIDATOR")
1325
+
1326
+ # METRICS: Increment timeout counter
1327
+ redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
1328
+ fi
1329
+ else
1330
+ echo " ⚠️ $UNIQUE_VALIDATOR_ID completed but no output file found"
1331
+ LOOP2_FAILED_AGENTS+=("$VALIDATOR")
1332
+
1333
+ # METRICS: Increment timeout counter
1334
+ redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
1335
+ fi
1336
+ else
1337
+ echo " ❌ $UNIQUE_VALIDATOR_ID failed (process exited with error)"
1338
+ LOOP2_FAILED_AGENTS+=("$VALIDATOR")
1339
+
1340
+ # METRICS: Increment timeout counter
1341
+ redis-cli INCR "swarm:${TASK_ID}:metrics:timeout_count" >/dev/null
1342
+ fi
1343
+
1344
+ # Cleanup temp file
1345
+ rm -f "$OUTPUT_FILE"
1346
+ done
1347
+
1348
+ echo ""
1349
+
1350
+ # Validate quorum
1351
+ if [ ${#LOOP2_COMPLETED_AGENTS[@]} -ge "$LOOP2_REQUIRED" ]; then
1352
+ echo "[Loop 2] ✅ Quorum met: ${#LOOP2_COMPLETED_AGENTS[@]}/$LOOP2_REQUIRED validators completed"
1353
+ if [ ${#LOOP2_FAILED_AGENTS[@]} -gt 0 ]; then
1354
+ echo "[Loop 2] ⚠️ Failed validators (continuing with quorum): ${LOOP2_FAILED_AGENTS[*]}"
1355
+
1356
+ # METRICS: Increment quorum fallback counter
1357
+ redis-cli INCR "swarm:${TASK_ID}:metrics:quorum_fallback" >/dev/null
1358
+ fi
1359
+ else
1360
+ echo "[Loop 2] ❌ Quorum FAILED: ${#LOOP2_COMPLETED_AGENTS[@]} < $LOOP2_REQUIRED"
1361
+ echo "[Loop 2] Failed validators: ${LOOP2_FAILED_AGENTS[*]}"
1362
+ exit 1
1363
+ fi
1364
+ echo ""
1365
+
1366
+ # Step 3c: Calculate Loop 2 consensus from extracted confidence scores
1367
+ echo "[Loop 2] Calculating consensus from ${#LOOP2_COMPLETED_AGENTS[@]} validators..."
1368
+
1369
+ # Calculate average confidence from completed validators
1370
+ LOOP2_TOTAL_CONFIDENCE=0
1371
+ LOOP2_CONFIDENCE_COUNT=0
1372
+
1373
+ for VALIDATOR_ID in "${LOOP2_COMPLETED_AGENTS[@]}"; do
1374
+ CONFIDENCE="${LOOP2_CONFIDENCES[$VALIDATOR_ID]}"
1375
+ if [ -n "$CONFIDENCE" ] && [ "$CONFIDENCE" != "null" ]; then
1376
+ LOOP2_TOTAL_CONFIDENCE=$(echo "$LOOP2_TOTAL_CONFIDENCE + $CONFIDENCE" | bc -l)
1377
+ LOOP2_CONFIDENCE_COUNT=$((LOOP2_CONFIDENCE_COUNT + 1))
1378
+ fi
1379
+ done
1380
+
1381
+ if [ "$LOOP2_CONFIDENCE_COUNT" -gt 0 ]; then
1382
+ LOOP2_CONSENSUS=$(echo "scale=2; $LOOP2_TOTAL_CONFIDENCE / $LOOP2_CONFIDENCE_COUNT" | bc -l)
1383
+ else
1384
+ echo "⚠️ No valid confidence scores found, defaulting to 0.0"
1385
+ LOOP2_CONSENSUS=0.0
1386
+ fi
1387
+
1388
+ echo "[Loop 2] Average consensus: $LOOP2_CONSENSUS (from ${LOOP2_CONFIDENCE_COUNT} validators)"
1389
+
1390
+ # METRICS: Store Loop 2 consensus score
1391
+ LOOP2_METRIC=$(jq -nc \
1392
+ --arg consensus "$LOOP2_CONSENSUS" \
1393
+ --arg iteration "$ITERATION" \
1394
+ '{consensus: ($consensus | tonumber), iteration: ($iteration | tonumber)}')
1395
+ echo "$LOOP2_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:loop2_consensus" >/dev/null
1396
+
1397
+ # Display consensus status
1398
+ echo ""
1399
+ if (( $(echo "$LOOP2_CONSENSUS >= $CONSENSUS" | bc -l) )); then
1400
+ echo "✅ CONSENSUS REACHED ($LOOP2_CONSENSUS >= $CONSENSUS)"
1401
+ else
1402
+ echo "⚠️ CONSENSUS NOT REACHED ($LOOP2_CONSENSUS < $CONSENSUS)"
1403
+ fi
1404
+ echo ""
1405
+
1406
+ # [BUG #11 FIX] Product Owner decision via output parsing (not Redis wait)
1407
+ echo "[Product Owner] Spawning Product Owner for strategic decision..."
1408
+
1409
+ # BUG #19 FIX: Define PO_UNIQUE_ID BEFORE building context string
1410
+ PO_UNIQUE_ID="${PRODUCT_OWNER}-${ITERATION}-decision"
1411
+
1412
+ # Build Product Owner context
1413
+ PO_CONTEXT="CFN Loop iteration $ITERATION complete.
1414
+
1415
+ Loop 2 Consensus: $LOOP2_CONSENSUS (threshold: $CONSENSUS)
1416
+ Task ID: $TASK_ID
1417
+ Agent ID: $PO_UNIQUE_ID
1418
+
1419
+ Make your strategic decision: PROCEED, ITERATE, or ABORT
1420
+
1421
+ Decision Framework:
1422
+ - PROCEED: Consensus >= $CONSENSUS AND deliverables verified
1423
+ - ITERATE: Consensus < $CONSENSUS AND iteration < $MAX_ITERATIONS
1424
+ - ABORT: Max iterations reached without consensus
1425
+
1426
+ Output your decision clearly with reasoning."
1427
+
1428
+ # Spawn Product Owner and capture output
1429
+ PO_TIMEOUT=$(get_agent_timeout "$PRODUCT_OWNER" "$TASK_ID")
1430
+ echo "[Product Owner] Spawning with timeout: ${PO_TIMEOUT}s"
1431
+
1432
+ PO_OUTPUT=$(timeout "$PO_TIMEOUT" npx claude-flow-novice agent "$PRODUCT_OWNER" \
1433
+ --task-id "$TASK_ID" \
1434
+ --agent-id "$PO_UNIQUE_ID" \
1435
+ --context "$PO_CONTEXT" 2>&1 || true)
1436
+
1437
+ # Parse decision from output with multiple fallback patterns
1438
+ DECISION_TYPE=$(echo "$PO_OUTPUT" | grep -oiE "Decision:\s*(PROCEED|ITERATE|ABORT)" | \
1439
+ grep -oE "(PROCEED|ITERATE|ABORT)" | head -1)
1440
+
1441
+ if [ -z "$DECISION_TYPE" ]; then
1442
+ # Fallback: Look for standalone keywords
1443
+ DECISION_TYPE=$(echo "$PO_OUTPUT" | grep -oE "(PROCEED|ITERATE|ABORT)" | head -1)
1444
+ fi
1445
+
1446
+ if [ -z "$DECISION_TYPE" ]; then
1447
+ echo "❌ ERROR: Could not parse Product Owner decision from output"
1448
+ echo "Product Owner output:"
1449
+ echo "$PO_OUTPUT"
1450
+ exit 1
1451
+ fi
1452
+
1453
+ # Extract reasoning (text context around decision)
1454
+ REASONING=$(echo "$PO_OUTPUT" | grep -A5 -i "decision" | tail -5 | tr '\n' ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
1455
+
1456
+ # Build decision JSON and push to Redis (orchestrator's responsibility)
1457
+ DECISION=$(jq -n \
1458
+ --arg decision "$DECISION_TYPE" \
1459
+ --arg reasoning "${REASONING:-Parsed from Product Owner output}" \
1460
+ --arg confidence "0.90" \
1461
+ '{decision: $decision, reasoning: $reasoning, confidence: ($confidence | tonumber)}')
1462
+
1463
+ DECISION_KEY="swarm:${TASK_ID}:${PO_UNIQUE_ID}:decision"
1464
+ echo "$DECISION" | redis-cli -x LPUSH "$DECISION_KEY" >/dev/null
1465
+
1466
+ # Signal Product Owner completion
1467
+ redis-cli LPUSH "swarm:${TASK_ID}:${PO_UNIQUE_ID}:done" "complete" >/dev/null
1468
+
1469
+ # LOG: Product Owner decision
1470
+ ./.claude/skills/redis-coordination/log-event.sh \
1471
+ --task-id "$TASK_ID" \
1472
+ --event-type "po_decision" \
1473
+ --agent-id "$PO_UNIQUE_ID" \
1474
+ --iteration "$ITERATION" \
1475
+ --details "$DECISION" \
1476
+ --level "INFO" 2>/dev/null || true
1477
+
1478
+ echo "[Product Owner] Decision: $DECISION_TYPE"
1479
+ echo ""
1480
+
1481
+ # Handle Product Owner decision
1482
+ if [ "$DECISION_TYPE" = "PROCEED" ]; then
1483
+ # DELIVERABLE VERIFICATION (Sprint 8 - prevent "consensus on vapor")
1484
+ echo "[Deliverable Verification] Checking success criteria..."
1485
+
1486
+ SUCCESS_CRITERIA_RAW=$(redis-cli GET "swarm:${TASK_ID}:success-criteria" 2>/dev/null)
1487
+ if [ -n "$SUCCESS_CRITERIA_RAW" ]; then
1488
+ # Check if task description includes file/deliverable keywords
1489
+ TASK_DESC=$(redis-cli GET "swarm:${TASK_ID}:task" 2>/dev/null)
1490
+
1491
+ if echo "$TASK_DESC" | grep -qiE "create|build|implement|generate|file|component|module|test"; then
1492
+ echo "[Deliverable Verification] Task involves implementation - checking for file changes..."
1493
+
1494
+ # Count modified/created files since orchestrator started
1495
+ FILES_CREATED=$(git status --short 2>/dev/null | grep -E "^(A|M|\\?\\?)" | wc -l)
1496
+
1497
+ if [ "$FILES_CREATED" -eq 0 ]; then
1498
+ echo "⚠️ DELIVERABLE VERIFICATION FAILED"
1499
+ echo " Task requires implementation but no files were created/modified"
1500
+ echo " Consensus reached on plans without actual deliverables"
1501
+ echo ""
1502
+ echo " Options:"
1503
+ echo " 1. Force ITERATE to create actual implementation"
1504
+ echo " 2. Override verification (--skip-deliverable-check flag)"
1505
+ echo " 3. Manual intervention to verify work was done"
1506
+ echo ""
1507
+ echo " Recommendation: Force ITERATE with explicit deliverable requirement"
1508
+
1509
+ # Store verification failure
1510
+ redis-cli SET "swarm:${TASK_ID}:deliverable_verification" "failed" EX 86400 >/dev/null
1511
+
1512
+ # Optional: Force ITERATE (commented for now - requires flag)
1513
+ # echo "[Forced Override] Changing PROCEED → ITERATE due to missing deliverables"
1514
+ # DECISION_TYPE="ITERATE"
1515
+ # DECISION_REASONING="No deliverables created despite implementation task"
1516
+ else
1517
+ echo "✅ Deliverable verification passed ($FILES_CREATED files created/modified)"
1518
+ redis-cli SET "swarm:${TASK_ID}:deliverable_verification" "passed:$FILES_CREATED" EX 86400 >/dev/null
1519
+ fi
1520
+ else
1521
+ echo "[Deliverable Verification] Task is analysis/planning - skipping file check"
1522
+ fi
1523
+ fi
1524
+
1525
+ echo "🎉 CFN Loop Complete (Product Owner: PROCEED)"
1526
+ echo "Final Consensus: $LOOP2_CONSENSUS (Iteration $ITERATION)"
1527
+
1528
+ # METRICS: Iteration end timestamp and duration
1529
+ ITERATION_END=$(date +%s%N | cut -b1-13)
1530
+ ITERATION_DURATION=$((ITERATION_END - ITERATION_START))
1531
+
1532
+ # Store final iteration duration metric
1533
+ DURATION_METRIC=$(jq -nc \
1534
+ --arg duration "$ITERATION_DURATION" \
1535
+ --arg iteration "$ITERATION" \
1536
+ '{duration_ms: ($duration | tonumber), iteration: ($iteration | tonumber)}')
1537
+ echo "$DURATION_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:iteration_duration" >/dev/null
1538
+
1539
+ # Wake all agents with completion signal - CRITICAL priority (priority=5)
1540
+ echo "[Coordinator] Waking all agents with CRITICAL priority for completion..."
1541
+ IFS=',' read -ra ALL_AGENTS <<< "$LOOP3_AGENTS,$LOOP2_AGENTS"
1542
+ for AGENT in "${ALL_AGENTS[@]}"; do
1543
+ ./.claude/skills/redis-coordination/invoke-waiting-mode.sh wake \
1544
+ --task-id "$TASK_ID" \
1545
+ --agent-id "$AGENT" \
1546
+ --priority 5 \
1547
+ --reason "cfn_complete" \
1548
+ --iteration "$ITERATION"
1549
+ done
1550
+
1551
+ # Use general complete-swarm primitive
1552
+ ./.claude/skills/redis-coordination/complete-swarm.sh \
1553
+ --swarm-id "$SWARM_ID" \
1554
+ --final-metric "final_consensus=$LOOP2_CONSENSUS" \
1555
+ --final-metric "total_iterations=$ITERATION" > /dev/null
1556
+
1557
+ exit 0
1558
+
1559
+ elif [ "$DECISION_TYPE" = "ITERATE" ]; then
1560
+ echo "⚠️ Product Owner Decision: ITERATE (improve quality)"
1561
+
1562
+ # METRICS: Iteration end timestamp and duration
1563
+ ITERATION_END=$(date +%s%N | cut -b1-13)
1564
+ ITERATION_DURATION=$((ITERATION_END - ITERATION_START))
1565
+
1566
+ # Store iteration duration metric
1567
+ DURATION_METRIC=$(jq -nc \
1568
+ --arg duration "$ITERATION_DURATION" \
1569
+ --arg iteration "$ITERATION" \
1570
+ '{duration_ms: ($duration | tonumber), iteration: ($iteration | tonumber)}')
1571
+ echo "$DURATION_METRIC" | redis-cli -x LPUSH "swarm:${TASK_ID}:metrics:iteration_duration" >/dev/null
1572
+
1573
+ # Check max iterations
1574
+ if [ $ITERATION -eq $MAX_ITERATIONS ]; then
1575
+ echo "❌ Maximum iterations ($MAX_ITERATIONS) reached - cannot iterate further"
1576
+ echo " Product Owner wanted ITERATE but max iterations exhausted"
1577
+ exit 1
1578
+ fi
1579
+
1580
+ # Wake agents for next iteration with role-based priorities
1581
+ echo "[Coordinator] Waking agents for iteration $((ITERATION + 1)) with priorities..."
1582
+
1583
+ # Wake Loop 3 implementers with MEDIUM priority (priority=30)
1584
+ IFS=',' read -ra LOOP3_ARRAY <<< "$LOOP3_AGENTS"
1585
+ for AGENT in "${LOOP3_ARRAY[@]}"; do
1586
+ # SPRINT 4: Get fork ID if exists
1587
+ FORK_ID=$(redis-cli get "swarm:${TASK_ID}:${AGENT}:fork-id" 2>/dev/null || echo "")
1588
+ if [ "$FORK_ID" = "(nil)" ]; then FORK_ID=""; fi
1589
+
1590
+ ./.claude/skills/redis-coordination/invoke-waiting-mode.sh wake \
1591
+ --task-id "$TASK_ID" \
1592
+ --agent-id "$AGENT" \
1593
+ --priority 30 \
1594
+ --reason "cfn_loop_iteration" \
1595
+ --iteration $((ITERATION + 1)) \
1596
+ --fork-id "$FORK_ID" \
1597
+ --feedback "Product Owner decision: ITERATE - Improve consensus from $LOOP2_CONSENSUS to >=$CONSENSUS"
1598
+ done
1599
+
1600
+ # REMOVED: Agents now exit cleanly, no wake needed # Wake Loop 2 validators with HIGH priority (priority=10)
1601
+ # REMOVED: Agents now exit cleanly, no wake needed IFS=',' read -ra LOOP2_ARRAY <<< "$LOOP2_AGENTS"
1602
+ # REMOVED: Agents now exit cleanly, no wake needed for AGENT in "${LOOP2_ARRAY[@]}"; do
1603
+ # REMOVED: Agents now exit cleanly, no wake needed ./.claude/skills/redis-coordination/invoke-waiting-mode.sh wake \
1604
+ # REMOVED: Agents now exit cleanly, no wake needed --task-id "$TASK_ID" \
1605
+ # REMOVED: Agents now exit cleanly, no wake needed --agent-id "$AGENT" \
1606
+ # REMOVED: Agents now exit cleanly, no wake needed --priority 10 \
1607
+ # REMOVED: Agents now exit cleanly, no wake needed --reason "cfn_loop_iteration" \
1608
+ # REMOVED: Agents now exit cleanly, no wake needed --iteration $((ITERATION + 1)) \
1609
+ # REMOVED: Agents now exit cleanly, no wake needed --feedback "Product Owner decision: ITERATE - Improve consensus from $LOOP2_CONSENSUS to >=$CONSENSUS"
1610
+ # REMOVED: Agents now exit cleanly, no wake needed done
1611
+ # REMOVED: Agents now exit cleanly, no wake needed
1612
+ echo ""
1613
+
1614
+ elif [ "$DECISION_TYPE" = "ABORT" ]; then
1615
+ echo "❌ Product Owner Decision: ABORT (scope too large or out of scope)"
1616
+ echo " Consensus: $LOOP2_CONSENSUS, Iteration: $ITERATION"
1617
+ exit 1
1618
+
1619
+ else
1620
+ echo "❌ ERROR: Unknown Product Owner decision: $DECISION_TYPE"
1621
+ echo " Expected: PROCEED, ITERATE, or ABORT"
1622
+ exit 1
1623
+ fi
1624
+ done
1625
+
1626
+ echo "❌ CFN Loop failed after $MAX_ITERATIONS iterations"
1627
+ exit 1