claude-flow-novice 2.14.3 → 2.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/.claude/commands/CFN_LOOP_TASK_MODE.md +4 -47
  2. package/.claude/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  3. package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +4 -47
  4. package/claude-assets/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  5. package/dist/agents/agent-loader.js +165 -146
  6. package/dist/agents/agent-loader.js.map +1 -1
  7. package/dist/cli/agent-prompt-builder.js +25 -0
  8. package/dist/cli/agent-prompt-builder.js.map +1 -1
  9. package/dist/cli/config-manager.js +91 -109
  10. package/package.json +1 -1
  11. package/.claude/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  12. package/.claude/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  13. package/.claude/skills/cfn-redis-coordination/LOGGING.md +0 -260
  14. package/.claude/skills/cfn-redis-coordination/README.md +0 -65
  15. package/.claude/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  16. package/.claude/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  17. package/.claude/skills/cfn-redis-coordination/SKILL.md +0 -720
  18. package/.claude/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  19. package/.claude/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  20. package/.claude/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  21. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  22. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  23. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  24. package/.claude/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  25. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  26. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  27. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  28. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  29. package/.claude/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  30. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  31. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  32. package/.claude/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  33. package/.claude/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  34. package/.claude/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  35. package/.claude/skills/cfn-redis-coordination/examples/README.md +0 -73
  36. package/.claude/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  37. package/.claude/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  38. package/.claude/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  39. package/.claude/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  40. package/.claude/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  41. package/.claude/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  42. package/.claude/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  43. package/.claude/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  44. package/.claude/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  45. package/.claude/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  46. package/.claude/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  47. package/.claude/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  48. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  49. package/.claude/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  50. package/.claude/skills/cfn-redis-coordination/log-event.sh +0 -109
  51. package/.claude/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  52. package/.claude/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  53. package/.claude/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  54. package/.claude/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  55. package/.claude/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  56. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  57. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  58. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  59. package/.claude/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  60. package/.claude/skills/cfn-redis-coordination/priority_wake.py +0 -134
  61. package/.claude/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  62. package/.claude/skills/cfn-redis-coordination/query-logs.sh +0 -103
  63. package/.claude/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  64. package/.claude/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  65. package/.claude/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  66. package/.claude/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  67. package/.claude/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  68. package/.claude/skills/cfn-redis-coordination/signal.sh +0 -38
  69. package/.claude/skills/cfn-redis-coordination/store-context.sh +0 -86
  70. package/.claude/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  71. package/.claude/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  72. package/.claude/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  73. package/.claude/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  74. package/.claude/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  75. package/.claude/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  76. package/.claude/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  77. package/.claude/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  78. package/.claude/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  79. package/.claude/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  80. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  81. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  82. package/.claude/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  83. package/.claude/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  84. package/.claude/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  85. package/.claude/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  86. package/.claude/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
  87. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  88. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  89. package/claude-assets/skills/cfn-redis-coordination/LOGGING.md +0 -260
  90. package/claude-assets/skills/cfn-redis-coordination/README.md +0 -65
  91. package/claude-assets/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  92. package/claude-assets/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  93. package/claude-assets/skills/cfn-redis-coordination/SKILL.md +0 -720
  94. package/claude-assets/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  95. package/claude-assets/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  96. package/claude-assets/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  97. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  98. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  99. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  100. package/claude-assets/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  101. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  102. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  103. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  104. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  105. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  106. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  107. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  108. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  109. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  110. package/claude-assets/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  111. package/claude-assets/skills/cfn-redis-coordination/examples/README.md +0 -73
  112. package/claude-assets/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  113. package/claude-assets/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  114. package/claude-assets/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  115. package/claude-assets/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  116. package/claude-assets/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  117. package/claude-assets/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  118. package/claude-assets/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  119. package/claude-assets/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  120. package/claude-assets/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  121. package/claude-assets/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  122. package/claude-assets/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  123. package/claude-assets/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  124. package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  125. package/claude-assets/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  126. package/claude-assets/skills/cfn-redis-coordination/log-event.sh +0 -109
  127. package/claude-assets/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  128. package/claude-assets/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  129. package/claude-assets/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  130. package/claude-assets/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  131. package/claude-assets/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  132. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  133. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  134. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  135. package/claude-assets/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  136. package/claude-assets/skills/cfn-redis-coordination/priority_wake.py +0 -134
  137. package/claude-assets/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  138. package/claude-assets/skills/cfn-redis-coordination/query-logs.sh +0 -103
  139. package/claude-assets/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  140. package/claude-assets/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  141. package/claude-assets/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  142. package/claude-assets/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  143. package/claude-assets/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  144. package/claude-assets/skills/cfn-redis-coordination/signal.sh +0 -38
  145. package/claude-assets/skills/cfn-redis-coordination/store-context.sh +0 -86
  146. package/claude-assets/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  147. package/claude-assets/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  148. package/claude-assets/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  149. package/claude-assets/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  150. package/claude-assets/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  151. package/claude-assets/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  152. package/claude-assets/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  153. package/claude-assets/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  154. package/claude-assets/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  155. package/claude-assets/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  156. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  157. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  158. package/claude-assets/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  159. package/claude-assets/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  160. package/claude-assets/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  161. package/claude-assets/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  162. package/claude-assets/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
@@ -1,267 +0,0 @@
1
- # Heartbeat Monitoring for CFN Loop Orchestration
2
-
3
- ## Overview
4
-
5
- The orchestrator includes built-in heartbeat monitoring to detect hung or unresponsive agents during BLPOP waiting periods. This feature provides early warning of agent failures and enables quorum-aware decision making.
6
-
7
- ## Features
8
-
9
- - **Periodic Health Checks**: Monitors agent heartbeats every 30 seconds
10
- - **Missed Beat Tracking**: Tracks consecutive missed heartbeats per agent
11
- - **Quorum-Aware Decisions**: Determines if the loop can continue without hung agents
12
- - **Automatic Recovery**: Resets counters when agents recover
13
- - **Graceful Shutdown**: Monitors stop cleanly when orchestrator shuts down
14
-
15
- ## Architecture
16
-
17
- ### Components
18
-
19
- 1. **Heartbeat Check Function** (`check_agent_heartbeat`)
20
- - Checks Redis key: `swarm:{task_id}:{agent_id}:heartbeat`
21
- - Returns 0 if heartbeat exists, 1 if missing
22
-
23
- 2. **Loop Health Check** (`check_heartbeats_loop`)
24
- - Checks all agents in a loop
25
- - Increments missed heartbeat counter
26
- - Warns after 2 consecutive misses (60 seconds)
27
- - Evaluates quorum impact
28
-
29
- 3. **Background Monitor** (`start_heartbeat_monitor`)
30
- - Runs in background subprocess
31
- - Checks every 30 seconds
32
- - Stops via marker file removal
33
- - Respects SHUTDOWN_REQUESTED flag
34
-
35
- 4. **Monitor Cleanup** (`stop_heartbeat_monitor`)
36
- - Removes marker file
37
- - Terminates background process
38
- - Called during shutdown
39
-
40
- ### Heartbeat Data Format
41
-
42
- ```json
43
- {
44
- "timestamp": 1760898665,
45
- "status": "working",
46
- "iteration": 1,
47
- "task": "implementing feature X"
48
- }
49
- ```
50
-
51
- ## Usage
52
-
53
- ### Agent Side (Publishing Heartbeats)
54
-
55
- Agents should publish heartbeats every 20-30 seconds:
56
-
57
- ```bash
58
- # Set heartbeat with 60s TTL
59
- HEARTBEAT=$(jq -n \
60
- --arg ts "$(date +%s)" \
61
- --arg status "working" \
62
- --arg iteration "1" \
63
- '{timestamp: ($ts | tonumber), status: $status, iteration: ($iteration | tonumber)}')
64
-
65
- redis-cli SET "swarm:${TASK_ID}:${AGENT_ID}:heartbeat" "$HEARTBEAT" EX 60
66
- ```
67
-
68
- ### Orchestrator Side (Monitoring)
69
-
70
- The orchestrator automatically starts/stops monitors during each loop:
71
-
72
- ```bash
73
- # Loop 3 monitoring
74
- LOOP3_HEARTBEAT_MONITOR_PID=$(start_heartbeat_monitor "$TASK_ID" "loop3" "${LOOP3_AGENTS[@]}")
75
-
76
- # ... wait for agents ...
77
-
78
- stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
79
- ```
80
-
81
- ## Monitoring Output
82
-
83
- ### Normal Operation
84
-
85
- ```
86
- [Loop 3] Starting heartbeat monitor (checking every 30s)...
87
- ```
88
-
89
- ### Agent Appears Hung
90
-
91
- ```
92
- [2025-10-19T18:30:00Z] [loop3] ⚠️ agent-1 appears hung (no heartbeat for 60s)
93
- [2025-10-19T18:30:00Z] [loop3] ℹ️ Continuing with quorum (2/2 agents)
94
- ```
95
-
96
- ### Quorum at Risk
97
-
98
- ```
99
- [2025-10-19T18:30:00Z] [loop3] ⚠️ agent-2 appears hung (no heartbeat for 60s)
100
- [2025-10-19T18:30:00Z] [loop3] ⚠️ Cannot meet quorum without agent-2 (1/2)
101
- ```
102
-
103
- ## Configuration
104
-
105
- ### Monitoring Interval
106
-
107
- Default: 30 seconds
108
-
109
- To change, edit the `sleep` duration in `start_heartbeat_monitor`:
110
-
111
- ```bash
112
- sleep 30 # Check every 30s
113
- ```
114
-
115
- ### Missed Heartbeat Threshold
116
-
117
- Default: 2 consecutive misses (60 seconds)
118
-
119
- To change, edit the threshold in `check_heartbeats_loop`:
120
-
121
- ```bash
122
- if [ ${MISSED_HEARTBEATS["$AGENT"]} -ge 2 ]; then
123
- ```
124
-
125
- ### Heartbeat TTL
126
-
127
- Default: 60 seconds
128
-
129
- Agents should set TTL when publishing:
130
-
131
- ```bash
132
- redis-cli SET "swarm:${TASK_ID}:${AGENT_ID}:heartbeat" "$DATA" EX 60
133
- ```
134
-
135
- ## Integration Points
136
-
137
- ### Cleanup Handler
138
-
139
- Monitors are automatically stopped during shutdown:
140
-
141
- ```bash
142
- function cleanup_and_exit() {
143
- # Stop heartbeat monitors if running
144
- if [ -n "${LOOP3_HEARTBEAT_MONITOR_PID:-}" ]; then
145
- stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
146
- fi
147
- if [ -n "${LOOP2_HEARTBEAT_MONITOR_PID:-}" ]; then
148
- stop_heartbeat_monitor "$TASK_ID" "loop2" "$LOOP2_HEARTBEAT_MONITOR_PID"
149
- fi
150
- }
151
- ```
152
-
153
- ### Global Variables
154
-
155
- ```bash
156
- LOOP3_HEARTBEAT_MONITOR_PID=""
157
- LOOP2_HEARTBEAT_MONITOR_PID=""
158
- declare -A MISSED_HEARTBEATS
159
- ```
160
-
161
- ## Testing
162
-
163
- ### Unit Tests
164
-
165
- ```bash
166
- ./.claude/skills/redis-coordination/tests/test-heartbeat-simple.sh
167
- ```
168
-
169
- Tests:
170
- 1. Active heartbeat detection
171
- 2. Missing heartbeat detection
172
- 3. Missed heartbeat counter increment
173
- 4. Counter reset on recovery
174
-
175
- ### Manual Testing
176
-
177
- ```bash
178
- # Set up test heartbeat
179
- redis-cli SET "swarm:test-task:test-agent:heartbeat" '{"timestamp": 1234567890}' EX 60
180
-
181
- # Source functions
182
- source ./.claude/skills/redis-coordination/heartbeat-functions.sh
183
-
184
- # Check heartbeat
185
- check_agent_heartbeat "test-agent" "test-task"
186
- echo $? # Should be 0 (success)
187
- ```
188
-
189
- ## Best Practices
190
-
191
- ### For Agents
192
-
193
- 1. **Publish regularly**: Every 20-30 seconds
194
- 2. **Use appropriate TTL**: 60 seconds (2x publish interval)
195
- 3. **Include metadata**: Status, iteration, current task
196
- 4. **Handle errors**: Continue if Redis unavailable
197
-
198
- ### For Orchestrators
199
-
200
- 1. **Start monitors early**: Before waiting for agents
201
- 2. **Stop monitors promptly**: After agents complete
202
- 3. **Check quorum impact**: Before making decisions
203
- 4. **Log appropriately**: Warn for hung agents, not for normal timeouts
204
-
205
- ## Troubleshooting
206
-
207
- ### Monitor Not Detecting Hung Agents
208
-
209
- **Symptoms**: Agents appear hung but no warnings
210
-
211
- **Causes**:
212
- 1. Monitor not started
213
- 2. Heartbeat check interval too long
214
- 3. Missed heartbeat threshold too high
215
-
216
- **Solutions**:
217
- - Verify monitor PID is set
218
- - Check monitor marker file exists
219
- - Review threshold values
220
-
221
- ### False Positives
222
-
223
- **Symptoms**: Warnings for healthy agents
224
-
225
- **Causes**:
226
- 1. Heartbeat publish interval too long
227
- 2. Heartbeat TTL too short
228
- 3. Network latency issues
229
-
230
- **Solutions**:
231
- - Reduce heartbeat interval (e.g., 15s)
232
- - Increase TTL (e.g., 90s)
233
- - Increase missed heartbeat threshold
234
-
235
- ### Monitor Not Stopping
236
-
237
- **Symptoms**: Background processes remain after completion
238
-
239
- **Causes**:
240
- 1. Marker file not removed
241
- 2. Process not killed
242
- 3. Cleanup not called
243
-
244
- **Solutions**:
245
- - Check for marker files: `ls /tmp/heartbeat-monitor-*.active`
246
- - Kill processes: `pkill -f heartbeat-monitor`
247
- - Verify cleanup handler is registered
248
-
249
- ## Performance Impact
250
-
251
- - **CPU**: Negligible (~0.01% per monitor)
252
- - **Network**: ~1 Redis GET per agent per 30s
253
- - **Memory**: ~1KB per agent for tracking state
254
-
255
- ## Future Enhancements
256
-
257
- 1. **Adaptive Intervals**: Reduce check frequency for stable agents
258
- 2. **Health Scores**: Track reliability over time
259
- 3. **Auto-Retry**: Wake hung agents with lower priority
260
- 4. **Metrics Export**: Publish heartbeat stats to monitoring system
261
- 5. **Dead Letter Queue Integration**: Automatic DLQ writes for consistently hung agents
262
-
263
- ## Related Documentation
264
-
265
- - [Redis Coordination Skill](./SKILL.md)
266
- - [CFN Loop Orchestration](./orchestrate-cfn-loop.sh)
267
- - [Waiting Mode Documentation](../../CLAUDE.md#redis-waiting-mode-zero-token-agent-coordination)
@@ -1,260 +0,0 @@
1
- # CFN Loop Logging System
2
-
3
- ## Overview
4
-
5
- The CFN Loop logging system provides comprehensive visibility into agent execution, decisions, and errors. All logs are stored in SQLite for efficient querying and analysis by AI agents.
6
-
7
- ## Database Location
8
-
9
- ```bash
10
- Default: claude-flow-novice/data/cfn-loop.db
11
- Custom: Set DB_PATH environment variable
12
- ```
13
-
14
- ## Schema
15
-
16
- ```sql
17
- CREATE TABLE cfn_loop_logs (
18
- id INTEGER PRIMARY KEY AUTOINCREMENT,
19
- task_id TEXT NOT NULL, -- Task/swarm identifier
20
- timestamp TEXT DEFAULT (datetime('now')), -- ISO 8601 timestamp
21
- event_type TEXT NOT NULL, -- Event category
22
- loop TEXT, -- loop3, loop2, product_owner, coordinator
23
- agent_id TEXT, -- Agent identifier (e.g., coder-1-1)
24
- iteration INTEGER, -- CFN loop iteration number
25
- details TEXT, -- JSON payload with event-specific data
26
- level TEXT DEFAULT 'INFO' -- DEBUG, INFO, WARN, ERROR
27
- );
28
-
29
- -- Indexes for fast queries
30
- CREATE INDEX idx_task_id ON cfn_loop_logs(task_id);
31
- CREATE INDEX idx_event_type ON cfn_loop_logs(event_type);
32
- CREATE INDEX idx_timestamp ON cfn_loop_logs(timestamp);
33
- CREATE INDEX idx_level ON cfn_loop_logs(level);
34
- ```
35
-
36
- ## Event Types
37
-
38
- | Event Type | Description | Level | Details Payload |
39
- |-----------|-------------|-------|-----------------|
40
- | `swarm_init` | CFN loop initialization | INFO | `{mode, loop3_agents, loop2_agents, product_owner, max_iterations, gate_threshold, consensus_threshold}` |
41
- | `agent_spawn` | Agent process started | INFO | `{agent_type, timeout}` |
42
- | `agent_complete` | Agent successfully completed | INFO | `{confidence, confidence_source, files_changed, latency_ms}` |
43
- | `agent_failure` | Agent execution failed | ERROR | `{error, output}` |
44
- | `gate_check` | Loop 3 gate validation | INFO/WARN | `{consensus, threshold, result: PASS\|FAIL, decision?}` |
45
- | `po_decision` | Product Owner strategic decision | INFO | `{decision: PROCEED\|ITERATE\|ABORT, reasoning, confidence}` |
46
- | `parameter_error` | Invalid parameters passed to tool | ERROR | `{error, command}` |
47
-
48
- ## Usage
49
-
50
- ### Logging Events (Orchestrator/Scripts)
51
-
52
- ```bash
53
- # Log swarm initialization
54
- ./.claude/skills/redis-coordination/log-event.sh \
55
- --task-id "cfn-task-123" \
56
- --event-type "swarm_init" \
57
- --details '{"mode": "standard", "loop3_agents": "coder", "loop2_agents": "reviewer"}' \
58
- --level "INFO"
59
-
60
- # Log agent spawn
61
- ./.claude/skills/redis-coordination/log-event.sh \
62
- --task-id "cfn-task-123" \
63
- --event-type "agent_spawn" \
64
- --loop "loop3" \
65
- --agent-id "coder-1-1" \
66
- --iteration 1 \
67
- --details '{"agent_type": "coder", "timeout": 900}' \
68
- --level "INFO"
69
-
70
- # Log error
71
- ./.claude/skills/redis-coordination/log-event.sh \
72
- --task-id "cfn-task-123" \
73
- --event-type "agent_failure" \
74
- --loop "loop3" \
75
- --agent-id "coder-1-1" \
76
- --iteration 1 \
77
- --details '{"error": "timeout", "output": "Agent exceeded 900s timeout"}' \
78
- --level "ERROR"
79
- ```
80
-
81
- ### Querying Logs (AI Agents/Debugging)
82
-
83
- ```bash
84
- # Get all logs for a task
85
- ./query-logs.sh --task-id "cfn-task-123"
86
-
87
- # Get only errors
88
- ./query-logs.sh --task-id "cfn-task-123" --level ERROR
89
-
90
- # Get Loop 3 agent spawns
91
- ./query-logs.sh --task-id "cfn-task-123" --event-type agent_spawn --loop loop3
92
-
93
- # Get Product Owner decisions
94
- ./query-logs.sh --task-id "cfn-task-123" --event-type po_decision
95
-
96
- # Get latest 10 events in table format
97
- ./query-logs.sh --task-id "cfn-task-123" --limit 10 --format table
98
-
99
- # Get events for specific iteration
100
- ./query-logs.sh --task-id "cfn-task-123" --iteration 2
101
-
102
- # Get events for specific agent
103
- ./query-logs.sh --task-id "cfn-task-123" --agent-id "coder-1-1"
104
- ```
105
-
106
- ### Output Formats
107
-
108
- **JSON (default):**
109
- ```json
110
- [
111
- {
112
- "id": 1,
113
- "task_id": "cfn-task-123",
114
- "timestamp": "2025-10-21T10:30:00Z",
115
- "event_type": "agent_spawn",
116
- "loop": "loop3",
117
- "agent_id": "coder-1-1",
118
- "iteration": 1,
119
- "details": "{\"agent_type\": \"coder\", \"timeout\": 900}",
120
- "level": "INFO"
121
- }
122
- ]
123
- ```
124
-
125
- **Table:**
126
- ```
127
- id task_id timestamp event_type loop agent_id iteration level
128
- 1 cfn-task-123 2025-10-21T10:30:00Z agent_spawn loop3 coder-1-1 1 INFO
129
- 2 cfn-task-123 2025-10-21T10:45:00Z agent_complete loop3 coder-1-1 1 INFO
130
- ```
131
-
132
- **CSV:**
133
- ```csv
134
- 1,cfn-task-123,2025-10-21T10:30:00Z,agent_spawn,loop3,coder-1-1,1,"{""agent_type"": ""coder"", ""timeout"": 900}",INFO
135
- ```
136
-
137
- ## AI Agent Consumption
138
-
139
- AI agents can query logs to improve workflows:
140
-
141
- ```bash
142
- # Example: Analyze agent failures
143
- ERRORS=$(./query-logs.sh --task-id "cfn-task-123" --level ERROR --format json)
144
-
145
- # Parse JSON with jq
146
- echo "$ERRORS" | jq -r '.[] | "\(.timestamp) [\(.agent_id)] \(.event_type): \(.details)"'
147
-
148
- # Example output:
149
- # 2025-10-21T10:45:00Z [coder-1-1] agent_failure: {"error": "skill_execution_error", "output": "Unknown parameter --invalid-param"}
150
- ```
151
-
152
- ### Common Queries for AI Analysis
153
-
154
- ```bash
155
- # Find agents with highest failure rate
156
- ./query-logs.sh --task-id "$TASK_ID" --event-type agent_failure --format json | \
157
- jq -r '.[].agent_id' | sort | uniq -c | sort -nr
158
-
159
- # Calculate average agent latency per loop
160
- ./query-logs.sh --task-id "$TASK_ID" --event-type agent_complete --format json | \
161
- jq -r '.[] | "\(.loop) \(.details | fromjson | .latency_ms)"' | \
162
- awk '{sum[$1]+=$2; count[$1]++} END {for (loop in sum) print loop, sum[loop]/count[loop]}'
163
-
164
- # Find parameter errors (for troubleshooting implementations)
165
- ./query-logs.sh --task-id "$TASK_ID" --event-type parameter_error --format json
166
-
167
- # Track decision history
168
- ./query-logs.sh --task-id "$TASK_ID" --event-type po_decision --format json | \
169
- jq -r '.[] | "\(.iteration): \(.details | fromjson | .decision) - \(.details | fromjson | .reasoning)"'
170
- ```
171
-
172
- ## Logged Events in Orchestrator
173
-
174
- The orchestrator automatically logs:
175
-
176
- 1. **Line ~643:** Swarm initialization with all configuration
177
- 2. **Line ~811:** Each Loop 3 agent spawn with timeout
178
- 3. **Line ~892:** Each Loop 3 agent completion with confidence and files changed
179
- 4. **Line ~917:** Each Loop 3 agent failure with error details
180
- 5. **Line ~1082:** Gate check failures with consensus scores
181
- 6. **Line ~1115:** Gate check successes
182
- 7. **Line ~1440:** Product Owner decisions with reasoning
183
-
184
- All logs include `2>/dev/null || true` to ensure logging failures don't break orchestration.
185
-
186
- ## Performance
187
-
188
- - **Write latency:** ~5-10ms per log entry
189
- - **Query latency:** ~10-50ms for typical queries (< 1000 events)
190
- - **Storage:** ~500 bytes per event (compressed SQLite)
191
- - **Indexes:** Optimized for task_id, event_type, timestamp, level queries
192
-
193
- ## Debugging
194
-
195
- ### Check if logging is working
196
-
197
- ```bash
198
- # Check database exists
199
- ls -lh data/cfn-loop.db
200
-
201
- # Count total log entries
202
- sqlite3 data/cfn-loop.db "SELECT COUNT(*) FROM cfn_loop_logs;"
203
-
204
- # Get latest 5 events
205
- ./query-logs.sh --task-id "YOUR_TASK_ID" --limit 5 --format table
206
- ```
207
-
208
- ### Common issues
209
-
210
- **Issue:** "Error: Database not found"
211
- - **Cause:** No logs written yet
212
- - **Fix:** Run a CFN loop task to generate logs
213
-
214
- **Issue:** "Error: --details must be valid JSON"
215
- - **Cause:** Malformed JSON in details parameter
216
- - **Fix:** Validate JSON with `echo "$DETAILS" | jq empty`
217
-
218
- **Issue:** Logging fails silently
219
- - **Cause:** `2>/dev/null || true` suppresses errors
220
- - **Fix:** Remove `2>/dev/null` temporarily to see error messages
221
-
222
- ## Web Portal Integration
223
-
224
- The web portal can query logs for real-time visibility:
225
-
226
- ```typescript
227
- // Example: Fetch logs for task
228
- const logs = await fetch('/api/logs?task_id=cfn-task-123&event_type=agent_spawn');
229
- const events = await logs.json();
230
-
231
- // Display in timeline
232
- events.forEach(event => {
233
- console.log(`${event.timestamp} [${event.loop}] ${event.agent_id}: ${event.event_type}`);
234
- });
235
- ```
236
-
237
- ## Retention
238
-
239
- - **Default:** Logs persist indefinitely in SQLite
240
- - **Recommended:** Implement cleanup job to delete logs older than 30 days for non-critical tasks
241
- - **Critical tasks:** Retain logs for audit trail
242
-
243
- ```bash
244
- # Example: Delete logs older than 30 days
245
- sqlite3 data/cfn-loop.db "DELETE FROM cfn_loop_logs WHERE timestamp < datetime('now', '-30 days');"
246
- ```
247
-
248
- ## Privacy & Security
249
-
250
- - **Sensitive data:** Avoid logging secrets, API keys, or PII in details field
251
- - **Access control:** Database file permissions (chmod 600) restrict access
252
- - **Audit trail:** Logs include full decision reasoning for compliance
253
-
254
- ## Future Enhancements
255
-
256
- - [ ] Structured logging levels (DEBUG for verbose agent output)
257
- - [ ] Log streaming to external systems (Elasticsearch, CloudWatch)
258
- - [ ] Automatic anomaly detection (high failure rates, long latencies)
259
- - [ ] Log rotation and archival
260
- - [ ] Web UI for log browsing and search
@@ -1,65 +0,0 @@
1
- # Redis Coordination Skill
2
-
3
- ## Quick Start
4
-
5
- ### Prerequisites
6
- - Redis 5.0+
7
- - bash
8
- - jq
9
- - redis-cli
10
-
11
- ### Installation
12
- 1. Ensure Redis is running
13
- 2. Configure Redis connection in `config.json`
14
- 3. Make scripts executable:
15
- ```bash
16
- chmod +x invoke-waiting-mode.sh
17
- ```
18
-
19
- ### Basic Usage
20
-
21
- #### Consensus Collection (Updated)
22
- ```bash
23
- # Agent reports results
24
- ./invoke-waiting-mode.sh report \
25
- --task-id "my-task" \
26
- --agent-id "agent-1" \
27
- --confidence 0.95
28
-
29
- # Collect and evaluate consensus
30
- ./invoke-waiting-mode.sh collect \
31
- --task-id "my-task" \
32
- --agent-ids "agent-1,agent-2,agent-3"
33
- ```
34
-
35
- ## Important Changes in P7 (Redis Script Cleanup)
36
-
37
- ### Deprecation Notices
38
- - 🚨 `enter` and `wake` subcommands are NO LONGER SUPPORTED
39
- - Agents should exit cleanly without waiting mode
40
- - Coordinator spawns agents directly
41
- - Fork-ID references have been removed
42
-
43
- ### Migration Guide
44
- - Update agent scripts to exit cleanly after task
45
- - Remove manual waiting mode calls
46
- - Use direct agent spawning in orchestrator
47
-
48
- ## Script Categories
49
- - **Production Scripts**:
50
- - `invoke-waiting-mode.sh`: Redis coordination wrapper
51
- - `orchestrate-cfn-loop.sh`: CFN Loop orchestration
52
- - **Demos and Tests**: Located in `./demos/`
53
-
54
- ## Performance
55
- - Zero-token waiting
56
- - Sub-100ms wake-up latency
57
- - Supports 10+ concurrent agents
58
- - Configurable consensus thresholds
59
-
60
- ## Configuration Options
61
- See `SKILL.md` for detailed configuration and usage instructions.
62
-
63
- ## Troubleshooting
64
- - If you encounter issues with old scripts, refer to migration guide
65
- - Test scripts are available in `./demos/` directory
@@ -1,25 +0,0 @@
1
- # Security Review: Metrics Export System (Phase 7)
2
-
3
- ## Overview
4
- Security review conducted by security-specialist-3 for Redis Coordination metrics export functionality.
5
-
6
- ## Confidence Score: 0.92 (High)
7
-
8
- ### Key Findings
9
- - ✅ Robust input validation
10
- - ✅ Secure file handling
11
- - ✅ Minimal data exposure
12
- - ⚠️ Recommended ACL improvements
13
-
14
- ### Recommendations
15
- 1. Implement optional PII sanitization
16
- 2. Enhance Redis key access controls
17
- 3. Create metrics export audit logging
18
- 4. Add optional export encryption
19
-
20
- ### Compliance
21
- - NIST SP 800-53 Alignment: Moderate Impact
22
- - SOC 2 Type II Ready
23
- - GDPR Data Minimization Compliant
24
-
25
- Full detailed report available in source code comments.