claude-flow-novice 2.14.2 → 2.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/.claude/commands/CFN_LOOP_TASK_MODE.md +4 -47
  2. package/.claude/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  3. package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +4 -47
  4. package/claude-assets/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  5. package/dist/cli/agent-prompt-builder.js +25 -0
  6. package/dist/cli/agent-prompt-builder.js.map +1 -1
  7. package/dist/cli/config-manager.js +91 -109
  8. package/package.json +1 -1
  9. package/scripts/init-project.js +1 -1
  10. package/.claude/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  11. package/.claude/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  12. package/.claude/skills/cfn-redis-coordination/LOGGING.md +0 -260
  13. package/.claude/skills/cfn-redis-coordination/README.md +0 -65
  14. package/.claude/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  15. package/.claude/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  16. package/.claude/skills/cfn-redis-coordination/SKILL.md +0 -720
  17. package/.claude/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  18. package/.claude/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  19. package/.claude/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  20. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  21. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  22. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  23. package/.claude/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  24. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  25. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  26. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  27. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  28. package/.claude/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  29. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  30. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  31. package/.claude/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  32. package/.claude/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  33. package/.claude/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  34. package/.claude/skills/cfn-redis-coordination/examples/README.md +0 -73
  35. package/.claude/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  36. package/.claude/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  37. package/.claude/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  38. package/.claude/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  39. package/.claude/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  40. package/.claude/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  41. package/.claude/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  42. package/.claude/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  43. package/.claude/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  44. package/.claude/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  45. package/.claude/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  46. package/.claude/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  47. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  48. package/.claude/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  49. package/.claude/skills/cfn-redis-coordination/log-event.sh +0 -109
  50. package/.claude/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  51. package/.claude/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  52. package/.claude/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  53. package/.claude/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  54. package/.claude/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  55. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  56. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  57. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  58. package/.claude/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  59. package/.claude/skills/cfn-redis-coordination/priority_wake.py +0 -134
  60. package/.claude/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  61. package/.claude/skills/cfn-redis-coordination/query-logs.sh +0 -103
  62. package/.claude/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  63. package/.claude/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  64. package/.claude/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  65. package/.claude/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  66. package/.claude/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  67. package/.claude/skills/cfn-redis-coordination/signal.sh +0 -38
  68. package/.claude/skills/cfn-redis-coordination/store-context.sh +0 -86
  69. package/.claude/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  70. package/.claude/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  71. package/.claude/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  72. package/.claude/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  73. package/.claude/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  74. package/.claude/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  75. package/.claude/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  76. package/.claude/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  77. package/.claude/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  78. package/.claude/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  79. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  80. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  81. package/.claude/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  82. package/.claude/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  83. package/.claude/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  84. package/.claude/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  85. package/.claude/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
  86. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  87. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  88. package/claude-assets/skills/cfn-redis-coordination/LOGGING.md +0 -260
  89. package/claude-assets/skills/cfn-redis-coordination/README.md +0 -65
  90. package/claude-assets/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  91. package/claude-assets/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  92. package/claude-assets/skills/cfn-redis-coordination/SKILL.md +0 -720
  93. package/claude-assets/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  94. package/claude-assets/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  95. package/claude-assets/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  96. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  97. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  98. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  99. package/claude-assets/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  100. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  101. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  102. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  103. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  104. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  105. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  106. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  107. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  108. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  109. package/claude-assets/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  110. package/claude-assets/skills/cfn-redis-coordination/examples/README.md +0 -73
  111. package/claude-assets/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  112. package/claude-assets/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  113. package/claude-assets/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  114. package/claude-assets/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  115. package/claude-assets/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  116. package/claude-assets/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  117. package/claude-assets/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  118. package/claude-assets/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  119. package/claude-assets/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  120. package/claude-assets/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  121. package/claude-assets/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  122. package/claude-assets/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  123. package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  124. package/claude-assets/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  125. package/claude-assets/skills/cfn-redis-coordination/log-event.sh +0 -109
  126. package/claude-assets/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  127. package/claude-assets/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  128. package/claude-assets/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  129. package/claude-assets/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  130. package/claude-assets/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  131. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  132. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  133. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  134. package/claude-assets/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  135. package/claude-assets/skills/cfn-redis-coordination/priority_wake.py +0 -134
  136. package/claude-assets/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  137. package/claude-assets/skills/cfn-redis-coordination/query-logs.sh +0 -103
  138. package/claude-assets/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  139. package/claude-assets/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  140. package/claude-assets/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  141. package/claude-assets/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  142. package/claude-assets/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  143. package/claude-assets/skills/cfn-redis-coordination/signal.sh +0 -38
  144. package/claude-assets/skills/cfn-redis-coordination/store-context.sh +0 -86
  145. package/claude-assets/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  146. package/claude-assets/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  147. package/claude-assets/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  148. package/claude-assets/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  149. package/claude-assets/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  150. package/claude-assets/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  151. package/claude-assets/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  152. package/claude-assets/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  153. package/claude-assets/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  154. package/claude-assets/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  155. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  156. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  157. package/claude-assets/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  158. package/claude-assets/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  159. package/claude-assets/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  160. package/claude-assets/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  161. package/claude-assets/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
@@ -1,57 +0,0 @@
1
- # Heartbeat Monitoring Script
2
-
3
- ## Overview
4
- This script provides a robust mechanism for tracking agent health and detecting unresponsive agents in a distributed system using Redis.
5
-
6
- ## Key Features
7
- - Real-time agent status tracking
8
- - Configurable TTL and missed heartbeat thresholds
9
- - Automatic quorum fallback mechanism
10
- - Detailed logging for debugging
11
-
12
- ## Usage
13
-
14
- ### Sending a Heartbeat
15
- ```bash
16
- ./heartbeat.sh send --task-id TASK_ID --agent-id AGENT_ID
17
- ```
18
-
19
- ### Checking Heartbeat Status
20
- ```bash
21
- ./heartbeat.sh check --task-id TASK_ID --agent-id AGENT_ID
22
- ```
23
-
24
- ## Configuration Parameters
25
- - `HEARTBEAT_TTL`: Heartbeat expiration time (default: 60 seconds)
26
- - `CHECK_INTERVAL`: Recommended check frequency (default: 30 seconds)
27
- - `MISSED_THRESHOLD`: Number of missed heartbeats before triggering fallback (default: 2)
28
-
29
- ## Redis Key Structure
30
- - Heartbeat Key: `swarm:agent_status:{task_id}:{agent_id}`
31
- - Missed Heartbeat Counter: `swarm:missed_heartbeats:{task_id}:{agent_id}`
32
- - Quorum Fallback List: `swarm:{task_id}:quorum_fallback`
33
-
34
- ## Fault Tolerance
35
- - Automatically detects agent disconnection
36
- - Supports graceful degradation via quorum fallback
37
- - Minimal overhead with Redis-based tracking
38
-
39
- ## Test Coverage
40
- - 100% Coverage of Core Scenarios
41
- - Basic Heartbeat Sending
42
- - TTL Expiration
43
- - Missed Heartbeat Detection
44
- - Quorum Fallback Mechanism
45
-
46
- ## Performance Characteristics
47
- - Low-latency Redis operations
48
- - Constant-time heartbeat tracking
49
- - Negligible system resource consumption
50
-
51
- ## Limitations
52
- - Requires Redis 3.2+ for SETEX functionality
53
- - Network reliability impacts heartbeat accuracy
54
-
55
- ## Security Considerations
56
- - Use in trusted network environments
57
- - Implement additional authentication for production use
@@ -1,267 +0,0 @@
1
- # Heartbeat Monitoring for CFN Loop Orchestration
2
-
3
- ## Overview
4
-
5
- The orchestrator includes built-in heartbeat monitoring to detect hung or unresponsive agents during BLPOP waiting periods. This feature provides early warning of agent failures and enables quorum-aware decision making.
6
-
7
- ## Features
8
-
9
- - **Periodic Health Checks**: Monitors agent heartbeats every 30 seconds
10
- - **Missed Beat Tracking**: Tracks consecutive missed heartbeats per agent
11
- - **Quorum-Aware Decisions**: Determines if the loop can continue without hung agents
12
- - **Automatic Recovery**: Resets counters when agents recover
13
- - **Graceful Shutdown**: Monitors stop cleanly when orchestrator shuts down
14
-
15
- ## Architecture
16
-
17
- ### Components
18
-
19
- 1. **Heartbeat Check Function** (`check_agent_heartbeat`)
20
- - Checks Redis key: `swarm:{task_id}:{agent_id}:heartbeat`
21
- - Returns 0 if heartbeat exists, 1 if missing
22
-
23
- 2. **Loop Health Check** (`check_heartbeats_loop`)
24
- - Checks all agents in a loop
25
- - Increments missed heartbeat counter
26
- - Warns after 2 consecutive misses (60 seconds)
27
- - Evaluates quorum impact
28
-
29
- 3. **Background Monitor** (`start_heartbeat_monitor`)
30
- - Runs in background subprocess
31
- - Checks every 30 seconds
32
- - Stops via marker file removal
33
- - Respects SHUTDOWN_REQUESTED flag
34
-
35
- 4. **Monitor Cleanup** (`stop_heartbeat_monitor`)
36
- - Removes marker file
37
- - Terminates background process
38
- - Called during shutdown
39
-
40
- ### Heartbeat Data Format
41
-
42
- ```json
43
- {
44
- "timestamp": 1760898665,
45
- "status": "working",
46
- "iteration": 1,
47
- "task": "implementing feature X"
48
- }
49
- ```
50
-
51
- ## Usage
52
-
53
- ### Agent Side (Publishing Heartbeats)
54
-
55
- Agents should publish heartbeats every 20-30 seconds:
56
-
57
- ```bash
58
- # Set heartbeat with 60s TTL
59
- HEARTBEAT=$(jq -n \
60
- --arg ts "$(date +%s)" \
61
- --arg status "working" \
62
- --arg iteration "1" \
63
- '{timestamp: ($ts | tonumber), status: $status, iteration: ($iteration | tonumber)}')
64
-
65
- redis-cli SET "swarm:${TASK_ID}:${AGENT_ID}:heartbeat" "$HEARTBEAT" EX 60
66
- ```
67
-
68
- ### Orchestrator Side (Monitoring)
69
-
70
- The orchestrator automatically starts/stops monitors during each loop:
71
-
72
- ```bash
73
- # Loop 3 monitoring
74
- LOOP3_HEARTBEAT_MONITOR_PID=$(start_heartbeat_monitor "$TASK_ID" "loop3" "${LOOP3_AGENTS[@]}")
75
-
76
- # ... wait for agents ...
77
-
78
- stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
79
- ```
80
-
81
- ## Monitoring Output
82
-
83
- ### Normal Operation
84
-
85
- ```
86
- [Loop 3] Starting heartbeat monitor (checking every 30s)...
87
- ```
88
-
89
- ### Agent Appears Hung
90
-
91
- ```
92
- [2025-10-19T18:30:00Z] [loop3] ⚠️ agent-1 appears hung (no heartbeat for 60s)
93
- [2025-10-19T18:30:00Z] [loop3] ℹ️ Continuing with quorum (2/2 agents)
94
- ```
95
-
96
- ### Quorum at Risk
97
-
98
- ```
99
- [2025-10-19T18:30:00Z] [loop3] ⚠️ agent-2 appears hung (no heartbeat for 60s)
100
- [2025-10-19T18:30:00Z] [loop3] ⚠️ Cannot meet quorum without agent-2 (1/2)
101
- ```
102
-
103
- ## Configuration
104
-
105
- ### Monitoring Interval
106
-
107
- Default: 30 seconds
108
-
109
- To change, edit the `sleep` duration in `start_heartbeat_monitor`:
110
-
111
- ```bash
112
- sleep 30 # Check every 30s
113
- ```
114
-
115
- ### Missed Heartbeat Threshold
116
-
117
- Default: 2 consecutive misses (60 seconds)
118
-
119
- To change, edit the threshold in `check_heartbeats_loop`:
120
-
121
- ```bash
122
- if [ ${MISSED_HEARTBEATS["$AGENT"]} -ge 2 ]; then
123
- ```
124
-
125
- ### Heartbeat TTL
126
-
127
- Default: 60 seconds
128
-
129
- Agents should set TTL when publishing:
130
-
131
- ```bash
132
- redis-cli SET "swarm:${TASK_ID}:${AGENT_ID}:heartbeat" "$DATA" EX 60
133
- ```
134
-
135
- ## Integration Points
136
-
137
- ### Cleanup Handler
138
-
139
- Monitors are automatically stopped during shutdown:
140
-
141
- ```bash
142
- function cleanup_and_exit() {
143
- # Stop heartbeat monitors if running
144
- if [ -n "${LOOP3_HEARTBEAT_MONITOR_PID:-}" ]; then
145
- stop_heartbeat_monitor "$TASK_ID" "loop3" "$LOOP3_HEARTBEAT_MONITOR_PID"
146
- fi
147
- if [ -n "${LOOP2_HEARTBEAT_MONITOR_PID:-}" ]; then
148
- stop_heartbeat_monitor "$TASK_ID" "loop2" "$LOOP2_HEARTBEAT_MONITOR_PID"
149
- fi
150
- }
151
- ```
152
-
153
- ### Global Variables
154
-
155
- ```bash
156
- LOOP3_HEARTBEAT_MONITOR_PID=""
157
- LOOP2_HEARTBEAT_MONITOR_PID=""
158
- declare -A MISSED_HEARTBEATS
159
- ```
160
-
161
- ## Testing
162
-
163
- ### Unit Tests
164
-
165
- ```bash
166
- ./.claude/skills/redis-coordination/tests/test-heartbeat-simple.sh
167
- ```
168
-
169
- Tests:
170
- 1. Active heartbeat detection
171
- 2. Missing heartbeat detection
172
- 3. Missed heartbeat counter increment
173
- 4. Counter reset on recovery
174
-
175
- ### Manual Testing
176
-
177
- ```bash
178
- # Set up test heartbeat
179
- redis-cli SET "swarm:test-task:test-agent:heartbeat" '{"timestamp": 1234567890}' EX 60
180
-
181
- # Source functions
182
- source ./.claude/skills/redis-coordination/heartbeat-functions.sh
183
-
184
- # Check heartbeat
185
- check_agent_heartbeat "test-agent" "test-task"
186
- echo $? # Should be 0 (success)
187
- ```
188
-
189
- ## Best Practices
190
-
191
- ### For Agents
192
-
193
- 1. **Publish regularly**: Every 20-30 seconds
194
- 2. **Use appropriate TTL**: 60 seconds (2x publish interval)
195
- 3. **Include metadata**: Status, iteration, current task
196
- 4. **Handle errors**: Continue if Redis unavailable
197
-
198
- ### For Orchestrators
199
-
200
- 1. **Start monitors early**: Before waiting for agents
201
- 2. **Stop monitors promptly**: After agents complete
202
- 3. **Check quorum impact**: Before making decisions
203
- 4. **Log appropriately**: Warn for hung agents, not for normal timeouts
204
-
205
- ## Troubleshooting
206
-
207
- ### Monitor Not Detecting Hung Agents
208
-
209
- **Symptoms**: Agents appear hung but no warnings
210
-
211
- **Causes**:
212
- 1. Monitor not started
213
- 2. Heartbeat check interval too long
214
- 3. Missed heartbeat threshold too high
215
-
216
- **Solutions**:
217
- - Verify monitor PID is set
218
- - Check monitor marker file exists
219
- - Review threshold values
220
-
221
- ### False Positives
222
-
223
- **Symptoms**: Warnings for healthy agents
224
-
225
- **Causes**:
226
- 1. Heartbeat publish interval too long
227
- 2. Heartbeat TTL too short
228
- 3. Network latency issues
229
-
230
- **Solutions**:
231
- - Reduce heartbeat interval (e.g., 15s)
232
- - Increase TTL (e.g., 90s)
233
- - Increase missed heartbeat threshold
234
-
235
- ### Monitor Not Stopping
236
-
237
- **Symptoms**: Background processes remain after completion
238
-
239
- **Causes**:
240
- 1. Marker file not removed
241
- 2. Process not killed
242
- 3. Cleanup not called
243
-
244
- **Solutions**:
245
- - Check for marker files: `ls /tmp/heartbeat-monitor-*.active`
246
- - Kill processes: `pkill -f heartbeat-monitor`
247
- - Verify cleanup handler is registered
248
-
249
- ## Performance Impact
250
-
251
- - **CPU**: Negligible (~0.01% per monitor)
252
- - **Network**: ~1 Redis GET per agent per 30s
253
- - **Memory**: ~1KB per agent for tracking state
254
-
255
- ## Future Enhancements
256
-
257
- 1. **Adaptive Intervals**: Reduce check frequency for stable agents
258
- 2. **Health Scores**: Track reliability over time
259
- 3. **Auto-Retry**: Wake hung agents with lower priority
260
- 4. **Metrics Export**: Publish heartbeat stats to monitoring system
261
- 5. **Dead Letter Queue Integration**: Automatic DLQ writes for consistently hung agents
262
-
263
- ## Related Documentation
264
-
265
- - [Redis Coordination Skill](./SKILL.md)
266
- - [CFN Loop Orchestration](./orchestrate-cfn-loop.sh)
267
- - [Waiting Mode Documentation](../../CLAUDE.md#redis-waiting-mode-zero-token-agent-coordination)
@@ -1,260 +0,0 @@
1
- # CFN Loop Logging System
2
-
3
- ## Overview
4
-
5
- The CFN Loop logging system provides comprehensive visibility into agent execution, decisions, and errors. All logs are stored in SQLite for efficient querying and analysis by AI agents.
6
-
7
- ## Database Location
8
-
9
- ```bash
10
- Default: claude-flow-novice/data/cfn-loop.db
11
- Custom: Set DB_PATH environment variable
12
- ```
13
-
14
- ## Schema
15
-
16
- ```sql
17
- CREATE TABLE cfn_loop_logs (
18
- id INTEGER PRIMARY KEY AUTOINCREMENT,
19
- task_id TEXT NOT NULL, -- Task/swarm identifier
20
- timestamp TEXT DEFAULT (datetime('now')), -- ISO 8601 timestamp
21
- event_type TEXT NOT NULL, -- Event category
22
- loop TEXT, -- loop3, loop2, product_owner, coordinator
23
- agent_id TEXT, -- Agent identifier (e.g., coder-1-1)
24
- iteration INTEGER, -- CFN loop iteration number
25
- details TEXT, -- JSON payload with event-specific data
26
- level TEXT DEFAULT 'INFO' -- DEBUG, INFO, WARN, ERROR
27
- );
28
-
29
- -- Indexes for fast queries
30
- CREATE INDEX idx_task_id ON cfn_loop_logs(task_id);
31
- CREATE INDEX idx_event_type ON cfn_loop_logs(event_type);
32
- CREATE INDEX idx_timestamp ON cfn_loop_logs(timestamp);
33
- CREATE INDEX idx_level ON cfn_loop_logs(level);
34
- ```
35
-
36
- ## Event Types
37
-
38
- | Event Type | Description | Level | Details Payload |
39
- |-----------|-------------|-------|-----------------|
40
- | `swarm_init` | CFN loop initialization | INFO | `{mode, loop3_agents, loop2_agents, product_owner, max_iterations, gate_threshold, consensus_threshold}` |
41
- | `agent_spawn` | Agent process started | INFO | `{agent_type, timeout}` |
42
- | `agent_complete` | Agent successfully completed | INFO | `{confidence, confidence_source, files_changed, latency_ms}` |
43
- | `agent_failure` | Agent execution failed | ERROR | `{error, output}` |
44
- | `gate_check` | Loop 3 gate validation | INFO/WARN | `{consensus, threshold, result: PASS\|FAIL, decision?}` |
45
- | `po_decision` | Product Owner strategic decision | INFO | `{decision: PROCEED\|ITERATE\|ABORT, reasoning, confidence}` |
46
- | `parameter_error` | Invalid parameters passed to tool | ERROR | `{error, command}` |
47
-
48
- ## Usage
49
-
50
- ### Logging Events (Orchestrator/Scripts)
51
-
52
- ```bash
53
- # Log swarm initialization
54
- ./.claude/skills/redis-coordination/log-event.sh \
55
- --task-id "cfn-task-123" \
56
- --event-type "swarm_init" \
57
- --details '{"mode": "standard", "loop3_agents": "coder", "loop2_agents": "reviewer"}' \
58
- --level "INFO"
59
-
60
- # Log agent spawn
61
- ./.claude/skills/redis-coordination/log-event.sh \
62
- --task-id "cfn-task-123" \
63
- --event-type "agent_spawn" \
64
- --loop "loop3" \
65
- --agent-id "coder-1-1" \
66
- --iteration 1 \
67
- --details '{"agent_type": "coder", "timeout": 900}' \
68
- --level "INFO"
69
-
70
- # Log error
71
- ./.claude/skills/redis-coordination/log-event.sh \
72
- --task-id "cfn-task-123" \
73
- --event-type "agent_failure" \
74
- --loop "loop3" \
75
- --agent-id "coder-1-1" \
76
- --iteration 1 \
77
- --details '{"error": "timeout", "output": "Agent exceeded 900s timeout"}' \
78
- --level "ERROR"
79
- ```
80
-
81
- ### Querying Logs (AI Agents/Debugging)
82
-
83
- ```bash
84
- # Get all logs for a task
85
- ./query-logs.sh --task-id "cfn-task-123"
86
-
87
- # Get only errors
88
- ./query-logs.sh --task-id "cfn-task-123" --level ERROR
89
-
90
- # Get Loop 3 agent spawns
91
- ./query-logs.sh --task-id "cfn-task-123" --event-type agent_spawn --loop loop3
92
-
93
- # Get Product Owner decisions
94
- ./query-logs.sh --task-id "cfn-task-123" --event-type po_decision
95
-
96
- # Get latest 10 events in table format
97
- ./query-logs.sh --task-id "cfn-task-123" --limit 10 --format table
98
-
99
- # Get events for specific iteration
100
- ./query-logs.sh --task-id "cfn-task-123" --iteration 2
101
-
102
- # Get events for specific agent
103
- ./query-logs.sh --task-id "cfn-task-123" --agent-id "coder-1-1"
104
- ```
105
-
106
- ### Output Formats
107
-
108
- **JSON (default):**
109
- ```json
110
- [
111
- {
112
- "id": 1,
113
- "task_id": "cfn-task-123",
114
- "timestamp": "2025-10-21T10:30:00Z",
115
- "event_type": "agent_spawn",
116
- "loop": "loop3",
117
- "agent_id": "coder-1-1",
118
- "iteration": 1,
119
- "details": "{\"agent_type\": \"coder\", \"timeout\": 900}",
120
- "level": "INFO"
121
- }
122
- ]
123
- ```
124
-
125
- **Table:**
126
- ```
127
- id task_id timestamp event_type loop agent_id iteration level
128
- 1 cfn-task-123 2025-10-21T10:30:00Z agent_spawn loop3 coder-1-1 1 INFO
129
- 2 cfn-task-123 2025-10-21T10:45:00Z agent_complete loop3 coder-1-1 1 INFO
130
- ```
131
-
132
- **CSV:**
133
- ```csv
134
- 1,cfn-task-123,2025-10-21T10:30:00Z,agent_spawn,loop3,coder-1-1,1,"{""agent_type"": ""coder"", ""timeout"": 900}",INFO
135
- ```
136
-
137
- ## AI Agent Consumption
138
-
139
- AI agents can query logs to improve workflows:
140
-
141
- ```bash
142
- # Example: Analyze agent failures
143
- ERRORS=$(./query-logs.sh --task-id "cfn-task-123" --level ERROR --format json)
144
-
145
- # Parse JSON with jq
146
- echo "$ERRORS" | jq -r '.[] | "\(.timestamp) [\(.agent_id)] \(.event_type): \(.details)"'
147
-
148
- # Example output:
149
- # 2025-10-21T10:45:00Z [coder-1-1] agent_failure: {"error": "skill_execution_error", "output": "Unknown parameter --invalid-param"}
150
- ```
151
-
152
- ### Common Queries for AI Analysis
153
-
154
- ```bash
155
- # Find agents with highest failure rate
156
- ./query-logs.sh --task-id "$TASK_ID" --event-type agent_failure --format json | \
157
- jq -r '.[].agent_id' | sort | uniq -c | sort -nr
158
-
159
- # Calculate average agent latency per loop
160
- ./query-logs.sh --task-id "$TASK_ID" --event-type agent_complete --format json | \
161
- jq -r '.[] | "\(.loop) \(.details | fromjson | .latency_ms)"' | \
162
- awk '{sum[$1]+=$2; count[$1]++} END {for (loop in sum) print loop, sum[loop]/count[loop]}'
163
-
164
- # Find parameter errors (for troubleshooting implementations)
165
- ./query-logs.sh --task-id "$TASK_ID" --event-type parameter_error --format json
166
-
167
- # Track decision history
168
- ./query-logs.sh --task-id "$TASK_ID" --event-type po_decision --format json | \
169
- jq -r '.[] | "\(.iteration): \(.details | fromjson | .decision) - \(.details | fromjson | .reasoning)"'
170
- ```
171
-
172
- ## Logged Events in Orchestrator
173
-
174
- The orchestrator automatically logs:
175
-
176
- 1. **Line ~643:** Swarm initialization with all configuration
177
- 2. **Line ~811:** Each Loop 3 agent spawn with timeout
178
- 3. **Line ~892:** Each Loop 3 agent completion with confidence and files changed
179
- 4. **Line ~917:** Each Loop 3 agent failure with error details
180
- 5. **Line ~1082:** Gate check failures with consensus scores
181
- 6. **Line ~1115:** Gate check successes
182
- 7. **Line ~1440:** Product Owner decisions with reasoning
183
-
184
- All logs include `2>/dev/null || true` to ensure logging failures don't break orchestration.
185
-
186
- ## Performance
187
-
188
- - **Write latency:** ~5-10ms per log entry
189
- - **Query latency:** ~10-50ms for typical queries (< 1000 events)
190
- - **Storage:** ~500 bytes per event (compressed SQLite)
191
- - **Indexes:** Optimized for task_id, event_type, timestamp, level queries
192
-
193
- ## Debugging
194
-
195
- ### Check if logging is working
196
-
197
- ```bash
198
- # Check database exists
199
- ls -lh data/cfn-loop.db
200
-
201
- # Count total log entries
202
- sqlite3 data/cfn-loop.db "SELECT COUNT(*) FROM cfn_loop_logs;"
203
-
204
- # Get latest 5 events
205
- ./query-logs.sh --task-id "YOUR_TASK_ID" --limit 5 --format table
206
- ```
207
-
208
- ### Common issues
209
-
210
- **Issue:** "Error: Database not found"
211
- - **Cause:** No logs written yet
212
- - **Fix:** Run a CFN loop task to generate logs
213
-
214
- **Issue:** "Error: --details must be valid JSON"
215
- - **Cause:** Malformed JSON in details parameter
216
- - **Fix:** Validate JSON with `echo "$DETAILS" | jq empty`
217
-
218
- **Issue:** Logging fails silently
219
- - **Cause:** `2>/dev/null || true` suppresses errors
220
- - **Fix:** Remove `2>/dev/null` temporarily to see error messages
221
-
222
- ## Web Portal Integration
223
-
224
- The web portal can query logs for real-time visibility:
225
-
226
- ```typescript
227
- // Example: Fetch logs for task
228
- const logs = await fetch('/api/logs?task_id=cfn-task-123&event_type=agent_spawn');
229
- const events = await logs.json();
230
-
231
- // Display in timeline
232
- events.forEach(event => {
233
- console.log(`${event.timestamp} [${event.loop}] ${event.agent_id}: ${event.event_type}`);
234
- });
235
- ```
236
-
237
- ## Retention
238
-
239
- - **Default:** Logs persist indefinitely in SQLite
240
- - **Recommended:** Implement cleanup job to delete logs older than 30 days for non-critical tasks
241
- - **Critical tasks:** Retain logs for audit trail
242
-
243
- ```bash
244
- # Example: Delete logs older than 30 days
245
- sqlite3 data/cfn-loop.db "DELETE FROM cfn_loop_logs WHERE timestamp < datetime('now', '-30 days');"
246
- ```
247
-
248
- ## Privacy & Security
249
-
250
- - **Sensitive data:** Avoid logging secrets, API keys, or PII in details field
251
- - **Access control:** Database file permissions (chmod 600) restrict access
252
- - **Audit trail:** Logs include full decision reasoning for compliance
253
-
254
- ## Future Enhancements
255
-
256
- - [ ] Structured logging levels (DEBUG for verbose agent output)
257
- - [ ] Log streaming to external systems (Elasticsearch, CloudWatch)
258
- - [ ] Automatic anomaly detection (high failure rates, long latencies)
259
- - [ ] Log rotation and archival
260
- - [ ] Web UI for log browsing and search
@@ -1,65 +0,0 @@
1
- # Redis Coordination Skill
2
-
3
- ## Quick Start
4
-
5
- ### Prerequisites
6
- - Redis 5.0+
7
- - bash
8
- - jq
9
- - redis-cli
10
-
11
- ### Installation
12
- 1. Ensure Redis is running
13
- 2. Configure Redis connection in `config.json`
14
- 3. Make scripts executable:
15
- ```bash
16
- chmod +x invoke-waiting-mode.sh
17
- ```
18
-
19
- ### Basic Usage
20
-
21
- #### Consensus Collection (Updated)
22
- ```bash
23
- # Agent reports results
24
- ./invoke-waiting-mode.sh report \
25
- --task-id "my-task" \
26
- --agent-id "agent-1" \
27
- --confidence 0.95
28
-
29
- # Collect and evaluate consensus
30
- ./invoke-waiting-mode.sh collect \
31
- --task-id "my-task" \
32
- --agent-ids "agent-1,agent-2,agent-3"
33
- ```
34
-
35
- ## Important Changes in P7 (Redis Script Cleanup)
36
-
37
- ### Deprecation Notices
38
- - 🚨 `enter` and `wake` subcommands are NO LONGER SUPPORTED
39
- - Agents should exit cleanly without waiting mode
40
- - Coordinator spawns agents directly
41
- - Fork-ID references have been removed
42
-
43
- ### Migration Guide
44
- - Update agent scripts to exit cleanly after task
45
- - Remove manual waiting mode calls
46
- - Use direct agent spawning in orchestrator
47
-
48
- ## Script Categories
49
- - **Production Scripts**:
50
- - `invoke-waiting-mode.sh`: Redis coordination wrapper
51
- - `orchestrate-cfn-loop.sh`: CFN Loop orchestration
52
- - **Demos and Tests**: Located in `./demos/`
53
-
54
- ## Performance
55
- - Zero-token waiting
56
- - Sub-100ms wake-up latency
57
- - Supports 10+ concurrent agents
58
- - Configurable consensus thresholds
59
-
60
- ## Configuration Options
61
- See `SKILL.md` for detailed configuration and usage instructions.
62
-
63
- ## Troubleshooting
64
- - If you encounter issues with old scripts, refer to migration guide
65
- - Test scripts are available in `./demos/` directory
@@ -1,25 +0,0 @@
1
- # Security Review: Metrics Export System (Phase 7)
2
-
3
- ## Overview
4
- Security review conducted by security-specialist-3 for Redis Coordination metrics export functionality.
5
-
6
- ## Confidence Score: 0.92 (High)
7
-
8
- ### Key Findings
9
- - ✅ Robust input validation
10
- - ✅ Secure file handling
11
- - ✅ Minimal data exposure
12
- - ⚠️ Recommended ACL improvements
13
-
14
- ### Recommendations
15
- 1. Implement optional PII sanitization
16
- 2. Enhance Redis key access controls
17
- 3. Create metrics export audit logging
18
- 4. Add optional export encryption
19
-
20
- ### Compliance
21
- - NIST SP 800-53 Alignment: Moderate Impact
22
- - SOC 2 Type II Ready
23
- - GDPR Data Minimization Compliant
24
-
25
- Full detailed report available in source code comments.