claude-flow-novice 2.14.2 → 2.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/.claude/commands/CFN_LOOP_TASK_MODE.md +4 -47
  2. package/.claude/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  3. package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +4 -47
  4. package/claude-assets/skills/cfn-redis-coordination/demos/test-cancel-swarm.sh +0 -276
  5. package/dist/cli/agent-prompt-builder.js +25 -0
  6. package/dist/cli/agent-prompt-builder.js.map +1 -1
  7. package/dist/cli/config-manager.js +91 -109
  8. package/package.json +1 -1
  9. package/scripts/init-project.js +1 -1
  10. package/.claude/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  11. package/.claude/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  12. package/.claude/skills/cfn-redis-coordination/LOGGING.md +0 -260
  13. package/.claude/skills/cfn-redis-coordination/README.md +0 -65
  14. package/.claude/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  15. package/.claude/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  16. package/.claude/skills/cfn-redis-coordination/SKILL.md +0 -720
  17. package/.claude/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  18. package/.claude/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  19. package/.claude/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  20. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  21. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  22. package/.claude/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  23. package/.claude/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  24. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  25. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  26. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  27. package/.claude/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  28. package/.claude/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  29. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  30. package/.claude/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  31. package/.claude/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  32. package/.claude/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  33. package/.claude/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  34. package/.claude/skills/cfn-redis-coordination/examples/README.md +0 -73
  35. package/.claude/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  36. package/.claude/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  37. package/.claude/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  38. package/.claude/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  39. package/.claude/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  40. package/.claude/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  41. package/.claude/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  42. package/.claude/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  43. package/.claude/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  44. package/.claude/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  45. package/.claude/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  46. package/.claude/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  47. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  48. package/.claude/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  49. package/.claude/skills/cfn-redis-coordination/log-event.sh +0 -109
  50. package/.claude/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  51. package/.claude/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  52. package/.claude/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  53. package/.claude/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  54. package/.claude/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  55. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  56. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  57. package/.claude/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  58. package/.claude/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  59. package/.claude/skills/cfn-redis-coordination/priority_wake.py +0 -134
  60. package/.claude/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  61. package/.claude/skills/cfn-redis-coordination/query-logs.sh +0 -103
  62. package/.claude/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  63. package/.claude/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  64. package/.claude/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  65. package/.claude/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  66. package/.claude/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  67. package/.claude/skills/cfn-redis-coordination/signal.sh +0 -38
  68. package/.claude/skills/cfn-redis-coordination/store-context.sh +0 -86
  69. package/.claude/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  70. package/.claude/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  71. package/.claude/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  72. package/.claude/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  73. package/.claude/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  74. package/.claude/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  75. package/.claude/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  76. package/.claude/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  77. package/.claude/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  78. package/.claude/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  79. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  80. package/.claude/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  81. package/.claude/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  82. package/.claude/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  83. package/.claude/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  84. package/.claude/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  85. package/.claude/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
  86. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT.md +0 -57
  87. package/claude-assets/skills/cfn-redis-coordination/HEARTBEAT_MONITORING.md +0 -267
  88. package/claude-assets/skills/cfn-redis-coordination/LOGGING.md +0 -260
  89. package/claude-assets/skills/cfn-redis-coordination/README.md +0 -65
  90. package/claude-assets/skills/cfn-redis-coordination/SECURITY_REVIEW.md +0 -25
  91. package/claude-assets/skills/cfn-redis-coordination/SHUTDOWN_HANDLING.md +0 -164
  92. package/claude-assets/skills/cfn-redis-coordination/SKILL.md +0 -720
  93. package/claude-assets/skills/cfn-redis-coordination/demos/test-dlq.sh +0 -129
  94. package/claude-assets/skills/cfn-redis-coordination/demos/test-iteration-feedback.sh +0 -320
  95. package/claude-assets/skills/cfn-redis-coordination/demos/test-orchestrator.sh +0 -249
  96. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4-unix.sh +0 -148
  97. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake-phase4.sh +0 -163
  98. package/claude-assets/skills/cfn-redis-coordination/demos/test-priority-wake.sh +0 -138
  99. package/claude-assets/skills/cfn-redis-coordination/demos/test-quick-fix.sh +0 -81
  100. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-absolute.sh +0 -45
  101. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-fallback.sh +0 -68
  102. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-percentage.sh +0 -56
  103. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum-with-retry.sh +0 -81
  104. package/claude-assets/skills/cfn-redis-coordination/demos/test-quorum.sh +0 -57
  105. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown-handling.sh +0 -187
  106. package/claude-assets/skills/cfn-redis-coordination/demos/test-shutdown.sh +0 -160
  107. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils-unix.sh +0 -97
  108. package/claude-assets/skills/cfn-redis-coordination/demos/test-utils.sh +0 -97
  109. package/claude-assets/skills/cfn-redis-coordination/demos/test-waiting-mode.sh +0 -59
  110. package/claude-assets/skills/cfn-redis-coordination/examples/README.md +0 -73
  111. package/claude-assets/skills/cfn-redis-coordination/examples/grafana-dashboard.json +0 -352
  112. package/claude-assets/skills/cfn-redis-coordination/examples/hierarchical-pattern.sh +0 -127
  113. package/claude-assets/skills/cfn-redis-coordination/examples/mesh-pattern.sh +0 -171
  114. package/claude-assets/skills/cfn-redis-coordination/examples/timeout-handling.sh +0 -227
  115. package/claude-assets/skills/cfn-redis-coordination/examples/waiting-mode-pattern.sh +0 -239
  116. package/claude-assets/skills/cfn-redis-coordination/execute-product-owner-decision.sh +0 -258
  117. package/claude-assets/skills/cfn-redis-coordination/get-agent-timeout.sh +0 -177
  118. package/claude-assets/skills/cfn-redis-coordination/heartbeat-functions.sh +0 -137
  119. package/claude-assets/skills/cfn-redis-coordination/heartbeat-protocol.md +0 -106
  120. package/claude-assets/skills/cfn-redis-coordination/heartbeat.sh +0 -126
  121. package/claude-assets/skills/cfn-redis-coordination/init-swarm.sh +0 -148
  122. package/claude-assets/skills/cfn-redis-coordination/invoke-redis-pattern.sh +0 -220
  123. package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +0 -283
  124. package/claude-assets/skills/cfn-redis-coordination/list-active-swarms.sh +0 -147
  125. package/claude-assets/skills/cfn-redis-coordination/log-event.sh +0 -109
  126. package/claude-assets/skills/cfn-redis-coordination/metrics-export.sh +0 -674
  127. package/claude-assets/skills/cfn-redis-coordination/metrics-schema.json +0 -66
  128. package/claude-assets/skills/cfn-redis-coordination/metrics-storage.md +0 -31
  129. package/claude-assets/skills/cfn-redis-coordination/monitor-cfn-violations.sh +0 -391
  130. package/claude-assets/skills/cfn-redis-coordination/monitor-heartbeats.sh +0 -101
  131. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop-v3.sh +0 -141
  132. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh +0 -31
  133. package/claude-assets/skills/cfn-redis-coordination/orchestrate-cfn-loop.sh.bak +0 -0
  134. package/claude-assets/skills/cfn-redis-coordination/priority-wake-mechanism.md +0 -75
  135. package/claude-assets/skills/cfn-redis-coordination/priority_wake.py +0 -134
  136. package/claude-assets/skills/cfn-redis-coordination/query-dlq.sh +0 -162
  137. package/claude-assets/skills/cfn-redis-coordination/query-logs.sh +0 -103
  138. package/claude-assets/skills/cfn-redis-coordination/redis-pattern.sh +0 -619
  139. package/claude-assets/skills/cfn-redis-coordination/retrieve-context.sh +0 -58
  140. package/claude-assets/skills/cfn-redis-coordination/select-specialist-agent.sh +0 -371
  141. package/claude-assets/skills/cfn-redis-coordination/semantic-match-tfidf.py +0 -252
  142. package/claude-assets/skills/cfn-redis-coordination/send-heartbeat.sh +0 -165
  143. package/claude-assets/skills/cfn-redis-coordination/signal.sh +0 -38
  144. package/claude-assets/skills/cfn-redis-coordination/store-context.sh +0 -86
  145. package/claude-assets/skills/cfn-redis-coordination/store-epic-context.sh +0 -123
  146. package/claude-assets/skills/cfn-redis-coordination/test-context-injection.sh +0 -354
  147. package/claude-assets/skills/cfn-redis-coordination/test-timeout-enforcement.sh +0 -513
  148. package/claude-assets/skills/cfn-redis-coordination/tests/convert-line-endings.sh +0 -15
  149. package/claude-assets/skills/cfn-redis-coordination/tests/dlq-functionality-test.sh +0 -102
  150. package/claude-assets/skills/cfn-redis-coordination/tests/edge-cases-test.sh +0 -99
  151. package/claude-assets/skills/cfn-redis-coordination/tests/integration-test.sh +0 -170
  152. package/claude-assets/skills/cfn-redis-coordination/tests/retry-mechanism-test.sh +0 -82
  153. package/claude-assets/skills/cfn-redis-coordination/tests/run-test-suite.sh +0 -92
  154. package/claude-assets/skills/cfn-redis-coordination/tests/run-tests.sh +0 -4
  155. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-monitoring.sh +0 -418
  156. package/claude-assets/skills/cfn-redis-coordination/tests/test-heartbeat-simple.sh +0 -124
  157. package/claude-assets/skills/cfn-redis-coordination/tests/test-primitives.sh +0 -166
  158. package/claude-assets/skills/cfn-redis-coordination/tests/test-utils.sh +0 -54
  159. package/claude-assets/skills/cfn-redis-coordination/tests/test_utils.sh +0 -49
  160. package/claude-assets/skills/cfn-redis-coordination/v2_modularization/core_orchestration.sh +0 -76
  161. package/claude-assets/skills/cfn-redis-coordination/validate-parameters.sh +0 -492
@@ -1,674 +0,0 @@
1
- #!/bin/bash
2
- # metrics-export.sh - Export CFN Loop metrics in multiple formats
3
- # Supports: JSON, Prometheus, OpenTelemetry (OTLP), CSV
4
- # Features: Time range filtering, remote push, TTL cleanup
5
-
6
- set -euo pipefail
7
-
8
- # ============================================================================
9
- # Configuration & Default Values
10
- # ============================================================================
11
-
12
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
13
- TASK_ID=""
14
- FORMAT="json"
15
- SINCE=""
16
- UNTIL=""
17
- PUSH_GATEWAY=""
18
- OUTPUT_FILE=""
19
- CLEANUP_TTL=""
20
- VERBOSE=false
21
-
22
- # Metric categories (matching orchestrate-cfn-loop.sh)
23
- METRICS_CATEGORIES=(
24
- "iteration_start"
25
- "iteration_duration"
26
- "loop3_consensus"
27
- "loop2_consensus"
28
- "agent_latency"
29
- "gate_failures"
30
- "timeout_count"
31
- "quorum_fallback"
32
- "retry_count"
33
- )
34
-
35
- # ============================================================================
36
- # Utility Functions
37
- # ============================================================================
38
-
39
- log() {
40
- if [[ "$VERBOSE" == true ]]; then
41
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
42
- fi
43
- }
44
-
45
- error() {
46
- echo "ERROR: $*" >&2
47
- exit 1
48
- }
49
-
50
- usage() {
51
- cat <<EOF
52
- Usage: $0 --task-id <id> [OPTIONS]
53
-
54
- Export CFN Loop metrics in multiple formats.
55
-
56
- Required:
57
- --task-id <id> Task ID to export metrics for
58
-
59
- Optional:
60
- --format <type> Export format: json|prometheus|otlp|csv (default: json)
61
- --since <timestamp> Filter metrics since timestamp (ISO 8601 or Unix ms)
62
- --until <timestamp> Filter metrics until timestamp (ISO 8601 or Unix ms)
63
- --output <file> Write output to file instead of stdout
64
- --push-gateway <url> Push Prometheus metrics to remote gateway
65
- --cleanup-ttl <days> Cleanup metrics older than N days
66
- --verbose Enable verbose logging
67
-
68
- Examples:
69
- # Export all metrics as JSON
70
- $0 --task-id redis-phase7-1760900252 --format json
71
-
72
- # Export to Prometheus with time range
73
- $0 --task-id task-123 --format prometheus \\
74
- --since "2025-10-19T00:00:00Z" --until "2025-10-19T23:59:59Z"
75
-
76
- # Push to Prometheus Pushgateway
77
- $0 --task-id task-123 --format prometheus \\
78
- --push-gateway http://localhost:9091
79
-
80
- # Export to file with cleanup
81
- $0 --task-id task-123 --format json \\
82
- --output metrics.json --cleanup-ttl 30
83
-
84
- # Export OTLP for OpenTelemetry
85
- $0 --task-id task-123 --format otlp --output traces.json
86
-
87
- EOF
88
- exit 1
89
- }
90
-
91
- # ============================================================================
92
- # Time Conversion Functions
93
- # ============================================================================
94
-
95
- # Convert ISO 8601 timestamp to Unix milliseconds
96
- iso_to_unix_ms() {
97
- local iso_timestamp="$1"
98
-
99
- # Try using date command (GNU/BSD compatible)
100
- if date --version >/dev/null 2>&1; then
101
- # GNU date
102
- date -d "$iso_timestamp" +%s%3N 2>/dev/null || echo "0"
103
- else
104
- # BSD/macOS date
105
- date -j -f "%Y-%m-%dT%H:%M:%S" "${iso_timestamp%%Z*}" +%s000 2>/dev/null || echo "0"
106
- fi
107
- }
108
-
109
- # Filter metrics by time range
110
- filter_by_time() {
111
- local value="$1"
112
- local timestamp="$2"
113
-
114
- # If no time filters, include all
115
- [[ -z "$SINCE" && -z "$UNTIL" ]] && echo "$value" && return
116
-
117
- local since_ms=$(iso_to_unix_ms "$SINCE")
118
- local until_ms=$(iso_to_unix_ms "$UNTIL")
119
-
120
- if [[ -n "$SINCE" ]] && (( timestamp < since_ms )); then
121
- return 1
122
- fi
123
-
124
- if [[ -n "$UNTIL" ]] && (( timestamp > until_ms )); then
125
- return 1
126
- fi
127
-
128
- echo "$value"
129
- }
130
-
131
- # ============================================================================
132
- # Metric Collection Functions
133
- # ============================================================================
134
-
135
- # Collect all metrics from Redis for a task
136
- collect_metrics() {
137
- local task_id="$1"
138
- declare -gA METRICS
139
-
140
- log "Collecting metrics for task: $task_id"
141
-
142
- for category in "${METRICS_CATEGORIES[@]}"; do
143
- local key="swarm:${task_id}:metrics:${category}"
144
- local values=$(redis-cli LRANGE "$key" 0 -1 2>/dev/null || echo "")
145
-
146
- if [[ -n "$values" ]]; then
147
- METRICS["$category"]="$values"
148
- local count=$(echo "$values" | wc -l)
149
- log "Collected ${category}: $count entries"
150
- else
151
- METRICS["$category"]=""
152
- fi
153
- done
154
-
155
- # Collect agent-specific metrics
156
- local agent_pattern="swarm:${task_id}:agent:*:metrics:*"
157
- local agent_keys=$(redis-cli KEYS "$agent_pattern" 2>/dev/null || echo "")
158
-
159
- if [[ -n "$agent_keys" ]]; then
160
- METRICS["agent_keys"]="$agent_keys"
161
- local count=$(echo "$agent_keys" | wc -w)
162
- log "Found agent-specific metric keys: $count"
163
- fi
164
- }
165
-
166
- # Calculate statistical summaries from JSON array
167
- calculate_stats() {
168
- local json_array="$1"
169
- local field="${2:-}"
170
-
171
- # Parse JSON array and extract numeric values
172
- local values=""
173
- if [[ -n "$field" ]]; then
174
- values=$(echo "$json_array" | jq -r ".[] | .$field // empty" 2>/dev/null || echo "")
175
- else
176
- values=$(echo "$json_array" | jq -r '.[] // empty' 2>/dev/null || echo "")
177
- fi
178
-
179
- local count=0
180
- local sum=0
181
- local min=""
182
- local max=""
183
-
184
- for val in $values; do
185
- # Skip non-numeric values
186
- if ! [[ "$val" =~ ^[0-9]+\.?[0-9]*$ ]]; then
187
- continue
188
- fi
189
-
190
- count=$((count + 1))
191
- sum=$(echo "$sum + $val" | bc -l)
192
-
193
- if [[ -z "$min" ]] || (( $(echo "$val < $min" | bc -l) )); then
194
- min="$val"
195
- fi
196
-
197
- if [[ -z "$max" ]] || (( $(echo "$val > $max" | bc -l) )); then
198
- max="$val"
199
- fi
200
- done
201
-
202
- local avg=0
203
- if (( count > 0 )); then
204
- avg=$(echo "scale=2; $sum / $count" | bc -l)
205
- fi
206
-
207
- cat <<EOF
208
- {
209
- "count": $count,
210
- "sum": ${sum:-0},
211
- "avg": ${avg:-0},
212
- "min": ${min:-0},
213
- "max": ${max:-0}
214
- }
215
- EOF
216
- }
217
-
218
- # ============================================================================
219
- # Export Format Functions
220
- # ============================================================================
221
-
222
- # Export as JSON
223
- export_json() {
224
- local task_id="$1"
225
-
226
- log "Exporting metrics in JSON format"
227
-
228
- # Build JSON structure
229
- cat <<EOF
230
- {
231
- "task_id": "$task_id",
232
- "export_timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
233
- "metrics": {
234
- EOF
235
-
236
- local first=true
237
- for category in "${METRICS_CATEGORIES[@]}"; do
238
- local values="${METRICS[$category]:-}"
239
- [[ -z "$values" ]] && continue
240
-
241
- if [[ "$first" == false ]]; then
242
- echo ","
243
- fi
244
- first=false
245
-
246
- # Convert to JSON array (Redis LRANGE returns newline-separated values)
247
- local json_array=$(echo "$values" | jq -s -c '.' 2>/dev/null || echo "[]")
248
-
249
- # Calculate statistics based on category type
250
- local stats=""
251
- case "$category" in
252
- iteration_duration|agent_latency)
253
- stats=$(calculate_stats "$json_array" "duration_ms")
254
- ;;
255
- loop3_consensus|loop2_consensus)
256
- stats=$(calculate_stats "$json_array" "consensus")
257
- ;;
258
- *)
259
- # For scalar metrics
260
- stats=$(calculate_stats "$json_array")
261
- ;;
262
- esac
263
-
264
- cat <<EOF
265
- "$category": {
266
- "values": $json_array,
267
- "stats": $stats
268
- }
269
- EOF
270
- done
271
-
272
- cat <<EOF
273
-
274
- },
275
- "metadata": {
276
- "total_categories": ${#METRICS_CATEGORIES[@]},
277
- "non_empty_categories": $(for cat in "${METRICS_CATEGORIES[@]}"; do [[ -n "${METRICS[$cat]:-}" ]] && echo 1; done | wc -l)
278
- }
279
- }
280
- EOF
281
- }
282
-
283
- # Export as Prometheus text format
284
- export_prometheus() {
285
- local task_id="$1"
286
-
287
- log "Exporting metrics in Prometheus format"
288
-
289
- # Iteration duration histogram
290
- if [[ -n "${METRICS[iteration_duration]:-}" ]]; then
291
- echo "# HELP cfn_iteration_duration_ms CFN Loop iteration duration in milliseconds"
292
- echo "# TYPE cfn_iteration_duration_ms histogram"
293
-
294
- local values=$(echo "${METRICS[iteration_duration]}" | jq -s -c '.')
295
- echo "$values" | jq -r '.[] | "\(.duration_ms) \(.iteration)"' 2>/dev/null | while read -r duration iteration; do
296
- echo "cfn_iteration_duration_ms{task_id=\"$task_id\",iteration=\"$iteration\"} $duration"
297
- done
298
- echo ""
299
- fi
300
-
301
- # Agent latency histogram
302
- if [[ -n "${METRICS[agent_latency]:-}" ]]; then
303
- echo "# HELP cfn_agent_latency_ms Agent completion latency in milliseconds"
304
- echo "# TYPE cfn_agent_latency_ms histogram"
305
-
306
- local values=$(echo "${METRICS[agent_latency]}" | jq -s -c '.')
307
- echo "$values" | jq -r '.[] | "\(.agent) \(.latency_ms) \(.loop) \(.iteration)"' 2>/dev/null | while read -r agent latency loop iteration; do
308
- echo "cfn_agent_latency_ms{task_id=\"$task_id\",agent=\"$agent\",loop=\"$loop\",iteration=\"$iteration\"} $latency"
309
- done
310
- echo ""
311
- fi
312
-
313
- # Loop 3 consensus scores
314
- if [[ -n "${METRICS[loop3_consensus]:-}" ]]; then
315
- echo "# HELP cfn_loop3_consensus Loop 3 consensus score (0.0-1.0)"
316
- echo "# TYPE cfn_loop3_consensus gauge"
317
-
318
- local values=$(echo "${METRICS[loop3_consensus]}" | jq -s -c '.')
319
- echo "$values" | jq -r '.[] | "\(.consensus) \(.iteration)"' 2>/dev/null | while read -r score iteration; do
320
- echo "cfn_loop3_consensus{task_id=\"$task_id\",iteration=\"$iteration\"} $score"
321
- done
322
- echo ""
323
- fi
324
-
325
- # Loop 2 consensus scores
326
- if [[ -n "${METRICS[loop2_consensus]:-}" ]]; then
327
- echo "# HELP cfn_loop2_consensus Loop 2 consensus score (0.0-1.0)"
328
- echo "# TYPE cfn_loop2_consensus gauge"
329
-
330
- local values=$(echo "${METRICS[loop2_consensus]}" | jq -s -c '.')
331
- echo "$values" | jq -r '.[] | "\(.consensus) \(.iteration)"' 2>/dev/null | while read -r score iteration; do
332
- echo "cfn_loop2_consensus{task_id=\"$task_id\",iteration=\"$iteration\"} $score"
333
- done
334
- echo ""
335
- fi
336
-
337
- # Gate failure counter
338
- local gate_failures=$(redis-cli GET "swarm:${task_id}:metrics:gate_failures" 2>/dev/null || echo "0")
339
- echo "# HELP cfn_gate_failures_total Total number of gate failures"
340
- echo "# TYPE cfn_gate_failures_total counter"
341
- echo "cfn_gate_failures_total{task_id=\"$task_id\"} ${gate_failures:-0}"
342
- echo ""
343
-
344
- # Timeout counter
345
- local timeout_count=$(redis-cli GET "swarm:${task_id}:metrics:timeout_count" 2>/dev/null || echo "0")
346
- echo "# HELP cfn_timeout_total Total number of agent timeouts"
347
- echo "# TYPE cfn_timeout_total counter"
348
- echo "cfn_timeout_total{task_id=\"$task_id\"} ${timeout_count:-0}"
349
- echo ""
350
-
351
- # Quorum fallback counter
352
- local quorum_fallback=$(redis-cli GET "swarm:${task_id}:metrics:quorum_fallback" 2>/dev/null || echo "0")
353
- echo "# HELP cfn_quorum_fallback_total Total number of quorum fallbacks"
354
- echo "# TYPE cfn_quorum_fallback_total counter"
355
- echo "cfn_quorum_fallback_total{task_id=\"$task_id\"} ${quorum_fallback:-0}"
356
- echo ""
357
-
358
- # Retry counter
359
- local retry_count=$(redis-cli GET "swarm:${task_id}:metrics:retry_count" 2>/dev/null || echo "0")
360
- echo "# HELP cfn_retry_total Total number of retry attempts"
361
- echo "# TYPE cfn_retry_total counter"
362
- echo "cfn_retry_total{task_id=\"$task_id\"} ${retry_count:-0}"
363
- echo ""
364
- }
365
-
366
- # Export as OpenTelemetry OTLP JSON
367
- export_otlp() {
368
- local task_id="$1"
369
-
370
- log "Exporting metrics in OTLP format"
371
-
372
- # OTLP Trace/Span structure for CFN Loop
373
- cat <<EOF
374
- {
375
- "resourceSpans": [
376
- {
377
- "resource": {
378
- "attributes": [
379
- {
380
- "key": "service.name",
381
- "value": { "stringValue": "cfn-loop-orchestrator" }
382
- },
383
- {
384
- "key": "task.id",
385
- "value": { "stringValue": "$task_id" }
386
- }
387
- ]
388
- },
389
- "scopeSpans": [
390
- {
391
- "scope": {
392
- "name": "redis-coordination",
393
- "version": "2.2.0"
394
- },
395
- "spans": [
396
- EOF
397
-
398
- # Create spans for each iteration
399
- local first_span=true
400
-
401
- if [[ -n "${METRICS[iteration_start]:-}" ]] && [[ -n "${METRICS[iteration_duration]:-}" ]]; then
402
- local starts="${METRICS[iteration_start]}"
403
- local durations=$(echo "${METRICS[iteration_duration]}" | jq -s -c '.')
404
-
405
- # Parse starts and combine with durations
406
- local iteration=1
407
- for start_time in $starts; do
408
- local duration=$(echo "$durations" | jq -r ".[$((iteration - 1))].duration_ms // 0" 2>/dev/null)
409
-
410
- if [[ "$first_span" == false ]]; then
411
- echo ","
412
- fi
413
- first_span=false
414
-
415
- # Calculate end time
416
- local end_time=$((start_time + duration))
417
-
418
- # Convert to nanoseconds for OTLP
419
- local start_ns=$((start_time * 1000000))
420
- local end_ns=$((end_time * 1000000))
421
-
422
- # Generate trace and span IDs
423
- local trace_id=$(openssl rand -hex 16 2>/dev/null || echo "00000000000000000000000000000000")
424
- local span_id=$(openssl rand -hex 8 2>/dev/null || echo "0000000000000000")
425
-
426
- cat <<SPAN
427
- {
428
- "traceId": "$trace_id",
429
- "spanId": "$span_id",
430
- "name": "cfn_loop_iteration_${iteration}",
431
- "kind": 1,
432
- "startTimeUnixNano": "$start_ns",
433
- "endTimeUnixNano": "$end_ns",
434
- "attributes": [
435
- {
436
- "key": "iteration",
437
- "value": { "intValue": "$iteration" }
438
- },
439
- {
440
- "key": "duration_ms",
441
- "value": { "intValue": "$duration" }
442
- }
443
- ]
444
- }
445
- SPAN
446
-
447
- iteration=$((iteration + 1))
448
- done
449
- fi
450
-
451
- cat <<EOF
452
-
453
- ]
454
- }
455
- ]
456
- }
457
- ]
458
- }
459
- EOF
460
- }
461
-
462
- # Export as CSV
463
- export_csv() {
464
- local task_id="$1"
465
-
466
- log "Exporting metrics in CSV format"
467
-
468
- # CSV header
469
- echo "timestamp,category,metric_type,value,task_id,iteration,agent,loop"
470
-
471
- local export_timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
472
-
473
- # Export each category
474
- for category in "${METRICS_CATEGORIES[@]}"; do
475
- local values="${METRICS[$category]:-}"
476
- [[ -z "$values" ]] && continue
477
-
478
- local json_array=$(echo "$values" | jq -s -c '.' 2>/dev/null || echo "[]")
479
-
480
- case "$category" in
481
- iteration_duration)
482
- echo "$json_array" | jq -r ".[] | \"$export_timestamp,$category,duration,\(.duration_ms),$task_id,\(.iteration),,\"" 2>/dev/null
483
- ;;
484
- agent_latency)
485
- echo "$json_array" | jq -r ".[] | \"$export_timestamp,$category,latency,\(.latency_ms),$task_id,\(.iteration),\(.agent),\(.loop)\"" 2>/dev/null
486
- ;;
487
- loop3_consensus|loop2_consensus)
488
- echo "$json_array" | jq -r ".[] | \"$export_timestamp,$category,consensus,\(.consensus),$task_id,\(.iteration),,\"" 2>/dev/null
489
- ;;
490
- *)
491
- # For scalar counter metrics
492
- local value=$(redis-cli GET "swarm:${task_id}:metrics:${category}" 2>/dev/null || echo "0")
493
- if [[ -n "$value" ]] && [[ "$value" != "0" ]]; then
494
- echo "$export_timestamp,$category,count,$value,$task_id,,,,"
495
- fi
496
- ;;
497
- esac
498
- done
499
- }
500
-
501
- # ============================================================================
502
- # Push to Prometheus Pushgateway
503
- # ============================================================================
504
-
505
- push_to_gateway() {
506
- local gateway_url="$1"
507
- local task_id="$2"
508
- local metrics_data="$3"
509
-
510
- log "Pushing metrics to Prometheus Pushgateway: $gateway_url"
511
-
512
- # Push to Pushgateway
513
- local response=$(curl -s -w "\n%{http_code}" --data-binary "$metrics_data" \
514
- "${gateway_url}/metrics/job/cfn-loop/instance/${task_id}" 2>&1)
515
-
516
- local http_code=$(echo "$response" | tail -n1)
517
-
518
- if [[ "$http_code" == "200" ]] || [[ "$http_code" == "201" ]]; then
519
- log "Successfully pushed metrics to gateway (HTTP $http_code)"
520
- echo "Metrics pushed successfully to $gateway_url"
521
- else
522
- error "Failed to push metrics (HTTP $http_code)"
523
- fi
524
- }
525
-
526
- # ============================================================================
527
- # TTL Cleanup
528
- # ============================================================================
529
-
530
- cleanup_old_metrics() {
531
- local task_id="$1"
532
- local ttl_days="$2"
533
-
534
- log "Cleaning up metrics older than $ttl_days days"
535
-
536
- local cutoff_ms=$(date -d "$ttl_days days ago" +%s%3N 2>/dev/null || echo "0")
537
- local deleted_count=0
538
-
539
- for category in "${METRICS_CATEGORIES[@]}"; do
540
- local key="swarm:${task_id}:metrics:${category}"
541
- local values=$(redis-cli LRANGE "$key" 0 -1 2>/dev/null || echo "")
542
-
543
- [[ -z "$values" ]] && continue
544
-
545
- local keep_values=()
546
- local json_array="[$values]"
547
-
548
- # Parse JSON objects and filter by timestamp
549
- local filtered=$(echo "$json_array" | jq -c "[.[] | select(.timestamp >= $cutoff_ms)]" 2>/dev/null || echo "[]")
550
-
551
- local filtered_count=$(echo "$filtered" | jq 'length' 2>/dev/null || echo "0")
552
- local original_count=$(echo "$json_array" | jq 'length' 2>/dev/null || echo "0")
553
-
554
- deleted_count=$((deleted_count + original_count - filtered_count))
555
-
556
- # Replace list with filtered values
557
- if (( filtered_count > 0 )); then
558
- redis-cli DEL "$key" >/dev/null
559
- echo "$filtered" | jq -c '.[]' | while read -r val; do
560
- echo "$val" | redis-cli -x RPUSH "$key" >/dev/null
561
- done
562
- else
563
- redis-cli DEL "$key" >/dev/null
564
- fi
565
- done
566
-
567
- log "Deleted $deleted_count old metric entries"
568
- echo "Cleanup complete: $deleted_count entries removed"
569
- }
570
-
571
- # ============================================================================
572
- # Main Execution
573
- # ============================================================================
574
-
575
- main() {
576
- # Parse arguments
577
- while [[ $# -gt 0 ]]; do
578
- case $1 in
579
- --task-id)
580
- TASK_ID="$2"
581
- shift 2
582
- ;;
583
- --format)
584
- FORMAT="$2"
585
- shift 2
586
- ;;
587
- --since)
588
- SINCE="$2"
589
- shift 2
590
- ;;
591
- --until)
592
- UNTIL="$2"
593
- shift 2
594
- ;;
595
- --output)
596
- OUTPUT_FILE="$2"
597
- shift 2
598
- ;;
599
- --push-gateway)
600
- PUSH_GATEWAY="$2"
601
- shift 2
602
- ;;
603
- --cleanup-ttl)
604
- CLEANUP_TTL="$2"
605
- shift 2
606
- ;;
607
- --verbose)
608
- VERBOSE=true
609
- shift
610
- ;;
611
- --help)
612
- usage
613
- ;;
614
- *)
615
- echo "Unknown option: $1"
616
- usage
617
- ;;
618
- esac
619
- done
620
-
621
- # Validate required arguments
622
- [[ -z "$TASK_ID" ]] && error "Missing required --task-id argument"
623
-
624
- # Validate format
625
- case "$FORMAT" in
626
- json|prometheus|otlp|csv)
627
- ;;
628
- *)
629
- error "Invalid format: $FORMAT (must be json|prometheus|otlp|csv)"
630
- ;;
631
- esac
632
-
633
- # Collect metrics
634
- collect_metrics "$TASK_ID"
635
-
636
- # Generate export
637
- local output=""
638
- case "$FORMAT" in
639
- json)
640
- output=$(export_json "$TASK_ID")
641
- ;;
642
- prometheus)
643
- output=$(export_prometheus "$TASK_ID")
644
- ;;
645
- otlp)
646
- output=$(export_otlp "$TASK_ID")
647
- ;;
648
- csv)
649
- output=$(export_csv "$TASK_ID")
650
- ;;
651
- esac
652
-
653
- # Output handling
654
- if [[ -n "$OUTPUT_FILE" ]]; then
655
- echo "$output" > "$OUTPUT_FILE"
656
- log "Metrics exported to: $OUTPUT_FILE"
657
- echo "Exported metrics to: $OUTPUT_FILE"
658
- else
659
- echo "$output"
660
- fi
661
-
662
- # Push to Prometheus Pushgateway if specified
663
- if [[ -n "$PUSH_GATEWAY" ]] && [[ "$FORMAT" == "prometheus" ]]; then
664
- push_to_gateway "$PUSH_GATEWAY" "$TASK_ID" "$output"
665
- fi
666
-
667
- # Cleanup old metrics if specified
668
- if [[ -n "$CLEANUP_TTL" ]]; then
669
- cleanup_old_metrics "$TASK_ID" "$CLEANUP_TTL"
670
- fi
671
- }
672
-
673
- # Run main function
674
- main "$@"