claude-flow-novice 2.14.35 → 2.14.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude/commands/CFN_LOOP_TASK_MODE.md +1 -1
  2. package/.claude/commands/cfn-loop-cli.md +491 -456
  3. package/.claude/commands/switch-api.md +1 -1
  4. package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
  5. package/.claude/skills/cfn-loop-validation/config.json +2 -2
  6. package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
  7. package/claude-assets/agents/README-AGENT_LIFECYCLE.md +37 -10
  8. package/claude-assets/agents/README-VALIDATION.md +0 -8
  9. package/claude-assets/agents/cfn-dev-team/README.md +0 -8
  10. package/claude-assets/agents/cfn-dev-team/coordinators/README.md +1 -9
  11. package/claude-assets/agents/cfn-dev-team/developers/README.md +1 -9
  12. package/claude-assets/agents/cfn-dev-team/documentation/README-VALIDATION.md +0 -8
  13. package/claude-assets/agents/cfn-dev-team/documentation/agent-type-guidelines.md +0 -10
  14. package/claude-assets/agents/cfn-dev-team/reviewers/README.md +1 -9
  15. package/claude-assets/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +0 -10
  16. package/claude-assets/agents/cfn-dev-team/test-agent.md +0 -10
  17. package/claude-assets/agents/cfn-dev-team/testers/README.md +1 -9
  18. package/claude-assets/agents/csuite/cto-agent.md +0 -10
  19. package/claude-assets/agents/custom/cfn-system-expert.md +1 -128
  20. package/claude-assets/agents/custom/claude-code-expert.md +151 -2
  21. package/claude-assets/agents/docker-coordinators/cfn-docker-v3-coordinator.md +39 -3
  22. package/claude-assets/agents/docker-team/csuite/c-suite-template.md +1 -5
  23. package/claude-assets/agents/docker-team/infrastructure/team-coordinator-template.md +1 -5
  24. package/claude-assets/agents/marketing_hybrid/cost_tracker.md +0 -10
  25. package/claude-assets/agents/marketing_hybrid/docker_deployer.md +0 -10
  26. package/claude-assets/agents/marketing_hybrid/zai_worker_spawner.md +0 -10
  27. package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +1 -1
  28. package/claude-assets/commands/cfn-loop-cli.md +491 -456
  29. package/claude-assets/commands/switch-api.md +1 -1
  30. package/claude-assets/skills/cfn-error-logging/SKILL.md +339 -0
  31. package/claude-assets/skills/cfn-error-logging/cleanup-error-logs.sh +334 -0
  32. package/claude-assets/skills/cfn-error-logging/integrate-cli.sh +232 -0
  33. package/claude-assets/skills/cfn-error-logging/integrate-docker.sh +294 -0
  34. package/claude-assets/skills/cfn-error-logging/invoke-error-logging.sh +839 -0
  35. package/claude-assets/skills/cfn-error-logging/test-error-logging.sh +475 -0
  36. package/claude-assets/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
  37. package/claude-assets/skills/cfn-loop-validation/config.json +2 -2
  38. package/claude-assets/skills/cfn-process-instrumentation/instrument-process.sh +326 -322
  39. package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
  40. package/claude-assets/skills/cfn-task-config-init/initialize-config.sh +2 -2
  41. package/claude-assets/skills/cfn-task-mode-sanitize/task-mode-env-sanitizer.sh +224 -181
  42. package/claude-assets/skills/cfn-validation-runner-instrumentation/wrapped-executor.sh +235 -271
  43. package/dist/agents/agent-loader.js +467 -133
  44. package/dist/agents/agent-loader.js.map +1 -1
  45. package/dist/cli/config-manager.js +109 -91
  46. package/dist/cli/config-manager.js.map +1 -1
  47. package/dist/hello.js +27 -3
  48. package/dist/hello.js.map +1 -1
  49. package/dist/server.js +194 -0
  50. package/dist/server.js.map +1 -0
  51. package/dist/server.test.js +207 -0
  52. package/dist/server.test.js.map +1 -0
  53. package/package.json +2 -1
  54. package/scripts/docker-build-mcp.sh +155 -0
  55. package/scripts/docker-test-mcp.sh +260 -0
  56. package/scripts/mcp-health-check.sh +123 -0
@@ -1,323 +1,327 @@
1
- #!/usr/bin/env bash
2
-
3
- ##############################################################################
4
- # CFN Process Instrumentation
5
- # Part of ANTI-023 Memory Leak Protection System
6
- #
7
- # Provides process instrumentation, monitoring, and automatic resource limiting
8
- # for CFN Loop agents and orchestration processes.
9
- #
10
- # Usage:
11
- # source ./instrument-process.sh [--agent-id <id>] [--memory-limit <size>]
12
- # ./instrument-process.sh --monitor-pid <pid>
13
- ##############################################################################
14
-
15
- set -euo pipefail
16
-
17
- # Configuration
18
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
- PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
20
-
21
- # Default limits
22
- DEFAULT_MEMORY_LIMIT="2G"
23
- DEFAULT_CPU_LIMIT="80%"
24
- DEFAULT_TIMEOUT="600"
25
-
26
- # Process tracking
27
- AGENT_ID="${AGENT_ID:-$(hostname)-$$}"
28
- MONITOR_PID=""
29
- MEMORY_LIMIT="${CFN_MEMORY_LIMIT:-$DEFAULT_MEMORY_LIMIT}"
30
- CPU_LIMIT="${CFN_CPU_LIMIT:-$DEFAULT_CPU_LIMIT}"
31
- TIMEOUT="${CFN_TIMEOUT:-$DEFAULT_TIMEOUT}"
32
-
33
- # Telemetry storage
34
- TELEMETRY_DIR="${CFN_TELEMETRY_DIR:-/tmp/cfn-telemetry}"
35
- METRICS_FILE="$TELEMETRY_DIR/metrics_${AGENT_ID}.json"
36
-
37
- # Color coding
38
- readonly RED='\033[0;31m'
39
- readonly GREEN='\033[0;32m'
40
- readonly YELLOW='\033[1;33m'
41
- readonly BLUE='\033[0;34m'
42
- readonly NC='\033[0m'
43
-
44
- # Logging functions
45
- log_info() {
46
- echo -e "${BLUE}[INSTRUMENT]${NC} $1" >&2
47
- }
48
-
49
- log_success() {
50
- echo -e "${GREEN}[INSTRUMENT]${NC} $1" >&2
51
- }
52
-
53
- log_warning() {
54
- echo -e "${YELLOW}[INSTRUMENT]${NC} $1" >&2
55
- }
56
-
57
- log_error() {
58
- echo -e "${RED}[INSTRUMENT]${NC} $1" >&2
59
- }
60
-
61
- # Initialize telemetry directory
62
- init_telemetry() {
63
- mkdir -p "$TELEMETRY_DIR"
64
-
65
- # Create metrics file with initial structure
66
- cat > "$METRICS_FILE" << EOF
67
- {
68
- "agent_id": "$AGENT_ID",
69
- "start_time": "$(date -Iseconds)",
70
- "process_id": "$$",
71
- "memory_limit": "$MEMORY_LIMIT",
72
- "cpu_limit": "$CPU_LIMIT",
73
- "timeout": "$TIMEOUT",
74
- "samples": []
75
- }
76
- EOF
77
- }
78
-
79
- # Collect process metrics
80
- collect_metrics() {
81
- local pid="${1:-$$}"
82
- local timestamp="$(date -Iseconds)"
83
-
84
- # Get process statistics
85
- local mem_usage=""
86
- local cpu_usage=""
87
- local open_files=""
88
- local threads=""
89
-
90
- if command -v ps >/dev/null 2>&1; then
91
- mem_usage=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
92
- cpu_usage=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
93
- fi
94
-
95
- if command -v lsof >/dev/null 2>&1; then
96
- open_files=$(lsof -p "$pid" 2>/dev/null | wc -l || echo "0")
97
- fi
98
-
99
- if [[ -f "/proc/$pid/status" ]]; then
100
- threads=$(grep "^Threads:" "/proc/$pid/status" | awk '{print $2}' || echo "0")
101
- fi
102
-
103
- # Create metrics entry
104
- local metrics_entry=$(cat << EOF
105
- {
106
- "timestamp": "$timestamp",
107
- "memory_kb": "$mem_usage",
108
- "cpu_percent": "$cpu_usage",
109
- "open_files": "$open_files",
110
- "threads": "$threads"
111
- }
112
- EOF
113
- )
114
-
115
- # Update metrics file
116
- if [[ -f "$METRICS_FILE" ]]; then
117
- # Use jq to safely append to samples array
118
- if command -v jq >/dev/null 2>&1; then
119
- jq --argjson entry "$metrics_entry" '.samples += [$entry]' "$METRICS_FILE" > "$METRICS_FILE.tmp" && \
120
- mv "$METRICS_FILE.tmp" "$METRICS_FILE"
121
- else
122
- # Fallback without jq
123
- echo "Warning: jq not available, using simple append" >&2
124
- echo "$metrics_entry" >> "$METRICS_FILE.raw"
125
- fi
126
- fi
127
- }
128
-
129
- # Check resource limits
130
- check_limits() {
131
- local pid="${1:-$$}"
132
-
133
- # Memory limit check
134
- if command -v ps >/dev/null 2>&1; then
135
- local mem_kb=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
136
- local mem_mb=$((mem_kb / 1024))
137
-
138
- case "$MEMORY_LIMIT" in
139
- *G|*g)
140
- local limit_mb=$((${MEMORY_LIMIT%[Gg]*} * 1024))
141
- ;;
142
- *M|*m)
143
- local limit_mb=$((${MEMORY_LIMIT%[Mm]*}))
144
- ;;
145
- *)
146
- local limit_mb=2048 # Default 2GB
147
- ;;
148
- esac
149
-
150
- if [[ $mem_mb -gt $limit_mb ]]; then
151
- log_warning "Memory limit exceeded: ${mem_mb}MB > ${limit_mb}MB"
152
- return 1
153
- fi
154
- fi
155
-
156
- # CPU limit check
157
- if command -v ps >/dev/null 2>&1; then
158
- local cpu_percent=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
159
- local cpu_limit_num=$((${CPU_LIMIT%\%}))
160
-
161
- if (( $(echo "$cpu_percent > $cpu_limit_num" | bc -l) )); then
162
- log_warning "CPU limit exceeded: ${cpu_percent}% > ${CPU_LIMIT}"
163
- return 1
164
- fi
165
- fi
166
-
167
- return 0
168
- }
169
-
170
- # Start background monitoring
171
- start_monitoring() {
172
- local pid="${1:-$$}"
173
- local interval="${2:-30}" # Check every 30 seconds
174
-
175
- log_info "Starting process monitoring for PID $pid (interval: ${interval}s)"
176
-
177
- # Start monitoring in background
178
- (
179
- while true; do
180
- if ! kill -0 "$pid" 2>/dev/null; then
181
- log_info "Process $pid no longer exists, stopping monitoring"
182
- break
183
- fi
184
-
185
- collect_metrics "$pid"
186
-
187
- if ! check_limits "$pid"; then
188
- log_error "Resource limits exceeded, terminating process $pid"
189
- kill -TERM "$pid" 2>/dev/null || true
190
- break
191
- fi
192
-
193
- sleep "$interval"
194
- done
195
- ) &
196
-
197
- MONITOR_PID=$!
198
- echo "$MONITOR_PID"
199
- }
200
-
201
- # Stop monitoring
202
- stop_monitoring() {
203
- if [[ -n "$MONITOR_PID" ]] && kill -0 "$MONITOR_PID" 2>/dev/null; then
204
- kill "$MONITOR_PID" 2>/dev/null || true
205
- log_info "Stopped monitoring (PID: $MONITOR_PID)"
206
- fi
207
- }
208
-
209
- # Generate final report
210
- generate_report() {
211
- local exit_code="${1:-0}"
212
-
213
- if [[ -f "$METRICS_FILE" ]]; then
214
- # Update with final information
215
- if command -v jq >/dev/null 2>&1; then
216
- jq --arg end_time "$(date -Iseconds)" \
217
- --arg exit_code "$exit_code" \
218
- '.end_time = $end_time | .exit_code = $exit_code' \
219
- "$METRICS_FILE" > "$METRICS_FILE.tmp" && \
220
- mv "$METRICS_FILE.tmp" "$METRICS_FILE"
221
- fi
222
-
223
- log_success "Process report generated: $METRICS_FILE"
224
-
225
- # Print summary
226
- if command -v jq >/dev/null 2>&1; then
227
- local samples=$(jq '.samples | length' "$METRICS_FILE")
228
- echo "📊 Process Metrics Summary:" >&2
229
- echo " Agent ID: $AGENT_ID" >&2
230
- echo " Samples: $samples" >&2
231
- echo " Exit Code: $exit_code" >&2
232
- fi
233
- fi
234
- }
235
-
236
- # Monitor existing process
237
- monitor_pid() {
238
- local pid="$1"
239
-
240
- log_info "Monitoring existing process: PID $pid"
241
-
242
- if ! kill -0 "$pid" 2>/dev/null; then
243
- log_error "Process $pid does not exist"
244
- return 1
245
- fi
246
-
247
- # Start monitoring
248
- local monitor_pid=$(start_monitoring "$pid")
249
-
250
- # Wait for process to complete
251
- while kill -0 "$pid" 2>/dev/null; do
252
- sleep 5
253
- done
254
-
255
- # Stop monitoring
256
- stop_monitoring
257
-
258
- log_success "Process monitoring completed for PID $pid"
259
- }
260
-
261
- # Main execution
262
- main() {
263
- local action="${1:-"instrument"}"
264
-
265
- case "$action" in
266
- "instrument")
267
- init_telemetry
268
- local monitor_pid=$(start_monitoring)
269
-
270
- # Set up cleanup traps
271
- trap 'stop_monitoring; generate_report $?' EXIT
272
- trap 'stop_monitoring; generate_report 1' INT TERM
273
-
274
- log_success "Process instrumentation started for $AGENT_ID"
275
- ;;
276
- "monitor-pid")
277
- if [[ -z "${2:-}" ]]; then
278
- log_error "PID required for monitor-pid action"
279
- exit 1
280
- fi
281
- monitor_pid "$2"
282
- ;;
283
- "--help"|"-h")
284
- cat << EOF
285
- CFN Process Instrumentation Script
286
-
287
- Usage:
288
- $0 # Instrument current process
289
- $0 monitor-pid <pid> # Monitor existing process
290
- $0 --help # Show this help
291
-
292
- Environment Variables:
293
- AGENT_ID # Agent identifier (default: hostname-PID)
294
- CFN_MEMORY_LIMIT # Memory limit (default: 2G)
295
- CFN_CPU_LIMIT # CPU limit (default: 80%)
296
- CFN_TIMEOUT # Timeout in seconds (default: 600)
297
- CFN_TELEMETRY_DIR # Telemetry storage directory
298
-
299
- This script provides process monitoring and resource limit enforcement
300
- for CFN Loop agents and orchestration processes.
301
- EOF
302
- ;;
303
- *)
304
- log_error "Unknown action: $action"
305
- exit 1
306
- ;;
307
- esac
308
- }
309
-
310
- # Execute main function if run directly
311
- if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
312
- main "$@"
313
- else
314
- # When sourced, automatically instrument current process
315
- init_telemetry
316
- local monitor_pid=$(start_monitoring)
317
-
318
- # Set up cleanup traps
319
- trap 'stop_monitoring; generate_report $?' EXIT
320
- trap 'stop_monitoring; generate_report 1' INT TERM
321
-
322
- log_info "Process instrumentation enabled for $AGENT_ID"
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ # CFN Process Instrumentation
5
+ # Part of ANTI-023 Memory Leak Protection System
6
+ #
7
+ # Provides process instrumentation, monitoring, and automatic resource limiting
8
+ # for CFN Loop agents and orchestration processes.
9
+ #
10
+ # Usage:
11
+ # source ./instrument-process.sh [--agent-id <id>] [--memory-limit <size>]
12
+ # ./instrument-process.sh --monitor-pid <pid>
13
+ ##############################################################################
14
+
15
+ set -euo pipefail
16
+
17
+ # Configuration
18
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
20
+
21
+ # Default limits
22
+ # Note: DEFAULT_TIMEOUT may be set by wrapped-executor.sh (300)
23
+ # Only set if not already defined
24
+ : "${DEFAULT_MEMORY_LIMIT:="2G"}"
25
+ : "${DEFAULT_CPU_LIMIT:="80%"}"
26
+ : "${DEFAULT_TIMEOUT:="600"}"
27
+
28
+ # Process tracking
29
+ AGENT_ID="${AGENT_ID:-$(hostname)-$$}"
30
+ MONITOR_PID=""
31
+ MEMORY_LIMIT="${CFN_MEMORY_LIMIT:-$DEFAULT_MEMORY_LIMIT}"
32
+ CPU_LIMIT="${CFN_CPU_LIMIT:-$DEFAULT_CPU_LIMIT}"
33
+ TIMEOUT="${CFN_TIMEOUT:-$DEFAULT_TIMEOUT}"
34
+
35
+ # Telemetry storage
36
+ TELEMETRY_DIR="${CFN_TELEMETRY_DIR:-/tmp/cfn-telemetry}"
37
+ METRICS_FILE="$TELEMETRY_DIR/metrics_${AGENT_ID}.json"
38
+
39
+ # Color coding
40
+ readonly RED='\033[0;31m'
41
+ readonly GREEN='\033[0;32m'
42
+ readonly YELLOW='\033[1;33m'
43
+ readonly BLUE='\033[0;34m'
44
+ readonly NC='\033[0m'
45
+
46
+ # Logging functions
47
+ log_info() {
48
+ echo -e "${BLUE}[INSTRUMENT]${NC} $1" >&2
49
+ }
50
+
51
+ log_success() {
52
+ echo -e "${GREEN}[INSTRUMENT]${NC} $1" >&2
53
+ }
54
+
55
+ log_warning() {
56
+ echo -e "${YELLOW}[INSTRUMENT]${NC} $1" >&2
57
+ }
58
+
59
+ log_error() {
60
+ echo -e "${RED}[INSTRUMENT]${NC} $1" >&2
61
+ }
62
+
63
+ # Initialize telemetry directory
64
+ init_telemetry() {
65
+ mkdir -p "$TELEMETRY_DIR"
66
+
67
+ # Create metrics file with initial structure
68
+ cat > "$METRICS_FILE" << EOF
69
+ {
70
+ "agent_id": "$AGENT_ID",
71
+ "start_time": "$(date -Iseconds)",
72
+ "process_id": "$$",
73
+ "memory_limit": "$MEMORY_LIMIT",
74
+ "cpu_limit": "$CPU_LIMIT",
75
+ "timeout": "$TIMEOUT",
76
+ "samples": []
77
+ }
78
+ EOF
79
+ }
80
+
81
+ # Collect process metrics
82
+ collect_metrics() {
83
+ local pid="${1:-$$}"
84
+ local timestamp="$(date -Iseconds)"
85
+
86
+ # Get process statistics
87
+ local mem_usage=""
88
+ local cpu_usage=""
89
+ local open_files=""
90
+ local threads=""
91
+
92
+ if command -v ps >/dev/null 2>&1; then
93
+ mem_usage=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
94
+ cpu_usage=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
95
+ fi
96
+
97
+ if command -v lsof >/dev/null 2>&1; then
98
+ open_files=$(lsof -p "$pid" 2>/dev/null | wc -l || echo "0")
99
+ fi
100
+
101
+ if [[ -f "/proc/$pid/status" ]]; then
102
+ threads=$(grep "^Threads:" "/proc/$pid/status" | awk '{print $2}' || echo "0")
103
+ fi
104
+
105
+ # Create metrics entry
106
+ local metrics_entry=$(cat << EOF
107
+ {
108
+ "timestamp": "$timestamp",
109
+ "memory_kb": "$mem_usage",
110
+ "cpu_percent": "$cpu_usage",
111
+ "open_files": "$open_files",
112
+ "threads": "$threads"
113
+ }
114
+ EOF
115
+ )
116
+
117
+ # Update metrics file
118
+ if [[ -f "$METRICS_FILE" ]]; then
119
+ # Use jq to safely append to samples array
120
+ if command -v jq >/dev/null 2>&1; then
121
+ jq --argjson entry "$metrics_entry" '.samples += [$entry]' "$METRICS_FILE" > "$METRICS_FILE.tmp" && \
122
+ mv "$METRICS_FILE.tmp" "$METRICS_FILE"
123
+ else
124
+ # Fallback without jq
125
+ echo "Warning: jq not available, using simple append" >&2
126
+ echo "$metrics_entry" >> "$METRICS_FILE.raw"
127
+ fi
128
+ fi
129
+ }
130
+
131
+ # Check resource limits
132
+ check_limits() {
133
+ local pid="${1:-$$}"
134
+
135
+ # Memory limit check
136
+ if command -v ps >/dev/null 2>&1; then
137
+ local mem_kb=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
138
+ local mem_mb=$((mem_kb / 1024))
139
+
140
+ case "$MEMORY_LIMIT" in
141
+ *G|*g)
142
+ local limit_mb=$((${MEMORY_LIMIT%[Gg]*} * 1024))
143
+ ;;
144
+ *M|*m)
145
+ local limit_mb=$((${MEMORY_LIMIT%[Mm]*}))
146
+ ;;
147
+ *)
148
+ local limit_mb=2048 # Default 2GB
149
+ ;;
150
+ esac
151
+
152
+ if [[ $mem_mb -gt $limit_mb ]]; then
153
+ log_warning "Memory limit exceeded: ${mem_mb}MB > ${limit_mb}MB"
154
+ return 1
155
+ fi
156
+ fi
157
+
158
+ # CPU limit check
159
+ if command -v ps >/dev/null 2>&1; then
160
+ local cpu_percent=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
161
+ local cpu_limit_num=$((${CPU_LIMIT%\%}))
162
+
163
+ if (( $(echo "$cpu_percent > $cpu_limit_num" | bc -l) )); then
164
+ log_warning "CPU limit exceeded: ${cpu_percent}% > ${CPU_LIMIT}"
165
+ return 1
166
+ fi
167
+ fi
168
+
169
+ return 0
170
+ }
171
+
172
+ # Start background monitoring
173
+ start_monitoring() {
174
+ local pid="${1:-$$}"
175
+ local interval="${2:-30}" # Check every 30 seconds
176
+
177
+ log_info "Starting process monitoring for PID $pid (interval: ${interval}s)"
178
+
179
+ # Start monitoring in background
180
+ (
181
+ while true; do
182
+ if ! kill -0 "$pid" 2>/dev/null; then
183
+ log_info "Process $pid no longer exists, stopping monitoring"
184
+ break
185
+ fi
186
+
187
+ collect_metrics "$pid"
188
+
189
+ if ! check_limits "$pid"; then
190
+ log_error "Resource limits exceeded, terminating process $pid"
191
+ kill -TERM "$pid" 2>/dev/null || true
192
+ break
193
+ fi
194
+
195
+ sleep "$interval"
196
+ done
197
+ ) &
198
+
199
+ MONITOR_PID=$!
200
+ echo "$MONITOR_PID"
201
+ }
202
+
203
+ # Stop monitoring
204
+ stop_monitoring() {
205
+ if [[ -n "$MONITOR_PID" ]] && kill -0 "$MONITOR_PID" 2>/dev/null; then
206
+ kill "$MONITOR_PID" 2>/dev/null || true
207
+ log_info "Stopped monitoring (PID: $MONITOR_PID)"
208
+ fi
209
+ }
210
+
211
+ # Generate final report
212
+ generate_report() {
213
+ local exit_code="${1:-0}"
214
+
215
+ if [[ -f "$METRICS_FILE" ]]; then
216
+ # Update with final information
217
+ if command -v jq >/dev/null 2>&1; then
218
+ jq --arg end_time "$(date -Iseconds)" \
219
+ --arg exit_code "$exit_code" \
220
+ '.end_time = $end_time | .exit_code = $exit_code' \
221
+ "$METRICS_FILE" > "$METRICS_FILE.tmp" && \
222
+ mv "$METRICS_FILE.tmp" "$METRICS_FILE"
223
+ fi
224
+
225
+ log_success "Process report generated: $METRICS_FILE"
226
+
227
+ # Print summary
228
+ if command -v jq >/dev/null 2>&1; then
229
+ local samples=$(jq '.samples | length' "$METRICS_FILE")
230
+ echo "📊 Process Metrics Summary:" >&2
231
+ echo " Agent ID: $AGENT_ID" >&2
232
+ echo " Samples: $samples" >&2
233
+ echo " Exit Code: $exit_code" >&2
234
+ fi
235
+ fi
236
+ }
237
+
238
+ # Monitor existing process
239
+ monitor_pid() {
240
+ local pid="$1"
241
+
242
+ log_info "Monitoring existing process: PID $pid"
243
+
244
+ if ! kill -0 "$pid" 2>/dev/null; then
245
+ log_error "Process $pid does not exist"
246
+ return 1
247
+ fi
248
+
249
+ # Start monitoring
250
+ local monitor_pid=$(start_monitoring "$pid")
251
+
252
+ # Wait for process to complete
253
+ while kill -0 "$pid" 2>/dev/null; do
254
+ sleep 5
255
+ done
256
+
257
+ # Stop monitoring
258
+ stop_monitoring
259
+
260
+ log_success "Process monitoring completed for PID $pid"
261
+ }
262
+
263
+ # Main execution
264
+ main() {
265
+ local action="${1:-"instrument"}"
266
+
267
+ case "$action" in
268
+ "instrument")
269
+ init_telemetry
270
+ local monitor_pid=$(start_monitoring)
271
+
272
+ # Set up cleanup traps
273
+ trap 'stop_monitoring; generate_report $?' EXIT
274
+ trap 'stop_monitoring; generate_report 1' INT TERM
275
+
276
+ log_success "Process instrumentation started for $AGENT_ID"
277
+ ;;
278
+ "monitor-pid")
279
+ if [[ -z "${2:-}" ]]; then
280
+ log_error "PID required for monitor-pid action"
281
+ exit 1
282
+ fi
283
+ monitor_pid "$2"
284
+ ;;
285
+ "--help"|"-h")
286
+ cat << EOF
287
+ CFN Process Instrumentation Script
288
+
289
+ Usage:
290
+ $0 # Instrument current process
291
+ $0 monitor-pid <pid> # Monitor existing process
292
+ $0 --help # Show this help
293
+
294
+ Environment Variables:
295
+ AGENT_ID # Agent identifier (default: hostname-PID)
296
+ CFN_MEMORY_LIMIT # Memory limit (default: 2G)
297
+ CFN_CPU_LIMIT # CPU limit (default: 80%)
298
+ CFN_TIMEOUT # Timeout in seconds (default: 600)
299
+ CFN_TELEMETRY_DIR # Telemetry storage directory
300
+
301
+ This script provides process monitoring and resource limit enforcement
302
+ for CFN Loop agents and orchestration processes.
303
+ EOF
304
+ ;;
305
+ *)
306
+ log_error "Unknown action: $action"
307
+ exit 1
308
+ ;;
309
+ esac
310
+ }
311
+
312
+ # Execute main function if run directly
313
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
314
+ main "$@"
315
+ else
316
+ # When sourced, automatically instrument current process
317
+ init_telemetry
318
+ # BUG #12 FIX: Removed local to prevent bash from blocking on background process
319
+ # The function sets global MONITOR_PID which is used by cleanup traps
320
+ start_monitoring >/dev/null
321
+
322
+ # Set up cleanup traps
323
+ trap 'stop_monitoring; generate_report $?' EXIT
324
+ trap 'stop_monitoring; generate_report 1' INT TERM
325
+
326
+ log_info "Process instrumentation enabled for $AGENT_ID"
323
327
  fi