claude-flow-novice 2.14.35 → 2.14.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/.claude/commands/cfn-loop-cli.md +491 -456
- package/.claude/commands/switch-api.md +1 -1
- package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
- package/.claude/skills/cfn-loop-validation/config.json +2 -2
- package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
- package/claude-assets/agents/README-AGENT_LIFECYCLE.md +37 -10
- package/claude-assets/agents/README-VALIDATION.md +0 -8
- package/claude-assets/agents/cfn-dev-team/README.md +0 -8
- package/claude-assets/agents/cfn-dev-team/coordinators/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/developers/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/documentation/README-VALIDATION.md +0 -8
- package/claude-assets/agents/cfn-dev-team/documentation/agent-type-guidelines.md +0 -10
- package/claude-assets/agents/cfn-dev-team/reviewers/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +0 -10
- package/claude-assets/agents/cfn-dev-team/test-agent.md +0 -10
- package/claude-assets/agents/cfn-dev-team/testers/README.md +1 -9
- package/claude-assets/agents/csuite/cto-agent.md +0 -10
- package/claude-assets/agents/custom/cfn-system-expert.md +1 -128
- package/claude-assets/agents/custom/claude-code-expert.md +151 -2
- package/claude-assets/agents/docker-coordinators/cfn-docker-v3-coordinator.md +39 -3
- package/claude-assets/agents/docker-team/csuite/c-suite-template.md +1 -5
- package/claude-assets/agents/docker-team/infrastructure/team-coordinator-template.md +1 -5
- package/claude-assets/agents/marketing_hybrid/cost_tracker.md +0 -10
- package/claude-assets/agents/marketing_hybrid/docker_deployer.md +0 -10
- package/claude-assets/agents/marketing_hybrid/zai_worker_spawner.md +0 -10
- package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/claude-assets/commands/cfn-loop-cli.md +491 -456
- package/claude-assets/commands/switch-api.md +1 -1
- package/claude-assets/skills/cfn-error-logging/SKILL.md +339 -0
- package/claude-assets/skills/cfn-error-logging/cleanup-error-logs.sh +334 -0
- package/claude-assets/skills/cfn-error-logging/integrate-cli.sh +232 -0
- package/claude-assets/skills/cfn-error-logging/integrate-docker.sh +294 -0
- package/claude-assets/skills/cfn-error-logging/invoke-error-logging.sh +839 -0
- package/claude-assets/skills/cfn-error-logging/test-error-logging.sh +475 -0
- package/claude-assets/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
- package/claude-assets/skills/cfn-loop-validation/config.json +2 -2
- package/claude-assets/skills/cfn-process-instrumentation/instrument-process.sh +326 -322
- package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
- package/claude-assets/skills/cfn-task-config-init/initialize-config.sh +2 -2
- package/claude-assets/skills/cfn-task-mode-sanitize/task-mode-env-sanitizer.sh +224 -181
- package/claude-assets/skills/cfn-validation-runner-instrumentation/wrapped-executor.sh +235 -271
- package/dist/agents/agent-loader.js +467 -133
- package/dist/agents/agent-loader.js.map +1 -1
- package/dist/cli/config-manager.js +109 -91
- package/dist/cli/config-manager.js.map +1 -1
- package/dist/hello.js +27 -3
- package/dist/hello.js.map +1 -1
- package/dist/server.js +194 -0
- package/dist/server.js.map +1 -0
- package/dist/server.test.js +207 -0
- package/dist/server.test.js.map +1 -0
- package/package.json +2 -1
- package/scripts/docker-build-mcp.sh +155 -0
- package/scripts/docker-test-mcp.sh +260 -0
- package/scripts/mcp-health-check.sh +123 -0
|
@@ -1,323 +1,327 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
|
|
3
|
-
##############################################################################
|
|
4
|
-
# CFN Process Instrumentation
|
|
5
|
-
# Part of ANTI-023 Memory Leak Protection System
|
|
6
|
-
#
|
|
7
|
-
# Provides process instrumentation, monitoring, and automatic resource limiting
|
|
8
|
-
# for CFN Loop agents and orchestration processes.
|
|
9
|
-
#
|
|
10
|
-
# Usage:
|
|
11
|
-
# source ./instrument-process.sh [--agent-id <id>] [--memory-limit <size>]
|
|
12
|
-
# ./instrument-process.sh --monitor-pid <pid>
|
|
13
|
-
##############################################################################
|
|
14
|
-
|
|
15
|
-
set -euo pipefail
|
|
16
|
-
|
|
17
|
-
# Configuration
|
|
18
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
19
|
-
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
20
|
-
|
|
21
|
-
# Default limits
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
readonly
|
|
41
|
-
readonly
|
|
42
|
-
readonly
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
local
|
|
88
|
-
local
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
"
|
|
109
|
-
"
|
|
110
|
-
"
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
echo "
|
|
231
|
-
echo "
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
$0
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
#
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
##############################################################################
|
|
4
|
+
# CFN Process Instrumentation
|
|
5
|
+
# Part of ANTI-023 Memory Leak Protection System
|
|
6
|
+
#
|
|
7
|
+
# Provides process instrumentation, monitoring, and automatic resource limiting
|
|
8
|
+
# for CFN Loop agents and orchestration processes.
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# source ./instrument-process.sh [--agent-id <id>] [--memory-limit <size>]
|
|
12
|
+
# ./instrument-process.sh --monitor-pid <pid>
|
|
13
|
+
##############################################################################
|
|
14
|
+
|
|
15
|
+
set -euo pipefail
|
|
16
|
+
|
|
17
|
+
# Configuration
|
|
18
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
19
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
20
|
+
|
|
21
|
+
# Default limits
|
|
22
|
+
# Note: DEFAULT_TIMEOUT may be set by wrapped-executor.sh (300)
|
|
23
|
+
# Only set if not already defined
|
|
24
|
+
: "${DEFAULT_MEMORY_LIMIT:="2G"}"
|
|
25
|
+
: "${DEFAULT_CPU_LIMIT:="80%"}"
|
|
26
|
+
: "${DEFAULT_TIMEOUT:="600"}"
|
|
27
|
+
|
|
28
|
+
# Process tracking
|
|
29
|
+
AGENT_ID="${AGENT_ID:-$(hostname)-$$}"
|
|
30
|
+
MONITOR_PID=""
|
|
31
|
+
MEMORY_LIMIT="${CFN_MEMORY_LIMIT:-$DEFAULT_MEMORY_LIMIT}"
|
|
32
|
+
CPU_LIMIT="${CFN_CPU_LIMIT:-$DEFAULT_CPU_LIMIT}"
|
|
33
|
+
TIMEOUT="${CFN_TIMEOUT:-$DEFAULT_TIMEOUT}"
|
|
34
|
+
|
|
35
|
+
# Telemetry storage
|
|
36
|
+
TELEMETRY_DIR="${CFN_TELEMETRY_DIR:-/tmp/cfn-telemetry}"
|
|
37
|
+
METRICS_FILE="$TELEMETRY_DIR/metrics_${AGENT_ID}.json"
|
|
38
|
+
|
|
39
|
+
# Color coding
|
|
40
|
+
readonly RED='\033[0;31m'
|
|
41
|
+
readonly GREEN='\033[0;32m'
|
|
42
|
+
readonly YELLOW='\033[1;33m'
|
|
43
|
+
readonly BLUE='\033[0;34m'
|
|
44
|
+
readonly NC='\033[0m'
|
|
45
|
+
|
|
46
|
+
# Logging functions
|
|
47
|
+
log_info() {
|
|
48
|
+
echo -e "${BLUE}[INSTRUMENT]${NC} $1" >&2
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
log_success() {
|
|
52
|
+
echo -e "${GREEN}[INSTRUMENT]${NC} $1" >&2
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
log_warning() {
|
|
56
|
+
echo -e "${YELLOW}[INSTRUMENT]${NC} $1" >&2
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
log_error() {
|
|
60
|
+
echo -e "${RED}[INSTRUMENT]${NC} $1" >&2
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Initialize telemetry directory
|
|
64
|
+
init_telemetry() {
|
|
65
|
+
mkdir -p "$TELEMETRY_DIR"
|
|
66
|
+
|
|
67
|
+
# Create metrics file with initial structure
|
|
68
|
+
cat > "$METRICS_FILE" << EOF
|
|
69
|
+
{
|
|
70
|
+
"agent_id": "$AGENT_ID",
|
|
71
|
+
"start_time": "$(date -Iseconds)",
|
|
72
|
+
"process_id": "$$",
|
|
73
|
+
"memory_limit": "$MEMORY_LIMIT",
|
|
74
|
+
"cpu_limit": "$CPU_LIMIT",
|
|
75
|
+
"timeout": "$TIMEOUT",
|
|
76
|
+
"samples": []
|
|
77
|
+
}
|
|
78
|
+
EOF
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Collect process metrics
|
|
82
|
+
collect_metrics() {
|
|
83
|
+
local pid="${1:-$$}"
|
|
84
|
+
local timestamp="$(date -Iseconds)"
|
|
85
|
+
|
|
86
|
+
# Get process statistics
|
|
87
|
+
local mem_usage=""
|
|
88
|
+
local cpu_usage=""
|
|
89
|
+
local open_files=""
|
|
90
|
+
local threads=""
|
|
91
|
+
|
|
92
|
+
if command -v ps >/dev/null 2>&1; then
|
|
93
|
+
mem_usage=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
|
|
94
|
+
cpu_usage=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
if command -v lsof >/dev/null 2>&1; then
|
|
98
|
+
open_files=$(lsof -p "$pid" 2>/dev/null | wc -l || echo "0")
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
if [[ -f "/proc/$pid/status" ]]; then
|
|
102
|
+
threads=$(grep "^Threads:" "/proc/$pid/status" | awk '{print $2}' || echo "0")
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
# Create metrics entry
|
|
106
|
+
local metrics_entry=$(cat << EOF
|
|
107
|
+
{
|
|
108
|
+
"timestamp": "$timestamp",
|
|
109
|
+
"memory_kb": "$mem_usage",
|
|
110
|
+
"cpu_percent": "$cpu_usage",
|
|
111
|
+
"open_files": "$open_files",
|
|
112
|
+
"threads": "$threads"
|
|
113
|
+
}
|
|
114
|
+
EOF
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Update metrics file
|
|
118
|
+
if [[ -f "$METRICS_FILE" ]]; then
|
|
119
|
+
# Use jq to safely append to samples array
|
|
120
|
+
if command -v jq >/dev/null 2>&1; then
|
|
121
|
+
jq --argjson entry "$metrics_entry" '.samples += [$entry]' "$METRICS_FILE" > "$METRICS_FILE.tmp" && \
|
|
122
|
+
mv "$METRICS_FILE.tmp" "$METRICS_FILE"
|
|
123
|
+
else
|
|
124
|
+
# Fallback without jq
|
|
125
|
+
echo "Warning: jq not available, using simple append" >&2
|
|
126
|
+
echo "$metrics_entry" >> "$METRICS_FILE.raw"
|
|
127
|
+
fi
|
|
128
|
+
fi
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Check resource limits
|
|
132
|
+
check_limits() {
|
|
133
|
+
local pid="${1:-$$}"
|
|
134
|
+
|
|
135
|
+
# Memory limit check
|
|
136
|
+
if command -v ps >/dev/null 2>&1; then
|
|
137
|
+
local mem_kb=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ' || echo "0")
|
|
138
|
+
local mem_mb=$((mem_kb / 1024))
|
|
139
|
+
|
|
140
|
+
case "$MEMORY_LIMIT" in
|
|
141
|
+
*G|*g)
|
|
142
|
+
local limit_mb=$((${MEMORY_LIMIT%[Gg]*} * 1024))
|
|
143
|
+
;;
|
|
144
|
+
*M|*m)
|
|
145
|
+
local limit_mb=$((${MEMORY_LIMIT%[Mm]*}))
|
|
146
|
+
;;
|
|
147
|
+
*)
|
|
148
|
+
local limit_mb=2048 # Default 2GB
|
|
149
|
+
;;
|
|
150
|
+
esac
|
|
151
|
+
|
|
152
|
+
if [[ $mem_mb -gt $limit_mb ]]; then
|
|
153
|
+
log_warning "Memory limit exceeded: ${mem_mb}MB > ${limit_mb}MB"
|
|
154
|
+
return 1
|
|
155
|
+
fi
|
|
156
|
+
fi
|
|
157
|
+
|
|
158
|
+
# CPU limit check
|
|
159
|
+
if command -v ps >/dev/null 2>&1; then
|
|
160
|
+
local cpu_percent=$(ps -p "$pid" -o %cpu= 2>/dev/null | tr -d ' ' || echo "0")
|
|
161
|
+
local cpu_limit_num=$((${CPU_LIMIT%\%}))
|
|
162
|
+
|
|
163
|
+
if (( $(echo "$cpu_percent > $cpu_limit_num" | bc -l) )); then
|
|
164
|
+
log_warning "CPU limit exceeded: ${cpu_percent}% > ${CPU_LIMIT}"
|
|
165
|
+
return 1
|
|
166
|
+
fi
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
return 0
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Start background monitoring
|
|
173
|
+
start_monitoring() {
|
|
174
|
+
local pid="${1:-$$}"
|
|
175
|
+
local interval="${2:-30}" # Check every 30 seconds
|
|
176
|
+
|
|
177
|
+
log_info "Starting process monitoring for PID $pid (interval: ${interval}s)"
|
|
178
|
+
|
|
179
|
+
# Start monitoring in background
|
|
180
|
+
(
|
|
181
|
+
while true; do
|
|
182
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
183
|
+
log_info "Process $pid no longer exists, stopping monitoring"
|
|
184
|
+
break
|
|
185
|
+
fi
|
|
186
|
+
|
|
187
|
+
collect_metrics "$pid"
|
|
188
|
+
|
|
189
|
+
if ! check_limits "$pid"; then
|
|
190
|
+
log_error "Resource limits exceeded, terminating process $pid"
|
|
191
|
+
kill -TERM "$pid" 2>/dev/null || true
|
|
192
|
+
break
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
sleep "$interval"
|
|
196
|
+
done
|
|
197
|
+
) &
|
|
198
|
+
|
|
199
|
+
MONITOR_PID=$!
|
|
200
|
+
echo "$MONITOR_PID"
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
# Stop monitoring
|
|
204
|
+
stop_monitoring() {
|
|
205
|
+
if [[ -n "$MONITOR_PID" ]] && kill -0 "$MONITOR_PID" 2>/dev/null; then
|
|
206
|
+
kill "$MONITOR_PID" 2>/dev/null || true
|
|
207
|
+
log_info "Stopped monitoring (PID: $MONITOR_PID)"
|
|
208
|
+
fi
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# Generate final report
|
|
212
|
+
generate_report() {
|
|
213
|
+
local exit_code="${1:-0}"
|
|
214
|
+
|
|
215
|
+
if [[ -f "$METRICS_FILE" ]]; then
|
|
216
|
+
# Update with final information
|
|
217
|
+
if command -v jq >/dev/null 2>&1; then
|
|
218
|
+
jq --arg end_time "$(date -Iseconds)" \
|
|
219
|
+
--arg exit_code "$exit_code" \
|
|
220
|
+
'.end_time = $end_time | .exit_code = $exit_code' \
|
|
221
|
+
"$METRICS_FILE" > "$METRICS_FILE.tmp" && \
|
|
222
|
+
mv "$METRICS_FILE.tmp" "$METRICS_FILE"
|
|
223
|
+
fi
|
|
224
|
+
|
|
225
|
+
log_success "Process report generated: $METRICS_FILE"
|
|
226
|
+
|
|
227
|
+
# Print summary
|
|
228
|
+
if command -v jq >/dev/null 2>&1; then
|
|
229
|
+
local samples=$(jq '.samples | length' "$METRICS_FILE")
|
|
230
|
+
echo "📊 Process Metrics Summary:" >&2
|
|
231
|
+
echo " Agent ID: $AGENT_ID" >&2
|
|
232
|
+
echo " Samples: $samples" >&2
|
|
233
|
+
echo " Exit Code: $exit_code" >&2
|
|
234
|
+
fi
|
|
235
|
+
fi
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Monitor existing process
|
|
239
|
+
monitor_pid() {
|
|
240
|
+
local pid="$1"
|
|
241
|
+
|
|
242
|
+
log_info "Monitoring existing process: PID $pid"
|
|
243
|
+
|
|
244
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
245
|
+
log_error "Process $pid does not exist"
|
|
246
|
+
return 1
|
|
247
|
+
fi
|
|
248
|
+
|
|
249
|
+
# Start monitoring
|
|
250
|
+
local monitor_pid=$(start_monitoring "$pid")
|
|
251
|
+
|
|
252
|
+
# Wait for process to complete
|
|
253
|
+
while kill -0 "$pid" 2>/dev/null; do
|
|
254
|
+
sleep 5
|
|
255
|
+
done
|
|
256
|
+
|
|
257
|
+
# Stop monitoring
|
|
258
|
+
stop_monitoring
|
|
259
|
+
|
|
260
|
+
log_success "Process monitoring completed for PID $pid"
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
# Main execution
|
|
264
|
+
main() {
|
|
265
|
+
local action="${1:-"instrument"}"
|
|
266
|
+
|
|
267
|
+
case "$action" in
|
|
268
|
+
"instrument")
|
|
269
|
+
init_telemetry
|
|
270
|
+
local monitor_pid=$(start_monitoring)
|
|
271
|
+
|
|
272
|
+
# Set up cleanup traps
|
|
273
|
+
trap 'stop_monitoring; generate_report $?' EXIT
|
|
274
|
+
trap 'stop_monitoring; generate_report 1' INT TERM
|
|
275
|
+
|
|
276
|
+
log_success "Process instrumentation started for $AGENT_ID"
|
|
277
|
+
;;
|
|
278
|
+
"monitor-pid")
|
|
279
|
+
if [[ -z "${2:-}" ]]; then
|
|
280
|
+
log_error "PID required for monitor-pid action"
|
|
281
|
+
exit 1
|
|
282
|
+
fi
|
|
283
|
+
monitor_pid "$2"
|
|
284
|
+
;;
|
|
285
|
+
"--help"|"-h")
|
|
286
|
+
cat << EOF
|
|
287
|
+
CFN Process Instrumentation Script
|
|
288
|
+
|
|
289
|
+
Usage:
|
|
290
|
+
$0 # Instrument current process
|
|
291
|
+
$0 monitor-pid <pid> # Monitor existing process
|
|
292
|
+
$0 --help # Show this help
|
|
293
|
+
|
|
294
|
+
Environment Variables:
|
|
295
|
+
AGENT_ID # Agent identifier (default: hostname-PID)
|
|
296
|
+
CFN_MEMORY_LIMIT # Memory limit (default: 2G)
|
|
297
|
+
CFN_CPU_LIMIT # CPU limit (default: 80%)
|
|
298
|
+
CFN_TIMEOUT # Timeout in seconds (default: 600)
|
|
299
|
+
CFN_TELEMETRY_DIR # Telemetry storage directory
|
|
300
|
+
|
|
301
|
+
This script provides process monitoring and resource limit enforcement
|
|
302
|
+
for CFN Loop agents and orchestration processes.
|
|
303
|
+
EOF
|
|
304
|
+
;;
|
|
305
|
+
*)
|
|
306
|
+
log_error "Unknown action: $action"
|
|
307
|
+
exit 1
|
|
308
|
+
;;
|
|
309
|
+
esac
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
# Execute main function if run directly
|
|
313
|
+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
314
|
+
main "$@"
|
|
315
|
+
else
|
|
316
|
+
# When sourced, automatically instrument current process
|
|
317
|
+
init_telemetry
|
|
318
|
+
# BUG #12 FIX: Removed local to prevent bash from blocking on background process
|
|
319
|
+
# The function sets global MONITOR_PID which is used by cleanup traps
|
|
320
|
+
start_monitoring >/dev/null
|
|
321
|
+
|
|
322
|
+
# Set up cleanup traps
|
|
323
|
+
trap 'stop_monitoring; generate_report $?' EXIT
|
|
324
|
+
trap 'stop_monitoring; generate_report 1' INT TERM
|
|
325
|
+
|
|
326
|
+
log_info "Process instrumentation enabled for $AGENT_ID"
|
|
323
327
|
fi
|