claude-flow-novice 2.14.31 → 2.14.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/cfn-data/cfn-loop.db +0 -0
- package/.claude/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/.claude/skills/cfn-agent-discovery/agents-registry.json +10 -9
- package/.claude/skills/cfn-docker-agent-spawning/SKILL.md +394 -0
- package/.claude/skills/cfn-docker-agent-spawning/spawn-agent.sh +521 -0
- package/.claude/skills/cfn-docker-loop-orchestration/SKILL.md +449 -0
- package/.claude/skills/cfn-docker-loop-orchestration/orchestrate.sh +787 -0
- package/.claude/skills/cfn-docker-redis-coordination/SKILL.md +435 -0
- package/.claude/skills/cfn-docker-redis-coordination/coordinate.sh +635 -0
- package/.claude/skills/cfn-docker-skill-mcp-selection/SKILL.md +289 -0
- package/.claude/skills/cfn-docker-skill-mcp-selection/skill-mcp-selector.js +472 -0
- package/.claude/skills/cfn-loop-validation/config.json +2 -2
- package/.claude/skills/pre-edit-backup/backup.sh +107 -0
- package/README.md +95 -0
- package/claude-assets/agents/README-AGENT_LIFECYCLE.md +10 -37
- package/claude-assets/agents/README-VALIDATION.md +8 -0
- package/claude-assets/agents/cfn-dev-team/README.md +8 -0
- package/claude-assets/agents/cfn-dev-team/coordinators/README.md +9 -1
- package/claude-assets/agents/cfn-dev-team/coordinators/cfn-v3-coordinator.md +71 -9
- package/claude-assets/agents/cfn-dev-team/developers/README.md +9 -1
- package/claude-assets/agents/cfn-dev-team/documentation/README-VALIDATION.md +8 -0
- package/claude-assets/agents/cfn-dev-team/documentation/agent-type-guidelines.md +10 -0
- package/claude-assets/agents/cfn-dev-team/reviewers/README.md +9 -1
- package/claude-assets/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +10 -0
- package/claude-assets/agents/cfn-dev-team/test-agent.md +10 -0
- package/claude-assets/agents/cfn-dev-team/testers/README.md +9 -1
- package/claude-assets/agents/csuite/cto-agent.md +10 -0
- package/claude-assets/agents/custom/cfn-system-expert.md +128 -1
- package/claude-assets/agents/docker-coordinators/cfn-docker-v3-coordinator.md +5 -1
- package/claude-assets/agents/docker-team/csuite/c-suite-template.md +5 -1
- package/claude-assets/agents/docker-team/infrastructure/team-coordinator-template.md +5 -1
- package/claude-assets/agents/marketing_hybrid/cost_tracker.md +10 -0
- package/claude-assets/agents/marketing_hybrid/docker_deployer.md +10 -0
- package/claude-assets/agents/marketing_hybrid/zai_worker_spawner.md +10 -0
- package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/claude-assets/hooks/cfn-post-execution/memory-cleanup.sh +20 -0
- package/claude-assets/hooks/cfn-pre-execution/memory-check.sh +20 -0
- package/claude-assets/skills/cfn-agent-discovery/agents-registry.json +10 -9
- package/claude-assets/skills/cfn-docker-agent-spawning/spawn-agent.sh +70 -10
- package/claude-assets/skills/cfn-loop-validation/config.json +2 -2
- package/claude-assets/skills/cfn-memory-management/SKILL.md +271 -0
- package/claude-assets/skills/cfn-memory-management/check-memory.sh +160 -0
- package/claude-assets/skills/cfn-memory-management/cleanup-memory.sh +197 -0
- package/claude-assets/skills/cfn-redis-data-extraction/SKILL.md +442 -0
- package/claude-assets/skills/cfn-redis-data-extraction/extract.sh +306 -0
- package/claude-assets/skills/cfn-task-config-init/initialize-config.sh +2 -2
- package/claude-assets/skills/hook-pipeline/security-scanner.sh +102 -0
- package/claude-assets/skills/pre-edit-backup/backup.sh +107 -0
- package/dist/cli/agent-command.js +44 -2
- package/dist/cli/agent-command.js.map +1 -1
- package/dist/cli/config-manager.js +91 -109
- package/dist/cli/config-manager.js.map +1 -1
- package/dist/cli/index.js +29 -2
- package/dist/cli/index.js.map +1 -1
- package/package.json +22 -5
- package/scripts/deploy-production.sh +356 -0
- package/scripts/memory-leak-prevention.sh +306 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Production CFN Loop Deployment Script
|
|
4
|
+
# Deploys the complete production stack with validation
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
# Configuration
|
|
9
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
|
+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
|
11
|
+
TIMESTAMP=$(date +%s)
|
|
12
|
+
LOG_FILE="$PROJECT_ROOT/logs/production-deploy-$TIMESTAMP.log"
|
|
13
|
+
|
|
14
|
+
# Colors for output
|
|
15
|
+
RED='\033[0;31m'
|
|
16
|
+
GREEN='\033[0;32m'
|
|
17
|
+
YELLOW='\033[1;33m'
|
|
18
|
+
BLUE='\033[0;34m'
|
|
19
|
+
NC='\033[0m' # No Color
|
|
20
|
+
|
|
21
|
+
# Environment variables
|
|
22
|
+
export BUILD_DATE="${BUILD_DATE:-$(date -u +'%Y-%m-%dT%H:%M:%SZ')}"
|
|
23
|
+
export VCS_REF="${VCS_REF:-$(git rev-parse --short HEAD 2>/dev/null || echo 'unknown')}"
|
|
24
|
+
export VERSION="${VERSION:-4.0.0}"
|
|
25
|
+
export CLAUDE_FLOW_VERSION="${CLAUDE_FLOW_VERSION:-4.0.0}"
|
|
26
|
+
export LOG_LEVEL="${LOG_LEVEL:-info}"
|
|
27
|
+
export MAX_AGENTS="${MAX_AGENTS:-10}"
|
|
28
|
+
export AGENT_TIMEOUT="${AGENT_TIMEOUT:-300000}"
|
|
29
|
+
export MEMORY_LIMIT="${MEMORY_LIMIT:-1g}"
|
|
30
|
+
export CPU_LIMIT="${CPU_LIMIT:-0.5}"
|
|
31
|
+
export AGENT_REPLICAS="${AGENT_REPLICAS:-3}"
|
|
32
|
+
export WORKER_POOL_SIZE="${WORKER_POOL_SIZE:-5}"
|
|
33
|
+
export GRAFANA_USER="${GRAFANA_USER:-admin}"
|
|
34
|
+
export GRAFANA_PASSWORD="${GRAFANA_PASSWORD:-admin123}"
|
|
35
|
+
|
|
36
|
+
# Logging function
|
|
37
|
+
log() {
|
|
38
|
+
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
error() {
|
|
42
|
+
echo -e "${RED}[ERROR]${NC} $1" >&2 | tee -a "$LOG_FILE"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
success() {
|
|
46
|
+
echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE"
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
warning() {
|
|
50
|
+
echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$LOG_FILE"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Cleanup function
|
|
54
|
+
cleanup() {
|
|
55
|
+
log "Cleaning up on exit..."
|
|
56
|
+
# Add any cleanup tasks here
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Set up signal handlers
|
|
60
|
+
trap cleanup EXIT INT TERM
|
|
61
|
+
|
|
62
|
+
# Main deployment function
|
|
63
|
+
main() {
|
|
64
|
+
log "🚀 Starting Production CFN Loop Deployment"
|
|
65
|
+
log "📋 Deployment ID: $TIMESTAMP"
|
|
66
|
+
log "📁 Project root: $PROJECT_ROOT"
|
|
67
|
+
log "📄 Log file: $LOG_FILE"
|
|
68
|
+
|
|
69
|
+
# Change to project directory
|
|
70
|
+
cd "$PROJECT_ROOT"
|
|
71
|
+
|
|
72
|
+
# Phase 1: Pre-deployment checks
|
|
73
|
+
log "🔍 Running pre-deployment checks..."
|
|
74
|
+
run_pre_deployment_checks
|
|
75
|
+
|
|
76
|
+
# Phase 2: Build production images
|
|
77
|
+
log "🏗️ Building production Docker images..."
|
|
78
|
+
build_production_images
|
|
79
|
+
|
|
80
|
+
# Phase 3: Deploy infrastructure
|
|
81
|
+
log "🐳 Deploying production infrastructure..."
|
|
82
|
+
deploy_infrastructure
|
|
83
|
+
|
|
84
|
+
# Phase 4: Wait for services to be healthy
|
|
85
|
+
log "⏳ Waiting for services to be healthy..."
|
|
86
|
+
wait_for_healthy_services
|
|
87
|
+
|
|
88
|
+
# Phase 5: Run validation tests
|
|
89
|
+
log "🧪 Running deployment validation..."
|
|
90
|
+
run_validation_tests
|
|
91
|
+
|
|
92
|
+
# Phase 6: Display deployment summary
|
|
93
|
+
display_deployment_summary
|
|
94
|
+
|
|
95
|
+
success "🎉 Production deployment completed successfully!"
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Pre-deployment checks
|
|
99
|
+
run_pre_deployment_checks() {
|
|
100
|
+
log " Checking prerequisites..."
|
|
101
|
+
|
|
102
|
+
# Check Docker
|
|
103
|
+
if ! command -v docker &> /dev/null; then
|
|
104
|
+
error "Docker is not installed or not in PATH"
|
|
105
|
+
exit 1
|
|
106
|
+
fi
|
|
107
|
+
log " ✅ Docker available: $(docker --version)"
|
|
108
|
+
|
|
109
|
+
# Check Docker Compose
|
|
110
|
+
if ! command -v docker-compose &> /dev/null; then
|
|
111
|
+
error "Docker Compose is not installed or not in PATH"
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
log " ✅ Docker Compose available: $(docker-compose --version)"
|
|
115
|
+
|
|
116
|
+
# Check Node.js (for local tests)
|
|
117
|
+
if command -v node &> /dev/null; then
|
|
118
|
+
log " ✅ Node.js available: $(node --version)"
|
|
119
|
+
else
|
|
120
|
+
warning "Node.js not available - some tests may be skipped"
|
|
121
|
+
fi
|
|
122
|
+
|
|
123
|
+
# Check available disk space
|
|
124
|
+
local available_space
|
|
125
|
+
available_space=$(df -BG . | awk 'NR==2 {print $4}' | sed 's/G//')
|
|
126
|
+
if [[ $available_space -lt 5 ]]; then
|
|
127
|
+
warning "Low disk space: ${available_space}GB available (recommended: 5GB+)"
|
|
128
|
+
else
|
|
129
|
+
log " ✅ Disk space: ${available_space}GB available"
|
|
130
|
+
fi
|
|
131
|
+
|
|
132
|
+
# Check available memory
|
|
133
|
+
if command -v free &> /dev/null; then
|
|
134
|
+
local available_memory
|
|
135
|
+
available_memory=$(free -g | awk 'NR==2{print $7}')
|
|
136
|
+
if [[ $available_memory -lt 4 ]]; then
|
|
137
|
+
warning "Low memory: ${available_memory}GB available (recommended: 4GB+)"
|
|
138
|
+
else
|
|
139
|
+
log " ✅ Memory: ${available_memory}GB available"
|
|
140
|
+
fi
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
# Create necessary directories
|
|
144
|
+
log " Creating directories..."
|
|
145
|
+
mkdir -p logs workspaces monitoring/grafana/{provisioning/{datasources,dashboards},dashboards} nginx
|
|
146
|
+
|
|
147
|
+
log " ✅ Pre-deployment checks completed"
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Build production images
|
|
151
|
+
build_production_images() {
|
|
152
|
+
log " Building production Docker image..."
|
|
153
|
+
|
|
154
|
+
# Build the production image
|
|
155
|
+
if docker build -f Dockerfile.production -t claude-flow-novice:production .; then
|
|
156
|
+
success " ✅ Production image built successfully"
|
|
157
|
+
else
|
|
158
|
+
error " ❌ Failed to build production image"
|
|
159
|
+
exit 1
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
# Tag the image
|
|
163
|
+
docker tag claude-flow-novice:production claude-flow-novice:${VERSION}
|
|
164
|
+
log " ✅ Image tagged as claude-flow-novice:${VERSION}"
|
|
165
|
+
|
|
166
|
+
# Show image information
|
|
167
|
+
local image_size
|
|
168
|
+
image_size=$(docker images claude-flow-novice:production --format "{{.Size}}")
|
|
169
|
+
log " 📊 Image size: $image_size"
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Deploy infrastructure
|
|
173
|
+
deploy_infrastructure() {
|
|
174
|
+
log " Deploying production stack..."
|
|
175
|
+
|
|
176
|
+
# Create environment file
|
|
177
|
+
cat > .env.production << EOF
|
|
178
|
+
# Production CFN Loop Environment Configuration
|
|
179
|
+
BUILD_DATE=$BUILD_DATE
|
|
180
|
+
VCS_REF=$VCS_REF
|
|
181
|
+
VERSION=$VERSION
|
|
182
|
+
CLAUDE_FLOW_VERSION=$CLAUDE_FLOW_VERSION
|
|
183
|
+
LOG_LEVEL=$LOG_LEVEL
|
|
184
|
+
MAX_AGENTS=$MAX_AGENTS
|
|
185
|
+
AGENT_TIMEOUT=$AGENT_TIMEOUT
|
|
186
|
+
MEMORY_LIMIT=$MEMORY_LIMIT
|
|
187
|
+
CPU_LIMIT=$CPU_LIMIT
|
|
188
|
+
AGENT_REPLICAS=$AGENT_REPLICAS
|
|
189
|
+
WORKER_POOL_SIZE=$WORKER_POOL_SIZE
|
|
190
|
+
GRAFANA_USER=$GRAFANA_USER
|
|
191
|
+
GRAFANA_PASSWORD=$GRAFANA_PASSWORD
|
|
192
|
+
EOF
|
|
193
|
+
|
|
194
|
+
# Deploy the stack
|
|
195
|
+
if docker-compose -f docker-compose.production.yml --env-file .env.production up -d; then
|
|
196
|
+
success " ✅ Production stack deployed"
|
|
197
|
+
else
|
|
198
|
+
error " ❌ Failed to deploy production stack"
|
|
199
|
+
exit 1
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# Show deployed services
|
|
203
|
+
log " 📊 Deployed services:"
|
|
204
|
+
docker-compose -f docker-compose.production.yml ps
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
# Wait for services to be healthy
|
|
208
|
+
wait_for_healthy_services() {
|
|
209
|
+
log " Waiting for services to become healthy..."
|
|
210
|
+
local max_wait=300 # 5 minutes
|
|
211
|
+
local wait_interval=10
|
|
212
|
+
local waited=0
|
|
213
|
+
|
|
214
|
+
while [[ $waited -lt $max_wait ]]; do
|
|
215
|
+
local healthy_count=0
|
|
216
|
+
local total_count=0
|
|
217
|
+
|
|
218
|
+
# Check service health
|
|
219
|
+
while IFS= read -r line; do
|
|
220
|
+
if [[ $line == *"cfn-"* ]]; then
|
|
221
|
+
((total_count++))
|
|
222
|
+
if [[ $line == *"healthy"* ]] || [[ $line == *"Up"* ]]; then
|
|
223
|
+
((healthy_count++))
|
|
224
|
+
fi
|
|
225
|
+
fi
|
|
226
|
+
done < <(docker-compose -f docker-compose.production.yml ps --format "table {{.Name}}\t{{.Status}}" | tail -n +2)
|
|
227
|
+
|
|
228
|
+
log " Progress: $healthy_count/$total_count services healthy (${waited}s elapsed)"
|
|
229
|
+
|
|
230
|
+
if [[ $healthy_count -eq $total_count ]] && [[ $total_count -gt 0 ]]; then
|
|
231
|
+
success " ✅ All $total_count services are healthy"
|
|
232
|
+
return 0
|
|
233
|
+
fi
|
|
234
|
+
|
|
235
|
+
sleep $wait_interval
|
|
236
|
+
((waited += wait_interval))
|
|
237
|
+
done
|
|
238
|
+
|
|
239
|
+
warning " ⚠️ Some services may still be starting up"
|
|
240
|
+
log " 📊 Current status:"
|
|
241
|
+
docker-compose -f docker-compose.production.yml ps
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# Run validation tests
|
|
245
|
+
run_validation_tests() {
|
|
246
|
+
log " Running deployment validation tests..."
|
|
247
|
+
|
|
248
|
+
# Test basic connectivity
|
|
249
|
+
log " Testing Redis connectivity..."
|
|
250
|
+
if docker exec cfn-redis-coordinator redis-cli ping | grep -q "PONG"; then
|
|
251
|
+
success " ✅ Redis connectivity verified"
|
|
252
|
+
else
|
|
253
|
+
error " ❌ Redis connectivity failed"
|
|
254
|
+
fi
|
|
255
|
+
|
|
256
|
+
# Test orchestrator
|
|
257
|
+
log " Testing orchestrator..."
|
|
258
|
+
if docker exec cfn-orchestrator node -e "console.log('Orchestrator ready')" &>/dev/null; then
|
|
259
|
+
success " ✅ Orchestrator responsive"
|
|
260
|
+
else
|
|
261
|
+
warning " ⚠️ Orchestrator may still be initializing"
|
|
262
|
+
fi
|
|
263
|
+
|
|
264
|
+
# Test monitoring endpoints
|
|
265
|
+
log " Testing monitoring endpoints..."
|
|
266
|
+
local prometheus_ok=false
|
|
267
|
+
local grafana_ok=false
|
|
268
|
+
|
|
269
|
+
if curl -s http://localhost:9090/-/healthy &>/dev/null; then
|
|
270
|
+
prometheus_ok=true
|
|
271
|
+
success " ✅ Prometheus endpoint accessible"
|
|
272
|
+
else
|
|
273
|
+
warning " ⚠️ Prometheus endpoint not accessible"
|
|
274
|
+
fi
|
|
275
|
+
|
|
276
|
+
if curl -s http://localhost:3001/api/health &>/dev/null; then
|
|
277
|
+
grafana_ok=true
|
|
278
|
+
success " ✅ Grafana endpoint accessible"
|
|
279
|
+
else
|
|
280
|
+
warning " ⚠️ Grafana endpoint not accessible"
|
|
281
|
+
fi
|
|
282
|
+
|
|
283
|
+
# Run comprehensive test if Node.js is available
|
|
284
|
+
if command -v node &> /dev/null; then
|
|
285
|
+
log " Running comprehensive production test..."
|
|
286
|
+
if node tests/docker/production-deployment-test.js &>/dev/null; then
|
|
287
|
+
success " ✅ Comprehensive test passed"
|
|
288
|
+
else
|
|
289
|
+
warning " ⚠️ Comprehensive test encountered issues"
|
|
290
|
+
fi
|
|
291
|
+
else
|
|
292
|
+
log " ⏭️ Skipping comprehensive test (Node.js not available)"
|
|
293
|
+
fi
|
|
294
|
+
|
|
295
|
+
# Store test results
|
|
296
|
+
cat > logs/deployment-validation-$TIMESTAMP.json << EOF
|
|
297
|
+
{
|
|
298
|
+
"timestamp": "$(date -Iseconds)",
|
|
299
|
+
"deployment_id": "$TIMESTAMP",
|
|
300
|
+
"redis_healthy": $(docker exec cfn-redis-coordinator redis-cli ping | grep -q "PONG" && echo true || echo false),
|
|
301
|
+
"prometheus_healthy": $prometheus_ok,
|
|
302
|
+
"grafana_healthy": $grafana_ok,
|
|
303
|
+
"services_total": $(docker-compose -f docker-compose.production.yml ps --format json | jq '. | length'),
|
|
304
|
+
"services_healthy": $(docker-compose -f docker-compose.production.yml ps --format json | jq '[.[] | select(.State == "running")] | length')
|
|
305
|
+
}
|
|
306
|
+
EOF
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
# Display deployment summary
|
|
310
|
+
display_deployment_summary() {
|
|
311
|
+
log "📊 Deployment Summary"
|
|
312
|
+
log "===================="
|
|
313
|
+
|
|
314
|
+
# Service status
|
|
315
|
+
log ""
|
|
316
|
+
log "🐳 Services Status:"
|
|
317
|
+
docker-compose -f docker-compose.production.yml ps
|
|
318
|
+
|
|
319
|
+
# Access URLs
|
|
320
|
+
log ""
|
|
321
|
+
log "🔗 Access URLs:"
|
|
322
|
+
log " • Prometheus: http://localhost:9090"
|
|
323
|
+
log " • Grafana: http://localhost:3001 (admin/admin123)"
|
|
324
|
+
log " • Redis: redis://localhost:6379"
|
|
325
|
+
log " • Orchestrator: http://localhost:3000"
|
|
326
|
+
|
|
327
|
+
# Resource usage
|
|
328
|
+
log ""
|
|
329
|
+
log "📈 Resource Usage:"
|
|
330
|
+
docker stats --no-stream --format "table {{.Container}}\t{{.MemUsage}}\t{{.CPUPerc}}" | grep cfn- || log " No container stats available yet"
|
|
331
|
+
|
|
332
|
+
# Useful commands
|
|
333
|
+
log ""
|
|
334
|
+
log "🛠️ Useful Commands:"
|
|
335
|
+
log " • View logs: docker-compose -f docker-compose.production.yml logs -f [service-name]"
|
|
336
|
+
log " • Stop stack: docker-compose -f docker-compose.production.yml down"
|
|
337
|
+
log " • Restart service: docker-compose -f docker-compose.production.yml restart [service-name]"
|
|
338
|
+
log " • Scale agents: docker-compose -f docker-compose.production.yml up -d --scale agent-pool=[N]"
|
|
339
|
+
|
|
340
|
+
# Next steps
|
|
341
|
+
log ""
|
|
342
|
+
log "📋 Next Steps:"
|
|
343
|
+
log " 1. Configure Grafana dashboards for monitoring"
|
|
344
|
+
log " 2. Set up alerting rules in Prometheus"
|
|
345
|
+
log " 3. Test CFN Loop execution with real tasks"
|
|
346
|
+
log " 4. Configure backup and disaster recovery"
|
|
347
|
+
log " 5. Set up log aggregation and analysis"
|
|
348
|
+
|
|
349
|
+
log ""
|
|
350
|
+
success "🎉 Production deployment is ready for use!"
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
# Script entry point
|
|
354
|
+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
355
|
+
main "$@"
|
|
356
|
+
fi
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Memory Leak Detection and Prevention Script
|
|
4
|
+
# Implements recommendations from memory leak analysis
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
# Configuration
|
|
9
|
+
DEFAULT_MEMORY_LIMIT="8192" # 8GB limit instead of 16GB
|
|
10
|
+
DEFAULT_NODE_OPTIONS="--max-old-space-size=$DEFAULT_MEMORY_LIMIT"
|
|
11
|
+
PROFILING_DIR="/tmp/claude-memory-profiles"
|
|
12
|
+
LOG_FILE="/tmp/claude-memory-monitor.log"
|
|
13
|
+
|
|
14
|
+
# Colors for output
|
|
15
|
+
RED='\033[0;31m'
|
|
16
|
+
GREEN='\033[0;32m'
|
|
17
|
+
YELLOW='\033[1;33m'
|
|
18
|
+
BLUE='\033[0;34m'
|
|
19
|
+
NC='\033[0m' # No Color
|
|
20
|
+
|
|
21
|
+
# Create profiling directory
|
|
22
|
+
mkdir -p "$PROFILING_DIR"
|
|
23
|
+
|
|
24
|
+
log() {
|
|
25
|
+
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
show_help() {
|
|
29
|
+
cat << EOF
|
|
30
|
+
Memory Leak Detection and Prevention Tool
|
|
31
|
+
|
|
32
|
+
Usage: $0 [COMMAND] [OPTIONS]
|
|
33
|
+
|
|
34
|
+
Commands:
|
|
35
|
+
profile Start Claude with heap profiling enabled
|
|
36
|
+
monitor Monitor existing Claude process memory usage
|
|
37
|
+
limit Set memory limits for Claude sessions
|
|
38
|
+
install-tools Install strace and perf for WSL
|
|
39
|
+
analyze Analyze existing memory profiles
|
|
40
|
+
config Show current memory configuration
|
|
41
|
+
|
|
42
|
+
Options:
|
|
43
|
+
--limit MB Set memory limit in MB (default: 8192)
|
|
44
|
+
--pid PID Monitor specific process ID
|
|
45
|
+
--duration SEC Monitoring duration in seconds (default: 300)
|
|
46
|
+
--output DIR Output directory for profiles (default: /tmp/claude-memory-profiles)
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
$0 profile --limit 6144 # Start Claude with 6GB limit and profiling
|
|
50
|
+
$0 monitor --pid 12345 --duration 600 # Monitor PID 12345 for 10 minutes
|
|
51
|
+
$0 install-tools # Install strace and perf
|
|
52
|
+
$0 analyze --output ./profiles # Analyze existing profiles
|
|
53
|
+
EOF
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
install_debug_tools() {
|
|
57
|
+
log "${BLUE}Installing strace and perf for debugging...${NC}"
|
|
58
|
+
|
|
59
|
+
if command -v apt-get &> /dev/null; then
|
|
60
|
+
sudo apt-get update
|
|
61
|
+
sudo apt-get install -y strace linux-tools-generic linux-perf
|
|
62
|
+
elif command -v yum &> /dev/null; then
|
|
63
|
+
sudo yum install -y strace perf
|
|
64
|
+
else
|
|
65
|
+
log "${RED}Unsupported package manager. Please install strace and perf manually.${NC}"
|
|
66
|
+
return 1
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
# Configure perf for non-root users
|
|
70
|
+
echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid >/dev/null 2>&1 || true
|
|
71
|
+
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict >/dev/null 2>&1 || true
|
|
72
|
+
|
|
73
|
+
log "${GREEN}Debug tools installed successfully${NC}"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
start_with_profiling() {
|
|
77
|
+
local memory_limit=${1:-$DEFAULT_MEMORY_LIMIT}
|
|
78
|
+
|
|
79
|
+
log "${BLUE}Starting Claude with heap profiling and memory limit: ${memory_limit}MB${NC}"
|
|
80
|
+
|
|
81
|
+
# Set up environment for profiling
|
|
82
|
+
export NODE_OPTIONS="--max-old-space-size=$memory_limit --inspect=0.0.0.0:9229 --heap-prof"
|
|
83
|
+
export CLAUDE_MEMORY_PROFILE_DIR="$PROFILING_DIR"
|
|
84
|
+
|
|
85
|
+
# Create profile filename with timestamp
|
|
86
|
+
local profile_file="${PROFILING_DIR}/claude-profile-$(date +%Y%m%d-%H%M%S)"
|
|
87
|
+
|
|
88
|
+
log "${GREEN}Environment variables set:${NC}"
|
|
89
|
+
log " NODE_OPTIONS: $NODE_OPTIONS"
|
|
90
|
+
log " Profile directory: $PROFILING_DIR"
|
|
91
|
+
log "${YELLOW}Claude will start with debugging enabled on port 9229${NC}"
|
|
92
|
+
log "${YELLOW}Use Chrome DevTools or chrome://inspect to connect${NC}"
|
|
93
|
+
|
|
94
|
+
# Start Claude CLI with profiling
|
|
95
|
+
log "${BLUE}Starting Claude CLI...${NC}"
|
|
96
|
+
npx claude-flow-novice "$@"
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
monitor_memory() {
|
|
100
|
+
local target_pid=${1:-}
|
|
101
|
+
local duration=${2:-300}
|
|
102
|
+
local output_file="${PROFILING_DIR}/memory-monitor-$(date +%Y%m%d-%H%M%S).csv"
|
|
103
|
+
|
|
104
|
+
if [[ -z "$target_pid" ]]; then
|
|
105
|
+
log "${RED}Error: PID required for monitoring. Use --pid to specify.${NC}"
|
|
106
|
+
return 1
|
|
107
|
+
fi
|
|
108
|
+
|
|
109
|
+
log "${BLUE}Monitoring PID $target_pid for ${duration} seconds...${NC}"
|
|
110
|
+
log "Results will be saved to: $output_file"
|
|
111
|
+
|
|
112
|
+
# Create CSV header
|
|
113
|
+
echo "timestamp,pid,rss_mb,vms_mb,cpu_percent,open_files,connections" > "$output_file"
|
|
114
|
+
|
|
115
|
+
local start_time=$(date +%s)
|
|
116
|
+
local end_time=$((start_time + duration))
|
|
117
|
+
|
|
118
|
+
while [[ $(date +%s) -lt $end_time ]]; do
|
|
119
|
+
if ! kill -0 "$target_pid" 2>/dev/null; then
|
|
120
|
+
log "${RED}Process $target_pid is no longer running${NC}"
|
|
121
|
+
break
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
|
125
|
+
local stats=$(ps -p "$target_pid" -o pid,rss,vms,pcpu --no-headers 2>/dev/null || echo "")
|
|
126
|
+
|
|
127
|
+
if [[ -n "$stats" ]]; then
|
|
128
|
+
local rss_mb=$(echo "$stats" | awk '{print int($2/1024)}')
|
|
129
|
+
local vms_mb=$(echo "$stats" | awk '{print int($3/1024)}')
|
|
130
|
+
local cpu_percent=$(echo "$stats" | awk '{print $4}')
|
|
131
|
+
|
|
132
|
+
# Count open files and network connections
|
|
133
|
+
local open_files=$(lsof -p "$target_pid" 2>/dev/null | wc -l || echo "0")
|
|
134
|
+
local connections=$(netstat -p 2>/dev/null | grep "$target_pid/" | wc -l || echo "0")
|
|
135
|
+
|
|
136
|
+
echo "$timestamp,$target_pid,$rss_mb,$vms_mb,$cpu_percent,$open_files,$connections" >> "$output_file"
|
|
137
|
+
|
|
138
|
+
# Alert if memory exceeds threshold
|
|
139
|
+
if [[ $rss_mb -gt 8192 ]]; then
|
|
140
|
+
log "${RED}⚠️ High memory usage detected: ${rss_mb}MB RSS${NC}"
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
log "Memory: ${rss_mb}MB RSS, ${vms_mb}MB VMS, CPU: ${cpu_percent}%, Files: $open_files, Connections: $connections"
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
sleep 10
|
|
147
|
+
done
|
|
148
|
+
|
|
149
|
+
log "${GREEN}Monitoring completed. Data saved to: $output_file${NC}"
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
set_memory_limits() {
|
|
153
|
+
local limit_mb=${1:-8192}
|
|
154
|
+
|
|
155
|
+
log "${BLUE}Setting memory limits to ${limit_mb}MB...${NC}"
|
|
156
|
+
|
|
157
|
+
# Update shell configuration
|
|
158
|
+
local shell_rc="$HOME/.bashrc"
|
|
159
|
+
if [[ -f "$HOME/.zshrc" ]]; then
|
|
160
|
+
shell_rc="$HOME/.zshrc"
|
|
161
|
+
fi
|
|
162
|
+
|
|
163
|
+
# Remove existing NODE_OPTIONS lines
|
|
164
|
+
sed -i '/^export NODE_OPTIONS="--max-old-space-size=/d' "$shell_rc" 2>/dev/null || true
|
|
165
|
+
|
|
166
|
+
# Add new limit
|
|
167
|
+
echo "export NODE_OPTIONS=\"--max-old-space-size=$limit_mb\"" >> "$shell_rc"
|
|
168
|
+
|
|
169
|
+
log "${GREEN}Memory limit set to ${limit_mb}MB in $shell_rc${NC}"
|
|
170
|
+
log "${YELLOW}Restart your shell or run: source $shell_rc${NC}"
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
analyze_profiles() {
|
|
174
|
+
local profile_dir=${1:-$PROFILING_DIR}
|
|
175
|
+
|
|
176
|
+
log "${BLUE}Analyzing memory profiles in $profile_dir...${NC}"
|
|
177
|
+
|
|
178
|
+
if [[ ! -d "$profile_dir" ]]; then
|
|
179
|
+
log "${RED}Profile directory $profile_dir does not exist${NC}"
|
|
180
|
+
return 1
|
|
181
|
+
fi
|
|
182
|
+
|
|
183
|
+
# Find heap profile files
|
|
184
|
+
local heap_profiles=($(find "$profile_dir" -name "*.heapprofile" 2>/dev/null))
|
|
185
|
+
|
|
186
|
+
if [[ ${#heap_profiles[@]} -eq 0 ]]; then
|
|
187
|
+
log "${YELLOW}No heap profiles found in $profile_dir${NC}"
|
|
188
|
+
return 1
|
|
189
|
+
fi
|
|
190
|
+
|
|
191
|
+
log "${GREEN}Found ${#heap_profiles[@]} heap profile(s)${NC}"
|
|
192
|
+
|
|
193
|
+
for profile in "${heap_profiles[@]}"; do
|
|
194
|
+
log "${BLUE}Analyzing: $(basename "$profile")${NC}"
|
|
195
|
+
|
|
196
|
+
# Basic analysis - find top allocations
|
|
197
|
+
if command -v node &> /dev/null; then
|
|
198
|
+
echo "Top 10 allocations in $(basename "$profile"):" > "${profile}.analysis"
|
|
199
|
+
node -e "
|
|
200
|
+
const fs = require('fs');
|
|
201
|
+
const data = JSON.parse(fs.readFileSync('$profile', 'utf8'));
|
|
202
|
+
const allocations = data.heapProfile || data;
|
|
203
|
+
if (allocations && allocations.samples) {
|
|
204
|
+
allocations.samples
|
|
205
|
+
.sort((a, b) => b.size - a.size)
|
|
206
|
+
.slice(0, 10)
|
|
207
|
+
.forEach((sample, i) => {
|
|
208
|
+
console.log(\`\${i+1}. \${sample.functionName || 'unknown'}: \${(sample.size/1024/1024).toFixed(2)}MB\`);
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
" >> "${profile}.analysis"
|
|
212
|
+
|
|
213
|
+
log "${GREEN}Analysis saved to: ${profile}.analysis${NC}"
|
|
214
|
+
fi
|
|
215
|
+
done
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
show_config() {
|
|
219
|
+
log "${BLUE}Current Memory Configuration:${NC}"
|
|
220
|
+
|
|
221
|
+
# Show current NODE_OPTIONS
|
|
222
|
+
if [[ -n "${NODE_OPTIONS:-}" ]]; then
|
|
223
|
+
log " NODE_OPTIONS: $NODE_OPTIONS"
|
|
224
|
+
else
|
|
225
|
+
log " NODE_OPTIONS: (not set)"
|
|
226
|
+
fi
|
|
227
|
+
|
|
228
|
+
# Show current limits
|
|
229
|
+
local current_limit=$(echo "$NODE_OPTIONS" | grep -o 'max-old-space-size=[0-9]*' | cut -d'=' -f2 || echo "unlimited")
|
|
230
|
+
log " Memory limit: ${current_limit}MB"
|
|
231
|
+
|
|
232
|
+
# Show available memory
|
|
233
|
+
if command -v free &> /dev/null; then
|
|
234
|
+
local total_mem=$(free -m | awk 'NR==2{print $2}')
|
|
235
|
+
local available_mem=$(free -m | awk 'NR==2{print $7}')
|
|
236
|
+
log " System memory: ${available_mem}MB available / ${total_mem}MB total"
|
|
237
|
+
fi
|
|
238
|
+
|
|
239
|
+
# Show profile directory
|
|
240
|
+
log " Profile directory: $PROFILING_DIR"
|
|
241
|
+
log " Log file: $LOG_FILE"
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# Parse command line arguments
|
|
245
|
+
case "${1:-}" in
|
|
246
|
+
"profile")
|
|
247
|
+
shift
|
|
248
|
+
local limit=$DEFAULT_MEMORY_LIMIT
|
|
249
|
+
while [[ $# -gt 0 ]]; do
|
|
250
|
+
case $1 in
|
|
251
|
+
--limit) limit="$2"; shift 2 ;;
|
|
252
|
+
*) shift ;;
|
|
253
|
+
esac
|
|
254
|
+
done
|
|
255
|
+
start_with_profiling "$limit"
|
|
256
|
+
;;
|
|
257
|
+
"monitor")
|
|
258
|
+
shift
|
|
259
|
+
local pid=""
|
|
260
|
+
local duration=300
|
|
261
|
+
while [[ $# -gt 0 ]]; do
|
|
262
|
+
case $1 in
|
|
263
|
+
--pid) pid="$2"; shift 2 ;;
|
|
264
|
+
--duration) duration="$2"; shift 2 ;;
|
|
265
|
+
*) shift ;;
|
|
266
|
+
esac
|
|
267
|
+
done
|
|
268
|
+
monitor_memory "$pid" "$duration"
|
|
269
|
+
;;
|
|
270
|
+
"limit")
|
|
271
|
+
shift
|
|
272
|
+
local limit=$DEFAULT_MEMORY_LIMIT
|
|
273
|
+
while [[ $# -gt 0 ]]; do
|
|
274
|
+
case $1 in
|
|
275
|
+
--limit) limit="$2"; shift 2 ;;
|
|
276
|
+
*) shift ;;
|
|
277
|
+
esac
|
|
278
|
+
done
|
|
279
|
+
set_memory_limits "$limit"
|
|
280
|
+
;;
|
|
281
|
+
"install-tools")
|
|
282
|
+
install_debug_tools
|
|
283
|
+
;;
|
|
284
|
+
"analyze")
|
|
285
|
+
shift
|
|
286
|
+
local dir=$PROFILING_DIR
|
|
287
|
+
while [[ $# -gt 0 ]]; do
|
|
288
|
+
case $1 in
|
|
289
|
+
--output) dir="$2"; shift 2 ;;
|
|
290
|
+
*) shift ;;
|
|
291
|
+
esac
|
|
292
|
+
done
|
|
293
|
+
analyze_profiles "$dir"
|
|
294
|
+
;;
|
|
295
|
+
"config")
|
|
296
|
+
show_config
|
|
297
|
+
;;
|
|
298
|
+
"help"|"-h"|"--help")
|
|
299
|
+
show_help
|
|
300
|
+
;;
|
|
301
|
+
*)
|
|
302
|
+
log "${RED}Unknown command: ${1:-}${NC}"
|
|
303
|
+
show_help
|
|
304
|
+
exit 1
|
|
305
|
+
;;
|
|
306
|
+
esac
|