claude-flow-novice 2.14.35 โ 2.14.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/.claude/commands/cfn-loop-cli.md +491 -456
- package/.claude/commands/switch-api.md +1 -1
- package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
- package/.claude/skills/cfn-loop-validation/config.json +2 -2
- package/.claude/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
- package/claude-assets/agents/README-AGENT_LIFECYCLE.md +37 -10
- package/claude-assets/agents/README-VALIDATION.md +0 -8
- package/claude-assets/agents/cfn-dev-team/README.md +0 -8
- package/claude-assets/agents/cfn-dev-team/coordinators/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/developers/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/documentation/README-VALIDATION.md +0 -8
- package/claude-assets/agents/cfn-dev-team/documentation/agent-type-guidelines.md +0 -10
- package/claude-assets/agents/cfn-dev-team/reviewers/README.md +1 -9
- package/claude-assets/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +0 -10
- package/claude-assets/agents/cfn-dev-team/test-agent.md +0 -10
- package/claude-assets/agents/cfn-dev-team/testers/README.md +1 -9
- package/claude-assets/agents/csuite/cto-agent.md +0 -10
- package/claude-assets/agents/custom/cfn-system-expert.md +1 -128
- package/claude-assets/agents/custom/claude-code-expert.md +151 -2
- package/claude-assets/agents/docker-coordinators/cfn-docker-v3-coordinator.md +39 -3
- package/claude-assets/agents/docker-team/csuite/c-suite-template.md +1 -5
- package/claude-assets/agents/docker-team/infrastructure/team-coordinator-template.md +1 -5
- package/claude-assets/agents/marketing_hybrid/cost_tracker.md +0 -10
- package/claude-assets/agents/marketing_hybrid/docker_deployer.md +0 -10
- package/claude-assets/agents/marketing_hybrid/zai_worker_spawner.md +0 -10
- package/claude-assets/commands/CFN_LOOP_TASK_MODE.md +1 -1
- package/claude-assets/commands/cfn-loop-cli.md +491 -456
- package/claude-assets/commands/switch-api.md +1 -1
- package/claude-assets/skills/cfn-error-logging/SKILL.md +339 -0
- package/claude-assets/skills/cfn-error-logging/cleanup-error-logs.sh +334 -0
- package/claude-assets/skills/cfn-error-logging/integrate-cli.sh +232 -0
- package/claude-assets/skills/cfn-error-logging/integrate-docker.sh +294 -0
- package/claude-assets/skills/cfn-error-logging/invoke-error-logging.sh +839 -0
- package/claude-assets/skills/cfn-error-logging/test-error-logging.sh +475 -0
- package/claude-assets/skills/cfn-loop-orchestration/orchestrate.sh +2 -1
- package/claude-assets/skills/cfn-loop-validation/config.json +2 -2
- package/claude-assets/skills/cfn-process-instrumentation/instrument-process.sh +326 -322
- package/claude-assets/skills/cfn-redis-coordination/invoke-waiting-mode.sh +220 -220
- package/claude-assets/skills/cfn-task-config-init/initialize-config.sh +2 -2
- package/claude-assets/skills/cfn-task-mode-sanitize/task-mode-env-sanitizer.sh +224 -181
- package/claude-assets/skills/cfn-validation-runner-instrumentation/wrapped-executor.sh +235 -271
- package/dist/agents/agent-loader.js +467 -133
- package/dist/agents/agent-loader.js.map +1 -1
- package/dist/cli/config-manager.js +109 -91
- package/dist/cli/config-manager.js.map +1 -1
- package/dist/hello.js +27 -3
- package/dist/hello.js.map +1 -1
- package/dist/server.js +194 -0
- package/dist/server.js.map +1 -0
- package/dist/server.test.js +207 -0
- package/dist/server.test.js.map +1 -0
- package/package.json +2 -1
- package/scripts/docker-build-mcp.sh +155 -0
- package/scripts/docker-test-mcp.sh +260 -0
- package/scripts/mcp-health-check.sh +123 -0
|
@@ -15,7 +15,7 @@ Switch Main Chat and Task() tool API provider between Z.ai (cost-optimized) and
|
|
|
15
15
|
**Arguments:**
|
|
16
16
|
- `status` - Show current routing configuration (default)
|
|
17
17
|
- `zai` - Route Main Chat + Task tool to Z.ai for cost savings
|
|
18
|
-
- `max` - Route Main Chat + Task tool to Anthropic for quality
|
|
18
|
+
- `max` or `claude` - Route Main Chat + Task tool to Anthropic for quality
|
|
19
19
|
|
|
20
20
|
**What This Does:**
|
|
21
21
|
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
# CFN Error Logging Skill
|
|
2
|
+
|
|
3
|
+
## Metadata
|
|
4
|
+
- **Skill ID:** cfn-error-logging
|
|
5
|
+
- **Version:** 1.0.0
|
|
6
|
+
- **Category:** Error Management & Debugging
|
|
7
|
+
- **Dependencies:** redis-coordination, system-diagnostics
|
|
8
|
+
- **Maturity:** Production
|
|
9
|
+
- **Last Updated:** 2025-11-10
|
|
10
|
+
|
|
11
|
+
## Purpose
|
|
12
|
+
Comprehensive error logging and diagnostic capture for CFN Loop failures. Creates detailed error reports that users can send for debugging when CFN loops fail in CLI or Docker modes.
|
|
13
|
+
|
|
14
|
+
## Responsibilities
|
|
15
|
+
1. **Error Detection**: Monitor CFN Loop execution for failures and exceptions
|
|
16
|
+
2. **Diagnostic Capture**: Collect system state, logs, and configuration data
|
|
17
|
+
3. **Report Generation**: Create user-friendly error reports with actionable information
|
|
18
|
+
4. **Log Management**: Store, organize, and clean up error logs
|
|
19
|
+
5. **Integration**: Hook into CLI and Docker CFN Loop failure points
|
|
20
|
+
|
|
21
|
+
## Interface
|
|
22
|
+
|
|
23
|
+
### Main Entry Point
|
|
24
|
+
```bash
|
|
25
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
26
|
+
--action <capture|report|cleanup|list> \
|
|
27
|
+
--task-id <unique-id> \
|
|
28
|
+
[--error-type <orchestrator|agent-spawn|timeout|consensus|resource>] \
|
|
29
|
+
[--error-message <description>] \
|
|
30
|
+
[--exit-code <number>] \
|
|
31
|
+
[--context <json>]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Parameters
|
|
35
|
+
- `action`: Operation to perform (capture, report, cleanup, list)
|
|
36
|
+
- `task-id`: Unique CFN Loop task identifier
|
|
37
|
+
- `error-type`: Type of error that occurred
|
|
38
|
+
- `error-message`: Human-readable error description
|
|
39
|
+
- `exit-code`: Process exit code (if available)
|
|
40
|
+
- `context`: Additional context data (JSON format)
|
|
41
|
+
|
|
42
|
+
### Available Actions
|
|
43
|
+
|
|
44
|
+
#### **capture** - Capture Error Data
|
|
45
|
+
```bash
|
|
46
|
+
# Automatic capture on CFN Loop failure
|
|
47
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
48
|
+
--action capture \
|
|
49
|
+
--task-id "cfn-cli-1731234567" \
|
|
50
|
+
--error-type "orchestrator" \
|
|
51
|
+
--error-message "Agent spawning failed" \
|
|
52
|
+
--exit-code 1
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
#### **report** - Generate User Report
|
|
56
|
+
```bash
|
|
57
|
+
# Generate user-friendly error report
|
|
58
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
59
|
+
--action report \
|
|
60
|
+
--task-id "cfn-cli-1731234567" \
|
|
61
|
+
--format "markdown"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
#### **cleanup** - Manage Error Logs
|
|
65
|
+
```bash
|
|
66
|
+
# Clean old error logs (older than 7 days)
|
|
67
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
68
|
+
--action cleanup \
|
|
69
|
+
--retention-days 7
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
#### **list** - List Error Logs
|
|
73
|
+
```bash
|
|
74
|
+
# List all error logs
|
|
75
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
76
|
+
--action list \
|
|
77
|
+
--format "table"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Data Captured
|
|
81
|
+
|
|
82
|
+
### System Diagnostics
|
|
83
|
+
- **Hardware**: CPU, memory, disk space usage
|
|
84
|
+
- **Software**: OS version, Node.js version, npx version
|
|
85
|
+
- **Dependencies**: Redis connectivity, Docker status
|
|
86
|
+
- **Network**: Connection status, latency
|
|
87
|
+
|
|
88
|
+
### CFN Loop State
|
|
89
|
+
- **Configuration**: Task ID, mode, agent lists, thresholds
|
|
90
|
+
- **Execution**: Current iteration, agent PIDs, timeouts
|
|
91
|
+
- **Redis Data**: Task context, agent states, confidence scores
|
|
92
|
+
- **Checkpoints**: Last successful iteration, saved state
|
|
93
|
+
|
|
94
|
+
### Error Context
|
|
95
|
+
- **Error Details**: Type, message, exit code, timestamp
|
|
96
|
+
- **Stack Traces**: Process logs, error messages, debug output
|
|
97
|
+
- **Environment**: Working directory, environment variables
|
|
98
|
+
- **Process Tree**: Parent/child process relationships
|
|
99
|
+
|
|
100
|
+
## Integration Points
|
|
101
|
+
|
|
102
|
+
### CLI Loop Integration
|
|
103
|
+
```bash
|
|
104
|
+
# Add to orchestrate.sh error handling
|
|
105
|
+
if [ $EXIT_CODE -ne 0 ]; then
|
|
106
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
107
|
+
--action capture \
|
|
108
|
+
--task-id "$TASK_ID" \
|
|
109
|
+
--error-type "orchestrator" \
|
|
110
|
+
--error-message "CFN Loop failed at iteration $ITERATION" \
|
|
111
|
+
--exit-code $EXIT_CODE
|
|
112
|
+
fi
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Docker Loop Integration
|
|
116
|
+
```bash
|
|
117
|
+
# Add to cfn-docker-loop-orchestration error handling
|
|
118
|
+
if [ $CONTAINER_EXIT_CODE -ne 0 ]; then
|
|
119
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
120
|
+
--action capture \
|
|
121
|
+
--task-id "$TASK_ID" \
|
|
122
|
+
--error-type "docker" \
|
|
123
|
+
--error-message "Container failed: $CONTAINER_NAME" \
|
|
124
|
+
--exit-code $CONTAINER_EXIT_CODE
|
|
125
|
+
fi
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Agent Spawning Integration
|
|
129
|
+
```bash
|
|
130
|
+
# Add to agent spawning error handling
|
|
131
|
+
if ! $SPAWN_COMMAND; then
|
|
132
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
133
|
+
--action capture \
|
|
134
|
+
--task-id "$TASK_ID" \
|
|
135
|
+
--error-type "agent-spawn" \
|
|
136
|
+
--error-message "Failed to spawn agent: $AGENT_TYPE" \
|
|
137
|
+
--exit-code $?
|
|
138
|
+
fi
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Error Report Format
|
|
142
|
+
|
|
143
|
+
### Markdown Report (User-Friendly)
|
|
144
|
+
```markdown
|
|
145
|
+
# CFN Loop Error Report
|
|
146
|
+
|
|
147
|
+
## ๐จ Error Summary
|
|
148
|
+
- **Task ID**: cfn-cli-1731234567
|
|
149
|
+
- **Error Type**: orchestrator
|
|
150
|
+
- **Message**: Agent spawning failed
|
|
151
|
+
- **Timestamp**: 2025-11-10 04:30:15 UTC
|
|
152
|
+
- **Exit Code**: 1
|
|
153
|
+
|
|
154
|
+
## ๐ Quick Diagnosis
|
|
155
|
+
**Most Likely Cause**: npx not found or Redis connection failed
|
|
156
|
+
**Recommended Action**: Check dependencies with pre-flight validation
|
|
157
|
+
|
|
158
|
+
## ๐ง Troubleshooting Steps
|
|
159
|
+
1. โ
Check Node.js installation: `node --version`
|
|
160
|
+
2. โ
Check npx availability: `npx --version`
|
|
161
|
+
3. โ Check Redis connection: `redis-cli ping`
|
|
162
|
+
4. โ
Check available memory: `free -h`
|
|
163
|
+
|
|
164
|
+
## ๐ System State
|
|
165
|
+
- **Memory Usage**: 65% (2.6GB/4GB)
|
|
166
|
+
- **Disk Space**: 45GB available
|
|
167
|
+
- **CPU Load**: 0.8
|
|
168
|
+
- **Concurrent CFN Loops**: 3
|
|
169
|
+
|
|
170
|
+
## ๐ Send This Report
|
|
171
|
+
**To**: Your Claude assistant
|
|
172
|
+
**Include**:
|
|
173
|
+
- Complete error details above
|
|
174
|
+
- Any recent changes to your setup
|
|
175
|
+
- Steps you were trying to perform
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## JSON Report (Machine-Readable)
|
|
179
|
+
```json
|
|
180
|
+
{
|
|
181
|
+
"task_id": "cfn-cli-1731234567",
|
|
182
|
+
"error_type": "orchestrator",
|
|
183
|
+
"error_message": "Agent spawning failed",
|
|
184
|
+
"timestamp": "2025-11-10T04:30:15Z",
|
|
185
|
+
"exit_code": 1,
|
|
186
|
+
"system_diagnostics": {...},
|
|
187
|
+
"cfn_state": {...},
|
|
188
|
+
"troubleshooting_steps": [...]
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Storage and Management
|
|
193
|
+
|
|
194
|
+
### Log Location
|
|
195
|
+
- **Base Directory**: `/tmp/cfn_error_logs/`
|
|
196
|
+
- **Individual Logs**: `/tmp/cfn_error_logs/cfn-error-<task-id>-<timestamp>.json`
|
|
197
|
+
- **Reports**: `/tmp/cfn_error_logs/reports/cfn-report-<task-id>-<timestamp>.md`
|
|
198
|
+
|
|
199
|
+
### Log Rotation
|
|
200
|
+
- **Retention**: 7 days by default
|
|
201
|
+
- **Cleanup**: Automatic cleanup on skill invocation
|
|
202
|
+
- **Compression**: Compress logs older than 1 day
|
|
203
|
+
- **Size Limit**: Maximum 100MB of error logs total
|
|
204
|
+
|
|
205
|
+
### Privacy Considerations
|
|
206
|
+
- **No Code**: Never captures source code content
|
|
207
|
+
- **No Credentials**: Strips sensitive environment variables
|
|
208
|
+
- **Local Storage**: All logs stored locally, user-controlled
|
|
209
|
+
- **User Consent**: Error capture only on explicit failures
|
|
210
|
+
|
|
211
|
+
## Usage Examples
|
|
212
|
+
|
|
213
|
+
### Capture Error on CLI Loop Failure
|
|
214
|
+
```bash
|
|
215
|
+
# In cfn-loop-cli command
|
|
216
|
+
if ! npx claude-flow-novice agent cfn-v3-coordinator ...; then
|
|
217
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
218
|
+
--action capture \
|
|
219
|
+
--task-id "$TASK_ID" \
|
|
220
|
+
--error-type "cli-coordinator" \
|
|
221
|
+
--error-message "CLI coordinator failed to start"
|
|
222
|
+
fi
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Generate Debug Report
|
|
226
|
+
```bash
|
|
227
|
+
# After CFN Loop failure
|
|
228
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
229
|
+
--action report \
|
|
230
|
+
--task-id "cfn-cli-1731234567" \
|
|
231
|
+
--format markdown > /tmp/cfn_error_report.md
|
|
232
|
+
|
|
233
|
+
echo "๐ Error report saved to: /tmp/cfn_error_report.md"
|
|
234
|
+
echo "๐ค Send this file to your Claude assistant for debugging help"
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### List Recent Errors
|
|
238
|
+
```bash
|
|
239
|
+
# List all recent errors
|
|
240
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
241
|
+
--action list \
|
|
242
|
+
--format table
|
|
243
|
+
|
|
244
|
+
# List errors from last 24 hours
|
|
245
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
246
|
+
--action list \
|
|
247
|
+
--since "24h" \
|
|
248
|
+
--format json
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Error Categories
|
|
252
|
+
|
|
253
|
+
### Orchestrator Errors
|
|
254
|
+
- Configuration validation failures
|
|
255
|
+
- Parameter parsing errors
|
|
256
|
+
- Mode threshold mismatches
|
|
257
|
+
- Resource allocation failures
|
|
258
|
+
|
|
259
|
+
### Agent Spawning Errors
|
|
260
|
+
- npx command failures
|
|
261
|
+
- Node.js environment issues
|
|
262
|
+
- Agent binary not found
|
|
263
|
+
- Container runtime errors
|
|
264
|
+
|
|
265
|
+
### Coordination Errors
|
|
266
|
+
- Redis connection failures
|
|
267
|
+
- Key conflicts and race conditions
|
|
268
|
+
- Timeout errors
|
|
269
|
+
- Consensus calculation failures
|
|
270
|
+
|
|
271
|
+
### Resource Errors
|
|
272
|
+
- Memory exhaustion
|
|
273
|
+
- Disk space shortage
|
|
274
|
+
- Process limit exceeded
|
|
275
|
+
- Network connectivity issues
|
|
276
|
+
|
|
277
|
+
### System Errors
|
|
278
|
+
- Permission denied
|
|
279
|
+
- File system errors
|
|
280
|
+
- Signal termination
|
|
281
|
+
- Unexpected crashes
|
|
282
|
+
|
|
283
|
+
## Troubleshooting Guide
|
|
284
|
+
|
|
285
|
+
### Common Error Patterns
|
|
286
|
+
1. **"npx not found"**: Install Node.js and npx globally
|
|
287
|
+
2. **"Redis connection failed"**: Start Redis server or check configuration
|
|
288
|
+
3. **"Memory exhaustion"**: Close other applications or increase system memory
|
|
289
|
+
4. **"Permission denied"**: Check file permissions and user access
|
|
290
|
+
|
|
291
|
+
### Diagnostic Commands
|
|
292
|
+
```bash
|
|
293
|
+
# System health check
|
|
294
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
295
|
+
--action diagnostics
|
|
296
|
+
|
|
297
|
+
# Dependency validation
|
|
298
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
299
|
+
--action validate-dependencies
|
|
300
|
+
|
|
301
|
+
# Resource monitoring
|
|
302
|
+
./.claude/skills/cfn-error-logging/invoke-error-logging.sh \
|
|
303
|
+
--action monitor-resources
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Best Practices
|
|
307
|
+
|
|
308
|
+
### For Users
|
|
309
|
+
1. **Send Complete Reports**: Include the full error report when asking for help
|
|
310
|
+
2. **Provide Context**: Describe what you were trying to accomplish
|
|
311
|
+
3. **Check Dependencies**: Run pre-flight validation before complex tasks
|
|
312
|
+
4. **Monitor Resources**: Watch memory and disk usage during long-running tasks
|
|
313
|
+
|
|
314
|
+
### For Developers
|
|
315
|
+
1. **Integration Points**: Add error logging to all CFN Loop failure points
|
|
316
|
+
2. **Error Categories**: Use appropriate error types for better classification
|
|
317
|
+
3. **Context Capture**: Include relevant state information for debugging
|
|
318
|
+
4. **Privacy**: Never capture sensitive data or code content
|
|
319
|
+
|
|
320
|
+
### For System Administrators
|
|
321
|
+
1. **Log Management**: Regular cleanup of old error logs
|
|
322
|
+
2. **Monitoring**: Track error frequency and patterns
|
|
323
|
+
3. **Resource Planning**: Ensure adequate memory and disk space
|
|
324
|
+
4. **Dependency Management**: Keep Node.js, Redis, and Docker updated
|
|
325
|
+
|
|
326
|
+
## Troubleshooting
|
|
327
|
+
|
|
328
|
+
### Skill Failures
|
|
329
|
+
If the error logging skill itself fails:
|
|
330
|
+
1. **Check Permissions**: Ensure write access to `/tmp/`
|
|
331
|
+
2. **Disk Space**: Verify available space for log files
|
|
332
|
+
3. **Dependencies**: Check for required system tools (jq, bc, etc.)
|
|
333
|
+
4. **Fallback**: Use manual error reporting with basic system diagnostics
|
|
334
|
+
|
|
335
|
+
### Common Issues
|
|
336
|
+
- **Permission Denied**: Fix directory permissions
|
|
337
|
+
- **Disk Full**: Clean up old error logs
|
|
338
|
+
- **Missing Tools**: Install required dependencies
|
|
339
|
+
- **Timezone Issues**: Use UTC timestamps consistently
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
##############################################################################
|
|
4
|
+
# CFN Error Logging - Cleanup and Management Script
|
|
5
|
+
# Version: 1.0.0
|
|
6
|
+
#
|
|
7
|
+
# Automated cleanup and management for CFN error logs
|
|
8
|
+
# Manages log rotation, compression, and retention policies
|
|
9
|
+
#
|
|
10
|
+
# Usage: ./cleanup-error-logs.sh [--retention-days 7] [--dry-run] [--force]
|
|
11
|
+
##############################################################################
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
# Configuration
|
|
16
|
+
LOG_BASE_DIR="/tmp/cfn_error_logs"
|
|
17
|
+
DEFAULT_RETENTION_DAYS=7
|
|
18
|
+
MAX_TOTAL_SIZE_MB=100
|
|
19
|
+
COMPRESS_THRESHOLD_DAYS=1
|
|
20
|
+
|
|
21
|
+
# Parse arguments
|
|
22
|
+
RETENTION_DAYS="$DEFAULT_RETENTION_DAYS"
|
|
23
|
+
DRY_RUN=false
|
|
24
|
+
FORCE=false
|
|
25
|
+
|
|
26
|
+
while [[ $# -gt 0 ]]; do
|
|
27
|
+
case $1 in
|
|
28
|
+
--retention-days)
|
|
29
|
+
RETENTION_DAYS="$2"
|
|
30
|
+
shift 2
|
|
31
|
+
;;
|
|
32
|
+
--dry-run)
|
|
33
|
+
DRY_RUN=true
|
|
34
|
+
shift
|
|
35
|
+
;;
|
|
36
|
+
--force)
|
|
37
|
+
FORCE=true
|
|
38
|
+
shift
|
|
39
|
+
;;
|
|
40
|
+
--help|-h)
|
|
41
|
+
cat << EOF
|
|
42
|
+
CFN Error Logging - Cleanup Script
|
|
43
|
+
|
|
44
|
+
Usage: $0 [OPTIONS]
|
|
45
|
+
|
|
46
|
+
Options:
|
|
47
|
+
--retention-days N Delete logs older than N days (default: 7)
|
|
48
|
+
--dry-run Show what would be deleted without actually deleting
|
|
49
|
+
--force Skip confirmation prompts
|
|
50
|
+
--help, -h Show this help message
|
|
51
|
+
|
|
52
|
+
Examples:
|
|
53
|
+
$0 # Standard cleanup (7-day retention)
|
|
54
|
+
$0 --retention-days 3 # Delete logs older than 3 days
|
|
55
|
+
$0 --dry-run # Preview what would be deleted
|
|
56
|
+
$0 --force --retention-days 1 # Force delete logs older than 1 day
|
|
57
|
+
EOF
|
|
58
|
+
exit 0
|
|
59
|
+
;;
|
|
60
|
+
*)
|
|
61
|
+
echo "โ Unknown option: $1"
|
|
62
|
+
echo "Use --help for usage information"
|
|
63
|
+
exit 1
|
|
64
|
+
;;
|
|
65
|
+
esac
|
|
66
|
+
done
|
|
67
|
+
|
|
68
|
+
# Logging function
|
|
69
|
+
log() {
|
|
70
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Ensure base directory exists
|
|
74
|
+
mkdir -p "$LOG_BASE_DIR"
|
|
75
|
+
|
|
76
|
+
# Get current disk usage of error logs
|
|
77
|
+
get_disk_usage() {
|
|
78
|
+
if command -v du >/dev/null 2>&1; then
|
|
79
|
+
du -sm "$LOG_BASE_DIR" 2>/dev/null | cut -f1 || echo "0"
|
|
80
|
+
else
|
|
81
|
+
echo "0"
|
|
82
|
+
fi
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Get total size in MB with detailed breakdown
|
|
86
|
+
get_size_breakdown() {
|
|
87
|
+
local total_size=0
|
|
88
|
+
local error_logs=0
|
|
89
|
+
local reports=0
|
|
90
|
+
local compressed=0
|
|
91
|
+
local other=0
|
|
92
|
+
|
|
93
|
+
if [ -d "$LOG_BASE_DIR" ]; then
|
|
94
|
+
# Error logs
|
|
95
|
+
if [ -d "$LOG_BASE_DIR/logs" ]; then
|
|
96
|
+
error_logs=$(du -sm "$LOG_BASE_DIR/logs" 2>/dev/null | cut -f1 || echo "0")
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
# Reports
|
|
100
|
+
if [ -d "$LOG_BASE_DIR/reports" ]; then
|
|
101
|
+
reports=$(du -sm "$LOG_BASE_DIR/reports" 2>/dev/null | cut -f1 || echo "0")
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
# Compressed logs
|
|
105
|
+
if [ -d "$LOG_BASE_DIR/compressed" ]; then
|
|
106
|
+
compressed=$(du -sm "$LOG_BASE_DIR/compressed" 2>/dev/null | cut -f1 || echo "0")
|
|
107
|
+
fi
|
|
108
|
+
|
|
109
|
+
# Other files
|
|
110
|
+
other=$(du -sm "$LOG_BASE_DIR" 2>/dev/null | cut -f1 || echo "0")
|
|
111
|
+
total_size=$((error_logs + reports + compressed + other))
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
echo "$total_size,$error_logs,$reports,$compressed,$other"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# List files to be deleted
|
|
118
|
+
list_files_to_delete() {
|
|
119
|
+
local cutoff_date
|
|
120
|
+
cutoff_date=$(date -d "$RETENTION_DAYS days ago" +%s 2>/dev/null || date -v-"$RETENTION_DAYS"d +%s)
|
|
121
|
+
|
|
122
|
+
find "$LOG_BASE_DIR" -type f -name "*.json" -o -name "*.md" -o -name "*.txt" | while read -r file; do
|
|
123
|
+
local file_date
|
|
124
|
+
file_date=$(stat -c %Y "$file" 2>/dev/null || stat -f %m "$file" 2>/dev/null || echo "0")
|
|
125
|
+
|
|
126
|
+
if [ "$file_date" -lt "$cutoff_date" ]; then
|
|
127
|
+
echo "$file"
|
|
128
|
+
fi
|
|
129
|
+
done
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
# Compress old logs
|
|
133
|
+
compress_old_logs() {
|
|
134
|
+
local cutoff_date
|
|
135
|
+
cutoff_date=$(date -d "$COMPRESS_THRESHOLD_DAYS days ago" +%s 2>/dev/null || date -v-"$COMPRESS_THRESHOLD_DAYS"d +%s)
|
|
136
|
+
|
|
137
|
+
mkdir -p "$LOG_BASE_DIR/compressed"
|
|
138
|
+
|
|
139
|
+
log "๐๏ธ Compressing logs older than $COMPRESS_THRESHOLD_DAYS days..."
|
|
140
|
+
|
|
141
|
+
local compressed_count=0
|
|
142
|
+
find "$LOG_BASE_DIR" -type f -name "*.json" -not -path "*/compressed/*" | while read -r file; do
|
|
143
|
+
local file_date
|
|
144
|
+
file_date=$(stat -c %Y "$file" 2>/dev/null || stat -f %m "$file" 2>/dev/null || echo "0")
|
|
145
|
+
|
|
146
|
+
if [ "$file_date" -lt "$cutoff_date" ]; then
|
|
147
|
+
local basename
|
|
148
|
+
basename=$(basename "$file" .json)
|
|
149
|
+
local compressed_file="$LOG_BASE_DIR/compressed/${basename}.json.gz"
|
|
150
|
+
|
|
151
|
+
if [ ! -f "$compressed_file" ]; then
|
|
152
|
+
if gzip -c "$file" > "$compressed_file" 2>/dev/null; then
|
|
153
|
+
if [ "$DRY_RUN" != true ]; then
|
|
154
|
+
rm -f "$file"
|
|
155
|
+
fi
|
|
156
|
+
compressed_count=$((compressed_count + 1))
|
|
157
|
+
log " Compressed: $(basename "$file") โ $(basename "$compressed_file")"
|
|
158
|
+
fi
|
|
159
|
+
fi
|
|
160
|
+
fi
|
|
161
|
+
done
|
|
162
|
+
|
|
163
|
+
if [ "$compressed_count" -gt 0 ]; then
|
|
164
|
+
log "โ
Compressed $compressed_count log files"
|
|
165
|
+
else
|
|
166
|
+
log "โน๏ธ No files needed compression"
|
|
167
|
+
fi
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
# Delete empty directories
|
|
171
|
+
cleanup_empty_dirs() {
|
|
172
|
+
if [ "$DRY_RUN" != true ]; then
|
|
173
|
+
find "$LOG_BASE_DIR" -type d -empty -delete 2>/dev/null || true
|
|
174
|
+
fi
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Show cleanup summary
|
|
178
|
+
show_summary() {
|
|
179
|
+
local before_size="$1"
|
|
180
|
+
local after_size="$2"
|
|
181
|
+
local files_deleted="$3"
|
|
182
|
+
|
|
183
|
+
local size_saved=$((before_size - after_size))
|
|
184
|
+
|
|
185
|
+
echo ""
|
|
186
|
+
echo "๐ Cleanup Summary:"
|
|
187
|
+
echo " Files deleted: $files_deleted"
|
|
188
|
+
echo " Space saved: ${size_saved}MB"
|
|
189
|
+
echo " Current usage: ${after_size}MB"
|
|
190
|
+
|
|
191
|
+
if [ "$after_size" -gt "$MAX_TOTAL_SIZE_MB" ]; then
|
|
192
|
+
echo " โ ๏ธ Warning: Error logs still exceed recommended size (${MAX_TOTAL_SIZE_MB}MB)"
|
|
193
|
+
echo " Consider running with --retention-days $(($RETENTION_DAYS - 1)) to reduce further"
|
|
194
|
+
fi
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# Main cleanup function
|
|
198
|
+
run_cleanup() {
|
|
199
|
+
log "๐งน Starting CFN error log cleanup (retention: $RETENTION_DAYS days)"
|
|
200
|
+
|
|
201
|
+
# Get initial size
|
|
202
|
+
local size_breakdown
|
|
203
|
+
size_breakdown=$(get_size_breakdown)
|
|
204
|
+
local before_size
|
|
205
|
+
before_size=$(echo "$size_breakdown" | cut -d, -f1)
|
|
206
|
+
|
|
207
|
+
log "๐ Current disk usage: ${before_size}MB"
|
|
208
|
+
|
|
209
|
+
if [ "$before_size" -eq 0 ]; then
|
|
210
|
+
log "โน๏ธ No error logs found to clean up"
|
|
211
|
+
return 0
|
|
212
|
+
fi
|
|
213
|
+
|
|
214
|
+
# Show size breakdown
|
|
215
|
+
local error_logs reports compressed other
|
|
216
|
+
IFS=, read -r error_logs reports compressed other <<< "$size_breakdown"
|
|
217
|
+
|
|
218
|
+
log "๐ Size breakdown:"
|
|
219
|
+
if [ "$error_logs" -gt 0 ]; then
|
|
220
|
+
log " Error logs: ${error_logs}MB"
|
|
221
|
+
fi
|
|
222
|
+
if [ "$reports" -gt 0 ]; then
|
|
223
|
+
log " Reports: ${reports}MB"
|
|
224
|
+
fi
|
|
225
|
+
if [ "$compressed" -gt 0 ]; then
|
|
226
|
+
log " Compressed: ${compressed}MB"
|
|
227
|
+
fi
|
|
228
|
+
if [ "$other" -gt 0 ]; then
|
|
229
|
+
log " Other: ${other}MB"
|
|
230
|
+
fi
|
|
231
|
+
|
|
232
|
+
# Compress old logs first
|
|
233
|
+
compress_old_logs
|
|
234
|
+
|
|
235
|
+
# List files to delete
|
|
236
|
+
local files_to_delete
|
|
237
|
+
files_to_delete=$(list_files_to_delete)
|
|
238
|
+
|
|
239
|
+
if [ -z "$files_to_delete" ]; then
|
|
240
|
+
log "โน๏ธ No files older than $RETENTION_DAYS days found"
|
|
241
|
+
|
|
242
|
+
# Show final size after compression
|
|
243
|
+
local final_size
|
|
244
|
+
final_size=$(get_disk_usage)
|
|
245
|
+
|
|
246
|
+
if [ "$before_size" -ne "$final_size" ]; then
|
|
247
|
+
show_summary "$before_size" "$final_size" "0"
|
|
248
|
+
fi
|
|
249
|
+
|
|
250
|
+
cleanup_empty_dirs
|
|
251
|
+
return 0
|
|
252
|
+
fi
|
|
253
|
+
|
|
254
|
+
local file_count
|
|
255
|
+
file_count=$(echo "$files_to_delete" | wc -l)
|
|
256
|
+
|
|
257
|
+
log "๐ Found $file_count files older than $RETENTION_DAYS days:"
|
|
258
|
+
|
|
259
|
+
if [ "$DRY_RUN" = true ]; then
|
|
260
|
+
echo "$files_to_delete" | head -10 | while read -r file; do
|
|
261
|
+
log " Would delete: $(basename "$file") ($(du -sh "$file" 2>/dev/null | cut -f1 || echo "unknown"))"
|
|
262
|
+
done
|
|
263
|
+
|
|
264
|
+
if [ "$file_count" -gt 10 ]; then
|
|
265
|
+
log " ... and $((file_count - 10)) more files"
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
log "๐ DRY RUN - No files were actually deleted"
|
|
269
|
+
else
|
|
270
|
+
# Show sample of files to be deleted
|
|
271
|
+
echo "$files_to_delete" | head -5 | while read -r file; do
|
|
272
|
+
log " Deleting: $(basename "$file") ($(du -sh "$file" 2>/dev/null | cut -f1 || echo "unknown"))"
|
|
273
|
+
done
|
|
274
|
+
|
|
275
|
+
if [ "$file_count" -gt 5 ]; then
|
|
276
|
+
log " ... and $((file_count - 5)) more files"
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
# Confirmation prompt (unless forced)
|
|
280
|
+
if [ "$FORCE" != true ]; then
|
|
281
|
+
echo ""
|
|
282
|
+
read -p "Delete these $file_count files? [y/N] " -n 1 -r
|
|
283
|
+
echo ""
|
|
284
|
+
|
|
285
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
286
|
+
log "โ Cancelled by user"
|
|
287
|
+
return 0
|
|
288
|
+
fi
|
|
289
|
+
fi
|
|
290
|
+
|
|
291
|
+
# Delete files
|
|
292
|
+
local deleted_count=0
|
|
293
|
+
echo "$files_to_delete" | while read -r file; do
|
|
294
|
+
if rm -f "$file" 2>/dev/null; then
|
|
295
|
+
deleted_count=$((deleted_count + 1))
|
|
296
|
+
fi
|
|
297
|
+
done
|
|
298
|
+
|
|
299
|
+
log "โ
Deleted $deleted_count files"
|
|
300
|
+
fi
|
|
301
|
+
|
|
302
|
+
# Clean up empty directories
|
|
303
|
+
cleanup_empty_dirs
|
|
304
|
+
|
|
305
|
+
# Get final size
|
|
306
|
+
local after_size
|
|
307
|
+
after_size=$(get_disk_usage)
|
|
308
|
+
|
|
309
|
+
show_summary "$before_size" "$after_size" "$file_count"
|
|
310
|
+
|
|
311
|
+
log "โ
Cleanup completed"
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
# Check if running as root (warn about permission issues)
|
|
315
|
+
if [ "$EUID" -eq 0 ]; then
|
|
316
|
+
log "โ ๏ธ Running as root - this may affect log ownership"
|
|
317
|
+
fi
|
|
318
|
+
|
|
319
|
+
# Check disk space before cleanup
|
|
320
|
+
local available_space
|
|
321
|
+
available_space=$(df "$LOG_BASE_DIR" 2>/dev/null | awk 'NR==2{print int($4/1024)}' || echo "0")
|
|
322
|
+
|
|
323
|
+
if [ "$available_space" -lt 50 ]; then
|
|
324
|
+
log "โ ๏ธ Low disk space (${available_space}MB available) - forcing cleanup"
|
|
325
|
+
FORCE=true
|
|
326
|
+
fi
|
|
327
|
+
|
|
328
|
+
# Run cleanup
|
|
329
|
+
run_cleanup
|
|
330
|
+
|
|
331
|
+
# Log cleanup completion to system log if possible
|
|
332
|
+
if command -v logger >/dev/null 2>&1; then
|
|
333
|
+
logger -t "cfn-error-logging" "Cleanup completed: retention=${RETENTION_DAYS}d, deleted=${file_count:-0} files"
|
|
334
|
+
fi
|