claude-controller 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/bin/autoloop.sh +382 -0
- package/bin/ctl +1189 -0
- package/bin/native-app.py +6 -3
- package/bin/watchdog.sh +357 -0
- package/cognitive/__init__.py +14 -0
- package/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/dispatcher.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/evaluator.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/goal_engine.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/learning.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/orchestrator.cpython-314.pyc +0 -0
- package/cognitive/__pycache__/planner.cpython-314.pyc +0 -0
- package/cognitive/dispatcher.py +192 -0
- package/cognitive/evaluator.py +289 -0
- package/cognitive/goal_engine.py +232 -0
- package/cognitive/learning.py +189 -0
- package/cognitive/orchestrator.py +303 -0
- package/cognitive/planner.py +207 -0
- package/cognitive/prompts/analyst.md +31 -0
- package/cognitive/prompts/coder.md +22 -0
- package/cognitive/prompts/reviewer.md +33 -0
- package/cognitive/prompts/tester.md +21 -0
- package/cognitive/prompts/writer.md +25 -0
- package/config.sh +6 -1
- package/dag/__init__.py +5 -0
- package/dag/__pycache__/__init__.cpython-314.pyc +0 -0
- package/dag/__pycache__/graph.cpython-314.pyc +0 -0
- package/dag/graph.py +222 -0
- package/lib/jobs.sh +12 -1
- package/package.json +11 -5
- package/postinstall.sh +1 -1
- package/service/controller.sh +43 -11
- package/web/audit.py +122 -0
- package/web/checkpoint.py +80 -0
- package/web/config.py +2 -5
- package/web/handler.py +634 -473
- package/web/handler_fs.py +153 -0
- package/web/handler_goals.py +203 -0
- package/web/handler_jobs.py +372 -0
- package/web/handler_memory.py +203 -0
- package/web/handler_sessions.py +132 -0
- package/web/jobs.py +585 -13
- package/web/personas.py +419 -0
- package/web/pipeline.py +981 -0
- package/web/presets.py +506 -0
- package/web/projects.py +246 -0
- package/web/static/api.js +141 -0
- package/web/static/app.js +25 -1937
- package/web/static/attachments.js +144 -0
- package/web/static/base.css +497 -0
- package/web/static/context.js +204 -0
- package/web/static/dirs.js +246 -0
- package/web/static/form.css +763 -0
- package/web/static/goals.css +363 -0
- package/web/static/goals.js +300 -0
- package/web/static/i18n.js +625 -0
- package/web/static/index.html +215 -13
- package/web/static/{styles.css → jobs.css} +746 -1141
- package/web/static/jobs.js +1270 -0
- package/web/static/memoryview.js +117 -0
- package/web/static/personas.js +228 -0
- package/web/static/pipeline.css +338 -0
- package/web/static/pipelines.js +487 -0
- package/web/static/presets.js +244 -0
- package/web/static/send.js +135 -0
- package/web/static/settings-style.css +291 -0
- package/web/static/settings.js +81 -0
- package/web/static/stream.js +534 -0
- package/web/static/utils.js +131 -0
- package/web/webhook.py +210 -0
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ A shell wrapper that runs Claude Code CLI as a headless daemon. Provides FIFO pi
|
|
|
7
7
|
```
|
|
8
8
|
┌─────────────────────────────────────────────────────────────────┐
|
|
9
9
|
│ Web Dashboard (Vanilla JS) │
|
|
10
|
-
│ https://
|
|
10
|
+
│ https://localhost:8420 │
|
|
11
11
|
└────────────────────┬────────────────────────────────────────────┘
|
|
12
12
|
│ REST API (Python http.server)
|
|
13
13
|
┌────────────────────▼────────────────────────────────────────────┐
|
|
@@ -202,7 +202,7 @@ Override settings via `data/settings.json` or environment variables:
|
|
|
202
202
|
|
|
203
203
|
| Setting | Default | Description |
|
|
204
204
|
|---------|---------|-------------|
|
|
205
|
-
| `skip_permissions` | `
|
|
205
|
+
| `skip_permissions` | `false` | Use `--dangerously-skip-permissions` flag (false: `--allowedTools` 사용) |
|
|
206
206
|
| `model` | `""` | Claude model override (empty = default model) |
|
|
207
207
|
| `max_jobs` | `10` | Max concurrent background jobs |
|
|
208
208
|
| `target_repo` | `""` | Git repository path for Worktree creation |
|
package/bin/autoloop.sh
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ============================================================
|
|
3
|
+
# autoloop.sh — 5분 주기 통합 모니터링 + 파이프라인 틱
|
|
4
|
+
#
|
|
5
|
+
# 역할:
|
|
6
|
+
# 1. Controller 서비스 헬스체크 (죽었으면 자동 재시작)
|
|
7
|
+
# 2. 파이프라인 tick-all (단계 자동 진행)
|
|
8
|
+
# 3. Stuck job 감지 (30분 이상 running)
|
|
9
|
+
# 4. 고아 프로세스 감지
|
|
10
|
+
# 5. 디스크 사용량 경고
|
|
11
|
+
# 6. 기본 로그 로테이션
|
|
12
|
+
#
|
|
13
|
+
# crontab:
|
|
14
|
+
# */5 * * * * /path/to/autoloop.sh >> logs/autoloop.log 2>&1
|
|
15
|
+
# ============================================================
|
|
16
|
+
|
|
17
|
+
set -euo pipefail
|
|
18
|
+
|
|
19
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
20
|
+
CONTROLLER_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
21
|
+
PID_FILE="$CONTROLLER_DIR/service/controller.pid"
|
|
22
|
+
FIFO_PATH="$CONTROLLER_DIR/queue/controller.pipe"
|
|
23
|
+
LOGS_DIR="$CONTROLLER_DIR/logs"
|
|
24
|
+
WORKTREES_DIR="$CONTROLLER_DIR/worktrees"
|
|
25
|
+
PYTHON="$(command -v python3 2>/dev/null || echo "/opt/homebrew/opt/python@3.14/bin/python3.14")"
|
|
26
|
+
CTL="$PYTHON $CONTROLLER_DIR/bin/ctl"
|
|
27
|
+
|
|
28
|
+
# 설정
|
|
29
|
+
STUCK_THRESHOLD_MIN=30 # 이 시간(분) 이상 running이면 stuck
|
|
30
|
+
DISK_WARN_MB=500 # 디스크 경고 임계값 (MB)
|
|
31
|
+
DISK_CRITICAL_MB=2000 # 디스크 위험 임계값 (MB)
|
|
32
|
+
LOG_MAX_SIZE_MB=10 # service.log 로테이션 임계값
|
|
33
|
+
LOG_RETENTION_DAYS=30 # 완료된 작업 파일 보존 기간 (일)
|
|
34
|
+
MAX_RESTART_ATTEMPTS=3 # 연속 재시작 최대 횟수
|
|
35
|
+
RESTART_STATE_FILE="$CONTROLLER_DIR/data/.restart_count"
|
|
36
|
+
|
|
37
|
+
# settings.json 오버라이드
|
|
38
|
+
_SETTINGS="$CONTROLLER_DIR/data/settings.json"
|
|
39
|
+
if [[ -f "$_SETTINGS" ]] && command -v jq &>/dev/null; then
|
|
40
|
+
_v=$(jq -r '.log_retention_days // empty' "$_SETTINGS" 2>/dev/null)
|
|
41
|
+
[[ -n "$_v" ]] && LOG_RETENTION_DAYS="$_v"
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
NOW=$(date '+%Y-%m-%d %H:%M:%S')
|
|
45
|
+
ISSUES=()
|
|
46
|
+
|
|
47
|
+
log() { echo "[$NOW] $1"; }
|
|
48
|
+
warn() { echo "[$NOW] WARN: $1"; ISSUES+=("$1"); }
|
|
49
|
+
err() { echo "[$NOW] ERROR: $1"; ISSUES+=("$1"); }
|
|
50
|
+
|
|
51
|
+
# ── 1. 서비스 헬스체크 ──────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
check_service() {
|
|
54
|
+
local pid=""
|
|
55
|
+
if [[ -f "$PID_FILE" ]]; then
|
|
56
|
+
pid=$(cat "$PID_FILE" 2>/dev/null)
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
60
|
+
# 프로세스 살아있음 + FIFO 존재 확인
|
|
61
|
+
if [[ -p "$FIFO_PATH" ]]; then
|
|
62
|
+
log "Service OK (PID $pid)"
|
|
63
|
+
# 재시작 카운터 리셋
|
|
64
|
+
rm -f "$RESTART_STATE_FILE"
|
|
65
|
+
return 0
|
|
66
|
+
else
|
|
67
|
+
warn "FIFO missing — pipe가 없음, 서비스 재시작 필요"
|
|
68
|
+
return 1
|
|
69
|
+
fi
|
|
70
|
+
else
|
|
71
|
+
warn "Service DOWN — PID ${pid:-unknown} 응답 없음"
|
|
72
|
+
return 1
|
|
73
|
+
fi
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
restart_service() {
|
|
77
|
+
# 재시작 횟수 체크 (무한 루프 방지)
|
|
78
|
+
local count=0
|
|
79
|
+
if [[ -f "$RESTART_STATE_FILE" ]]; then
|
|
80
|
+
count=$(cat "$RESTART_STATE_FILE" 2>/dev/null || echo 0)
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
if (( count >= MAX_RESTART_ATTEMPTS )); then
|
|
84
|
+
err "재시작 ${MAX_RESTART_ATTEMPTS}회 초과 — cooldown. 수동 확인 필요"
|
|
85
|
+
# macOS 알림
|
|
86
|
+
osascript -e 'display notification "Controller 재시작 실패 (3회 초과). 수동 확인 필요." with title "AutoLoop" sound name "Basso"' 2>/dev/null || true
|
|
87
|
+
return 1
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
echo $(( count + 1 )) > "$RESTART_STATE_FILE"
|
|
91
|
+
log "서비스 재시작 시도 (#$(( count + 1 ))/$MAX_RESTART_ATTEMPTS)..."
|
|
92
|
+
|
|
93
|
+
# 고아 프로세스 정리 후 시작
|
|
94
|
+
cleanup_zombies
|
|
95
|
+
"$CONTROLLER_DIR/bin/controller" start 2>/dev/null
|
|
96
|
+
sleep 2
|
|
97
|
+
|
|
98
|
+
if check_service 2>/dev/null; then
|
|
99
|
+
log "서비스 재시작 성공"
|
|
100
|
+
osascript -e 'display notification "Controller 서비스 자동 재시작 완료" with title "AutoLoop"' 2>/dev/null || true
|
|
101
|
+
rm -f "$RESTART_STATE_FILE"
|
|
102
|
+
return 0
|
|
103
|
+
else
|
|
104
|
+
err "서비스 재시작 실패"
|
|
105
|
+
return 1
|
|
106
|
+
fi
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# ── 2. 파이프라인 tick ─────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
tick_pipelines() {
|
|
112
|
+
local result
|
|
113
|
+
result=$($CTL pipeline tick-all 2>&1) || true
|
|
114
|
+
if [[ -n "$result" && "$result" != "[]" ]]; then
|
|
115
|
+
log "Pipeline tick: $result"
|
|
116
|
+
|
|
117
|
+
# 새 단계가 시작되면 알림
|
|
118
|
+
if echo "$result" | grep -q '"action": "dispatched"'; then
|
|
119
|
+
local name
|
|
120
|
+
name=$(echo "$result" | $PYTHON -c "import sys,json; r=json.load(sys.stdin); print(next((x.get('name','?') for x in r if x.get('result',{}).get('action')=='dispatched'),'?'))" 2>/dev/null || echo "?")
|
|
121
|
+
osascript -e "display notification \"파이프라인 시작: $name\" with title \"AutoLoop\"" 2>/dev/null || true
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
# 스킵된 파이프라인 로깅
|
|
125
|
+
if echo "$result" | grep -q '"action": "skipped"'; then
|
|
126
|
+
local skip_info
|
|
127
|
+
skip_info=$(echo "$result" | $PYTHON -c "
|
|
128
|
+
import sys,json
|
|
129
|
+
r=json.load(sys.stdin)
|
|
130
|
+
for x in r:
|
|
131
|
+
res = x.get('result',{})
|
|
132
|
+
if res.get('action')=='skipped':
|
|
133
|
+
print(f\"{x.get('name','?')}: {res.get('reason','?')}\")
|
|
134
|
+
" 2>/dev/null || echo "skipped")
|
|
135
|
+
log "Pipeline skipped (비용 절감): $skip_info"
|
|
136
|
+
fi
|
|
137
|
+
|
|
138
|
+
# 자동 일시정지 알림
|
|
139
|
+
if echo "$result" | grep -q '"action": "auto_paused"'; then
|
|
140
|
+
local pause_info
|
|
141
|
+
pause_info=$(echo "$result" | $PYTHON -c "
|
|
142
|
+
import sys,json
|
|
143
|
+
r=json.load(sys.stdin)
|
|
144
|
+
for x in r:
|
|
145
|
+
res = x.get('result',{})
|
|
146
|
+
if res.get('action')=='auto_paused':
|
|
147
|
+
print(f\"{x.get('name','?')}: {res.get('reason','?')}\")
|
|
148
|
+
" 2>/dev/null || echo "paused")
|
|
149
|
+
log "Pipeline AUTO-PAUSED: $pause_info"
|
|
150
|
+
osascript -e "display notification \"$pause_info\" with title \"AutoLoop: 자동 일시정지\" sound name \"Submarine\"" 2>/dev/null || true
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
# 파이프라인 완료 알림 (분류 결과 포함)
|
|
154
|
+
if echo "$result" | grep -q '"action": "completed"'; then
|
|
155
|
+
local completed_info
|
|
156
|
+
completed_info=$(echo "$result" | $PYTHON -c "
|
|
157
|
+
import sys,json
|
|
158
|
+
r=json.load(sys.stdin)
|
|
159
|
+
for x in r:
|
|
160
|
+
res = x.get('result',{})
|
|
161
|
+
if res.get('action')=='completed':
|
|
162
|
+
cls = res.get('classification','?')
|
|
163
|
+
name = x.get('name','?')
|
|
164
|
+
adapt = res.get('interval_adapted')
|
|
165
|
+
chain = res.get('chain')
|
|
166
|
+
msg = f'{name}: {cls}'
|
|
167
|
+
if adapt: msg += f' (인터벌 {adapt[\"change\"]})'
|
|
168
|
+
if chain: msg += f' → chain triggered'
|
|
169
|
+
print(msg)
|
|
170
|
+
" 2>/dev/null || echo "완료")
|
|
171
|
+
log "Pipeline completed: $completed_info"
|
|
172
|
+
osascript -e "display notification \"$completed_info\" with title \"AutoLoop\" sound name \"Glass\"" 2>/dev/null || true
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
# 파이프라인 실패 감지 ("error": null이 아닌 실제 에러만)
|
|
176
|
+
if echo "$result" | grep -q '"error":' && ! echo "$result" | grep -q '"error": null'; then
|
|
177
|
+
warn "파이프라인 tick 에러 발생"
|
|
178
|
+
fi
|
|
179
|
+
else
|
|
180
|
+
log "Pipeline tick: 활성 파이프라인 없음"
|
|
181
|
+
fi
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# ── 2b. 진화 요약 로깅 (10번째 tick마다) ───────────────────
|
|
185
|
+
log_evolution_summary() {
|
|
186
|
+
local tick_count_file="$CONTROLLER_DIR/data/.tick_count"
|
|
187
|
+
local count=0
|
|
188
|
+
[[ -f "$tick_count_file" ]] && count=$(cat "$tick_count_file" 2>/dev/null || echo 0)
|
|
189
|
+
count=$(( count + 1 ))
|
|
190
|
+
echo "$count" > "$tick_count_file"
|
|
191
|
+
|
|
192
|
+
# 10번째 tick마다 진화 요약
|
|
193
|
+
if (( count % 10 == 0 )); then
|
|
194
|
+
local evo
|
|
195
|
+
evo=$($CTL pipeline evolution --pretty 2>&1) || true
|
|
196
|
+
if [[ -n "$evo" ]]; then
|
|
197
|
+
log "=== Evolution Summary (tick #$count) ==="
|
|
198
|
+
while IFS= read -r line; do
|
|
199
|
+
log " $line"
|
|
200
|
+
done <<< "$evo"
|
|
201
|
+
fi
|
|
202
|
+
fi
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
# ── 3. Stuck job 감지 ──────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
check_stuck_jobs() {
|
|
208
|
+
local now_epoch
|
|
209
|
+
now_epoch=$(date +%s)
|
|
210
|
+
|
|
211
|
+
for meta in "$LOGS_DIR"/job_*.meta; do
|
|
212
|
+
[[ -f "$meta" ]] || continue
|
|
213
|
+
|
|
214
|
+
local status created_at job_id
|
|
215
|
+
status=$(grep '^STATUS=' "$meta" 2>/dev/null | cut -d= -f2 || true)
|
|
216
|
+
[[ "$status" == "running" ]] || continue
|
|
217
|
+
|
|
218
|
+
job_id=$(grep '^JOB_ID=' "$meta" 2>/dev/null | cut -d= -f2 || true)
|
|
219
|
+
created_at=$(grep '^CREATED_AT=' "$meta" 2>/dev/null | sed "s/^CREATED_AT='//" | sed "s/'$//" || true)
|
|
220
|
+
|
|
221
|
+
if [[ -n "$created_at" ]]; then
|
|
222
|
+
local job_epoch
|
|
223
|
+
job_epoch=$(date -j -f "%Y-%m-%d %H:%M:%S" "$created_at" +%s 2>/dev/null || echo 0)
|
|
224
|
+
local elapsed_min=$(( (now_epoch - job_epoch) / 60 ))
|
|
225
|
+
|
|
226
|
+
if (( elapsed_min > STUCK_THRESHOLD_MIN )); then
|
|
227
|
+
warn "Job $job_id stuck: ${elapsed_min}분째 running"
|
|
228
|
+
fi
|
|
229
|
+
fi
|
|
230
|
+
done
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# ── 4. 고아 프로세스 감지/정리 ──────────────────────────────
|
|
234
|
+
|
|
235
|
+
cleanup_zombies() {
|
|
236
|
+
local controller_pid=""
|
|
237
|
+
if [[ -f "$PID_FILE" ]]; then
|
|
238
|
+
controller_pid=$(cat "$PID_FILE" 2>/dev/null)
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# controller가 죽었는데 claude 프로세스가 남아있는 경우
|
|
242
|
+
if [[ -n "$controller_pid" ]] && ! kill -0 "$controller_pid" 2>/dev/null; then
|
|
243
|
+
local orphans
|
|
244
|
+
orphans=$(pgrep -f "claude.*-p" 2>/dev/null || true)
|
|
245
|
+
if [[ -n "$orphans" ]]; then
|
|
246
|
+
warn "고아 claude 프로세스 감지: $orphans"
|
|
247
|
+
# 여기서는 감지만 하고 kill은 하지 않음 (안전)
|
|
248
|
+
# 실제 kill이 필요하면 아래 주석 해제
|
|
249
|
+
# echo "$orphans" | xargs kill -TERM 2>/dev/null || true
|
|
250
|
+
fi
|
|
251
|
+
fi
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# ── 5. 디스크 사용량 체크 ──────────────────────────────────
|
|
255
|
+
|
|
256
|
+
check_disk() {
|
|
257
|
+
local logs_kb=0 wt_kb=0
|
|
258
|
+
|
|
259
|
+
if [[ -d "$LOGS_DIR" ]]; then
|
|
260
|
+
logs_kb=$(du -sk "$LOGS_DIR" 2>/dev/null | awk '{print $1}')
|
|
261
|
+
fi
|
|
262
|
+
if [[ -d "$WORKTREES_DIR" ]]; then
|
|
263
|
+
wt_kb=$(du -sk "$WORKTREES_DIR" 2>/dev/null | awk '{print $1}')
|
|
264
|
+
fi
|
|
265
|
+
|
|
266
|
+
local total_mb=$(( (logs_kb + wt_kb) / 1024 ))
|
|
267
|
+
|
|
268
|
+
if (( total_mb > DISK_CRITICAL_MB )); then
|
|
269
|
+
err "디스크 위험: ${total_mb}MB (logs+worktrees). 긴급 정리 필요"
|
|
270
|
+
osascript -e "display notification \"디스크 ${total_mb}MB — 긴급 정리 필요\" with title \"AutoLoop\" sound name \"Basso\"" 2>/dev/null || true
|
|
271
|
+
elif (( total_mb > DISK_WARN_MB )); then
|
|
272
|
+
warn "디스크 경고: ${total_mb}MB (logs+worktrees)"
|
|
273
|
+
else
|
|
274
|
+
log "Disk OK: ${total_mb}MB"
|
|
275
|
+
fi
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
# ── 6. 로그 로테이션 ──────────────────────────────────────
|
|
279
|
+
|
|
280
|
+
rotate_logs() {
|
|
281
|
+
local service_log="$LOGS_DIR/service.log"
|
|
282
|
+
if [[ -f "$service_log" ]]; then
|
|
283
|
+
local size_mb
|
|
284
|
+
size_mb=$(( $(stat -f%z "$service_log" 2>/dev/null || echo 0) / 1048576 ))
|
|
285
|
+
if (( size_mb > LOG_MAX_SIZE_MB )); then
|
|
286
|
+
log "service.log 로테이션: ${size_mb}MB > ${LOG_MAX_SIZE_MB}MB"
|
|
287
|
+
# 3세대 로테이션
|
|
288
|
+
[[ -f "${service_log}.2.gz" ]] && rm -f "${service_log}.2.gz"
|
|
289
|
+
[[ -f "${service_log}.1.gz" ]] && mv "${service_log}.1.gz" "${service_log}.2.gz"
|
|
290
|
+
[[ -f "${service_log}.0" ]] && gzip "${service_log}.0" && mv "${service_log}.0.gz" "${service_log}.1.gz"
|
|
291
|
+
mv "$service_log" "${service_log}.0"
|
|
292
|
+
touch "$service_log"
|
|
293
|
+
fi
|
|
294
|
+
fi
|
|
295
|
+
|
|
296
|
+
# autoloop.log 자체도 로테이션
|
|
297
|
+
local autoloop_log="$LOGS_DIR/autoloop.log"
|
|
298
|
+
if [[ -f "$autoloop_log" ]]; then
|
|
299
|
+
local size_mb
|
|
300
|
+
size_mb=$(( $(stat -f%z "$autoloop_log" 2>/dev/null || echo 0) / 1048576 ))
|
|
301
|
+
if (( size_mb > LOG_MAX_SIZE_MB )); then
|
|
302
|
+
log "autoloop.log 로테이션"
|
|
303
|
+
mv "$autoloop_log" "${autoloop_log}.old"
|
|
304
|
+
touch "$autoloop_log"
|
|
305
|
+
fi
|
|
306
|
+
fi
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
# ── 7. 오래된 작업 파일 정리 ─────────────────────────────────
|
|
310
|
+
|
|
311
|
+
cleanup_old_jobs() {
|
|
312
|
+
local retention_days="${LOG_RETENTION_DAYS:-30}"
|
|
313
|
+
local cutoff_ts=$(( $(date +%s) - retention_days * 86400 ))
|
|
314
|
+
local cleaned=0
|
|
315
|
+
|
|
316
|
+
[[ -d "$LOGS_DIR" ]] || return 0
|
|
317
|
+
|
|
318
|
+
for meta_file in "$LOGS_DIR"/job_*.meta; do
|
|
319
|
+
[[ -f "$meta_file" ]] || continue
|
|
320
|
+
|
|
321
|
+
# running 상태는 건드리지 않음
|
|
322
|
+
local status
|
|
323
|
+
status=$(grep '^STATUS=' "$meta_file" 2>/dev/null | head -1 | cut -d= -f2 | tr -d "'" | tr -d '"')
|
|
324
|
+
[[ "$status" == "running" ]] && continue
|
|
325
|
+
|
|
326
|
+
# 파일 수정 시각 기준으로 보존 기간 확인
|
|
327
|
+
local mtime
|
|
328
|
+
mtime=$(stat -f%m "$meta_file" 2>/dev/null || stat -c%Y "$meta_file" 2>/dev/null || echo 0)
|
|
329
|
+
(( mtime >= cutoff_ts )) && continue
|
|
330
|
+
|
|
331
|
+
# job ID 추출 → 관련 파일 일괄 삭제
|
|
332
|
+
local base
|
|
333
|
+
base=$(basename "$meta_file" .meta) # job_123
|
|
334
|
+
rm -f "$LOGS_DIR/${base}.meta" "$LOGS_DIR/${base}.out" "$LOGS_DIR/${base}.ext_id"
|
|
335
|
+
(( cleaned++ )) || true
|
|
336
|
+
done
|
|
337
|
+
|
|
338
|
+
if (( cleaned > 0 )); then
|
|
339
|
+
log "오래된 작업 파일 정리: ${cleaned}개 (보존 기간: ${retention_days}일)"
|
|
340
|
+
fi
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
# ── 메인 실행 ──────────────────────────────────────────────
|
|
344
|
+
|
|
345
|
+
main() {
|
|
346
|
+
log "========== autoloop tick =========="
|
|
347
|
+
|
|
348
|
+
# 1. 로그 로테이션 + 오래된 작업 파일 정리 (가장 먼저 — 디스크 보호)
|
|
349
|
+
rotate_logs
|
|
350
|
+
cleanup_old_jobs
|
|
351
|
+
|
|
352
|
+
# 2. 서비스 헬스체크 + 자동 재시작
|
|
353
|
+
if ! check_service; then
|
|
354
|
+
restart_service || true
|
|
355
|
+
fi
|
|
356
|
+
|
|
357
|
+
# 3. 파이프라인 tick (서비스가 살아있을 때만 의미있음)
|
|
358
|
+
if check_service 2>/dev/null; then
|
|
359
|
+
tick_pipelines
|
|
360
|
+
log_evolution_summary
|
|
361
|
+
fi
|
|
362
|
+
|
|
363
|
+
# 4. Stuck job 감지
|
|
364
|
+
check_stuck_jobs
|
|
365
|
+
|
|
366
|
+
# 5. 고아 프로세스 체크
|
|
367
|
+
cleanup_zombies
|
|
368
|
+
|
|
369
|
+
# 6. 디스크 사용량
|
|
370
|
+
check_disk
|
|
371
|
+
|
|
372
|
+
# 요약
|
|
373
|
+
if (( ${#ISSUES[@]} > 0 )); then
|
|
374
|
+
log "Issues (${#ISSUES[@]}): ${ISSUES[*]}"
|
|
375
|
+
else
|
|
376
|
+
log "All OK"
|
|
377
|
+
fi
|
|
378
|
+
|
|
379
|
+
log "========== done =========="
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
main "$@"
|