@walwal-harness/cli 4.0.0-beta.12 → 4.0.0-beta.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/harness-monitor.sh +110 -62
- package/scripts/harness-tmux-v4.sh +21 -9
- package/skills/team-action/SKILL.md +49 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@walwal-harness/cli",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.13",
|
|
4
4
|
"description": "Production harness for AI agent engineering — Planner, Generator(BE/FE), Evaluator(Func/Visual), optional Brainstormer (requirements refinement). Supports React and Flutter FE stacks.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"walwal-harness": "bin/init.js"
|
|
@@ -10,7 +10,19 @@ set -uo pipefail
|
|
|
10
10
|
|
|
11
11
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
# ── Args ──
|
|
14
|
+
# Usage: harness-monitor.sh [project-root] [--team N]
|
|
15
|
+
# --team N → 단일 팀만 렌더 (tmux per-team pane 용도)
|
|
16
|
+
PROJECT_ROOT=""
|
|
17
|
+
TEAM_FILTER=""
|
|
18
|
+
while [ $# -gt 0 ]; do
|
|
19
|
+
case "$1" in
|
|
20
|
+
--team) TEAM_FILTER="$2"; shift 2 ;;
|
|
21
|
+
--team=*) TEAM_FILTER="${1#--team=}"; shift ;;
|
|
22
|
+
*) [ -z "$PROJECT_ROOT" ] && PROJECT_ROOT="$1"; shift ;;
|
|
23
|
+
esac
|
|
24
|
+
done
|
|
25
|
+
|
|
14
26
|
if [ -z "$PROJECT_ROOT" ]; then
|
|
15
27
|
source "$SCRIPT_DIR/lib/harness-render-progress.sh"
|
|
16
28
|
PROJECT_ROOT="$(resolve_harness_root ".")" || {
|
|
@@ -58,8 +70,18 @@ render_v4_header() {
|
|
|
58
70
|
echo ""
|
|
59
71
|
}
|
|
60
72
|
|
|
73
|
+
# 고정폭 배너 구분선 (터미널 너비에 맞춰 채움)
|
|
74
|
+
banner_line() {
|
|
75
|
+
local cols
|
|
76
|
+
cols=$(tput cols 2>/dev/null || echo 78)
|
|
77
|
+
local line=""
|
|
78
|
+
for ((i=0; i<cols; i++)); do line+="━"; done
|
|
79
|
+
echo "$line"
|
|
80
|
+
}
|
|
81
|
+
|
|
61
82
|
render_team_section() {
|
|
62
83
|
local team_num="$1"
|
|
84
|
+
local log_lines="${2:-10}"
|
|
63
85
|
local color
|
|
64
86
|
color=$(team_color "$team_num")
|
|
65
87
|
|
|
@@ -90,89 +112,115 @@ render_team_section() {
|
|
|
90
112
|
*) phase_str="" ;;
|
|
91
113
|
esac
|
|
92
114
|
|
|
93
|
-
#
|
|
94
|
-
|
|
115
|
+
# ── 큰 배너 ──
|
|
116
|
+
local bar
|
|
117
|
+
bar=$(banner_line)
|
|
118
|
+
printf "%b%s%b\n" "$color" "$bar" "$RESET"
|
|
119
|
+
|
|
120
|
+
printf "%b%b TEAM %s%b" "$color$BOLD" "$icon" "$team_num" "$RESET"
|
|
95
121
|
if [ "$t_feature" != "—" ] && [ "$t_feature" != "null" ]; then
|
|
96
|
-
printf "%s " "$t_feature"
|
|
122
|
+
printf " %b%s%b" "$BOLD" "$t_feature" "$RESET"
|
|
97
123
|
if [ -n "$phase_str" ]; then
|
|
98
|
-
printf "%b%s%b" "$color" "$phase_str" "$RESET"
|
|
124
|
+
printf " %b[%s]%b" "$color" "$phase_str" "$RESET"
|
|
99
125
|
fi
|
|
100
126
|
if [ -n "$t_attempt" ] && [ "$t_attempt" != "—" ] && [ "$t_attempt" != "1" ]; then
|
|
101
|
-
printf " %
|
|
127
|
+
printf " %battempt #%s%b" "$YELLOW" "$t_attempt" "$RESET"
|
|
102
128
|
fi
|
|
103
129
|
else
|
|
104
|
-
printf "%bidle%b" "$DIM" "$RESET"
|
|
130
|
+
printf " %bidle%b" "$DIM" "$RESET"
|
|
105
131
|
fi
|
|
106
132
|
echo ""
|
|
133
|
+
printf "%b%s%b\n" "$color" "$bar" "$RESET"
|
|
107
134
|
|
|
108
|
-
# 팀 로그
|
|
135
|
+
# ── 팀 로그 ──
|
|
136
|
+
local have_logs=0
|
|
109
137
|
if [ -f "$PROGRESS_LOG" ]; then
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
+
local matched
|
|
139
|
+
matched=$(grep -E "team-${team_num}\b|team_${team_num}\b" "$PROGRESS_LOG" 2>/dev/null | tail -"$log_lines")
|
|
140
|
+
if [ -n "$matched" ]; then
|
|
141
|
+
have_logs=1
|
|
142
|
+
local cols
|
|
143
|
+
cols=$(tput cols 2>/dev/null || echo 78)
|
|
144
|
+
# 포맷: [ts] [icon agent] detail (너무 길면 터미널 너비에서 줄바꿈)
|
|
145
|
+
local prefix_w=18 # " HH:MM ✦ eval "
|
|
146
|
+
local detail_w=$(( cols - prefix_w ))
|
|
147
|
+
[ "$detail_w" -lt 20 ] && detail_w=20
|
|
148
|
+
|
|
149
|
+
echo "$matched" | while IFS= read -r line; do
|
|
150
|
+
local ts action detail
|
|
151
|
+
ts=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$1); print $1}')
|
|
152
|
+
action=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$3); print $3}')
|
|
153
|
+
detail=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$4); print $4}')
|
|
154
|
+
|
|
155
|
+
local short_ts
|
|
156
|
+
short_ts=$(echo "$ts" | grep -oE '[0-9]{2}:[0-9]{2}' | tail -1 || echo "$ts")
|
|
157
|
+
|
|
158
|
+
local a_icon="·" a_color="$DIM" a_label="$action"
|
|
159
|
+
case "$action" in
|
|
160
|
+
gen|gen-start|gen-read|gen-write|gen-test|gen-done)
|
|
161
|
+
a_icon="▶"; a_color="$GREEN"; a_label="Gen" ;;
|
|
162
|
+
eval|eval-start|eval-check|eval-done)
|
|
163
|
+
a_icon="✦"; a_color="$BLUE"; a_label="Eval" ;;
|
|
164
|
+
result|pass)
|
|
165
|
+
a_icon="✓"; a_color="$GREEN"; a_label="Result" ;;
|
|
166
|
+
fail)
|
|
167
|
+
a_icon="✗"; a_color="$RED"; a_label="Result" ;;
|
|
168
|
+
dequeue) a_icon="→"; a_color="$CYAN"; a_label="Queue" ;;
|
|
169
|
+
gate) a_icon="◆"; a_color="$YELLOW"; a_label="Gate" ;;
|
|
170
|
+
*) a_icon="·"; a_color="$DIM"; a_label="$action" ;;
|
|
171
|
+
esac
|
|
172
|
+
|
|
173
|
+
# 첫 줄
|
|
174
|
+
local first="${detail:0:$detail_w}"
|
|
175
|
+
printf " %b%s%b %b%s%b %-6s %s\n" \
|
|
176
|
+
"$DIM" "$short_ts" "$RESET" \
|
|
177
|
+
"$a_color" "$a_icon" "$RESET" \
|
|
178
|
+
"$a_label" "$first"
|
|
179
|
+
# 이어질 줄 (긴 detail 은 접지 않고 들여쓰기로 이어서 출력)
|
|
180
|
+
local rest="${detail:$detail_w}"
|
|
181
|
+
while [ -n "$rest" ]; do
|
|
182
|
+
local chunk="${rest:0:$detail_w}"
|
|
183
|
+
rest="${rest:$detail_w}"
|
|
184
|
+
printf " %*s%s\n" "$prefix_w" "" "$chunk"
|
|
185
|
+
done
|
|
186
|
+
done
|
|
187
|
+
fi
|
|
188
|
+
fi
|
|
189
|
+
if [ "$have_logs" -eq 0 ]; then
|
|
190
|
+
printf " %b(no activity)%b\n" "$DIM" "$RESET"
|
|
138
191
|
fi
|
|
139
192
|
}
|
|
140
193
|
|
|
141
194
|
render_v4() {
|
|
195
|
+
# 단일 팀 모드 (tmux per-team pane)
|
|
196
|
+
if [ -n "$TEAM_FILTER" ]; then
|
|
197
|
+
local rows log_lines
|
|
198
|
+
rows=$(tput lines 2>/dev/null || echo 20)
|
|
199
|
+
log_lines=$(( rows - 5 ))
|
|
200
|
+
[ "$log_lines" -lt 3 ] && log_lines=3
|
|
201
|
+
render_team_section "$TEAM_FILTER" "$log_lines"
|
|
202
|
+
return
|
|
203
|
+
fi
|
|
204
|
+
|
|
142
205
|
render_v4_header
|
|
143
206
|
|
|
144
|
-
# 팀 수 확인
|
|
145
207
|
local team_count=3
|
|
146
208
|
if [ -f "$QUEUE" ]; then
|
|
147
209
|
team_count=$(jq '.teams | length' "$QUEUE" 2>/dev/null || echo 3)
|
|
148
210
|
fi
|
|
211
|
+
[ "$team_count" -lt 1 ] && team_count=3
|
|
212
|
+
|
|
213
|
+
local rows per_team log_lines
|
|
214
|
+
rows=$(tput lines 2>/dev/null || echo 40)
|
|
215
|
+
per_team=$(( (rows - 2) / team_count ))
|
|
216
|
+
log_lines=$(( per_team - 5 ))
|
|
217
|
+
if [ "$log_lines" -lt 3 ]; then log_lines=3; fi
|
|
218
|
+
if [ "$log_lines" -gt 15 ]; then log_lines=15; fi
|
|
149
219
|
|
|
150
220
|
for i in $(seq 1 "$team_count"); do
|
|
151
|
-
render_team_section "$i"
|
|
221
|
+
render_team_section "$i" "$log_lines"
|
|
152
222
|
echo ""
|
|
153
223
|
done
|
|
154
|
-
|
|
155
|
-
# Lead/시스템 이벤트 (team이 아닌 엔트리)
|
|
156
|
-
echo -e "${BOLD}SYSTEM${RESET}"
|
|
157
|
-
if [ -f "$PROGRESS_LOG" ]; then
|
|
158
|
-
grep -v 'team-[0-9]' "$PROGRESS_LOG" 2>/dev/null | grep -v '^#' | grep -v '^$' | tail -5 | while IFS= read -r line; do
|
|
159
|
-
local ts agent action detail
|
|
160
|
-
ts=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$1); print $1}')
|
|
161
|
-
agent=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$2); print $2}')
|
|
162
|
-
action=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$3); print $3}')
|
|
163
|
-
detail=$(echo "$line" | awk -F'|' '{gsub(/^ +| +$/,"",$4); print $4}')
|
|
164
|
-
|
|
165
|
-
local short_ts
|
|
166
|
-
short_ts=$(echo "$ts" | grep -oE '[0-9]{2}:[0-9]{2}' | tail -1 || echo "$ts")
|
|
167
|
-
|
|
168
|
-
if [ ${#detail} -gt 35 ]; then detail="${detail:0:33}.."; fi
|
|
169
|
-
|
|
170
|
-
printf " %b%s%b %s %b%s%b\n" \
|
|
171
|
-
"$DIM" "$short_ts" "$RESET" \
|
|
172
|
-
"$agent" \
|
|
173
|
-
"$DIM" "$detail" "$RESET"
|
|
174
|
-
done
|
|
175
|
-
fi
|
|
176
224
|
}
|
|
177
225
|
|
|
178
226
|
# ══════════════════════════════════════════
|
|
@@ -292,8 +340,8 @@ clear
|
|
|
292
340
|
CURRENT_MODE=""
|
|
293
341
|
|
|
294
342
|
while true; do
|
|
295
|
-
# 동적 모드 감지
|
|
296
|
-
if [ -f "$QUEUE" ]; then
|
|
343
|
+
# 동적 모드 감지 (--team 지정 시 항상 v4)
|
|
344
|
+
if [ -n "$TEAM_FILTER" ] || [ -f "$QUEUE" ]; then
|
|
297
345
|
NEW_MODE="v4"
|
|
298
346
|
else
|
|
299
347
|
NEW_MODE="v3"
|
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
# harness-tmux-v4.sh — v4 Agent Teams: 원커맨드 실행
|
|
3
3
|
#
|
|
4
4
|
# ┌──────────────┬──────────────┬──────────────┐
|
|
5
|
-
# │ │ Dashboard │
|
|
6
|
-
# │ Main Claude │ (v4 queue)
|
|
7
|
-
# │ (Lead) ├──────────────┤
|
|
8
|
-
# │ │ Prompt
|
|
9
|
-
# │ │ History │
|
|
5
|
+
# │ │ Dashboard │ TEAM 1 │
|
|
6
|
+
# │ Main Claude │ (v4 queue) ├──────────────┤
|
|
7
|
+
# │ (Lead) ├──────────────┤ TEAM 2 │
|
|
8
|
+
# │ │ Prompt ├──────────────┤
|
|
9
|
+
# │ │ History │ TEAM 3 │
|
|
10
10
|
# └──────────────┴──────────────┴──────────────┘
|
|
11
11
|
#
|
|
12
12
|
# Usage:
|
|
@@ -80,10 +80,20 @@ PANE_MAIN=$(tmux new-session -d -s "$SESSION_NAME" -c "$PROJECT_ROOT" -x 220 -y
|
|
|
80
80
|
PANE_MID=$(tmux split-window -h -p 65 -t "$PANE_MAIN" -c "$PROJECT_ROOT" \
|
|
81
81
|
-P -F '#{pane_id}')
|
|
82
82
|
|
|
83
|
-
# 3. Split right section: Middle 45% | Right 55%
|
|
84
|
-
|
|
83
|
+
# 3. Split right section: Middle 45% | Right 55% — Right는 TEAM 1 으로 시작
|
|
84
|
+
PANE_T1=$(tmux split-window -h -p 55 -t "$PANE_MID" -c "$PROJECT_ROOT" \
|
|
85
85
|
-P -F '#{pane_id}' \
|
|
86
|
-
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-monitor.sh\" \"${PROJECT_ROOT}\"'")
|
|
86
|
+
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-monitor.sh\" \"${PROJECT_ROOT}\" --team 1'")
|
|
87
|
+
|
|
88
|
+
# 3b. Split TEAM 1 세로로 → TEAM 2 (하단 66%)
|
|
89
|
+
PANE_T2=$(tmux split-window -v -p 66 -t "$PANE_T1" -c "$PROJECT_ROOT" \
|
|
90
|
+
-P -F '#{pane_id}' \
|
|
91
|
+
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-monitor.sh\" \"${PROJECT_ROOT}\" --team 2'")
|
|
92
|
+
|
|
93
|
+
# 3c. Split TEAM 2 세로로 → TEAM 3 (하단 50%)
|
|
94
|
+
PANE_T3=$(tmux split-window -v -p 50 -t "$PANE_T2" -c "$PROJECT_ROOT" \
|
|
95
|
+
-P -F '#{pane_id}' \
|
|
96
|
+
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-monitor.sh\" \"${PROJECT_ROOT}\" --team 3'")
|
|
87
97
|
|
|
88
98
|
# 4. Split middle pane vertically: Dashboard (top 45%) | Prompt History (bottom 55%)
|
|
89
99
|
PANE_HISTORY=$(tmux split-window -v -p 55 -t "$PANE_MID" -c "$PROJECT_ROOT" \
|
|
@@ -109,7 +119,9 @@ fi
|
|
|
109
119
|
tmux select-pane -t "$PANE_MAIN" -T "Lead (Main Claude)"
|
|
110
120
|
tmux select-pane -t "$PANE_MID" -T "Dashboard"
|
|
111
121
|
tmux select-pane -t "$PANE_HISTORY" -T "Prompt History"
|
|
112
|
-
tmux select-pane -t "$
|
|
122
|
+
tmux select-pane -t "$PANE_T1" -T "TEAM 1"
|
|
123
|
+
tmux select-pane -t "$PANE_T2" -T "TEAM 2"
|
|
124
|
+
tmux select-pane -t "$PANE_T3" -T "TEAM 3"
|
|
113
125
|
|
|
114
126
|
tmux set-option -t "$SESSION_NAME" pane-border-status top 2>/dev/null || true
|
|
115
127
|
tmux set-option -t "$SESSION_NAME" pane-border-format " #{pane_title} " 2>/dev/null || true
|
|
@@ -66,7 +66,27 @@ Agent({
|
|
|
66
66
|
|
|
67
67
|
## 실시간 로깅 (필수)
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
Monitor 패널에서 **각 에이전트(Gen / Eval / Result)가 지금 무엇을 하고 있는지**가 보여야 합니다.
|
|
70
|
+
Phase 전환뿐 아니라 **내부 하위 단계**(파일 읽기, 파일 쓰기, 테스트 실행, AC 검증 등)까지
|
|
71
|
+
progress.log에 한 줄씩 남기세요. 대시보드는 3초마다 tail합니다.
|
|
72
|
+
|
|
73
|
+
**로깅 원칙**
|
|
74
|
+
- 의미 있는 동작마다 **한 줄씩** 즉시 기록 (파일 단위가 아니라 행위 단위)
|
|
75
|
+
- ACTION 토큰은 아래 표에서 선택 (Monitor가 아이콘/색을 매핑)
|
|
76
|
+
- DETAIL은 구체적으로 — 파일명, AC 번호, 에러 메시지 요약, 결정 사유
|
|
77
|
+
|
|
78
|
+
| ACTION | 사용 시점 | DETAIL 예시 |
|
|
79
|
+
|--------|-----------|-------------|
|
|
80
|
+
| `gen-start` | Gen Phase 시작 | `F-001 start — 6 AC` |
|
|
81
|
+
| `gen-read` | 소스/계약 읽기 | `read api-contract.json (POST /users)` |
|
|
82
|
+
| `gen-write` | 파일 생성/수정 | `write apps/service-user/src/user.controller.ts` |
|
|
83
|
+
| `gen-test` | 자체 게이트(tsc/eslint/jest) | `tsc OK · eslint 0 · jest 12/12` |
|
|
84
|
+
| `gen-done` | Gen Phase 종료 | `F-001 done — 5 files, 142 LOC` |
|
|
85
|
+
| `eval-start` | Evaluator Agent 호출 시작 | `F-001 spawning evaluator` |
|
|
86
|
+
| `eval-check` | AC 개별 검증 진행 | `AC-3 — verify POST /users returns 201` |
|
|
87
|
+
| `eval-done` | Eval 결과 수신 | `verdict=PASS score=2.95` |
|
|
88
|
+
| `result` / `pass` | PASS 확정 | `F-001 PASS — queue.pass` |
|
|
89
|
+
| `fail` | FAIL 확정(재시도/최종) | `FAIL #1 — AC-2 missing` |
|
|
70
90
|
|
|
71
91
|
**progress.log 기록** (하네스 루트의 progress.log에 append):
|
|
72
92
|
```bash
|
|
@@ -86,35 +106,53 @@ bash {HARNESS_ROOT}/scripts/harness-queue-manager.sh update_phase {FEATURE_ID} {
|
|
|
86
106
|
**시작 시 로깅:**
|
|
87
107
|
```bash
|
|
88
108
|
HARNESS_ROOT=$(git worktree list | head -1 | awk '{print $1}')
|
|
89
|
-
|
|
109
|
+
LOG="$HARNESS_ROOT/.harness/progress.log"
|
|
110
|
+
logev() { echo "$(date +'%Y-%m-%d %H:%M') | team-{N} | $1 | $2" >> "$LOG"; }
|
|
111
|
+
|
|
112
|
+
logev gen-start "{FEATURE_ID} start"
|
|
90
113
|
bash "$HARNESS_ROOT/scripts/harness-queue-manager.sh" update_phase {FEATURE_ID} gen "$HARNESS_ROOT"
|
|
91
114
|
```
|
|
92
115
|
|
|
93
|
-
1. Feature 정보
|
|
116
|
+
1. Feature 정보 확인 — **각 읽기마다 로그**:
|
|
117
|
+
```bash
|
|
118
|
+
logev gen-read "feature-list.json → {FEATURE_ID}"
|
|
119
|
+
logev gen-read "api-contract.json → {관련 엔드포인트}"
|
|
120
|
+
```
|
|
94
121
|
- `jq '.features[] | select(.id == "{FEATURE_ID}")' .harness/actions/feature-list.json`
|
|
95
122
|
- `.harness/actions/api-contract.json`에서 관련 엔드포인트 확인
|
|
96
123
|
- AC(Acceptance Criteria) 목록을 정확히 파악
|
|
97
124
|
|
|
98
|
-
2. 코드
|
|
125
|
+
2. 코드 생성 — **파일 쓰기마다 로그**:
|
|
126
|
+
```bash
|
|
127
|
+
logev gen-write "apps/service-user/src/user.controller.ts"
|
|
128
|
+
logev gen-write "libs/shared-dto/src/user.dto.ts"
|
|
129
|
+
```
|
|
99
130
|
- AGENTS.md의 IA-MAP에 따라 올바른 디렉토리에 코드 작성
|
|
100
131
|
- AC의 모든 항목을 충족하도록 구현
|
|
101
132
|
|
|
102
|
-
3. Pre-eval 게이트 (자체)
|
|
133
|
+
3. Pre-eval 게이트 (자체) — **결과 로그**:
|
|
134
|
+
```bash
|
|
135
|
+
logev gen-test "tsc OK · eslint 0w 0e · jest 12/12"
|
|
136
|
+
```
|
|
103
137
|
- tsc (타입 체크) 실행
|
|
104
138
|
- eslint (린트) 실행
|
|
105
139
|
- 컴파일 에러가 있으면 직접 수정 (Eval에 넘기지 않음)
|
|
106
140
|
|
|
107
141
|
**Gen 완료 로깅:**
|
|
108
142
|
```bash
|
|
109
|
-
|
|
143
|
+
logev gen-done "{FEATURE_ID} done — {변경파일수} files, {LOC} LOC"
|
|
110
144
|
```
|
|
111
145
|
|
|
112
146
|
## Phase 2: Evaluator (독립 평가 — Agent 도구 사용)
|
|
113
147
|
|
|
114
|
-
**Eval 시작
|
|
148
|
+
**Eval 시작 로깅 (하위 단계 포함):**
|
|
115
149
|
```bash
|
|
116
|
-
|
|
150
|
+
logev eval-start "{FEATURE_ID} spawning evaluator"
|
|
117
151
|
bash "$HARNESS_ROOT/scripts/harness-queue-manager.sh" update_phase {FEATURE_ID} eval "$HARNESS_ROOT"
|
|
152
|
+
# Evaluator가 AC를 하나씩 검증할 때마다:
|
|
153
|
+
# logev eval-check "AC-3 — POST /users returns 201"
|
|
154
|
+
# 최종:
|
|
155
|
+
# logev eval-done "verdict=PASS score=2.95"
|
|
118
156
|
```
|
|
119
157
|
|
|
120
158
|
코드 생성이 완료되면 **별도 Agent를 생성하여 평가**합니다.
|
|
@@ -163,20 +201,20 @@ Evaluator Agent 결과를 확인합니다:
|
|
|
163
201
|
|
|
164
202
|
### PASS인 경우 (VERDICT: PASS, SCORE ≥ 2.80):
|
|
165
203
|
```bash
|
|
166
|
-
|
|
204
|
+
logev result "{FEATURE_ID} PASS score={SCORE} — merging"
|
|
167
205
|
bash "$HARNESS_ROOT/scripts/harness-queue-manager.sh" pass {FEATURE_ID} "$HARNESS_ROOT"
|
|
168
206
|
```
|
|
169
207
|
변경 파일 목록과 AC 충족 요약을 Lead에게 반환.
|
|
170
208
|
|
|
171
209
|
### FAIL인 경우:
|
|
172
210
|
```bash
|
|
173
|
-
|
|
211
|
+
logev fail "{FEATURE_ID} FAIL #{ATTEMPT} — {사유요약}"
|
|
174
212
|
```
|
|
175
213
|
1. Evaluator의 FEEDBACK을 읽고 코드를 수정 (Phase 1로 돌아감)
|
|
176
214
|
2. 수정 후 다시 Phase 2 (새 Evaluator Agent 생성 — 이전 Eval 컨텍스트 없음)
|
|
177
215
|
3. 최대 3회 시도. 3회 모두 FAIL이면:
|
|
178
216
|
```bash
|
|
179
|
-
|
|
217
|
+
logev fail "{FEATURE_ID} FINAL FAIL after 3 attempts"
|
|
180
218
|
bash "$HARNESS_ROOT/scripts/harness-queue-manager.sh" fail {FEATURE_ID} "$HARNESS_ROOT"
|
|
181
219
|
```
|
|
182
220
|
실패 사유와 마지막 Eval 결과를 Lead에게 반환.
|