@walwal-harness/cli 4.0.0-alpha.1 → 4.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -209,6 +209,17 @@
209
209
  "frontend_cwd": "",
210
210
  "timeout_seconds": 120,
211
211
  "on_fail": "reroute_to_generator"
212
+ },
213
+ "parallel": {
214
+ "comment": "v4 Parallel Agent Teams 설정. npx walwal-harness v4 로 실행.",
215
+ "enabled": false,
216
+ "concurrency": 3,
217
+ "max_attempts_per_feature": 3,
218
+ "gen_model": "sonnet",
219
+ "eval_model": "opus",
220
+ "branch_strategy": "feature-branch",
221
+ "merge_on_pass": true,
222
+ "rebase_on_conflict": true
212
223
  }
213
224
  },
214
225
  "evaluation": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@walwal-harness/cli",
3
- "version": "4.0.0-alpha.1",
3
+ "version": "4.0.0-alpha.3",
4
4
  "description": "Production harness for AI agent engineering — Planner, Generator(BE/FE), Evaluator(Func/Visual), optional Brainstormer (requirements refinement). Supports React and Flutter FE stacks.",
5
5
  "bin": {
6
6
  "walwal-harness": "bin/init.js"
@@ -1,14 +1,17 @@
1
1
  #!/bin/bash
2
- # harness-studio-v4.sh — Harness Studio v4: Parallel Agent Teams
2
+ # harness-studio-v4.sh — Harness Studio v4: 3-Column Layout
3
3
  #
4
- # ┌──────────────────────┬─────────────────────────┐
5
- # │ Dashboard │ Team 1 (worker log)
6
- # │ (Queue + Teams + ├─────────────────────────┤
7
- # │ Feature status) Team 2 (worker log)
8
- # ├──────────────────────┤
9
- # │ Control ├─────────────────────────┤
10
- # │ harness> _ │ Team 3 (worker log) │
11
- # └──────────────────────┴─────────────────────────┘
4
+ # ┌──────────────┬──────────────┬──────────────┐
5
+ # │ │ │ Team 1
6
+ # │ │ ├──────────────┤
7
+ # │ Main │ Dashboard Team 2
8
+ # │ (Claude) │ (read-only) ├──────────────┤
9
+ # │ │ │ Team 3 │
10
+ # └──────────────┴──────────────┴──────────────┘
11
+ #
12
+ # Main: 사용자가 직접 Claude Code를 실행하는 대화형 세션
13
+ # Dashboard: feature-queue + team status 자동 갱신 (입력 불가)
14
+ # Team 1~3: claude -p headless worker (입력 불가, 로그만 표시)
12
15
  #
13
16
  # Usage:
14
17
  # bash scripts/harness-studio-v4.sh [project-root]
@@ -20,7 +23,6 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
20
23
  SESSION_NAME="harness-v4"
21
24
 
22
25
  PROJECT_ROOT=""
23
- KILL=false
24
26
 
25
27
  for arg in "$@"; do
26
28
  case "$arg" in
@@ -60,37 +62,39 @@ if [ ! -f "$QUEUE" ]; then
60
62
  fi
61
63
 
62
64
  # ══════════════════════════════════════════
63
- # Build 5-pane layout using explicit pane IDs
65
+ # 3-Column Layout (Main | Dashboard | Teams)
64
66
  # ══════════════════════════════════════════
65
67
 
66
- # 1. Dashboard (top-left)
67
- PANE_DASH=$(tmux new-session -d -s "$SESSION_NAME" -c "$PROJECT_ROOT" -x 200 -y 50 \
68
+ # Column 1: Main (interactive shell — user runs claude here)
69
+ PANE_MAIN=$(tmux new-session -d -s "$SESSION_NAME" -c "$PROJECT_ROOT" -x 220 -y 55 \
70
+ -P -F '#{pane_id}')
71
+
72
+ # Column 2: Dashboard (split right from Main, 66% remaining → 33% each of 3 cols)
73
+ PANE_DASH=$(tmux split-window -h -p 66 -t "$PANE_MAIN" -c "$PROJECT_ROOT" \
68
74
  -P -F '#{pane_id}' \
69
75
  "bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-dashboard-v4.sh\" \"${PROJECT_ROOT}\"'")
70
76
 
71
- # 2. Team 1 (top-right)
77
+ # Column 3: Team 1 (split right from Dashboard, 50% of remaining = 33% total)
72
78
  PANE_T1=$(tmux split-window -h -p 50 -t "$PANE_DASH" -c "$PROJECT_ROOT" \
73
79
  -P -F '#{pane_id}' \
74
80
  "bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 1 \"${PROJECT_ROOT}\"'")
75
81
 
76
- # 3. Control (bottom-left, 25% of left)
77
- PANE_CTRL=$(tmux split-window -v -p 25 -t "$PANE_DASH" -c "$PROJECT_ROOT" \
78
- -P -F '#{pane_id}' \
79
- "bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-control-v4.sh\" \"${PROJECT_ROOT}\"'")
80
-
81
- # 4. Team 2 (middle-right, split from Team 1)
82
+ # Team 2 (split below Team 1)
82
83
  PANE_T2=$(tmux split-window -v -p 66 -t "$PANE_T1" -c "$PROJECT_ROOT" \
83
84
  -P -F '#{pane_id}' \
84
85
  "bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 2 \"${PROJECT_ROOT}\"'")
85
86
 
86
- # 5. Team 3 (bottom-right, split from Team 2)
87
+ # Team 3 (split below Team 2)
87
88
  PANE_T3=$(tmux split-window -v -p 50 -t "$PANE_T2" -c "$PROJECT_ROOT" \
88
89
  -P -F '#{pane_id}' \
89
90
  "bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 3 \"${PROJECT_ROOT}\"'")
90
91
 
92
+ # ── Prepare Main pane (unset nvm noise, clear) ──
93
+ tmux send-keys -t "$PANE_MAIN" "unset npm_config_prefix 2>/dev/null; clear" Enter
94
+
91
95
  # ── Pane titles ──
96
+ tmux select-pane -t "$PANE_MAIN" -T "Main"
92
97
  tmux select-pane -t "$PANE_DASH" -T "Dashboard"
93
- tmux select-pane -t "$PANE_CTRL" -T "Control"
94
98
  tmux select-pane -t "$PANE_T1" -T "Team 1"
95
99
  tmux select-pane -t "$PANE_T2" -T "Team 2"
96
100
  tmux select-pane -t "$PANE_T3" -T "Team 3"
@@ -98,18 +102,18 @@ tmux select-pane -t "$PANE_T3" -T "Team 3"
98
102
  tmux set-option -t "$SESSION_NAME" pane-border-status top 2>/dev/null || true
99
103
  tmux set-option -t "$SESSION_NAME" pane-border-format " #{pane_title} " 2>/dev/null || true
100
104
 
101
- # Focus Control
102
- tmux select-pane -t "$PANE_CTRL"
105
+ # ── Focus Main pane ──
106
+ tmux select-pane -t "$PANE_MAIN"
103
107
 
104
- # Attach
108
+ # ── Attach ──
105
109
  if [ -n "${TMUX:-}" ]; then
106
110
  tmux switch-client -t "$SESSION_NAME"
107
111
  else
108
112
  echo ""
109
113
  echo "Launching Harness Studio v4..."
110
- echo " Dashboard (left) : Feature Queue + Team status"
111
- echo " Control (left↓) : start/pause/assign/requeue"
112
- echo " Team 1-3 (right) : Parallel worker logs"
114
+ echo " Main (left) : Interactive run 'claude' here"
115
+ echo " Dashboard (mid) : Feature Queue + Team status (auto-refresh)"
116
+ echo " Team 1-3 (right) : Parallel workers (headless, log only)"
113
117
  echo ""
114
118
  tmux attach -t "$SESSION_NAME"
115
119
  fi
@@ -40,11 +40,13 @@ CONFIG="$PROJECT_ROOT/.harness/config.json"
40
40
  PROGRESS_LOG="$PROJECT_ROOT/.harness/progress.log"
41
41
  QUEUE_MGR="$SCRIPT_DIR/harness-queue-manager.sh"
42
42
 
43
+ # ── Lock file for git operations (prevent race conditions between teams) ──
44
+ GIT_LOCK="$PROJECT_ROOT/.harness/.git-lock"
45
+
43
46
  MAX_ATTEMPTS="${MAX_ATTEMPTS:-3}"
44
47
  GEN_MODEL="${GEN_MODEL:-sonnet}"
45
48
  EVAL_MODEL="${EVAL_MODEL:-opus}"
46
49
 
47
- # Read models from config if available
48
50
  if [ -f "$CONFIG" ]; then
49
51
  _gm=$(jq -r '.agents["generator-frontend"].model // empty' "$CONFIG" 2>/dev/null)
50
52
  _em=$(jq -r '.agents["evaluator-functional"].model // empty' "$CONFIG" 2>/dev/null)
@@ -68,20 +70,33 @@ log() {
68
70
  }
69
71
 
70
72
  log_progress() {
71
- local action="$1" detail="$2"
72
- local now
73
- now=$(date +"%Y-%m-%d")
74
- echo "${now} | team-${TEAM_ID} | ${action} | ${detail}" >> "$PROGRESS_LOG"
73
+ echo "$(date +"%Y-%m-%d") | team-${TEAM_ID} | ${1} | ${2}" >> "$PROGRESS_LOG"
74
+ }
75
+
76
+ # ── Git lock serialize git checkout/merge across teams ──
77
+ acquire_git_lock() {
78
+ local max_wait=60 waited=0
79
+ while [ -f "$GIT_LOCK" ]; do
80
+ sleep 1
81
+ waited=$((waited + 1))
82
+ if [ "$waited" -ge "$max_wait" ]; then
83
+ log "${RED}Git lock timeout (${max_wait}s). Removing stale lock.${RESET}"
84
+ rm -f "$GIT_LOCK"
85
+ break
86
+ fi
87
+ done
88
+ echo "T${TEAM_ID}" > "$GIT_LOCK"
89
+ }
90
+
91
+ release_git_lock() {
92
+ rm -f "$GIT_LOCK"
75
93
  }
76
94
 
77
95
  # ── Pre-eval gate ──
78
96
  run_pre_eval_gate() {
79
- local feature_id="$1"
80
97
  local cwd="$PROJECT_ROOT"
81
98
 
82
- # Read frontend_cwd from config
83
99
  if [ -f "$CONFIG" ]; then
84
- local _cwd
85
100
  _cwd=$(jq -r '.flow.pre_eval_gate.frontend_cwd // empty' "$CONFIG" 2>/dev/null)
86
101
  if [ -n "$_cwd" ] && [ "$_cwd" != "null" ]; then
87
102
  cwd="$PROJECT_ROOT/$_cwd"
@@ -97,85 +112,86 @@ run_pre_eval_gate() {
97
112
  checks=("npx tsc --noEmit" "npx eslint src/")
98
113
  fi
99
114
 
100
- local all_pass=true
115
+ local all_pass=true fail_cmds=""
101
116
  for cmd in "${checks[@]}"; do
102
- if ! (cd "$cwd" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
103
- log "${RED}Gate FAIL:${RESET} $cmd"
117
+ if (cd "$cwd" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
118
+ log " ${GREEN}✓${RESET} $cmd"
119
+ else
120
+ log " ${RED}✗${RESET} $cmd"
104
121
  all_pass=false
122
+ fail_cmds+="$cmd; "
105
123
  fi
106
124
  done
107
125
 
108
- if [ "$all_pass" = true ]; then
109
- log "${GREEN}Gate: tsc ✓ eslint ✓${RESET}"
110
- return 0
111
- else
112
- return 1
113
- fi
126
+ [ "$all_pass" = true ]
114
127
  }
115
128
 
116
129
  # ── Build generator prompt ──
117
130
  build_gen_prompt() {
118
- local feature_id="$1"
119
- local attempt="$2"
120
- local eval_feedback="${3:-}"
131
+ local fid="$1" attempt="$2" feedback="${3:-}"
121
132
 
122
- local feature_json
123
- feature_json=$(jq --arg fid "$feature_id" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
133
+ local fobj
134
+ fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
135
+ local fname fdesc ac_json deps_json
136
+ fname=$(echo "$fobj" | jq -r '.name // .description // ""')
137
+ fdesc=$(echo "$fobj" | jq -r '.description // ""')
138
+ ac_json=$(echo "$fobj" | jq -c '.ac // []')
139
+ deps_json=$(echo "$fobj" | jq -c '.depends_on // []')
124
140
 
125
- local feature_name feature_desc ac_json
126
- feature_name=$(echo "$feature_json" | jq -r '.name // .description // ""')
127
- feature_desc=$(echo "$feature_json" | jq -r '.description // ""')
128
- ac_json=$(echo "$feature_json" | jq -c '.ac // []')
141
+ local project_name
142
+ project_name=$(jq -r '.project_name // ""' "$PROJECT_ROOT/.harness/progress.json" 2>/dev/null)
129
143
 
130
- local prompt="You are Generator-Frontend for a harness engineering project.
144
+ cat <<PROMPT
145
+ You are Generator-Frontend for a harness engineering project.
131
146
 
132
- PROJECT: $(jq -r '.project_name // ""' "$PROJECT_ROOT/.harness/progress.json" 2>/dev/null)
147
+ PROJECT: ${project_name}
133
148
  CONVENTIONS: Read CONVENTIONS.md if it exists.
134
149
 
135
- YOUR TASK: Implement ONLY feature ${feature_id}: ${feature_name}
136
- Description: ${feature_desc}
150
+ YOUR TASK: Implement ONLY feature ${fid}: ${fname}
151
+ Description: ${fdesc}
152
+ Dependencies (already implemented): ${deps_json}
137
153
  Acceptance Criteria: ${ac_json}
138
154
 
139
155
  Read these files for context:
140
- - .harness/actions/feature-list.json (filter to ${feature_id})
156
+ - .harness/actions/feature-list.json (filter to ${fid})
141
157
  - .harness/actions/api-contract.json (relevant endpoints)
142
158
  - .harness/actions/plan.md (overall design)
143
159
 
144
160
  RULES:
145
- - Implement ONLY this feature, do not touch other features' code
161
+ - Implement ONLY this single feature
162
+ - Do NOT modify code belonging to other features
146
163
  - Follow existing code patterns and CONVENTIONS.md
147
- - Commit your changes with message: 'feat(${feature_id}): ${feature_name}'
148
- - Do NOT create tests (evaluator will handle that)"
164
+ - When done, stage and commit with: git add -A && git commit -m 'feat(${fid}): ${fname}'
165
+ PROMPT
149
166
 
150
- if [ "$attempt" -gt 1 ] && [ -n "$eval_feedback" ]; then
151
- prompt="$prompt
167
+ if [ "$attempt" -gt 1 ] && [ -n "$feedback" ]; then
168
+ cat <<RETRY
152
169
 
153
170
  PREVIOUS EVAL FEEDBACK (attempt ${attempt}):
154
- ${eval_feedback}
171
+ ${feedback}
155
172
 
156
- Fix the issues from the feedback above. Focus specifically on the failed criteria."
173
+ Fix the issues above. Focus specifically on the failed criteria.
174
+ RETRY
157
175
  fi
158
-
159
- echo "$prompt"
160
176
  }
161
177
 
162
178
  # ── Build evaluator prompt ──
163
179
  build_eval_prompt() {
164
- local feature_id="$1"
165
-
166
- local feature_json
167
- feature_json=$(jq --arg fid "$feature_id" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
180
+ local fid="$1"
168
181
 
169
- local feature_name ac_json
170
- feature_name=$(echo "$feature_json" | jq -r '.name // .description // ""')
171
- ac_json=$(echo "$feature_json" | jq -c '.ac // []')
182
+ local fobj
183
+ fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
184
+ local fname ac_json
185
+ fname=$(echo "$fobj" | jq -r '.name // .description // ""')
186
+ ac_json=$(echo "$fobj" | jq -c '.ac // []')
172
187
 
173
188
  local passed_list
174
189
  passed_list=$(jq -r '.queue.passed // [] | join(", ")' "$QUEUE" 2>/dev/null)
175
190
 
176
- echo "You are Evaluator-Functional for a harness engineering project.
191
+ cat <<PROMPT
192
+ You are Evaluator-Functional for a harness engineering project.
177
193
 
178
- TASK: Evaluate feature ${feature_id}: ${feature_name}
194
+ TASK: Evaluate feature ${fid}: ${fname}
179
195
 
180
196
  Acceptance Criteria to verify:
181
197
  ${ac_json}
@@ -192,23 +208,24 @@ R5: Error handling & edge cases (15%)
192
208
  PASS threshold: 2.80 / 3.00
193
209
  FAIL: any AC not met, any regression failure
194
210
 
195
- OUTPUT FORMAT (must be parseable):
211
+ You MUST output this exact block (parseable by automation):
196
212
  ---EVAL-RESULT---
197
- FEATURE: ${feature_id}
213
+ FEATURE: ${fid}
198
214
  VERDICT: PASS or FAIL
199
215
  SCORE: X.XX
200
- FEEDBACK: <one paragraph summary of issues or confirmation>
201
- ---END-EVAL-RESULT---"
216
+ FEEDBACK: one paragraph summary
217
+ ---END-EVAL-RESULT---
218
+ PROMPT
202
219
  }
203
220
 
204
- # ── Parse eval result ──
221
+ # ── Parse eval result (macOS-compatible, no grep -P) ──
205
222
  parse_eval_result() {
206
223
  local output="$1"
207
224
 
208
225
  local verdict score feedback
209
- verdict=$(echo "$output" | grep -oP 'VERDICT:\s*\K\w+' | head -1)
210
- score=$(echo "$output" | grep -oP 'SCORE:\s*\K[0-9.]+' | head -1)
211
- feedback=$(echo "$output" | sed -n 's/^FEEDBACK:\s*//p' | head -1)
226
+ verdict=$(echo "$output" | grep -E '^VERDICT:' | sed 's/VERDICT:[[:space:]]*//' | head -1)
227
+ score=$(echo "$output" | grep -E '^SCORE:' | sed 's/SCORE:[[:space:]]*//' | head -1)
228
+ feedback=$(echo "$output" | grep -E '^FEEDBACK:' | sed 's/FEEDBACK:[[:space:]]*//' | head -1)
212
229
 
213
230
  echo "${verdict:-UNKNOWN}|${score:-0.00}|${feedback:-no feedback}"
214
231
  }
@@ -223,14 +240,13 @@ while true; do
223
240
  # ── Dequeue next feature ──
224
241
  feature_id=$(bash "$QUEUE_MGR" dequeue "$TEAM_ID" "$PROJECT_ROOT" 2>/dev/null)
225
242
 
226
- if [ -z "$feature_id" ] || [[ "$feature_id" == "[queue]"* ]]; then
227
- log "${DIM}No features in queue. Waiting 10s...${RESET}"
243
+ if [ -z "$feature_id" ] || [[ "$feature_id" == "["* ]]; then
244
+ log "${DIM}No features ready. Waiting 10s...${RESET}"
228
245
  sleep 10
229
246
 
230
247
  # Check if completely done
231
- local remaining
232
- remaining=$(jq '(.queue.ready | length) + (.queue.blocked | length) + (.queue.in_progress | length)' "$QUEUE" 2>/dev/null)
233
- if [ "${remaining:-1}" -eq 0 ]; then
248
+ remaining=$(jq '(.queue.ready | length) + (.queue.blocked | length) + (.queue.in_progress | length)' "$QUEUE" 2>/dev/null || echo "1")
249
+ if [ "${remaining}" -eq 0 ] 2>/dev/null; then
234
250
  log "${GREEN}${BOLD}ALL FEATURES COMPLETE. Team ${TEAM_ID} exiting.${RESET}"
235
251
  log_progress "complete" "All features done"
236
252
  exit 0
@@ -241,16 +257,18 @@ while true; do
241
257
  log "${CYAN}▶ Dequeued ${feature_id}${RESET}"
242
258
  log_progress "dequeue" "${feature_id}"
243
259
 
244
- # ── Create feature branch ──
245
- local branch="feature/${feature_id}"
246
- (cd "$PROJECT_ROOT" && git checkout -b "$branch" main 2>/dev/null) || \
260
+ # ── Create feature branch (with lock) ──
261
+ branch="feature/${feature_id}"
262
+ acquire_git_lock
263
+ (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git checkout -b "$branch" 2>/dev/null) || \
247
264
  (cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
265
+ release_git_lock
248
266
  log "Branch: ${branch}"
249
267
 
250
268
  # ── Gen→Eval Loop ──
251
- local attempt=1
252
- local eval_feedback=""
253
- local passed=false
269
+ attempt=1
270
+ eval_feedback=""
271
+ passed=false
254
272
 
255
273
  while [ "$attempt" -le "$MAX_ATTEMPTS" ]; do
256
274
  log "${BOLD}── Attempt ${attempt}/${MAX_ATTEMPTS} ──${RESET}"
@@ -259,18 +277,17 @@ while true; do
259
277
  log "Gen ${feature_id} (${GEN_MODEL})..."
260
278
  bash "$QUEUE_MGR" update_phase "$feature_id" "gen" "$attempt" "$PROJECT_ROOT" 2>/dev/null
261
279
 
262
- local gen_prompt
263
280
  gen_prompt=$(build_gen_prompt "$feature_id" "$attempt" "$eval_feedback")
264
281
 
265
- local gen_output
282
+ gen_start=$(date +%s)
266
283
  gen_output=$(cd "$PROJECT_ROOT" && claude -p "$gen_prompt" --model "$GEN_MODEL" --output-format text 2>&1) || true
284
+ gen_elapsed=$(( $(date +%s) - gen_start ))
267
285
 
268
- local files_changed
269
- files_changed=$(cd "$PROJECT_ROOT" && git diff --name-only | wc -l | tr -d ' ')
270
- log "Gen complete ${files_changed} files changed"
271
- log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files"
286
+ files_changed=$(cd "$PROJECT_ROOT" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')
287
+ log "Gen done (${gen_elapsed}s) ${files_changed} files"
288
+ log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files, ${gen_elapsed}s"
272
289
 
273
- # Auto-commit gen output
290
+ # Auto-commit
274
291
  (cd "$PROJECT_ROOT" && git add -A && git commit -m "feat(${feature_id}): gen attempt ${attempt}" --no-verify 2>/dev/null) || true
275
292
 
276
293
  # ── Pre-eval gate ──
@@ -288,72 +305,103 @@ while true; do
288
305
  log "Eval ${feature_id} (${EVAL_MODEL})..."
289
306
  bash "$QUEUE_MGR" update_phase "$feature_id" "eval" "$attempt" "$PROJECT_ROOT" 2>/dev/null
290
307
 
291
- local eval_prompt
292
308
  eval_prompt=$(build_eval_prompt "$feature_id")
293
309
 
294
- local eval_output
310
+ eval_start=$(date +%s)
295
311
  eval_output=$(cd "$PROJECT_ROOT" && claude -p "$eval_prompt" --model "$EVAL_MODEL" --output-format text 2>&1) || true
312
+ eval_elapsed=$(( $(date +%s) - eval_start ))
296
313
 
297
314
  # Parse result
298
- local result_line verdict score feedback
299
315
  result_line=$(parse_eval_result "$eval_output")
300
316
  verdict=$(echo "$result_line" | cut -d'|' -f1)
301
317
  score=$(echo "$result_line" | cut -d'|' -f2)
302
318
  feedback=$(echo "$result_line" | cut -d'|' -f3-)
303
319
 
304
- log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score})"
320
+ log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score}) ${eval_elapsed}s"
305
321
 
306
322
  if [ "$verdict" = "PASS" ]; then
307
- log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00"
323
+ log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
308
324
  passed=true
309
325
  break
310
326
  else
311
- log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00"
327
+ log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
312
328
  log "${DIM} ${feedback}${RESET}"
313
329
  eval_feedback="$feedback"
314
330
  attempt=$((attempt + 1))
315
331
  fi
316
332
  done
317
333
 
318
- # ── Result processing ──
334
+ # ══════════════════════════════════════════
335
+ # Phase 3: Branch merge with conflict handling
336
+ # ══════════════════════════════════════════
319
337
  if [ "$passed" = true ]; then
320
- # Merge to main
321
338
  log "Merging ${branch} → main..."
322
- (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null) || {
323
- log "${YELLOW}Merge conflict — rebasing...${RESET}"
324
- (cd "$PROJECT_ROOT" && git checkout "$branch" && git rebase main 2>/dev/null && \
325
- git checkout main && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null) || {
326
- log "${RED}Merge failed after rebase. Manual intervention needed.${RESET}"
327
- bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
328
- log_progress "merge-fail" "${feature_id}"
329
- continue
330
- }
331
- }
332
-
333
- # Mark passed in queue
334
- bash "$QUEUE_MGR" pass "$feature_id" "$PROJECT_ROOT" 2>/dev/null
335
- log_progress "pass" "${feature_id} merged to main"
336
-
337
- # Update feature-list.json passes
338
- if [ -f "$FEATURES" ]; then
339
- jq --arg fid "$feature_id" '
340
- .features |= map(
341
- if .id == $fid then
342
- .passes = ((.passes // []) + ["generator-frontend", "evaluator-functional"] | unique)
343
- else . end
344
- )
345
- ' "$FEATURES" > "${FEATURES}.tmp" && mv "${FEATURES}.tmp" "$FEATURES"
339
+ acquire_git_lock
340
+
341
+ merge_ok=false
342
+
343
+ # Attempt 1: straight merge
344
+ if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null); then
345
+ merge_ok=true
346
+ else
347
+ # Attempt 2: abort failed merge, rebase, re-eval gate, then merge
348
+ log "${YELLOW}Conflict detected — rebasing ${branch} onto main...${RESET}"
349
+ (cd "$PROJECT_ROOT" && git merge --abort 2>/dev/null) || true
350
+ (cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
351
+
352
+ if (cd "$PROJECT_ROOT" && git rebase main 2>/dev/null); then
353
+ log "Rebase OK. Re-running gate..."
354
+
355
+ if run_pre_eval_gate "$feature_id"; then
356
+ log "Gate still PASS after rebase."
357
+ if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null); then
358
+ merge_ok=true
359
+ fi
360
+ else
361
+ log "${RED}Gate FAIL after rebase — needs re-gen${RESET}"
362
+ fi
363
+ else
364
+ log "${RED}Rebase failed — conflicts too complex${RESET}"
365
+ (cd "$PROJECT_ROOT" && git rebase --abort 2>/dev/null) || true
366
+ fi
367
+ fi
368
+
369
+ release_git_lock
370
+
371
+ if [ "$merge_ok" = true ]; then
372
+ # Clean up feature branch
373
+ (cd "$PROJECT_ROOT" && git branch -d "$branch" 2>/dev/null) || true
374
+
375
+ bash "$QUEUE_MGR" pass "$feature_id" "$PROJECT_ROOT" 2>/dev/null
376
+ log_progress "pass" "${feature_id} merged to main"
377
+
378
+ # Update feature-list.json passes
379
+ if [ -f "$FEATURES" ]; then
380
+ jq --arg fid "$feature_id" '
381
+ .features |= map(
382
+ if .id == $fid then
383
+ .passes = ((.passes // []) + ["generator-frontend", "evaluator-functional"] | unique)
384
+ else . end
385
+ )
386
+ ' "$FEATURES" > "${FEATURES}.tmp" && mv "${FEATURES}.tmp" "$FEATURES"
387
+ fi
388
+
389
+ log "${GREEN}${BOLD}✓ ${feature_id} DONE${RESET}"
390
+ else
391
+ log "${RED}${BOLD}Merge failed — ${feature_id} marked as failed${RESET}"
392
+ (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
393
+ bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
394
+ log_progress "merge-fail" "${feature_id}"
346
395
  fi
347
396
 
348
397
  else
349
398
  log "${RED}${BOLD}✗ ${feature_id} FAILED after ${MAX_ATTEMPTS} attempts${RESET}"
399
+ acquire_git_lock
400
+ (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
401
+ release_git_lock
350
402
  bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
351
403
  log_progress "fail" "${feature_id} after ${MAX_ATTEMPTS} attempts"
352
-
353
- # Return to main
354
- (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
355
404
  fi
356
405
 
357
- # Brief pause before next feature
358
406
  sleep 2
359
407
  done
@@ -60,6 +60,32 @@ disable-model-invocation: true
60
60
  5. `actions/api-contract.json` — 기대 API 동작
61
61
  6. `.harness/progress.json`
62
62
 
63
+ ## v4 Feature-Level Mode (Parallel Agent Teams)
64
+
65
+ v4에서 Team Worker가 `claude -p`로 호출할 때, 프롬프트에 `FEATURE_ID`가 지정된다.
66
+
67
+ ### Feature-Level Rules
68
+ - `feature-list.json`에서 **지정된 FEATURE_ID의 AC만** 검증
69
+ - Regression: `feature-queue.json`의 `passed` 목록에 있는 Feature들의 AC 재검증
70
+ - Cross-Validation: Feature 단위에서는 skip (Sprint-End에서 수행)
71
+ - Visual Evaluation: Feature 단위에서는 skip (Sprint-End에서 수행)
72
+ - 출력 형식: `---EVAL-RESULT---` 블록 (Worker가 파싱 가능)
73
+
74
+ ### Feature-Level Scoring
75
+ - 동일한 R1-R5 루브릭 적용
76
+ - PASS 기준: 2.80/3.00 (변경 없음)
77
+ - 1건이라도 Regression 실패 시 FAIL (변경 없음)
78
+
79
+ ### Output Format (Machine-Parseable)
80
+ ```
81
+ ---EVAL-RESULT---
82
+ FEATURE: F-XXX
83
+ VERDICT: PASS or FAIL
84
+ SCORE: X.XX
85
+ FEEDBACK: one paragraph summary
86
+ ---END-EVAL-RESULT---
87
+ ```
88
+
63
89
  ## Evaluation Steps
64
90
 
65
91
  ### Step 0: IA Structure Compliance (GATE)
@@ -43,6 +43,23 @@ disable-model-invocation: true
43
43
 
44
44
  **Backend 통합 러너가 동작 중이어야 함.** Gateway 미응답 시 → STOP.
45
45
 
46
+ ## v4 Feature-Level Mode (Parallel Agent Teams)
47
+
48
+ v4에서 Team Worker가 `claude -p`로 호출할 때, 프롬프트에 `FEATURE_ID`가 지정된다.
49
+
50
+ ### Feature-Level Rules
51
+ - `feature-list.json`에서 **지정된 FEATURE_ID만** 필터하여 구현
52
+ - 다른 Feature의 코드를 수정하지 않음
53
+ - `depends_on`에 명시된 Feature는 이미 구현/머지 완료된 상태
54
+ - Feature branch (`feature/F-XXX`)에서 작업, 완료 시 commit
55
+ - Sprint Contract는 작성하지 않음 (v4에서는 Feature 단위로 관리)
56
+
57
+ ### Feature-Level Prompt Template
58
+ Worker가 전달하는 프롬프트에는 다음이 포함됨:
59
+ - `FEATURE_ID`, `feature_name`, `description`, `ac` (Acceptance Criteria)
60
+ - `depends_on` (이미 완료된 의존 Feature 목록)
61
+ - Eval 재시도 시: 이전 Eval의 피드백
62
+
46
63
  ## Sprint Workflow
47
64
 
48
65
  1. **Sprint Contract FE 섹션 추가** — 컴포넌트, API 연동, 성공 기준