@walwal-harness/cli 4.0.0-alpha.11 → 4.0.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -133,62 +133,15 @@
133
133
  ]
134
134
  }
135
135
  },
136
- "generator-frontend-flutter": {
137
- "role": "Flutter 앱 개발 — Riverpod, integrated_data_layer(Retrofit), i18n(ARB), build_runner",
138
- "skill": "harness-generator-frontend-flutter",
139
- "inputs": ["actions/plan.md", "actions/feature-list.json", "actions/api-contract.json", "actions/sprint-contract.md"],
140
- "outputs": ["code:flutter", "actions/sprint-contract.md"],
141
- "order": 2,
142
- "fe_stack": "flutter",
143
- "model": "sonnet",
144
- "thinking_mode": null
145
- },
146
- "evaluator-functional-flutter": {
147
- "role": "Flutter 앱 검증 — flutter analyze, flutter test, build_runner 일관성, 안티패턴 정적 검증",
148
- "skill": "harness-evaluator-functional-flutter",
149
- "tools": ["bash:flutter", "bash:dart"],
150
- "inputs": ["actions/sprint-contract.md"],
151
- "outputs": ["actions/evaluation-functional.md"],
152
- "fe_stack": "flutter",
153
- "model": "opus",
154
- "thinking_mode": "ultrathink"
155
- }
156
136
  },
157
137
  "flow": {
158
138
  "sequence": ["dispatcher", "planner", "generator-backend", "generator-frontend", "evaluator-functional", "evaluator-visual"],
159
139
  "pipeline_selection": {
160
- "comment": "Dispatcher가 pipeline.json으로 활성 에이전트를 결정. fe_stack 필드로 React/Flutter를 구분. harness-next.sh가 pipeline.json.fe_stack 을 읽어 generator-frontend/evaluator-functional 을 Flutter 변형으로 치환한다.",
140
+ "comment": "Dispatcher가 pipeline.json으로 활성 에이전트를 결정.",
161
141
  "pipelines": {
162
142
  "FULLSTACK": ["planner", "generator-backend", "generator-frontend", "evaluator-functional", "evaluator-visual"],
163
143
  "FE-ONLY": ["planner:light", "generator-frontend", "evaluator-functional", "evaluator-visual"],
164
144
  "BE-ONLY": ["planner", "generator-backend", "evaluator-functional:api-only"]
165
- },
166
- "fe_stack_substitution": {
167
- "comment": "pipeline.json.fe_stack + fe_target 에 따라 FE 에이전트 치환. Flutter Web 은 React 와 동일한 Playwright 기반 evaluator 를 사용한다.",
168
- "flutter": {
169
- "_doc": "fe_target = web | mobile | desktop. by_target 으로 분기.",
170
- "by_target": {
171
- "web": {
172
- "_doc": "Flutter Web — 컴파일 결과가 HTML+JS+CSS 이므로 Playwright evaluator 사용 가능",
173
- "generator-frontend": "generator-frontend-flutter",
174
- "evaluator-functional": "evaluator-functional",
175
- "evaluator-visual": "evaluator-visual"
176
- },
177
- "mobile": {
178
- "_doc": "Flutter Mobile (Android/iOS) — 브라우저 없음, 정적 분석 evaluator 사용",
179
- "generator-frontend": "generator-frontend-flutter",
180
- "evaluator-functional": "evaluator-functional-flutter",
181
- "evaluator-visual": "__skip__"
182
- },
183
- "desktop": {
184
- "_doc": "Flutter Desktop (macOS/Windows/Linux) — 브라우저 없음, 정적 분석 evaluator 사용",
185
- "generator-frontend": "generator-frontend-flutter",
186
- "evaluator-functional": "evaluator-functional-flutter",
187
- "evaluator-visual": "__skip__"
188
- }
189
- },
190
- "_default_target": "mobile"
191
- }
192
145
  }
193
146
  },
194
147
  "sprint_execution": {
@@ -0,0 +1 @@
1
+ # For consumer projects, add .worktrees/ to .gitignore
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@walwal-harness/cli",
3
- "version": "4.0.0-alpha.11",
3
+ "version": "4.0.0-alpha.13",
4
4
  "description": "Production harness for AI agent engineering — Planner, Generator(BE/FE), Evaluator(Func/Visual), optional Brainstormer (requirements refinement). Supports React and Flutter FE stacks.",
5
5
  "bin": {
6
6
  "walwal-harness": "bin/init.js"
@@ -59,21 +59,10 @@ if [ -f "$PIPELINE_JSON" ]; then
59
59
  fi
60
60
 
61
61
  # ─────────────────────────────────────────
62
- # fe_stack + fe_target 치환 헬퍼
63
- # pipeline_selection.pipelines 에서 읽은 에이전트명을 fe_stack/fe_target 에 따라 치환
64
- # - react: 그대로
65
- # - flutter+web: generator-frontend → generator-frontend-flutter 만 치환, eval 은 그대로 (Playwright 사용 가능)
66
- # - flutter+mobile/desktop: eval 도 정적 분석용으로 치환, evaluator-visual 은 __skip__
62
+ # fe_stack 치환 (no-op Flutter 지원 제거됨, 하위 호환용 stub)
67
63
  # ─────────────────────────────────────────
68
64
  substitute_fe_stack() {
69
- local agent="$1"
70
- if [ "$fe_stack" != "flutter" ]; then
71
- echo "$agent"
72
- return
73
- fi
74
- local sub
75
- sub=$(jq -r ".flow.pipeline_selection.fe_stack_substitution.${fe_stack}.by_target[\"${fe_target}\"][\"${agent}\"] // \"${agent}\"" "$CONFIG" 2>/dev/null)
76
- echo "$sub"
65
+ echo "$1"
77
66
  }
78
67
 
79
68
  # ─────────────────────────────────────────
@@ -105,7 +94,7 @@ run_pre_eval_gate() {
105
94
  # current_agent가 generator가 아닌 경우 (예: dispatcher로 리라우팅된 상태),
106
95
  # completed_agents에서 마지막 generator를 찾는다
107
96
  case "$source_agent" in
108
- generator-frontend|generator-frontend-flutter)
97
+ generator-frontend)
109
98
  location="frontend"
110
99
  checks_key="frontend_checks"
111
100
  ;;
@@ -118,7 +107,7 @@ run_pre_eval_gate() {
118
107
  local last_gen
119
108
  last_gen=$(jq -r '.completed_agents // [] | map(select(startswith("generator-"))) | last // empty' "$PROGRESS" 2>/dev/null)
120
109
  case "$last_gen" in
121
- generator-frontend|generator-frontend-flutter)
110
+ generator-frontend)
122
111
  location="frontend"
123
112
  checks_key="frontend_checks"
124
113
  ;;
@@ -1,16 +1,10 @@
1
1
  #!/bin/bash
2
- # harness-team-worker.sh — Team Worker: Feature-level Gen→Eval loop (v4.0)
2
+ # harness-team-worker.sh — Team Worker v4: git worktree 격리 실행
3
3
  #
4
- # 1 Team = 1 프로세스. Feature Queue에서 feature를 꺼내
5
- # GenGate→Eval 루프를 claude -p 헤드리스로 자율 실행한다.
4
+ # Team 독립 worktree에서 작업하여 git 충돌 없이 병렬 실행.
5
+ # Feature PASS main merge worktree 정리.
6
6
  #
7
- # Usage:
8
- # bash scripts/harness-team-worker.sh <team_id> [project-root]
9
- #
10
- # Environment:
11
- # MAX_ATTEMPTS=3 Feature당 최대 Gen→Eval 시도 횟수
12
- # GEN_MODEL=sonnet Generator 모델
13
- # EVAL_MODEL=opus Evaluator 모델
7
+ # Usage: bash scripts/harness-team-worker.sh <team_id> [project-root]
14
8
 
15
9
  set -uo pipefail
16
10
 
@@ -39,10 +33,11 @@ FEATURES="$PROJECT_ROOT/.harness/actions/feature-list.json"
39
33
  CONFIG="$PROJECT_ROOT/.harness/config.json"
40
34
  PROGRESS_LOG="$PROJECT_ROOT/.harness/progress.log"
41
35
  QUEUE_MGR="$SCRIPT_DIR/harness-queue-manager.sh"
42
-
43
- # ── Lock file for git operations (prevent race conditions between teams) ──
44
36
  GIT_LOCK="$PROJECT_ROOT/.harness/.git-lock"
45
37
 
38
+ # Worktree base directory
39
+ WORKTREE_DIR="$PROJECT_ROOT/.worktrees/team-${TEAM_ID}"
40
+
46
41
  MAX_ATTEMPTS="${MAX_ATTEMPTS:-3}"
47
42
  GEN_MODEL="${GEN_MODEL:-sonnet}"
48
43
  EVAL_MODEL="${EVAL_MODEL:-opus}"
@@ -54,7 +49,6 @@ if [ -f "$CONFIG" ]; then
54
49
  if [ -n "$_em" ]; then EVAL_MODEL="$_em"; fi
55
50
  fi
56
51
 
57
- # ── ANSI helpers ──
58
52
  BOLD="\033[1m"
59
53
  DIM="\033[2m"
60
54
  GREEN="\033[32m"
@@ -64,42 +58,101 @@ CYAN="\033[36m"
64
58
  RESET="\033[0m"
65
59
 
66
60
  ts() { date +"%H:%M:%S"; }
61
+ log() { echo -e "[$(ts)] ${BOLD}T${TEAM_ID}${RESET} $*"; }
62
+ log_progress() { echo "$(date +"%Y-%m-%d %H:%M") | team-${TEAM_ID} | ${1} | ${2}" >> "$PROGRESS_LOG"; }
67
63
 
68
- log() {
69
- echo -e "[$(ts)] ${BOLD}T${TEAM_ID}${RESET} $*"
64
+ # ── Git lock ──
65
+ acquire_git_lock() {
66
+ local waited=0
67
+ while ! mkdir "$GIT_LOCK" 2>/dev/null; do
68
+ sleep 0.2
69
+ waited=$((waited + 1))
70
+ if [ "$waited" -ge 150 ]; then rm -rf "$GIT_LOCK"; mkdir "$GIT_LOCK" 2>/dev/null || true; break; fi
71
+ done
70
72
  }
73
+ release_git_lock() { rm -rf "$GIT_LOCK" 2>/dev/null || true; }
74
+
75
+ # ── Worktree management ──
76
+ setup_worktree() {
77
+ local branch="$1"
78
+
79
+ acquire_git_lock
80
+
81
+ # Clean previous worktree if exists
82
+ if [ -d "$WORKTREE_DIR" ]; then
83
+ (cd "$PROJECT_ROOT" && git worktree remove "$WORKTREE_DIR" --force 2>/dev/null) || rm -rf "$WORKTREE_DIR"
84
+ fi
85
+
86
+ # Create fresh worktree from main
87
+ (cd "$PROJECT_ROOT" && git worktree add "$WORKTREE_DIR" -b "$branch" main 2>/dev/null) || \
88
+ (cd "$PROJECT_ROOT" && git worktree add "$WORKTREE_DIR" "$branch" 2>/dev/null) || {
89
+ release_git_lock
90
+ log "${RED}Failed to create worktree${RESET}"
91
+ return 1
92
+ }
93
+
94
+ release_git_lock
71
95
 
72
- log_progress() {
73
- echo "$(date +"%Y-%m-%d") | team-${TEAM_ID} | ${1} | ${2}" >> "$PROGRESS_LOG"
96
+ # Copy .harness to worktree (symlink for shared state)
97
+ ln -sf "$PROJECT_ROOT/.harness" "$WORKTREE_DIR/.harness" 2>/dev/null || true
98
+
99
+ log "Worktree: ${WORKTREE_DIR}"
100
+ return 0
74
101
  }
75
102
 
76
- # ── Git lock — serialize git checkout/merge across teams ──
77
- acquire_git_lock() {
78
- local max_wait=60 waited=0
79
- while [ -f "$GIT_LOCK" ]; do
80
- sleep 1
81
- waited=$((waited + 1))
82
- if [ "$waited" -ge "$max_wait" ]; then
83
- log "${RED}Git lock timeout (${max_wait}s). Removing stale lock.${RESET}"
84
- rm -f "$GIT_LOCK"
85
- break
86
- fi
87
- done
88
- echo "T${TEAM_ID}" > "$GIT_LOCK"
103
+ cleanup_worktree() {
104
+ acquire_git_lock
105
+ if [ -d "$WORKTREE_DIR" ]; then
106
+ (cd "$PROJECT_ROOT" && git worktree remove "$WORKTREE_DIR" --force 2>/dev/null) || rm -rf "$WORKTREE_DIR"
107
+ fi
108
+ release_git_lock
89
109
  }
90
110
 
91
- release_git_lock() {
92
- rm -f "$GIT_LOCK"
111
+ merge_to_main() {
112
+ local branch="$1"
113
+
114
+ acquire_git_lock
115
+
116
+ local merge_ok=false
117
+
118
+ # Try merge
119
+ if (cd "$PROJECT_ROOT" && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null); then
120
+ merge_ok=true
121
+ else
122
+ # Conflict → abort, then try rebase in worktree
123
+ (cd "$PROJECT_ROOT" && git merge --abort 2>/dev/null) || true
124
+
125
+ log "${YELLOW}Merge conflict — rebasing in worktree...${RESET}"
126
+ if (cd "$WORKTREE_DIR" && git rebase main 2>/dev/null); then
127
+ # Retry merge after rebase
128
+ if (cd "$PROJECT_ROOT" && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null); then
129
+ merge_ok=true
130
+ fi
131
+ else
132
+ (cd "$WORKTREE_DIR" && git rebase --abort 2>/dev/null) || true
133
+ log "${RED}Rebase failed${RESET}"
134
+ fi
135
+ fi
136
+
137
+ # Clean up branch after merge
138
+ if [ "$merge_ok" = true ]; then
139
+ (cd "$PROJECT_ROOT" && git branch -d "$branch" 2>/dev/null) || true
140
+ fi
141
+
142
+ release_git_lock
143
+
144
+ [ "$merge_ok" = true ]
93
145
  }
94
146
 
95
- # ── Pre-eval gate ──
147
+ # ── Pre-eval gate (runs in worktree) ──
96
148
  run_pre_eval_gate() {
97
- local cwd="$PROJECT_ROOT"
149
+ local work_dir="$WORKTREE_DIR"
98
150
 
151
+ # Resolve cwd within worktree
99
152
  if [ -f "$CONFIG" ]; then
100
153
  _cwd=$(jq -r '.flow.pre_eval_gate.frontend_cwd // empty' "$CONFIG" 2>/dev/null)
101
154
  if [ -n "$_cwd" ] && [ "$_cwd" != "null" ]; then
102
- cwd="$PROJECT_ROOT/$_cwd"
155
+ work_dir="$WORKTREE_DIR/$_cwd"
103
156
  fi
104
157
  fi
105
158
 
@@ -107,26 +160,24 @@ run_pre_eval_gate() {
107
160
  if [ -f "$CONFIG" ]; then
108
161
  mapfile -t checks < <(jq -r '.flow.pre_eval_gate.frontend_checks[]' "$CONFIG" 2>/dev/null)
109
162
  fi
110
-
111
163
  if [ ${#checks[@]} -eq 0 ]; then
112
164
  checks=("npx tsc --noEmit" "npx eslint src/")
113
165
  fi
114
166
 
115
- local all_pass=true fail_cmds=""
167
+ local all_pass=true
116
168
  for cmd in "${checks[@]}"; do
117
- if (cd "$cwd" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
169
+ if (cd "$work_dir" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
118
170
  log " ${GREEN}✓${RESET} $cmd"
119
171
  else
120
172
  log " ${RED}✗${RESET} $cmd"
121
173
  all_pass=false
122
- fail_cmds+="$cmd; "
123
174
  fi
124
175
  done
125
176
 
126
177
  [ "$all_pass" = true ]
127
178
  }
128
179
 
129
- # ── Build generator prompt ──
180
+ # ── Build prompts ──
130
181
  build_gen_prompt() {
131
182
  local fid="$1" attempt="$2" feedback="${3:-}"
132
183
 
@@ -161,7 +212,7 @@ RULES:
161
212
  - Implement ONLY this single feature
162
213
  - Do NOT modify code belonging to other features
163
214
  - Follow existing code patterns and CONVENTIONS.md
164
- - When done, stage and commit with: git add -A && git commit -m 'feat(${fid}): ${fname}'
215
+ - When done, stage and commit: git add -A && git commit -m 'feat(${fid}): ${fname}'
165
216
  PROMPT
166
217
 
167
218
  if [ "$attempt" -gt 1 ] && [ -n "$feedback" ]; then
@@ -175,7 +226,6 @@ RETRY
175
226
  fi
176
227
  }
177
228
 
178
- # ── Build evaluator prompt ──
179
229
  build_eval_prompt() {
180
230
  local fid="$1"
181
231
 
@@ -218,15 +268,12 @@ FEEDBACK: one paragraph summary
218
268
  PROMPT
219
269
  }
220
270
 
221
- # ── Parse eval result (macOS-compatible, no grep -P) ──
222
271
  parse_eval_result() {
223
272
  local output="$1"
224
-
225
273
  local verdict score feedback
226
274
  verdict=$(echo "$output" | grep -E '^VERDICT:' | sed 's/VERDICT:[[:space:]]*//' | head -1)
227
275
  score=$(echo "$output" | grep -E '^SCORE:' | sed 's/SCORE:[[:space:]]*//' | head -1)
228
276
  feedback=$(echo "$output" | grep -E '^FEEDBACK:' | sed 's/FEEDBACK:[[:space:]]*//' | head -1)
229
-
230
277
  echo "${verdict:-UNKNOWN}|${score:-0.00}|${feedback:-no feedback}"
231
278
  }
232
279
 
@@ -237,14 +284,12 @@ log "${CYAN}Team ${TEAM_ID} started${RESET} (gen=${GEN_MODEL}, eval=${EVAL_MODEL
237
284
  log_progress "start" "Team ${TEAM_ID} worker started"
238
285
 
239
286
  while true; do
240
- # ── Dequeue next feature ──
287
+ # ── Dequeue ──
241
288
  feature_id=$(bash "$QUEUE_MGR" dequeue "$TEAM_ID" "$PROJECT_ROOT" 2>/dev/null)
242
289
 
243
290
  if [ -z "$feature_id" ] || [[ "$feature_id" == "["* ]]; then
244
291
  log "${DIM}No features ready. Waiting 10s...${RESET}"
245
292
  sleep 10
246
-
247
- # Check if completely done
248
293
  remaining=$(jq '(.queue.ready | length) + (.queue.blocked | length) + (.queue.in_progress | length)' "$QUEUE" 2>/dev/null || echo "1")
249
294
  if [ "${remaining}" -eq 0 ] 2>/dev/null; then
250
295
  log "${GREEN}${BOLD}ALL FEATURES COMPLETE. Team ${TEAM_ID} exiting.${RESET}"
@@ -254,16 +299,16 @@ while true; do
254
299
  continue
255
300
  fi
256
301
 
257
- log "${CYAN}▶ Dequeued ${feature_id}${RESET}"
302
+ log "${CYAN}▶ ${feature_id}${RESET}"
258
303
  log_progress "dequeue" "${feature_id}"
259
304
 
260
- # ── Create feature branch (with lock) ──
305
+ # ── Setup worktree ──
261
306
  branch="feature/${feature_id}"
262
- acquire_git_lock
263
- (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git checkout -b "$branch" 2>/dev/null) || \
264
- (cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
265
- release_git_lock
266
- log "Branch: ${branch}"
307
+ if ! setup_worktree "$branch"; then
308
+ bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
309
+ log_progress "fail" "${feature_id} worktree setup failed"
310
+ continue
311
+ fi
267
312
 
268
313
  # ── Gen→Eval Loop ──
269
314
  attempt=1
@@ -271,119 +316,82 @@ while true; do
271
316
  passed=false
272
317
 
273
318
  while [ "$attempt" -le "$MAX_ATTEMPTS" ]; do
274
- log "${BOLD}── Attempt ${attempt}/${MAX_ATTEMPTS} ──${RESET}"
319
+ log "${BOLD}── ${feature_id} attempt ${attempt}/${MAX_ATTEMPTS} ──${RESET}"
275
320
 
276
- # ── Generate ──
277
- log "Gen ${feature_id} (${GEN_MODEL})..."
321
+ # ── Generate (in worktree) ──
322
+ log "Gen (${GEN_MODEL})..."
278
323
  bash "$QUEUE_MGR" update_phase "$feature_id" "gen" "$attempt" "$PROJECT_ROOT" 2>/dev/null
279
324
 
280
325
  gen_prompt=$(build_gen_prompt "$feature_id" "$attempt" "$eval_feedback")
281
326
 
282
327
  gen_start=$(date +%s)
283
- log "${DIM} claude -p --dangerously-skip-permissions --model ${GEN_MODEL}${RESET}"
284
- gen_output=$(cd "$PROJECT_ROOT" && claude -p "$gen_prompt" \
328
+ gen_output=$(cd "$WORKTREE_DIR" && claude -p "$gen_prompt" \
285
329
  --dangerously-skip-permissions \
286
330
  --model "$GEN_MODEL" \
287
331
  --output-format text 2>&1 | tee /dev/stderr) 2>&1 || true
288
332
  gen_elapsed=$(( $(date +%s) - gen_start ))
289
333
 
290
- files_changed=$(cd "$PROJECT_ROOT" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')
334
+ files_changed=$(cd "$WORKTREE_DIR" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')
291
335
  log "Gen done (${gen_elapsed}s) — ${files_changed} files"
292
- log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files, ${gen_elapsed}s"
336
+ log_progress "gen" "${feature_id} #${attempt}: ${files_changed} files, ${gen_elapsed}s"
293
337
 
294
- # Auto-commit
295
- (cd "$PROJECT_ROOT" && git add -A && git commit -m "feat(${feature_id}): gen attempt ${attempt}" --no-verify 2>/dev/null) || true
338
+ # Auto-commit in worktree
339
+ (cd "$WORKTREE_DIR" && git add -A && git commit -m "feat(${feature_id}): attempt ${attempt}" --no-verify 2>/dev/null) || true
296
340
 
297
- # ── Pre-eval gate ──
298
- log "Pre-eval gate..."
341
+ # ── Pre-eval gate (in worktree) ──
342
+ log "Gate..."
299
343
  bash "$QUEUE_MGR" update_phase "$feature_id" "gate" "$attempt" "$PROJECT_ROOT" 2>/dev/null
300
344
 
301
- if ! run_pre_eval_gate "$feature_id"; then
302
- log "${RED}Gate FAIL — retrying gen${RESET}"
303
- eval_feedback="Pre-eval gate failed: type check or lint errors. Fix compilation and lint issues."
345
+ if ! run_pre_eval_gate; then
346
+ log "${RED}Gate FAIL${RESET}"
347
+ eval_feedback="Pre-eval gate failed: type check or lint errors."
304
348
  attempt=$((attempt + 1))
305
349
  continue
306
350
  fi
307
351
 
308
- # ── Evaluate ──
309
- log "Eval ${feature_id} (${EVAL_MODEL})..."
352
+ # ── Evaluate (in worktree) ──
353
+ log "Eval (${EVAL_MODEL})..."
310
354
  bash "$QUEUE_MGR" update_phase "$feature_id" "eval" "$attempt" "$PROJECT_ROOT" 2>/dev/null
311
355
 
312
356
  eval_prompt=$(build_eval_prompt "$feature_id")
313
357
 
314
358
  eval_start=$(date +%s)
315
- log "${DIM} claude -p --dangerously-skip-permissions --model ${EVAL_MODEL}${RESET}"
316
- eval_output=$(cd "$PROJECT_ROOT" && claude -p "$eval_prompt" \
359
+ eval_output=$(cd "$WORKTREE_DIR" && claude -p "$eval_prompt" \
317
360
  --dangerously-skip-permissions \
318
361
  --model "$EVAL_MODEL" \
319
362
  --output-format text 2>&1 | tee /dev/stderr) 2>&1 || true
320
363
  eval_elapsed=$(( $(date +%s) - eval_start ))
321
364
 
322
- # Parse result
323
365
  result_line=$(parse_eval_result "$eval_output")
324
366
  verdict=$(echo "$result_line" | cut -d'|' -f1)
325
367
  score=$(echo "$result_line" | cut -d'|' -f2)
326
368
  feedback=$(echo "$result_line" | cut -d'|' -f3-)
327
369
 
328
- log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score}) ${eval_elapsed}s"
370
+ log_progress "eval" "${feature_id} #${attempt}: ${verdict} (${score}) ${eval_elapsed}s"
329
371
 
330
372
  if [ "$verdict" = "PASS" ]; then
331
- log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
373
+ log "${GREEN}${BOLD}✓ PASS ${score}/3.00${RESET} (${eval_elapsed}s)"
332
374
  passed=true
333
375
  break
334
376
  else
335
- log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
377
+ log "${RED}✗ FAIL ${score}/3.00${RESET} (${eval_elapsed}s)"
336
378
  log "${DIM} ${feedback}${RESET}"
337
379
  eval_feedback="$feedback"
338
380
  attempt=$((attempt + 1))
339
381
  fi
340
382
  done
341
383
 
342
- # ══════════════════════════════════════════
343
- # Phase 3: Branch merge with conflict handling
344
- # ══════════════════════════════════════════
384
+ # ── Result ──
345
385
  if [ "$passed" = true ]; then
346
- log "Merging ${branch} → main..."
347
- acquire_git_lock
348
-
349
- merge_ok=false
350
-
351
- # Attempt 1: straight merge
352
- if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null); then
353
- merge_ok=true
354
- else
355
- # Attempt 2: abort failed merge, rebase, re-eval gate, then merge
356
- log "${YELLOW}Conflict detected — rebasing ${branch} onto main...${RESET}"
357
- (cd "$PROJECT_ROOT" && git merge --abort 2>/dev/null) || true
358
- (cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
359
-
360
- if (cd "$PROJECT_ROOT" && git rebase main 2>/dev/null); then
361
- log "Rebase OK. Re-running gate..."
362
-
363
- if run_pre_eval_gate "$feature_id"; then
364
- log "Gate still PASS after rebase."
365
- if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null); then
366
- merge_ok=true
367
- fi
368
- else
369
- log "${RED}Gate FAIL after rebase — needs re-gen${RESET}"
370
- fi
371
- else
372
- log "${RED}Rebase failed — conflicts too complex${RESET}"
373
- (cd "$PROJECT_ROOT" && git rebase --abort 2>/dev/null) || true
374
- fi
375
- fi
376
-
377
- release_git_lock
378
-
379
- if [ "$merge_ok" = true ]; then
380
- # Clean up feature branch
381
- (cd "$PROJECT_ROOT" && git branch -d "$branch" 2>/dev/null) || true
386
+ log "Merging → main..."
382
387
 
388
+ if merge_to_main "$branch"; then
389
+ # Cleanup worktree after successful merge
390
+ cleanup_worktree
383
391
  bash "$QUEUE_MGR" pass "$feature_id" "$PROJECT_ROOT" 2>/dev/null
384
- log_progress "pass" "${feature_id} merged to main"
392
+ log_progress "pass" "${feature_id} merged & cleaned"
385
393
 
386
- # Update feature-list.json passes
394
+ # Update feature-list.json
387
395
  if [ -f "$FEATURES" ]; then
388
396
  jq --arg fid "$feature_id" '
389
397
  .features |= map(
@@ -396,18 +404,15 @@ while true; do
396
404
 
397
405
  log "${GREEN}${BOLD}✓ ${feature_id} DONE${RESET}"
398
406
  else
399
- log "${RED}${BOLD}Merge failed — ${feature_id} marked as failed${RESET}"
400
- (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
407
+ cleanup_worktree
401
408
  bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
409
+ log "${RED}Merge failed → ${feature_id} FAILED${RESET}"
402
410
  log_progress "merge-fail" "${feature_id}"
403
411
  fi
404
-
405
412
  else
406
- log "${RED}${BOLD}✗ ${feature_id} FAILED after ${MAX_ATTEMPTS} attempts${RESET}"
407
- acquire_git_lock
408
- (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
409
- release_git_lock
413
+ cleanup_worktree
410
414
  bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
415
+ log "${RED}${BOLD}✗ ${feature_id} FAILED (${MAX_ATTEMPTS} attempts)${RESET}"
411
416
  log_progress "fail" "${feature_id} after ${MAX_ATTEMPTS} attempts"
412
417
  fi
413
418