@walwal-harness/cli 4.0.0-alpha.9 → 4.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -273
- package/assets/templates/config.json +1 -48
- package/assets/templates/gitignore-append.txt +1 -0
- package/bin/init.js +42 -18
- package/package.json +1 -1
- package/scripts/harness-dashboard-v4.sh +27 -88
- package/scripts/harness-next.sh +4 -15
- package/scripts/harness-user-prompt-submit.sh +10 -0
- package/skills/dispatcher/SKILL.md +7 -2
- package/skills/team-action/SKILL.md +26 -0
- package/skills/team-stop/SKILL.md +19 -0
- package/scripts/harness-control-v4.sh +0 -97
- package/scripts/harness-studio-v4.sh +0 -122
- package/scripts/harness-team-worker.sh +0 -415
- package/skills/evaluator-functional-flutter/SKILL.md +0 -206
- package/skills/evaluator-functional-flutter/references/ia-compliance.md +0 -77
- package/skills/evaluator-functional-flutter/references/scoring-rubric.md +0 -132
- package/skills/evaluator-functional-flutter/references/static-check-rules.md +0 -99
- package/skills/generator-frontend-flutter/SKILL.md +0 -173
- package/skills/generator-frontend-flutter/references/anti-patterns.md +0 -320
- package/skills/generator-frontend-flutter/references/api-layer-pattern.md +0 -233
- package/skills/generator-frontend-flutter/references/flutter-web-pattern.md +0 -273
- package/skills/generator-frontend-flutter/references/i18n-pattern.md +0 -102
- package/skills/generator-frontend-flutter/references/riverpod-pattern.md +0 -199
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# harness-studio-v4.sh — Harness Studio v4: 3-Column Layout
|
|
3
|
-
#
|
|
4
|
-
# ┌──────────────┬──────────────┬──────────────┐
|
|
5
|
-
# │ │ │ Team 1 │
|
|
6
|
-
# │ │ ├──────────────┤
|
|
7
|
-
# │ Main │ Dashboard │ Team 2 │
|
|
8
|
-
# │ (Claude) │ (read-only) ├──────────────┤
|
|
9
|
-
# │ │ │ Team 3 │
|
|
10
|
-
# └──────────────┴──────────────┴──────────────┘
|
|
11
|
-
#
|
|
12
|
-
# Main: 사용자가 직접 Claude Code를 실행하는 대화형 세션
|
|
13
|
-
# Dashboard: feature-queue + team status 자동 갱신 (입력 불가)
|
|
14
|
-
# Team 1~3: claude -p headless worker (입력 불가, 로그만 표시)
|
|
15
|
-
#
|
|
16
|
-
# Usage:
|
|
17
|
-
# bash scripts/harness-studio-v4.sh [project-root]
|
|
18
|
-
# bash scripts/harness-studio-v4.sh --kill
|
|
19
|
-
|
|
20
|
-
set -euo pipefail
|
|
21
|
-
|
|
22
|
-
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
23
|
-
SESSION_NAME="harness-v4"
|
|
24
|
-
|
|
25
|
-
PROJECT_ROOT=""
|
|
26
|
-
|
|
27
|
-
for arg in "$@"; do
|
|
28
|
-
case "$arg" in
|
|
29
|
-
--kill)
|
|
30
|
-
tmux kill-session -t "$SESSION_NAME" 2>/dev/null && echo "Killed." || echo "No session."
|
|
31
|
-
exit 0
|
|
32
|
-
;;
|
|
33
|
-
*)
|
|
34
|
-
if [ -d "$arg" ]; then PROJECT_ROOT="$arg"; fi
|
|
35
|
-
;;
|
|
36
|
-
esac
|
|
37
|
-
done
|
|
38
|
-
|
|
39
|
-
if [ -z "$PROJECT_ROOT" ]; then
|
|
40
|
-
dir="$(pwd)"
|
|
41
|
-
while [ "$dir" != "/" ]; do
|
|
42
|
-
if [ -d "$dir/.harness" ]; then PROJECT_ROOT="$dir"; break; fi
|
|
43
|
-
dir="$(dirname "$dir")"
|
|
44
|
-
done
|
|
45
|
-
fi
|
|
46
|
-
|
|
47
|
-
if [ -z "$PROJECT_ROOT" ] || [ ! -d "$PROJECT_ROOT/.harness" ]; then
|
|
48
|
-
echo "Error: .harness/ not found."
|
|
49
|
-
exit 1
|
|
50
|
-
fi
|
|
51
|
-
|
|
52
|
-
echo "Project: $PROJECT_ROOT"
|
|
53
|
-
echo "Session: $SESSION_NAME"
|
|
54
|
-
|
|
55
|
-
tmux kill-session -t "$SESSION_NAME" 2>/dev/null || true
|
|
56
|
-
|
|
57
|
-
# ── Initialize or recover queue ──
|
|
58
|
-
QUEUE="$PROJECT_ROOT/.harness/actions/feature-queue.json"
|
|
59
|
-
if [ ! -f "$QUEUE" ]; then
|
|
60
|
-
echo "Initializing feature queue..."
|
|
61
|
-
bash "$SCRIPT_DIR/harness-queue-manager.sh" init "$PROJECT_ROOT"
|
|
62
|
-
else
|
|
63
|
-
echo "Recovering stale queue state..."
|
|
64
|
-
bash "$SCRIPT_DIR/harness-queue-manager.sh" recover "$PROJECT_ROOT"
|
|
65
|
-
fi
|
|
66
|
-
|
|
67
|
-
# ══════════════════════════════════════════
|
|
68
|
-
# 3-Column Layout (Main | Dashboard | Teams)
|
|
69
|
-
# ══════════════════════════════════════════
|
|
70
|
-
|
|
71
|
-
# Column 1: Main (interactive shell — user runs claude here)
|
|
72
|
-
PANE_MAIN=$(tmux new-session -d -s "$SESSION_NAME" -c "$PROJECT_ROOT" -x 220 -y 55 \
|
|
73
|
-
-P -F '#{pane_id}')
|
|
74
|
-
|
|
75
|
-
# Column 2: Dashboard (split right from Main, 66% remaining → 33% each of 3 cols)
|
|
76
|
-
PANE_DASH=$(tmux split-window -h -p 66 -t "$PANE_MAIN" -c "$PROJECT_ROOT" \
|
|
77
|
-
-P -F '#{pane_id}' \
|
|
78
|
-
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-dashboard-v4.sh\" \"${PROJECT_ROOT}\"'")
|
|
79
|
-
|
|
80
|
-
# Column 3: Team 1 (split right from Dashboard, 50% of remaining = 33% total)
|
|
81
|
-
PANE_T1=$(tmux split-window -h -p 50 -t "$PANE_DASH" -c "$PROJECT_ROOT" \
|
|
82
|
-
-P -F '#{pane_id}' \
|
|
83
|
-
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 1 \"${PROJECT_ROOT}\"'")
|
|
84
|
-
|
|
85
|
-
# Team 2 (split below Team 1)
|
|
86
|
-
PANE_T2=$(tmux split-window -v -p 66 -t "$PANE_T1" -c "$PROJECT_ROOT" \
|
|
87
|
-
-P -F '#{pane_id}' \
|
|
88
|
-
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 2 \"${PROJECT_ROOT}\"'")
|
|
89
|
-
|
|
90
|
-
# Team 3 (split below Team 2)
|
|
91
|
-
PANE_T3=$(tmux split-window -v -p 50 -t "$PANE_T2" -c "$PROJECT_ROOT" \
|
|
92
|
-
-P -F '#{pane_id}' \
|
|
93
|
-
"bash --norc --noprofile -c 'exec bash \"${SCRIPT_DIR}/harness-team-worker.sh\" 3 \"${PROJECT_ROOT}\"'")
|
|
94
|
-
|
|
95
|
-
# ── Launch Claude in Main pane ──
|
|
96
|
-
tmux send-keys -t "$PANE_MAIN" "unset npm_config_prefix 2>/dev/null; clear && claude --dangerously-skip-permissions" Enter
|
|
97
|
-
|
|
98
|
-
# ── Pane titles ──
|
|
99
|
-
tmux select-pane -t "$PANE_MAIN" -T "Main"
|
|
100
|
-
tmux select-pane -t "$PANE_DASH" -T "Dashboard"
|
|
101
|
-
tmux select-pane -t "$PANE_T1" -T "Team 1"
|
|
102
|
-
tmux select-pane -t "$PANE_T2" -T "Team 2"
|
|
103
|
-
tmux select-pane -t "$PANE_T3" -T "Team 3"
|
|
104
|
-
|
|
105
|
-
tmux set-option -t "$SESSION_NAME" pane-border-status top 2>/dev/null || true
|
|
106
|
-
tmux set-option -t "$SESSION_NAME" pane-border-format " #{pane_title} " 2>/dev/null || true
|
|
107
|
-
|
|
108
|
-
# ── Focus Main pane ──
|
|
109
|
-
tmux select-pane -t "$PANE_MAIN"
|
|
110
|
-
|
|
111
|
-
# ── Attach ──
|
|
112
|
-
if [ -n "${TMUX:-}" ]; then
|
|
113
|
-
tmux switch-client -t "$SESSION_NAME"
|
|
114
|
-
else
|
|
115
|
-
echo ""
|
|
116
|
-
echo "Launching Harness Studio v4..."
|
|
117
|
-
echo " Main (left) : Interactive — run 'claude' here"
|
|
118
|
-
echo " Dashboard (mid) : Feature Queue + Team status (auto-refresh)"
|
|
119
|
-
echo " Team 1-3 (right) : Parallel workers (headless, log only)"
|
|
120
|
-
echo ""
|
|
121
|
-
tmux attach -t "$SESSION_NAME"
|
|
122
|
-
fi
|
|
@@ -1,415 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# harness-team-worker.sh — Team Worker: Feature-level Gen→Eval loop (v4.0)
|
|
3
|
-
#
|
|
4
|
-
# 1 Team = 1 프로세스. Feature Queue에서 feature를 꺼내
|
|
5
|
-
# Gen→Gate→Eval 루프를 claude -p 헤드리스로 자율 실행한다.
|
|
6
|
-
#
|
|
7
|
-
# Usage:
|
|
8
|
-
# bash scripts/harness-team-worker.sh <team_id> [project-root]
|
|
9
|
-
#
|
|
10
|
-
# Environment:
|
|
11
|
-
# MAX_ATTEMPTS=3 Feature당 최대 Gen→Eval 시도 횟수
|
|
12
|
-
# GEN_MODEL=sonnet Generator 모델
|
|
13
|
-
# EVAL_MODEL=opus Evaluator 모델
|
|
14
|
-
|
|
15
|
-
set -uo pipefail
|
|
16
|
-
|
|
17
|
-
TEAM_ID="${1:?Usage: harness-team-worker.sh <team_id> [project-root]}"
|
|
18
|
-
shift || true
|
|
19
|
-
|
|
20
|
-
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
21
|
-
|
|
22
|
-
# ── Resolve project root ──
|
|
23
|
-
PROJECT_ROOT="${1:-}"
|
|
24
|
-
if [ -z "$PROJECT_ROOT" ]; then
|
|
25
|
-
dir="$(pwd)"
|
|
26
|
-
while [ "$dir" != "/" ]; do
|
|
27
|
-
if [ -d "$dir/.harness" ]; then PROJECT_ROOT="$dir"; break; fi
|
|
28
|
-
dir="$(dirname "$dir")"
|
|
29
|
-
done
|
|
30
|
-
fi
|
|
31
|
-
|
|
32
|
-
if [ -z "$PROJECT_ROOT" ] || [ ! -d "$PROJECT_ROOT/.harness" ]; then
|
|
33
|
-
echo "[T${TEAM_ID}] .harness/ not found."
|
|
34
|
-
exit 1
|
|
35
|
-
fi
|
|
36
|
-
|
|
37
|
-
QUEUE="$PROJECT_ROOT/.harness/actions/feature-queue.json"
|
|
38
|
-
FEATURES="$PROJECT_ROOT/.harness/actions/feature-list.json"
|
|
39
|
-
CONFIG="$PROJECT_ROOT/.harness/config.json"
|
|
40
|
-
PROGRESS_LOG="$PROJECT_ROOT/.harness/progress.log"
|
|
41
|
-
QUEUE_MGR="$SCRIPT_DIR/harness-queue-manager.sh"
|
|
42
|
-
|
|
43
|
-
# ── Lock file for git operations (prevent race conditions between teams) ──
|
|
44
|
-
GIT_LOCK="$PROJECT_ROOT/.harness/.git-lock"
|
|
45
|
-
|
|
46
|
-
MAX_ATTEMPTS="${MAX_ATTEMPTS:-3}"
|
|
47
|
-
GEN_MODEL="${GEN_MODEL:-sonnet}"
|
|
48
|
-
EVAL_MODEL="${EVAL_MODEL:-opus}"
|
|
49
|
-
|
|
50
|
-
if [ -f "$CONFIG" ]; then
|
|
51
|
-
_gm=$(jq -r '.agents["generator-frontend"].model // empty' "$CONFIG" 2>/dev/null)
|
|
52
|
-
_em=$(jq -r '.agents["evaluator-functional"].model // empty' "$CONFIG" 2>/dev/null)
|
|
53
|
-
if [ -n "$_gm" ]; then GEN_MODEL="$_gm"; fi
|
|
54
|
-
if [ -n "$_em" ]; then EVAL_MODEL="$_em"; fi
|
|
55
|
-
fi
|
|
56
|
-
|
|
57
|
-
# ── ANSI helpers ──
|
|
58
|
-
BOLD="\033[1m"
|
|
59
|
-
DIM="\033[2m"
|
|
60
|
-
GREEN="\033[32m"
|
|
61
|
-
YELLOW="\033[33m"
|
|
62
|
-
RED="\033[31m"
|
|
63
|
-
CYAN="\033[36m"
|
|
64
|
-
RESET="\033[0m"
|
|
65
|
-
|
|
66
|
-
ts() { date +"%H:%M:%S"; }
|
|
67
|
-
|
|
68
|
-
log() {
|
|
69
|
-
echo -e "[$(ts)] ${BOLD}T${TEAM_ID}${RESET} $*"
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
log_progress() {
|
|
73
|
-
echo "$(date +"%Y-%m-%d") | team-${TEAM_ID} | ${1} | ${2}" >> "$PROGRESS_LOG"
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
# ── Git lock — serialize git checkout/merge across teams ──
|
|
77
|
-
acquire_git_lock() {
|
|
78
|
-
local max_wait=60 waited=0
|
|
79
|
-
while [ -f "$GIT_LOCK" ]; do
|
|
80
|
-
sleep 1
|
|
81
|
-
waited=$((waited + 1))
|
|
82
|
-
if [ "$waited" -ge "$max_wait" ]; then
|
|
83
|
-
log "${RED}Git lock timeout (${max_wait}s). Removing stale lock.${RESET}"
|
|
84
|
-
rm -f "$GIT_LOCK"
|
|
85
|
-
break
|
|
86
|
-
fi
|
|
87
|
-
done
|
|
88
|
-
echo "T${TEAM_ID}" > "$GIT_LOCK"
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
release_git_lock() {
|
|
92
|
-
rm -f "$GIT_LOCK"
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
# ── Pre-eval gate ──
|
|
96
|
-
run_pre_eval_gate() {
|
|
97
|
-
local cwd="$PROJECT_ROOT"
|
|
98
|
-
|
|
99
|
-
if [ -f "$CONFIG" ]; then
|
|
100
|
-
_cwd=$(jq -r '.flow.pre_eval_gate.frontend_cwd // empty' "$CONFIG" 2>/dev/null)
|
|
101
|
-
if [ -n "$_cwd" ] && [ "$_cwd" != "null" ]; then
|
|
102
|
-
cwd="$PROJECT_ROOT/$_cwd"
|
|
103
|
-
fi
|
|
104
|
-
fi
|
|
105
|
-
|
|
106
|
-
local checks=()
|
|
107
|
-
if [ -f "$CONFIG" ]; then
|
|
108
|
-
mapfile -t checks < <(jq -r '.flow.pre_eval_gate.frontend_checks[]' "$CONFIG" 2>/dev/null)
|
|
109
|
-
fi
|
|
110
|
-
|
|
111
|
-
if [ ${#checks[@]} -eq 0 ]; then
|
|
112
|
-
checks=("npx tsc --noEmit" "npx eslint src/")
|
|
113
|
-
fi
|
|
114
|
-
|
|
115
|
-
local all_pass=true fail_cmds=""
|
|
116
|
-
for cmd in "${checks[@]}"; do
|
|
117
|
-
if (cd "$cwd" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
|
|
118
|
-
log " ${GREEN}✓${RESET} $cmd"
|
|
119
|
-
else
|
|
120
|
-
log " ${RED}✗${RESET} $cmd"
|
|
121
|
-
all_pass=false
|
|
122
|
-
fail_cmds+="$cmd; "
|
|
123
|
-
fi
|
|
124
|
-
done
|
|
125
|
-
|
|
126
|
-
[ "$all_pass" = true ]
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
# ── Build generator prompt ──
|
|
130
|
-
build_gen_prompt() {
|
|
131
|
-
local fid="$1" attempt="$2" feedback="${3:-}"
|
|
132
|
-
|
|
133
|
-
local fobj
|
|
134
|
-
fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
|
|
135
|
-
local fname fdesc ac_json deps_json
|
|
136
|
-
fname=$(echo "$fobj" | jq -r '.name // .description // ""')
|
|
137
|
-
fdesc=$(echo "$fobj" | jq -r '.description // ""')
|
|
138
|
-
ac_json=$(echo "$fobj" | jq -c '.ac // []')
|
|
139
|
-
deps_json=$(echo "$fobj" | jq -c '.depends_on // []')
|
|
140
|
-
|
|
141
|
-
local project_name
|
|
142
|
-
project_name=$(jq -r '.project_name // ""' "$PROJECT_ROOT/.harness/progress.json" 2>/dev/null)
|
|
143
|
-
|
|
144
|
-
cat <<PROMPT
|
|
145
|
-
You are Generator-Frontend for a harness engineering project.
|
|
146
|
-
|
|
147
|
-
PROJECT: ${project_name}
|
|
148
|
-
CONVENTIONS: Read CONVENTIONS.md if it exists.
|
|
149
|
-
|
|
150
|
-
YOUR TASK: Implement ONLY feature ${fid}: ${fname}
|
|
151
|
-
Description: ${fdesc}
|
|
152
|
-
Dependencies (already implemented): ${deps_json}
|
|
153
|
-
Acceptance Criteria: ${ac_json}
|
|
154
|
-
|
|
155
|
-
Read these files for context:
|
|
156
|
-
- .harness/actions/feature-list.json (filter to ${fid})
|
|
157
|
-
- .harness/actions/api-contract.json (relevant endpoints)
|
|
158
|
-
- .harness/actions/plan.md (overall design)
|
|
159
|
-
|
|
160
|
-
RULES:
|
|
161
|
-
- Implement ONLY this single feature
|
|
162
|
-
- Do NOT modify code belonging to other features
|
|
163
|
-
- Follow existing code patterns and CONVENTIONS.md
|
|
164
|
-
- When done, stage and commit with: git add -A && git commit -m 'feat(${fid}): ${fname}'
|
|
165
|
-
PROMPT
|
|
166
|
-
|
|
167
|
-
if [ "$attempt" -gt 1 ] && [ -n "$feedback" ]; then
|
|
168
|
-
cat <<RETRY
|
|
169
|
-
|
|
170
|
-
PREVIOUS EVAL FEEDBACK (attempt ${attempt}):
|
|
171
|
-
${feedback}
|
|
172
|
-
|
|
173
|
-
Fix the issues above. Focus specifically on the failed criteria.
|
|
174
|
-
RETRY
|
|
175
|
-
fi
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
# ── Build evaluator prompt ──
|
|
179
|
-
build_eval_prompt() {
|
|
180
|
-
local fid="$1"
|
|
181
|
-
|
|
182
|
-
local fobj
|
|
183
|
-
fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
|
|
184
|
-
local fname ac_json
|
|
185
|
-
fname=$(echo "$fobj" | jq -r '.name // .description // ""')
|
|
186
|
-
ac_json=$(echo "$fobj" | jq -c '.ac // []')
|
|
187
|
-
|
|
188
|
-
local passed_list
|
|
189
|
-
passed_list=$(jq -r '.queue.passed // [] | join(", ")' "$QUEUE" 2>/dev/null)
|
|
190
|
-
|
|
191
|
-
cat <<PROMPT
|
|
192
|
-
You are Evaluator-Functional for a harness engineering project.
|
|
193
|
-
|
|
194
|
-
TASK: Evaluate feature ${fid}: ${fname}
|
|
195
|
-
|
|
196
|
-
Acceptance Criteria to verify:
|
|
197
|
-
${ac_json}
|
|
198
|
-
|
|
199
|
-
Previously passed features (regression check): [${passed_list}]
|
|
200
|
-
|
|
201
|
-
SCORING RUBRIC (R1-R5):
|
|
202
|
-
R1: API Contract compliance (25%)
|
|
203
|
-
R2: Acceptance Criteria full pass (25%)
|
|
204
|
-
R3: Negative tests (20%)
|
|
205
|
-
R4: E2E scenario (15%)
|
|
206
|
-
R5: Error handling & edge cases (15%)
|
|
207
|
-
|
|
208
|
-
PASS threshold: 2.80 / 3.00
|
|
209
|
-
FAIL: any AC not met, any regression failure
|
|
210
|
-
|
|
211
|
-
You MUST output this exact block (parseable by automation):
|
|
212
|
-
---EVAL-RESULT---
|
|
213
|
-
FEATURE: ${fid}
|
|
214
|
-
VERDICT: PASS or FAIL
|
|
215
|
-
SCORE: X.XX
|
|
216
|
-
FEEDBACK: one paragraph summary
|
|
217
|
-
---END-EVAL-RESULT---
|
|
218
|
-
PROMPT
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
# ── Parse eval result (macOS-compatible, no grep -P) ──
|
|
222
|
-
parse_eval_result() {
|
|
223
|
-
local output="$1"
|
|
224
|
-
|
|
225
|
-
local verdict score feedback
|
|
226
|
-
verdict=$(echo "$output" | grep -E '^VERDICT:' | sed 's/VERDICT:[[:space:]]*//' | head -1)
|
|
227
|
-
score=$(echo "$output" | grep -E '^SCORE:' | sed 's/SCORE:[[:space:]]*//' | head -1)
|
|
228
|
-
feedback=$(echo "$output" | grep -E '^FEEDBACK:' | sed 's/FEEDBACK:[[:space:]]*//' | head -1)
|
|
229
|
-
|
|
230
|
-
echo "${verdict:-UNKNOWN}|${score:-0.00}|${feedback:-no feedback}"
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
# ══════════════════════════════════════════
|
|
234
|
-
# Main Worker Loop
|
|
235
|
-
# ══════════════════════════════════════════
|
|
236
|
-
log "${CYAN}Team ${TEAM_ID} started${RESET} (gen=${GEN_MODEL}, eval=${EVAL_MODEL}, max=${MAX_ATTEMPTS})"
|
|
237
|
-
log_progress "start" "Team ${TEAM_ID} worker started"
|
|
238
|
-
|
|
239
|
-
while true; do
|
|
240
|
-
# ── Dequeue next feature ──
|
|
241
|
-
feature_id=$(bash "$QUEUE_MGR" dequeue "$TEAM_ID" "$PROJECT_ROOT" 2>/dev/null)
|
|
242
|
-
|
|
243
|
-
if [ -z "$feature_id" ] || [[ "$feature_id" == "["* ]]; then
|
|
244
|
-
log "${DIM}No features ready. Waiting 10s...${RESET}"
|
|
245
|
-
sleep 10
|
|
246
|
-
|
|
247
|
-
# Check if completely done
|
|
248
|
-
remaining=$(jq '(.queue.ready | length) + (.queue.blocked | length) + (.queue.in_progress | length)' "$QUEUE" 2>/dev/null || echo "1")
|
|
249
|
-
if [ "${remaining}" -eq 0 ] 2>/dev/null; then
|
|
250
|
-
log "${GREEN}${BOLD}ALL FEATURES COMPLETE. Team ${TEAM_ID} exiting.${RESET}"
|
|
251
|
-
log_progress "complete" "All features done"
|
|
252
|
-
exit 0
|
|
253
|
-
fi
|
|
254
|
-
continue
|
|
255
|
-
fi
|
|
256
|
-
|
|
257
|
-
log "${CYAN}▶ Dequeued ${feature_id}${RESET}"
|
|
258
|
-
log_progress "dequeue" "${feature_id}"
|
|
259
|
-
|
|
260
|
-
# ── Create feature branch (with lock) ──
|
|
261
|
-
branch="feature/${feature_id}"
|
|
262
|
-
acquire_git_lock
|
|
263
|
-
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git checkout -b "$branch" 2>/dev/null) || \
|
|
264
|
-
(cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
|
|
265
|
-
release_git_lock
|
|
266
|
-
log "Branch: ${branch}"
|
|
267
|
-
|
|
268
|
-
# ── Gen→Eval Loop ──
|
|
269
|
-
attempt=1
|
|
270
|
-
eval_feedback=""
|
|
271
|
-
passed=false
|
|
272
|
-
|
|
273
|
-
while [ "$attempt" -le "$MAX_ATTEMPTS" ]; do
|
|
274
|
-
log "${BOLD}── Attempt ${attempt}/${MAX_ATTEMPTS} ──${RESET}"
|
|
275
|
-
|
|
276
|
-
# ── Generate ──
|
|
277
|
-
log "Gen ${feature_id} (${GEN_MODEL})..."
|
|
278
|
-
bash "$QUEUE_MGR" update_phase "$feature_id" "gen" "$attempt" "$PROJECT_ROOT" 2>/dev/null
|
|
279
|
-
|
|
280
|
-
gen_prompt=$(build_gen_prompt "$feature_id" "$attempt" "$eval_feedback")
|
|
281
|
-
|
|
282
|
-
gen_start=$(date +%s)
|
|
283
|
-
log "${DIM} claude -p --dangerously-skip-permissions --model ${GEN_MODEL}${RESET}"
|
|
284
|
-
gen_output=$(cd "$PROJECT_ROOT" && claude -p "$gen_prompt" \
|
|
285
|
-
--dangerously-skip-permissions \
|
|
286
|
-
--model "$GEN_MODEL" \
|
|
287
|
-
--output-format text 2>&1 | tee /dev/stderr) 2>&1 || true
|
|
288
|
-
gen_elapsed=$(( $(date +%s) - gen_start ))
|
|
289
|
-
|
|
290
|
-
files_changed=$(cd "$PROJECT_ROOT" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')
|
|
291
|
-
log "Gen done (${gen_elapsed}s) — ${files_changed} files"
|
|
292
|
-
log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files, ${gen_elapsed}s"
|
|
293
|
-
|
|
294
|
-
# Auto-commit
|
|
295
|
-
(cd "$PROJECT_ROOT" && git add -A && git commit -m "feat(${feature_id}): gen attempt ${attempt}" --no-verify 2>/dev/null) || true
|
|
296
|
-
|
|
297
|
-
# ── Pre-eval gate ──
|
|
298
|
-
log "Pre-eval gate..."
|
|
299
|
-
bash "$QUEUE_MGR" update_phase "$feature_id" "gate" "$attempt" "$PROJECT_ROOT" 2>/dev/null
|
|
300
|
-
|
|
301
|
-
if ! run_pre_eval_gate "$feature_id"; then
|
|
302
|
-
log "${RED}Gate FAIL — retrying gen${RESET}"
|
|
303
|
-
eval_feedback="Pre-eval gate failed: type check or lint errors. Fix compilation and lint issues."
|
|
304
|
-
attempt=$((attempt + 1))
|
|
305
|
-
continue
|
|
306
|
-
fi
|
|
307
|
-
|
|
308
|
-
# ── Evaluate ──
|
|
309
|
-
log "Eval ${feature_id} (${EVAL_MODEL})..."
|
|
310
|
-
bash "$QUEUE_MGR" update_phase "$feature_id" "eval" "$attempt" "$PROJECT_ROOT" 2>/dev/null
|
|
311
|
-
|
|
312
|
-
eval_prompt=$(build_eval_prompt "$feature_id")
|
|
313
|
-
|
|
314
|
-
eval_start=$(date +%s)
|
|
315
|
-
log "${DIM} claude -p --dangerously-skip-permissions --model ${EVAL_MODEL}${RESET}"
|
|
316
|
-
eval_output=$(cd "$PROJECT_ROOT" && claude -p "$eval_prompt" \
|
|
317
|
-
--dangerously-skip-permissions \
|
|
318
|
-
--model "$EVAL_MODEL" \
|
|
319
|
-
--output-format text 2>&1 | tee /dev/stderr) 2>&1 || true
|
|
320
|
-
eval_elapsed=$(( $(date +%s) - eval_start ))
|
|
321
|
-
|
|
322
|
-
# Parse result
|
|
323
|
-
result_line=$(parse_eval_result "$eval_output")
|
|
324
|
-
verdict=$(echo "$result_line" | cut -d'|' -f1)
|
|
325
|
-
score=$(echo "$result_line" | cut -d'|' -f2)
|
|
326
|
-
feedback=$(echo "$result_line" | cut -d'|' -f3-)
|
|
327
|
-
|
|
328
|
-
log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score}) ${eval_elapsed}s"
|
|
329
|
-
|
|
330
|
-
if [ "$verdict" = "PASS" ]; then
|
|
331
|
-
log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
|
|
332
|
-
passed=true
|
|
333
|
-
break
|
|
334
|
-
else
|
|
335
|
-
log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
|
|
336
|
-
log "${DIM} ${feedback}${RESET}"
|
|
337
|
-
eval_feedback="$feedback"
|
|
338
|
-
attempt=$((attempt + 1))
|
|
339
|
-
fi
|
|
340
|
-
done
|
|
341
|
-
|
|
342
|
-
# ══════════════════════════════════════════
|
|
343
|
-
# Phase 3: Branch merge with conflict handling
|
|
344
|
-
# ══════════════════════════════════════════
|
|
345
|
-
if [ "$passed" = true ]; then
|
|
346
|
-
log "Merging ${branch} → main..."
|
|
347
|
-
acquire_git_lock
|
|
348
|
-
|
|
349
|
-
merge_ok=false
|
|
350
|
-
|
|
351
|
-
# Attempt 1: straight merge
|
|
352
|
-
if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null); then
|
|
353
|
-
merge_ok=true
|
|
354
|
-
else
|
|
355
|
-
# Attempt 2: abort failed merge, rebase, re-eval gate, then merge
|
|
356
|
-
log "${YELLOW}Conflict detected — rebasing ${branch} onto main...${RESET}"
|
|
357
|
-
(cd "$PROJECT_ROOT" && git merge --abort 2>/dev/null) || true
|
|
358
|
-
(cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
|
|
359
|
-
|
|
360
|
-
if (cd "$PROJECT_ROOT" && git rebase main 2>/dev/null); then
|
|
361
|
-
log "Rebase OK. Re-running gate..."
|
|
362
|
-
|
|
363
|
-
if run_pre_eval_gate "$feature_id"; then
|
|
364
|
-
log "Gate still PASS after rebase."
|
|
365
|
-
if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null); then
|
|
366
|
-
merge_ok=true
|
|
367
|
-
fi
|
|
368
|
-
else
|
|
369
|
-
log "${RED}Gate FAIL after rebase — needs re-gen${RESET}"
|
|
370
|
-
fi
|
|
371
|
-
else
|
|
372
|
-
log "${RED}Rebase failed — conflicts too complex${RESET}"
|
|
373
|
-
(cd "$PROJECT_ROOT" && git rebase --abort 2>/dev/null) || true
|
|
374
|
-
fi
|
|
375
|
-
fi
|
|
376
|
-
|
|
377
|
-
release_git_lock
|
|
378
|
-
|
|
379
|
-
if [ "$merge_ok" = true ]; then
|
|
380
|
-
# Clean up feature branch
|
|
381
|
-
(cd "$PROJECT_ROOT" && git branch -d "$branch" 2>/dev/null) || true
|
|
382
|
-
|
|
383
|
-
bash "$QUEUE_MGR" pass "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
384
|
-
log_progress "pass" "${feature_id} merged to main"
|
|
385
|
-
|
|
386
|
-
# Update feature-list.json passes
|
|
387
|
-
if [ -f "$FEATURES" ]; then
|
|
388
|
-
jq --arg fid "$feature_id" '
|
|
389
|
-
.features |= map(
|
|
390
|
-
if .id == $fid then
|
|
391
|
-
.passes = ((.passes // []) + ["generator-frontend", "evaluator-functional"] | unique)
|
|
392
|
-
else . end
|
|
393
|
-
)
|
|
394
|
-
' "$FEATURES" > "${FEATURES}.tmp" && mv "${FEATURES}.tmp" "$FEATURES"
|
|
395
|
-
fi
|
|
396
|
-
|
|
397
|
-
log "${GREEN}${BOLD}✓ ${feature_id} DONE${RESET}"
|
|
398
|
-
else
|
|
399
|
-
log "${RED}${BOLD}Merge failed — ${feature_id} marked as failed${RESET}"
|
|
400
|
-
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
|
|
401
|
-
bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
402
|
-
log_progress "merge-fail" "${feature_id}"
|
|
403
|
-
fi
|
|
404
|
-
|
|
405
|
-
else
|
|
406
|
-
log "${RED}${BOLD}✗ ${feature_id} FAILED after ${MAX_ATTEMPTS} attempts${RESET}"
|
|
407
|
-
acquire_git_lock
|
|
408
|
-
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
|
|
409
|
-
release_git_lock
|
|
410
|
-
bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
411
|
-
log_progress "fail" "${feature_id} after ${MAX_ATTEMPTS} attempts"
|
|
412
|
-
fi
|
|
413
|
-
|
|
414
|
-
sleep 2
|
|
415
|
-
done
|