@walwal-harness/cli 4.0.0-alpha.1 → 4.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -209,6 +209,17 @@
|
|
|
209
209
|
"frontend_cwd": "",
|
|
210
210
|
"timeout_seconds": 120,
|
|
211
211
|
"on_fail": "reroute_to_generator"
|
|
212
|
+
},
|
|
213
|
+
"parallel": {
|
|
214
|
+
"comment": "v4 Parallel Agent Teams 설정. npx walwal-harness v4 로 실행.",
|
|
215
|
+
"enabled": false,
|
|
216
|
+
"concurrency": 3,
|
|
217
|
+
"max_attempts_per_feature": 3,
|
|
218
|
+
"gen_model": "sonnet",
|
|
219
|
+
"eval_model": "opus",
|
|
220
|
+
"branch_strategy": "feature-branch",
|
|
221
|
+
"merge_on_pass": true,
|
|
222
|
+
"rebase_on_conflict": true
|
|
212
223
|
}
|
|
213
224
|
},
|
|
214
225
|
"evaluation": {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@walwal-harness/cli",
|
|
3
|
-
"version": "4.0.0-alpha.
|
|
3
|
+
"version": "4.0.0-alpha.2",
|
|
4
4
|
"description": "Production harness for AI agent engineering — Planner, Generator(BE/FE), Evaluator(Func/Visual), optional Brainstormer (requirements refinement). Supports React and Flutter FE stacks.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"walwal-harness": "bin/init.js"
|
|
@@ -40,11 +40,13 @@ CONFIG="$PROJECT_ROOT/.harness/config.json"
|
|
|
40
40
|
PROGRESS_LOG="$PROJECT_ROOT/.harness/progress.log"
|
|
41
41
|
QUEUE_MGR="$SCRIPT_DIR/harness-queue-manager.sh"
|
|
42
42
|
|
|
43
|
+
# ── Lock file for git operations (prevent race conditions between teams) ──
|
|
44
|
+
GIT_LOCK="$PROJECT_ROOT/.harness/.git-lock"
|
|
45
|
+
|
|
43
46
|
MAX_ATTEMPTS="${MAX_ATTEMPTS:-3}"
|
|
44
47
|
GEN_MODEL="${GEN_MODEL:-sonnet}"
|
|
45
48
|
EVAL_MODEL="${EVAL_MODEL:-opus}"
|
|
46
49
|
|
|
47
|
-
# Read models from config if available
|
|
48
50
|
if [ -f "$CONFIG" ]; then
|
|
49
51
|
_gm=$(jq -r '.agents["generator-frontend"].model // empty' "$CONFIG" 2>/dev/null)
|
|
50
52
|
_em=$(jq -r '.agents["evaluator-functional"].model // empty' "$CONFIG" 2>/dev/null)
|
|
@@ -68,20 +70,33 @@ log() {
|
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
log_progress() {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
echo "$(date +"%Y-%m-%d") | team-${TEAM_ID} | ${1} | ${2}" >> "$PROGRESS_LOG"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# ── Git lock — serialize git checkout/merge across teams ──
|
|
77
|
+
acquire_git_lock() {
|
|
78
|
+
local max_wait=60 waited=0
|
|
79
|
+
while [ -f "$GIT_LOCK" ]; do
|
|
80
|
+
sleep 1
|
|
81
|
+
waited=$((waited + 1))
|
|
82
|
+
if [ "$waited" -ge "$max_wait" ]; then
|
|
83
|
+
log "${RED}Git lock timeout (${max_wait}s). Removing stale lock.${RESET}"
|
|
84
|
+
rm -f "$GIT_LOCK"
|
|
85
|
+
break
|
|
86
|
+
fi
|
|
87
|
+
done
|
|
88
|
+
echo "T${TEAM_ID}" > "$GIT_LOCK"
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
release_git_lock() {
|
|
92
|
+
rm -f "$GIT_LOCK"
|
|
75
93
|
}
|
|
76
94
|
|
|
77
95
|
# ── Pre-eval gate ──
|
|
78
96
|
run_pre_eval_gate() {
|
|
79
|
-
local feature_id="$1"
|
|
80
97
|
local cwd="$PROJECT_ROOT"
|
|
81
98
|
|
|
82
|
-
# Read frontend_cwd from config
|
|
83
99
|
if [ -f "$CONFIG" ]; then
|
|
84
|
-
local _cwd
|
|
85
100
|
_cwd=$(jq -r '.flow.pre_eval_gate.frontend_cwd // empty' "$CONFIG" 2>/dev/null)
|
|
86
101
|
if [ -n "$_cwd" ] && [ "$_cwd" != "null" ]; then
|
|
87
102
|
cwd="$PROJECT_ROOT/$_cwd"
|
|
@@ -97,85 +112,86 @@ run_pre_eval_gate() {
|
|
|
97
112
|
checks=("npx tsc --noEmit" "npx eslint src/")
|
|
98
113
|
fi
|
|
99
114
|
|
|
100
|
-
local all_pass=true
|
|
115
|
+
local all_pass=true fail_cmds=""
|
|
101
116
|
for cmd in "${checks[@]}"; do
|
|
102
|
-
if
|
|
103
|
-
log "${
|
|
117
|
+
if (cd "$cwd" && timeout 120s bash -c "$cmd" >/dev/null 2>&1); then
|
|
118
|
+
log " ${GREEN}✓${RESET} $cmd"
|
|
119
|
+
else
|
|
120
|
+
log " ${RED}✗${RESET} $cmd"
|
|
104
121
|
all_pass=false
|
|
122
|
+
fail_cmds+="$cmd; "
|
|
105
123
|
fi
|
|
106
124
|
done
|
|
107
125
|
|
|
108
|
-
|
|
109
|
-
log "${GREEN}Gate: tsc ✓ eslint ✓${RESET}"
|
|
110
|
-
return 0
|
|
111
|
-
else
|
|
112
|
-
return 1
|
|
113
|
-
fi
|
|
126
|
+
[ "$all_pass" = true ]
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
# ── Build generator prompt ──
|
|
117
130
|
build_gen_prompt() {
|
|
118
|
-
local
|
|
119
|
-
local attempt="$2"
|
|
120
|
-
local eval_feedback="${3:-}"
|
|
131
|
+
local fid="$1" attempt="$2" feedback="${3:-}"
|
|
121
132
|
|
|
122
|
-
local
|
|
123
|
-
|
|
133
|
+
local fobj
|
|
134
|
+
fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
|
|
135
|
+
local fname fdesc ac_json deps_json
|
|
136
|
+
fname=$(echo "$fobj" | jq -r '.name // .description // ""')
|
|
137
|
+
fdesc=$(echo "$fobj" | jq -r '.description // ""')
|
|
138
|
+
ac_json=$(echo "$fobj" | jq -c '.ac // []')
|
|
139
|
+
deps_json=$(echo "$fobj" | jq -c '.depends_on // []')
|
|
124
140
|
|
|
125
|
-
local
|
|
126
|
-
|
|
127
|
-
feature_desc=$(echo "$feature_json" | jq -r '.description // ""')
|
|
128
|
-
ac_json=$(echo "$feature_json" | jq -c '.ac // []')
|
|
141
|
+
local project_name
|
|
142
|
+
project_name=$(jq -r '.project_name // ""' "$PROJECT_ROOT/.harness/progress.json" 2>/dev/null)
|
|
129
143
|
|
|
130
|
-
|
|
144
|
+
cat <<PROMPT
|
|
145
|
+
You are Generator-Frontend for a harness engineering project.
|
|
131
146
|
|
|
132
|
-
PROJECT: $
|
|
147
|
+
PROJECT: ${project_name}
|
|
133
148
|
CONVENTIONS: Read CONVENTIONS.md if it exists.
|
|
134
149
|
|
|
135
|
-
YOUR TASK: Implement ONLY feature ${
|
|
136
|
-
Description: ${
|
|
150
|
+
YOUR TASK: Implement ONLY feature ${fid}: ${fname}
|
|
151
|
+
Description: ${fdesc}
|
|
152
|
+
Dependencies (already implemented): ${deps_json}
|
|
137
153
|
Acceptance Criteria: ${ac_json}
|
|
138
154
|
|
|
139
155
|
Read these files for context:
|
|
140
|
-
- .harness/actions/feature-list.json (filter to ${
|
|
156
|
+
- .harness/actions/feature-list.json (filter to ${fid})
|
|
141
157
|
- .harness/actions/api-contract.json (relevant endpoints)
|
|
142
158
|
- .harness/actions/plan.md (overall design)
|
|
143
159
|
|
|
144
160
|
RULES:
|
|
145
|
-
- Implement ONLY this feature
|
|
161
|
+
- Implement ONLY this single feature
|
|
162
|
+
- Do NOT modify code belonging to other features
|
|
146
163
|
- Follow existing code patterns and CONVENTIONS.md
|
|
147
|
-
-
|
|
148
|
-
|
|
164
|
+
- When done, stage and commit with: git add -A && git commit -m 'feat(${fid}): ${fname}'
|
|
165
|
+
PROMPT
|
|
149
166
|
|
|
150
|
-
if [ "$attempt" -gt 1 ] && [ -n "$
|
|
151
|
-
|
|
167
|
+
if [ "$attempt" -gt 1 ] && [ -n "$feedback" ]; then
|
|
168
|
+
cat <<RETRY
|
|
152
169
|
|
|
153
170
|
PREVIOUS EVAL FEEDBACK (attempt ${attempt}):
|
|
154
|
-
${
|
|
171
|
+
${feedback}
|
|
155
172
|
|
|
156
|
-
Fix the issues
|
|
173
|
+
Fix the issues above. Focus specifically on the failed criteria.
|
|
174
|
+
RETRY
|
|
157
175
|
fi
|
|
158
|
-
|
|
159
|
-
echo "$prompt"
|
|
160
176
|
}
|
|
161
177
|
|
|
162
178
|
# ── Build evaluator prompt ──
|
|
163
179
|
build_eval_prompt() {
|
|
164
|
-
local
|
|
165
|
-
|
|
166
|
-
local feature_json
|
|
167
|
-
feature_json=$(jq --arg fid "$feature_id" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
|
|
180
|
+
local fid="$1"
|
|
168
181
|
|
|
169
|
-
local
|
|
170
|
-
|
|
171
|
-
|
|
182
|
+
local fobj
|
|
183
|
+
fobj=$(jq --arg fid "$fid" '.features[] | select(.id == $fid)' "$FEATURES" 2>/dev/null)
|
|
184
|
+
local fname ac_json
|
|
185
|
+
fname=$(echo "$fobj" | jq -r '.name // .description // ""')
|
|
186
|
+
ac_json=$(echo "$fobj" | jq -c '.ac // []')
|
|
172
187
|
|
|
173
188
|
local passed_list
|
|
174
189
|
passed_list=$(jq -r '.queue.passed // [] | join(", ")' "$QUEUE" 2>/dev/null)
|
|
175
190
|
|
|
176
|
-
|
|
191
|
+
cat <<PROMPT
|
|
192
|
+
You are Evaluator-Functional for a harness engineering project.
|
|
177
193
|
|
|
178
|
-
TASK: Evaluate feature ${
|
|
194
|
+
TASK: Evaluate feature ${fid}: ${fname}
|
|
179
195
|
|
|
180
196
|
Acceptance Criteria to verify:
|
|
181
197
|
${ac_json}
|
|
@@ -192,23 +208,24 @@ R5: Error handling & edge cases (15%)
|
|
|
192
208
|
PASS threshold: 2.80 / 3.00
|
|
193
209
|
FAIL: any AC not met, any regression failure
|
|
194
210
|
|
|
195
|
-
|
|
211
|
+
You MUST output this exact block (parseable by automation):
|
|
196
212
|
---EVAL-RESULT---
|
|
197
|
-
FEATURE: ${
|
|
213
|
+
FEATURE: ${fid}
|
|
198
214
|
VERDICT: PASS or FAIL
|
|
199
215
|
SCORE: X.XX
|
|
200
|
-
FEEDBACK:
|
|
201
|
-
---END-EVAL-RESULT---
|
|
216
|
+
FEEDBACK: one paragraph summary
|
|
217
|
+
---END-EVAL-RESULT---
|
|
218
|
+
PROMPT
|
|
202
219
|
}
|
|
203
220
|
|
|
204
|
-
# ── Parse eval result ──
|
|
221
|
+
# ── Parse eval result (macOS-compatible, no grep -P) ──
|
|
205
222
|
parse_eval_result() {
|
|
206
223
|
local output="$1"
|
|
207
224
|
|
|
208
225
|
local verdict score feedback
|
|
209
|
-
verdict=$(echo "$output" | grep -
|
|
210
|
-
score=$(echo "$output" | grep -
|
|
211
|
-
feedback=$(echo "$output" |
|
|
226
|
+
verdict=$(echo "$output" | grep -E '^VERDICT:' | sed 's/VERDICT:[[:space:]]*//' | head -1)
|
|
227
|
+
score=$(echo "$output" | grep -E '^SCORE:' | sed 's/SCORE:[[:space:]]*//' | head -1)
|
|
228
|
+
feedback=$(echo "$output" | grep -E '^FEEDBACK:' | sed 's/FEEDBACK:[[:space:]]*//' | head -1)
|
|
212
229
|
|
|
213
230
|
echo "${verdict:-UNKNOWN}|${score:-0.00}|${feedback:-no feedback}"
|
|
214
231
|
}
|
|
@@ -223,14 +240,13 @@ while true; do
|
|
|
223
240
|
# ── Dequeue next feature ──
|
|
224
241
|
feature_id=$(bash "$QUEUE_MGR" dequeue "$TEAM_ID" "$PROJECT_ROOT" 2>/dev/null)
|
|
225
242
|
|
|
226
|
-
if [ -z "$feature_id" ] || [[ "$feature_id" == "[
|
|
227
|
-
log "${DIM}No features
|
|
243
|
+
if [ -z "$feature_id" ] || [[ "$feature_id" == "["* ]]; then
|
|
244
|
+
log "${DIM}No features ready. Waiting 10s...${RESET}"
|
|
228
245
|
sleep 10
|
|
229
246
|
|
|
230
247
|
# Check if completely done
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
if [ "${remaining:-1}" -eq 0 ]; then
|
|
248
|
+
remaining=$(jq '(.queue.ready | length) + (.queue.blocked | length) + (.queue.in_progress | length)' "$QUEUE" 2>/dev/null || echo "1")
|
|
249
|
+
if [ "${remaining}" -eq 0 ] 2>/dev/null; then
|
|
234
250
|
log "${GREEN}${BOLD}ALL FEATURES COMPLETE. Team ${TEAM_ID} exiting.${RESET}"
|
|
235
251
|
log_progress "complete" "All features done"
|
|
236
252
|
exit 0
|
|
@@ -241,16 +257,18 @@ while true; do
|
|
|
241
257
|
log "${CYAN}▶ Dequeued ${feature_id}${RESET}"
|
|
242
258
|
log_progress "dequeue" "${feature_id}"
|
|
243
259
|
|
|
244
|
-
# ── Create feature branch ──
|
|
245
|
-
|
|
246
|
-
|
|
260
|
+
# ── Create feature branch (with lock) ──
|
|
261
|
+
branch="feature/${feature_id}"
|
|
262
|
+
acquire_git_lock
|
|
263
|
+
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git checkout -b "$branch" 2>/dev/null) || \
|
|
247
264
|
(cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
|
|
265
|
+
release_git_lock
|
|
248
266
|
log "Branch: ${branch}"
|
|
249
267
|
|
|
250
268
|
# ── Gen→Eval Loop ──
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
269
|
+
attempt=1
|
|
270
|
+
eval_feedback=""
|
|
271
|
+
passed=false
|
|
254
272
|
|
|
255
273
|
while [ "$attempt" -le "$MAX_ATTEMPTS" ]; do
|
|
256
274
|
log "${BOLD}── Attempt ${attempt}/${MAX_ATTEMPTS} ──${RESET}"
|
|
@@ -259,18 +277,17 @@ while true; do
|
|
|
259
277
|
log "Gen ${feature_id} (${GEN_MODEL})..."
|
|
260
278
|
bash "$QUEUE_MGR" update_phase "$feature_id" "gen" "$attempt" "$PROJECT_ROOT" 2>/dev/null
|
|
261
279
|
|
|
262
|
-
local gen_prompt
|
|
263
280
|
gen_prompt=$(build_gen_prompt "$feature_id" "$attempt" "$eval_feedback")
|
|
264
281
|
|
|
265
|
-
|
|
282
|
+
gen_start=$(date +%s)
|
|
266
283
|
gen_output=$(cd "$PROJECT_ROOT" && claude -p "$gen_prompt" --model "$GEN_MODEL" --output-format text 2>&1) || true
|
|
284
|
+
gen_elapsed=$(( $(date +%s) - gen_start ))
|
|
267
285
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files"
|
|
286
|
+
files_changed=$(cd "$PROJECT_ROOT" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')
|
|
287
|
+
log "Gen done (${gen_elapsed}s) — ${files_changed} files"
|
|
288
|
+
log_progress "gen" "${feature_id} attempt ${attempt}: ${files_changed} files, ${gen_elapsed}s"
|
|
272
289
|
|
|
273
|
-
# Auto-commit
|
|
290
|
+
# Auto-commit
|
|
274
291
|
(cd "$PROJECT_ROOT" && git add -A && git commit -m "feat(${feature_id}): gen attempt ${attempt}" --no-verify 2>/dev/null) || true
|
|
275
292
|
|
|
276
293
|
# ── Pre-eval gate ──
|
|
@@ -288,72 +305,103 @@ while true; do
|
|
|
288
305
|
log "Eval ${feature_id} (${EVAL_MODEL})..."
|
|
289
306
|
bash "$QUEUE_MGR" update_phase "$feature_id" "eval" "$attempt" "$PROJECT_ROOT" 2>/dev/null
|
|
290
307
|
|
|
291
|
-
local eval_prompt
|
|
292
308
|
eval_prompt=$(build_eval_prompt "$feature_id")
|
|
293
309
|
|
|
294
|
-
|
|
310
|
+
eval_start=$(date +%s)
|
|
295
311
|
eval_output=$(cd "$PROJECT_ROOT" && claude -p "$eval_prompt" --model "$EVAL_MODEL" --output-format text 2>&1) || true
|
|
312
|
+
eval_elapsed=$(( $(date +%s) - eval_start ))
|
|
296
313
|
|
|
297
314
|
# Parse result
|
|
298
|
-
local result_line verdict score feedback
|
|
299
315
|
result_line=$(parse_eval_result "$eval_output")
|
|
300
316
|
verdict=$(echo "$result_line" | cut -d'|' -f1)
|
|
301
317
|
score=$(echo "$result_line" | cut -d'|' -f2)
|
|
302
318
|
feedback=$(echo "$result_line" | cut -d'|' -f3-)
|
|
303
319
|
|
|
304
|
-
log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score})"
|
|
320
|
+
log_progress "eval" "${feature_id} attempt ${attempt}: ${verdict} (${score}) ${eval_elapsed}s"
|
|
305
321
|
|
|
306
322
|
if [ "$verdict" = "PASS" ]; then
|
|
307
|
-
log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00"
|
|
323
|
+
log "${GREEN}${BOLD}✓ PASS${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
|
|
308
324
|
passed=true
|
|
309
325
|
break
|
|
310
326
|
else
|
|
311
|
-
log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00"
|
|
327
|
+
log "${RED}✗ FAIL${RESET} ${feature_id} — ${score}/3.00 (${eval_elapsed}s)"
|
|
312
328
|
log "${DIM} ${feedback}${RESET}"
|
|
313
329
|
eval_feedback="$feedback"
|
|
314
330
|
attempt=$((attempt + 1))
|
|
315
331
|
fi
|
|
316
332
|
done
|
|
317
333
|
|
|
318
|
-
#
|
|
334
|
+
# ══════════════════════════════════════════
|
|
335
|
+
# Phase 3: Branch merge with conflict handling
|
|
336
|
+
# ══════════════════════════════════════════
|
|
319
337
|
if [ "$passed" = true ]; then
|
|
320
|
-
# Merge to main
|
|
321
338
|
log "Merging ${branch} → main..."
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
339
|
+
acquire_git_lock
|
|
340
|
+
|
|
341
|
+
merge_ok=false
|
|
342
|
+
|
|
343
|
+
# Attempt 1: straight merge
|
|
344
|
+
if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS" 2>/dev/null); then
|
|
345
|
+
merge_ok=true
|
|
346
|
+
else
|
|
347
|
+
# Attempt 2: abort failed merge, rebase, re-eval gate, then merge
|
|
348
|
+
log "${YELLOW}Conflict detected — rebasing ${branch} onto main...${RESET}"
|
|
349
|
+
(cd "$PROJECT_ROOT" && git merge --abort 2>/dev/null) || true
|
|
350
|
+
(cd "$PROJECT_ROOT" && git checkout "$branch" 2>/dev/null) || true
|
|
351
|
+
|
|
352
|
+
if (cd "$PROJECT_ROOT" && git rebase main 2>/dev/null); then
|
|
353
|
+
log "Rebase OK. Re-running gate..."
|
|
354
|
+
|
|
355
|
+
if run_pre_eval_gate "$feature_id"; then
|
|
356
|
+
log "Gate still PASS after rebase."
|
|
357
|
+
if (cd "$PROJECT_ROOT" && git checkout main 2>/dev/null && git merge --no-ff "$branch" -m "merge: ${feature_id} PASS (rebased)" 2>/dev/null); then
|
|
358
|
+
merge_ok=true
|
|
359
|
+
fi
|
|
360
|
+
else
|
|
361
|
+
log "${RED}Gate FAIL after rebase — needs re-gen${RESET}"
|
|
362
|
+
fi
|
|
363
|
+
else
|
|
364
|
+
log "${RED}Rebase failed — conflicts too complex${RESET}"
|
|
365
|
+
(cd "$PROJECT_ROOT" && git rebase --abort 2>/dev/null) || true
|
|
366
|
+
fi
|
|
367
|
+
fi
|
|
368
|
+
|
|
369
|
+
release_git_lock
|
|
370
|
+
|
|
371
|
+
if [ "$merge_ok" = true ]; then
|
|
372
|
+
# Clean up feature branch
|
|
373
|
+
(cd "$PROJECT_ROOT" && git branch -d "$branch" 2>/dev/null) || true
|
|
374
|
+
|
|
375
|
+
bash "$QUEUE_MGR" pass "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
376
|
+
log_progress "pass" "${feature_id} merged to main"
|
|
377
|
+
|
|
378
|
+
# Update feature-list.json passes
|
|
379
|
+
if [ -f "$FEATURES" ]; then
|
|
380
|
+
jq --arg fid "$feature_id" '
|
|
381
|
+
.features |= map(
|
|
382
|
+
if .id == $fid then
|
|
383
|
+
.passes = ((.passes // []) + ["generator-frontend", "evaluator-functional"] | unique)
|
|
384
|
+
else . end
|
|
385
|
+
)
|
|
386
|
+
' "$FEATURES" > "${FEATURES}.tmp" && mv "${FEATURES}.tmp" "$FEATURES"
|
|
387
|
+
fi
|
|
388
|
+
|
|
389
|
+
log "${GREEN}${BOLD}✓ ${feature_id} DONE${RESET}"
|
|
390
|
+
else
|
|
391
|
+
log "${RED}${BOLD}Merge failed — ${feature_id} marked as failed${RESET}"
|
|
392
|
+
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
|
|
393
|
+
bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
394
|
+
log_progress "merge-fail" "${feature_id}"
|
|
346
395
|
fi
|
|
347
396
|
|
|
348
397
|
else
|
|
349
398
|
log "${RED}${BOLD}✗ ${feature_id} FAILED after ${MAX_ATTEMPTS} attempts${RESET}"
|
|
399
|
+
acquire_git_lock
|
|
400
|
+
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
|
|
401
|
+
release_git_lock
|
|
350
402
|
bash "$QUEUE_MGR" fail "$feature_id" "$PROJECT_ROOT" 2>/dev/null
|
|
351
403
|
log_progress "fail" "${feature_id} after ${MAX_ATTEMPTS} attempts"
|
|
352
|
-
|
|
353
|
-
# Return to main
|
|
354
|
-
(cd "$PROJECT_ROOT" && git checkout main 2>/dev/null) || true
|
|
355
404
|
fi
|
|
356
405
|
|
|
357
|
-
# Brief pause before next feature
|
|
358
406
|
sleep 2
|
|
359
407
|
done
|
|
@@ -60,6 +60,32 @@ disable-model-invocation: true
|
|
|
60
60
|
5. `actions/api-contract.json` — 기대 API 동작
|
|
61
61
|
6. `.harness/progress.json`
|
|
62
62
|
|
|
63
|
+
## v4 Feature-Level Mode (Parallel Agent Teams)
|
|
64
|
+
|
|
65
|
+
v4에서 Team Worker가 `claude -p`로 호출할 때, 프롬프트에 `FEATURE_ID`가 지정된다.
|
|
66
|
+
|
|
67
|
+
### Feature-Level Rules
|
|
68
|
+
- `feature-list.json`에서 **지정된 FEATURE_ID의 AC만** 검증
|
|
69
|
+
- Regression: `feature-queue.json`의 `passed` 목록에 있는 Feature들의 AC 재검증
|
|
70
|
+
- Cross-Validation: Feature 단위에서는 skip (Sprint-End에서 수행)
|
|
71
|
+
- Visual Evaluation: Feature 단위에서는 skip (Sprint-End에서 수행)
|
|
72
|
+
- 출력 형식: `---EVAL-RESULT---` 블록 (Worker가 파싱 가능)
|
|
73
|
+
|
|
74
|
+
### Feature-Level Scoring
|
|
75
|
+
- 동일한 R1-R5 루브릭 적용
|
|
76
|
+
- PASS 기준: 2.80/3.00 (변경 없음)
|
|
77
|
+
- 1건이라도 Regression 실패 시 FAIL (변경 없음)
|
|
78
|
+
|
|
79
|
+
### Output Format (Machine-Parseable)
|
|
80
|
+
```
|
|
81
|
+
---EVAL-RESULT---
|
|
82
|
+
FEATURE: F-XXX
|
|
83
|
+
VERDICT: PASS or FAIL
|
|
84
|
+
SCORE: X.XX
|
|
85
|
+
FEEDBACK: one paragraph summary
|
|
86
|
+
---END-EVAL-RESULT---
|
|
87
|
+
```
|
|
88
|
+
|
|
63
89
|
## Evaluation Steps
|
|
64
90
|
|
|
65
91
|
### Step 0: IA Structure Compliance (GATE)
|
|
@@ -43,6 +43,23 @@ disable-model-invocation: true
|
|
|
43
43
|
|
|
44
44
|
**Backend 통합 러너가 동작 중이어야 함.** Gateway 미응답 시 → STOP.
|
|
45
45
|
|
|
46
|
+
## v4 Feature-Level Mode (Parallel Agent Teams)
|
|
47
|
+
|
|
48
|
+
v4에서 Team Worker가 `claude -p`로 호출할 때, 프롬프트에 `FEATURE_ID`가 지정된다.
|
|
49
|
+
|
|
50
|
+
### Feature-Level Rules
|
|
51
|
+
- `feature-list.json`에서 **지정된 FEATURE_ID만** 필터하여 구현
|
|
52
|
+
- 다른 Feature의 코드를 수정하지 않음
|
|
53
|
+
- `depends_on`에 명시된 Feature는 이미 구현/머지 완료된 상태
|
|
54
|
+
- Feature branch (`feature/F-XXX`)에서 작업, 완료 시 commit
|
|
55
|
+
- Sprint Contract는 작성하지 않음 (v4에서는 Feature 단위로 관리)
|
|
56
|
+
|
|
57
|
+
### Feature-Level Prompt Template
|
|
58
|
+
Worker가 전달하는 프롬프트에는 다음이 포함됨:
|
|
59
|
+
- `FEATURE_ID`, `feature_name`, `description`, `ac` (Acceptance Criteria)
|
|
60
|
+
- `depends_on` (이미 완료된 의존 Feature 목록)
|
|
61
|
+
- Eval 재시도 시: 이전 Eval의 피드백
|
|
62
|
+
|
|
46
63
|
## Sprint Workflow
|
|
47
64
|
|
|
48
65
|
1. **Sprint Contract FE 섹션 추가** — 컴포넌트, API 연동, 성공 기준
|