autodevloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
autodevloop/engine.py ADDED
@@ -0,0 +1,750 @@
1
+ """The AutoDevLoop orchestration engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import concurrent.futures
6
+ import filecmp
7
+ import json
8
+ import shutil
9
+ import sys
10
+ import threading
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any, Callable
14
+
15
+ from . import llm, prompts, registry, reporting, testing, vcs
16
+ from .config import deep_get, load_config, provider_invocation, resolved_steps
17
+ from .util import (
18
+ APP_DIR, INTERNAL_DIRS, DOC_SUFFIXES, PROGRESS_FILE, STATE_FILE, STOP_FILE,
19
+ collect_context, copy_tree_contents, diff_file_lists, extract_json,
20
+ list_generated_files, load_json, now_text, read_text, restore_working_dir,
21
+ safe_rmtree, save_json, slugify, ts, write_text,
22
+ )
23
+
24
+ DEV_DEFAULT = [{"name": "AgentDEV", "role": "general", "task": "Implement the next useful version.", "owns": []}]
25
+
26
+
27
+ def _log(message: str) -> None:
28
+ line = f"[{ts()}] {message}"
29
+ try:
30
+ print(line, flush=True)
31
+ except UnicodeEncodeError:
32
+ enc = getattr(sys.stdout, "encoding", None) or "utf-8"
33
+ sys.stdout.write(line.encode(enc, "replace").decode(enc) + "\n")
34
+ sys.stdout.flush()
35
+
36
+
37
+ def _coerce_pct(value: Any, default: int = 0) -> int:
38
+ """Normalise a 0-100 percentage/score.
39
+
40
+ Models are inconsistent: some return a 0-1 fraction (e.g. 0.72), some a
41
+ 0-100 integer (e.g. 72). We treat any value in (0, 1] as a fraction and
42
+ scale it, then clamp to 0-100. This is why a version could show 0% goal
43
+ progress while a later one showed 95%: ``int(0.72)`` is ``0``.
44
+ """
45
+ try:
46
+ n = float(value)
47
+ except (TypeError, ValueError):
48
+ return default
49
+ if 0 < n <= 1:
50
+ n *= 100
51
+ return max(0, min(100, int(round(n))))
52
+
53
+
54
+ def _file_differs(a: Path, b: Path) -> bool:
55
+ if not b.exists():
56
+ return True
57
+ if not a.exists():
58
+ return False
59
+ try:
60
+ return not filecmp.cmp(a, b, shallow=False)
61
+ except OSError:
62
+ return True
63
+
64
+
65
+ class AutoDevLoop:
66
+ def __init__(self, root: Path, config: dict[str, Any], overrides: dict[str, Any] | None = None) -> None:
67
+ self.root = root.resolve()
68
+ self.config = config
69
+ self.overrides = overrides or {}
70
+ self.app_dir = self.root / APP_DIR
71
+ self.state_path = self.app_dir / STATE_FILE
72
+ self.progress_path = self.app_dir / PROGRESS_FILE
73
+ self.stop_path = self.app_dir / STOP_FILE
74
+ self.logs_dir = self.app_dir / "logs"
75
+ self.prompts_dir = self.app_dir / "prompts"
76
+ self.plans_dir = self.app_dir / "plans"
77
+ self.reviews_dir = self.app_dir / "reviews"
78
+ self.tests_dir = self.app_dir / "tests"
79
+ self.work_dir = self.app_dir / "work"
80
+ self.current_dir = self.root / "current"
81
+ self.versions_dir = self.root / "versions"
82
+ self.architecture_path = self.app_dir / "architecture.md"
83
+ self.changelog_path = self.root / "CHANGELOG.md"
84
+ self.features_path = self.root / "FEATURES.md"
85
+ self.report_path = self.app_dir / "final_report.md"
86
+ self.steps = resolved_steps(config)
87
+ self.provider = provider_invocation(config)
88
+ self.cost = {"cost_usd_total": 0.0, "input_tokens": 0, "output_tokens": 0, "calls": 0}
89
+ self._progress: dict[str, Any] = {}
90
+ self._active: dict[str, dict[str, Any]] = {}
91
+ self._lock = threading.Lock()
92
+ self._last_progress_write = 0.0
93
+
94
+ # ----- settings helpers ------------------------------------------------
95
+ @property
96
+ def agent_timeout(self) -> int:
97
+ return int(deep_get(self.config, "agents.timeout", 1800))
98
+
99
+ @property
100
+ def retries(self) -> int:
101
+ return int(deep_get(self.config, "agents.retries", 3))
102
+
103
+ @property
104
+ def backoff(self) -> float:
105
+ return float(deep_get(self.config, "agents.backoff_seconds", 5))
106
+
107
+ @property
108
+ def review_threshold(self) -> int:
109
+ return int(deep_get(self.config, "review.threshold", 80))
110
+
111
+ @property
112
+ def value_threshold(self) -> int:
113
+ return int(deep_get(self.config, "value.threshold", 65))
114
+
115
+ @property
116
+ def fix_retries(self) -> int:
117
+ return int(deep_get(self.config, "fix.retries", 2))
118
+
119
+ @property
120
+ def test_timeout(self) -> int:
121
+ return int(deep_get(self.config, "tests.timeout", 120))
122
+
123
+ @property
124
+ def allow_parallel(self) -> bool:
125
+ return bool(deep_get(self.config, "agents.allow_parallel", True))
126
+
127
+ @property
128
+ def max_parallel(self) -> int:
129
+ return int(deep_get(self.config, "agents.max_parallel", 3))
130
+
131
+ @property
132
+ def use_git(self) -> bool:
133
+ return bool(deep_get(self.config, "vcs.git", True))
134
+
135
+ # ----- lifecycle -------------------------------------------------------
136
+ def ensure_dirs(self) -> None:
137
+ for path in [self.app_dir, self.logs_dir, self.prompts_dir, self.plans_dir,
138
+ self.reviews_dir, self.tests_dir, self.work_dir,
139
+ self.current_dir, self.versions_dir]:
140
+ path.mkdir(parents=True, exist_ok=True)
141
+ prompts.ensure_templates(self.app_dir)
142
+
143
+ def run(self, *, reset: bool, goal: str, project_name: str, max_versions: int) -> None:
144
+ if reset:
145
+ for rel in [APP_DIR, "versions", "current"]:
146
+ safe_rmtree(self.root / rel, self.root)
147
+
148
+ self.ensure_dirs()
149
+ if self.stop_path.exists():
150
+ self.stop_path.unlink()
151
+
152
+ # Preserve event history across resumes / server restarts.
153
+ existing_progress = load_json(self.progress_path, {})
154
+ if isinstance(existing_progress, dict):
155
+ self._progress = existing_progress
156
+ self._progress.setdefault("events", [])
157
+
158
+ state = self._load_or_create_state(goal, project_name, max_versions, reset)
159
+ state["status"] = "running"
160
+ state["settings"] = self._settings_snapshot()
161
+ registry.register(self.root, state.get("project_name", ""))
162
+ if self.use_git:
163
+ vcs.ensure_repo(self.current_dir)
164
+ save_json(self.state_path, state)
165
+ with self._lock:
166
+ self._progress["run_started_at"] = now_text()
167
+ self._progress["run_started_ts"] = time.time()
168
+ self._progress["run_ended_at"] = None
169
+ self._progress["run_ended_ts"] = None
170
+ self._emit(state, step="START", agent="", message="run started")
171
+
172
+ _log("AutoDevLoop - autonomous AI iteration engine")
173
+ _log(f"Project : {state.get('project_name')} | mode: {deep_get(self.config, 'pipeline.mode')}")
174
+ _log(f"Goal : {state.get('goal', '')[:100]}")
175
+ _log(f"Versions: {state.get('current_version')} -> {state.get('max_versions')}")
176
+ _log(f"Provider: {self.provider.get('command')} ({self.provider.get('name')})")
177
+
178
+ try:
179
+ self._ensure_architecture(state)
180
+ while int(state["current_version"]) < int(state["max_versions"]):
181
+ if self.stop_path.exists():
182
+ state["status"] = "stopped"
183
+ state["stop_reason"] = "User requested stop (STOP file)"
184
+ break
185
+ version = int(state["current_version"]) + 1
186
+ state = self._run_version(version, state)
187
+ save_json(self.state_path, state)
188
+
189
+ if state.get("status") == "running":
190
+ state["status"] = "completed"
191
+ state["stop_reason"] = f"Reached max versions ({state.get('max_versions')})"
192
+ except KeyboardInterrupt:
193
+ state = load_json(self.state_path, state)
194
+ state["status"] = "stopped_by_keyboard"
195
+ state["stop_reason"] = "Ctrl+C"
196
+ _log("Stopped by keyboard interrupt.")
197
+ except Exception as exc: # noqa: BLE001
198
+ state = load_json(self.state_path, state)
199
+ state["status"] = "failed"
200
+ state["last_error"] = str(exc)
201
+ save_json(self.state_path, state)
202
+ self._emit(state, step="ERROR", agent="", message=str(exc)[:200])
203
+ _log(f"Failed: {exc}")
204
+ raise
205
+ finally:
206
+ state["cost"] = self.cost
207
+ save_json(self.state_path, state)
208
+ reporting.write_final_report(self.report_path, state)
209
+ reporting.write_features_overview(self.features_path, state)
210
+ with self._lock:
211
+ self._active.clear()
212
+ self._progress["run_ended_at"] = now_text()
213
+ self._progress["run_ended_ts"] = time.time()
214
+ self._emit(state, step="DONE", agent="", message=state.get("status", ""))
215
+ _log(f"Status: {state.get('status')} | Reason: {state.get('stop_reason', 'N/A')} | "
216
+ f"Calls: {self.cost['calls']} | Tokens in/out: "
217
+ f"{self.cost['input_tokens']}/{self.cost['output_tokens']}")
218
+
219
+ def _settings_snapshot(self) -> dict[str, Any]:
220
+ return {
221
+ "mode": deep_get(self.config, "pipeline.mode"),
222
+ "steps": self.steps,
223
+ "provider": {k: self.provider.get(k) for k in ("name", "command", "model")},
224
+ "review_threshold": self.review_threshold,
225
+ "value_threshold": self.value_threshold,
226
+ "fix_retries": self.fix_retries,
227
+ "max_versions_default": int(deep_get(self.config, "project.max_versions", 5)),
228
+ }
229
+
230
+ def _load_or_create_state(self, goal: str, project_name: str, max_versions: int, reset: bool) -> dict[str, Any]:
231
+ existing = load_json(self.state_path, {})
232
+ if existing and not reset:
233
+ if goal:
234
+ existing["goal"] = goal
235
+ existing["max_versions"] = max_versions or existing.get("max_versions")
236
+ self.cost = existing.get("cost", self.cost)
237
+ existing.setdefault("phase", "build")
238
+ existing.setdefault("backlog", [])
239
+ return existing
240
+ if not goal:
241
+ raise SystemExit("A goal is required (--goal or interactive setup).")
242
+ stamp = now_text()
243
+ return {
244
+ "project_name": project_name or deep_get(self.config, "project.name", "") or self.root.name,
245
+ "goal": goal,
246
+ "arch_hint": deep_get(self.config, "project.arch_hint", ""),
247
+ "current_version": 0,
248
+ "max_versions": max_versions,
249
+ "status": "initialized",
250
+ "phase": "build",
251
+ "goal_met": False,
252
+ "goal_progress": 0,
253
+ "goal_completed_version": None,
254
+ "stop_reason": None,
255
+ "created_at": stamp,
256
+ "updated_at": stamp,
257
+ "architecture_created": False,
258
+ "versions": [],
259
+ "backlog": [],
260
+ "last_review": {},
261
+ "last_test_result": {"success": None, "command": ""},
262
+ "cost": self.cost,
263
+ }
264
+
265
+ # ----- progress / events ----------------------------------------------
266
+ def _snapshot_locked(self, state: dict[str, Any]) -> None:
267
+ prog = self._progress
268
+ prog.update({
269
+ "status": state.get("status"),
270
+ "phase": state.get("phase"),
271
+ "project_name": state.get("project_name"),
272
+ "goal": state.get("goal"),
273
+ "current_version": state.get("current_version"),
274
+ "max_versions": state.get("max_versions"),
275
+ "goal_progress": state.get("goal_progress"),
276
+ "goal_met": state.get("goal_met"),
277
+ "goal_completed_version": state.get("goal_completed_version"),
278
+ "calls": self.cost.get("calls", 0),
279
+ "tokens": {"input": self.cost.get("input_tokens", 0), "output": self.cost.get("output_tokens", 0)},
280
+ "active": [dict(a) for a in self._active.values()],
281
+ "updated_at": now_text(),
282
+ })
283
+ prog.setdefault("events", [])
284
+ prog["versions"] = state.get("versions", [])
285
+ save_json(self.progress_path, prog, stamp=False)
286
+
287
+ def _snapshot(self, state: dict[str, Any], *, throttle: bool = False) -> None:
288
+ now = time.time()
289
+ with self._lock:
290
+ if throttle and now - self._last_progress_write < 0.6:
291
+ return
292
+ self._last_progress_write = now
293
+ self._snapshot_locked(state)
294
+
295
+ def _emit(self, state: dict[str, Any], *, step: str, agent: str, message: str = "",
296
+ extra: dict[str, Any] | None = None) -> None:
297
+ with self._lock:
298
+ prog = self._progress
299
+ prog.setdefault("events", [])
300
+ event = {"time": now_text(), "ts": time.time(), "version": state.get("current_version"),
301
+ "step": step, "agent": agent, "message": message}
302
+ if extra:
303
+ event.update(extra)
304
+ prog["events"].append(event)
305
+ prog["events"] = prog["events"][-400:]
306
+ self._last_progress_write = time.time()
307
+ self._snapshot_locked(state)
308
+
309
+ # ----- LLM helper ------------------------------------------------------
310
+ def _call(self, state: dict[str, Any], label: str, prompt: str, cwd: Path,
311
+ step: str, agent: str) -> str:
312
+ safe = slugify(label.lower())
313
+ version = int(state.get("current_version", 0)) + (0 if step in {"ARCH", "DONE", "START"} else 1)
314
+ prompt_path = self.prompts_dir / f"v{version}_{safe}.prompt.txt"
315
+ write_text(prompt_path, prompt)
316
+ debug_path = self.logs_dir / f"v{version}_{safe}_debug.log"
317
+
318
+ with self._lock:
319
+ self._active[agent] = {
320
+ "agent": agent, "step": step, "label": label,
321
+ "started_at": now_text(), "started_ts": time.time(), "message": "calling provider",
322
+ }
323
+ self._emit(state, step=step, agent=agent, message="started", extra={"kind": "start"})
324
+ _log(f"[v{version}] [{label}] calling {self.provider.get('command')} in {cwd.name}...")
325
+
326
+ def on_status(msg: str) -> None:
327
+ with self._lock:
328
+ if agent in self._active:
329
+ self._active[agent]["message"] = msg
330
+ self._snapshot(state, throttle=True)
331
+
332
+ try:
333
+ result = llm.call(
334
+ self.provider, prompt, cwd,
335
+ label=label, timeout=self.agent_timeout,
336
+ retries=self.retries, backoff_seconds=self.backoff,
337
+ debug_file=debug_path, on_status=on_status,
338
+ )
339
+ finally:
340
+ with self._lock:
341
+ self._active.pop(agent, None)
342
+
343
+ with self._lock:
344
+ self.cost["cost_usd_total"] += result.cost_usd
345
+ self.cost["input_tokens"] += result.input_tokens
346
+ self.cost["output_tokens"] += result.output_tokens
347
+ self.cost["calls"] += 1
348
+ log_name = f"v{version}_{safe}.log"
349
+ write_text(self.logs_dir / log_name, result.text)
350
+ duration = round(result.duration_s, 1)
351
+ self._emit(state, step=step, agent=agent,
352
+ message=f"done in {duration}s ({result.output_tokens} out tokens)",
353
+ extra={"kind": "done", "output_tokens": result.output_tokens,
354
+ "duration_s": duration, "log": log_name, "snippet": result.text.strip()[:500]})
355
+ _log(f"[v{version}] [{label}] done in {duration}s | out {result.output_tokens} tok")
356
+ return result.text
357
+
358
+ # ----- pipeline stages -------------------------------------------------
359
+ def _ensure_architecture(self, state: dict[str, Any]) -> None:
360
+ if state.get("architecture_created") and self.architecture_path.exists():
361
+ return
362
+ _log("[ARCH] Designing architecture, stack, and test strategy...")
363
+ prompt = prompts.render_template(self.app_dir, "arch", {
364
+ "goal": state.get("goal", ""),
365
+ "arch_hint": state.get("arch_hint", "") or "(none)",
366
+ })
367
+ output = self._call(state, "ARCH", prompt, self.current_dir, step="ARCH", agent="AgentARCH")
368
+ write_text(self.architecture_path, output.strip() + "\n")
369
+ state["architecture_created"] = True
370
+ if self.use_git:
371
+ vcs.commit_all(self.current_dir, "chore: initial architecture")
372
+ save_json(self.state_path, state)
373
+
374
+ def _run_version(self, version: int, state: dict[str, Any]) -> dict[str, Any]:
375
+ _log("=" * 60)
376
+ _log(f"[v{version}] Starting (phase: {state.get('phase')}) of {state.get('max_versions')}")
377
+ self._emit(state, step="VERSION_START", agent="",
378
+ message=f"v{version} · {state.get('phase')} phase",
379
+ extra={"kind": "version_start", "vno": version, "phase": state.get("phase")})
380
+ before_dir = self.work_dir / f"v{version}" / "_before"
381
+ safe_rmtree(before_dir, self.root)
382
+ copy_tree_contents(self.current_dir, before_dir)
383
+
384
+ try:
385
+ plan = self._plan(version, state)
386
+ dev_outputs = self._develop(version, state, plan, before_dir)
387
+ if self.steps.get("doc"):
388
+ dev_outputs.append(self._doc(version, state, plan))
389
+ test_result = self._test(version, state, plan)
390
+ review = self._review(version, state, plan, test_result, dev_outputs)
391
+
392
+ if self._needs_fix(test_result, review):
393
+ for attempt in range(1, self.fix_retries + 1):
394
+ _log(f"[v{version}] Fix attempt {attempt}/{self.fix_retries}")
395
+ self._fix(version, state, plan, test_result, review, attempt)
396
+ test_result = self._test(version, state, plan, suffix=f"fix{attempt}")
397
+ review = self._review(version, state, plan, test_result, dev_outputs, suffix=f"fix{attempt}")
398
+ if not self._needs_fix(test_result, review):
399
+ break
400
+
401
+ goal_met, goal_progress = self._assess_goal(version, state, review)
402
+ review["goal_met"] = goal_met
403
+ review["goal_progress"] = goal_progress
404
+ except Exception:
405
+ # Roll back the working copy so a resume re-runs this version cleanly.
406
+ # restore_working_dir keeps current/.git intact.
407
+ _log(f"[v{version}] Error during version; rolling back current/ from snapshot.")
408
+ restore_working_dir(before_dir, self.current_dir)
409
+ raise
410
+
411
+ state["goal_met"] = goal_met
412
+ state["goal_progress"] = goal_progress
413
+ newly_completed = goal_met and state.get("phase") == "build"
414
+ if newly_completed:
415
+ state["phase"] = "expand"
416
+ state["goal_completed_version"] = version
417
+ _log(f"[v{version}] 🎯 Core goal met. Switching to EXPAND phase.")
418
+
419
+ if state.get("phase") == "expand" and self.steps.get("scout"):
420
+ self._scout_and_evaluate(version, state, review)
421
+
422
+ # snapshot + vcs
423
+ diff = diff_file_lists(before_dir, self.current_dir)
424
+ version_dir = self.versions_dir / f"v{version}"
425
+ safe_rmtree(version_dir, self.root)
426
+ version_dir.mkdir(parents=True, exist_ok=True)
427
+ copy_tree_contents(self.current_dir, version_dir)
428
+ commit = None
429
+ if self.use_git:
430
+ commit = vcs.commit_all(self.current_dir, f"v{version}: {plan.get('version_goal', '')}".strip()[:200])
431
+ vcs.tag(self.current_dir, f"v{version}", plan.get("version_goal", ""))
432
+ if newly_completed:
433
+ vcs.tag(self.current_dir, vcs.GOAL_TAG, f"Core user goal met at v{version}")
434
+
435
+ # reports
436
+ reporting.write_version_changelog(self.changelog_path, version, plan, diff, test_result, review, state.get("phase"))
437
+
438
+ record = {
439
+ "version": version,
440
+ "phase": state.get("phase"),
441
+ "completed_at": now_text(),
442
+ "plan": plan,
443
+ "diff": diff,
444
+ "review_score": review.get("score", 0),
445
+ "review_issues": review.get("issues", []),
446
+ "feature_summary": review.get("feature_summary", ""),
447
+ "whats_new": review.get("whats_new", []),
448
+ "test_result": test_result,
449
+ "goal_met": goal_met,
450
+ "goal_progress": goal_progress,
451
+ "commit": commit,
452
+ "snapshot": str(version_dir.relative_to(self.root)),
453
+ }
454
+ state["current_version"] = version
455
+ state["last_review"] = review
456
+ state["last_test_result"] = test_result
457
+ state.setdefault("versions", []).append(record)
458
+ state["cost"] = self.cost
459
+ reporting.write_features_overview(self.features_path, state)
460
+
461
+ score = review.get("score", "?")
462
+ _log(f"[v{version}] complete | score {score}/100 | tests "
463
+ f"{'PASS' if test_result.get('success') else 'FAIL'} | goal {goal_progress}% | "
464
+ f"+{len(diff['added'])}/~{len(diff['changed'])} files")
465
+ self._emit(state, step="VERSION_DONE", agent="", message=f"v{version} score {score}")
466
+ return state
467
+
468
+ def _plan(self, version: int, state: dict[str, Any]) -> dict[str, Any]:
469
+ phase = state.get("phase", "build")
470
+ if phase == "build":
471
+ guidance = ("Phase: BUILD. The original goal is NOT fully met yet. Drive directly toward "
472
+ "completing the user's requested product. Fix real bugs first, then add the "
473
+ "core features the user asked for.")
474
+ else:
475
+ guidance = ("Phase: EXPAND. The core goal is already met. Keep it working, then build the "
476
+ "most valuable accepted backlog item(s) that extend the product into useful "
477
+ "adjacent features. Every addition must be genuinely useful to this product.")
478
+ context = collect_context(self.current_dir)
479
+ previous = json.dumps({
480
+ "last_review": {k: state.get("last_review", {}).get(k) for k in ("score", "issues", "suggestions_for_next_version")},
481
+ "last_test_result": {"success": state.get("last_test_result", {}).get("success")},
482
+ "recent_versions": [{"version": v.get("version"), "summary": v.get("feature_summary")}
483
+ for v in state.get("versions", [])[-3:]],
484
+ }, ensure_ascii=False, indent=2)
485
+ backlog = self._backlog_text(state)
486
+ prompt = prompts.render_template(self.app_dir, "plan", {
487
+ "version": version, "goal": state.get("goal", ""), "phase": phase,
488
+ "architecture": read_text(self.architecture_path, "(architecture missing)"),
489
+ "phase_guidance": guidance, "backlog": backlog, "previous": previous, "context": context,
490
+ })
491
+ raw = self._call(state, "PLAN", prompt, self.current_dir, step="PLAN", agent="AgentPLAN")
492
+ plan = extract_json(raw, {
493
+ "version_goal": f"Improve version {version}", "acceptance_criteria": [],
494
+ "dev_agents": DEV_DEFAULT, "test_focus": [], "risks": [],
495
+ })
496
+ if not plan.get("dev_agents"):
497
+ plan["dev_agents"] = DEV_DEFAULT
498
+ save_json(self.plans_dir / f"v{version}.json", plan)
499
+ return plan
500
+
501
+ def _develop(self, version: int, state: dict[str, Any], plan: dict[str, Any], before_dir: Path) -> list[dict[str, Any]]:
502
+ dev_agents = plan.get("dev_agents") or DEV_DEFAULT
503
+
504
+ # Single dev agent: edit current/ directly so file changes are visible
505
+ # live (no isolated workspace). Multiple agents still use isolated
506
+ # workspaces + conflict-aware merge for safety.
507
+ if len(dev_agents) == 1:
508
+ agent = dev_agents[0]
509
+ name = slugify(str(agent.get("name") or "AgentDEV"))
510
+ owns = agent.get("owns") or []
511
+ prompt = prompts.render_template(self.app_dir, "dev", {
512
+ "agent_name": name, "version": version, "goal": state.get("goal", ""),
513
+ "architecture": read_text(self.architecture_path, ""),
514
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
515
+ "task": agent.get("task", ""),
516
+ "owns": ", ".join(owns) if owns else "(not restricted)",
517
+ })
518
+ text = self._call(state, name, prompt, self.current_dir, step="DEV", agent=name)
519
+ return [{"name": name, "role": agent.get("role", "dev"),
520
+ "workspace": str(self.current_dir), "output": text,
521
+ "files": list_generated_files(self.current_dir)}]
522
+
523
+ specs = []
524
+ for idx, agent in enumerate(dev_agents, start=1):
525
+ name = slugify(str(agent.get("name") or f"AgentDEV_{idx}"))
526
+ workspace = self.work_dir / f"v{version}" / name
527
+ safe_rmtree(workspace, self.root)
528
+ copy_tree_contents(self.current_dir, workspace)
529
+ owns = agent.get("owns") or []
530
+ prompt = prompts.render_template(self.app_dir, "dev", {
531
+ "agent_name": name, "version": version, "goal": state.get("goal", ""),
532
+ "architecture": read_text(self.architecture_path, ""),
533
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
534
+ "task": agent.get("task", ""),
535
+ "owns": ", ".join(owns) if owns else "(not restricted; avoid touching peers' files)",
536
+ })
537
+ specs.append({"name": name, "role": agent.get("role", "dev"), "workspace": workspace, "prompt": prompt})
538
+
539
+ outputs: list[dict[str, Any]] = []
540
+ if self.allow_parallel and len(specs) > 1:
541
+ _log(f"[v{version}] Running {len(specs)} dev agents in parallel...")
542
+ with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_parallel) as pool:
543
+ futures = {pool.submit(self._run_dev_spec, version, state, spec): spec for spec in specs}
544
+ for future in concurrent.futures.as_completed(futures):
545
+ outputs.append(future.result())
546
+ else:
547
+ for spec in specs:
548
+ outputs.append(self._run_dev_spec(version, state, spec))
549
+
550
+ # Order outputs by original spec order for deterministic conflict resolution.
551
+ order = {spec["name"]: i for i, spec in enumerate(specs)}
552
+ outputs.sort(key=lambda o: order.get(o["name"], 0))
553
+ self._merge_dev_outputs(version, state, before_dir, outputs)
554
+ return outputs
555
+
556
+ def _run_dev_spec(self, version: int, state: dict[str, Any], spec: dict[str, Any]) -> dict[str, Any]:
557
+ text = self._call(state, spec["name"], spec["prompt"], spec["workspace"],
558
+ step="DEV", agent=spec["name"])
559
+ return {"name": spec["name"], "role": spec["role"], "workspace": str(spec["workspace"]),
560
+ "output": text, "files": list_generated_files(spec["workspace"])}
561
+
562
+ def _merge_dev_outputs(self, version: int, state: dict[str, Any], before_dir: Path,
563
+ outputs: list[dict[str, Any]]) -> None:
564
+ """Merge only files each agent actually changed; first writer wins on conflict."""
565
+ claimed: dict[str, str] = {}
566
+ conflicts: list[str] = []
567
+ for result in outputs:
568
+ ws = Path(result["workspace"])
569
+ for path in sorted(ws.rglob("*")):
570
+ if path.is_dir():
571
+ continue
572
+ rel = path.relative_to(ws)
573
+ if any(part in INTERNAL_DIRS for part in rel.parts):
574
+ continue
575
+ rel_str = rel.as_posix()
576
+ if not _file_differs(path, before_dir / rel):
577
+ continue # agent left this file unchanged; do not clobber peers
578
+ if rel_str in claimed:
579
+ conflicts.append(f"{rel_str} (kept {claimed[rel_str]}, skipped {result['name']})")
580
+ continue
581
+ claimed[rel_str] = result["name"]
582
+ target = self.current_dir / rel
583
+ target.parent.mkdir(parents=True, exist_ok=True)
584
+ shutil.copy2(path, target)
585
+ if conflicts:
586
+ _log(f"[v{version}] ⚠ merge conflicts (first writer kept): {conflicts}")
587
+ self._emit(state, step="MERGE", agent="", message=f"conflicts: {conflicts}")
588
+
589
+ def _doc(self, version: int, state: dict[str, Any], plan: dict[str, Any]) -> dict[str, Any]:
590
+ workspace = self.work_dir / f"v{version}" / "AgentDOC"
591
+ safe_rmtree(workspace, self.root)
592
+ copy_tree_contents(self.current_dir, workspace)
593
+ prompt = prompts.render_template(self.app_dir, "doc", {
594
+ "version": version, "goal": state.get("goal", ""),
595
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
596
+ })
597
+ text = self._call(state, "AgentDOC", prompt, workspace, step="DOC", agent="AgentDOC")
598
+ for path in sorted(workspace.rglob("*")):
599
+ if path.is_dir():
600
+ continue
601
+ rel = path.relative_to(workspace)
602
+ if any(part in INTERNAL_DIRS for part in rel.parts) or path.suffix.lower() not in DOC_SUFFIXES:
603
+ continue
604
+ target = self.current_dir / rel
605
+ target.parent.mkdir(parents=True, exist_ok=True)
606
+ shutil.copy2(path, target)
607
+ return {"name": "AgentDOC", "role": "documentation", "workspace": str(workspace),
608
+ "output": text, "files": list_generated_files(workspace)}
609
+
610
+ def _test(self, version: int, state: dict[str, Any], plan: dict[str, Any], suffix: str = "") -> dict[str, Any]:
611
+ label_suffix = f"_{suffix}" if suffix else ""
612
+ candidates = testing.detect_candidates(self.current_dir)
613
+ override = deep_get(self.config, "tests.command", "")
614
+ if override:
615
+ candidates.insert(0, {"name": "configured", "command": override, "kind": "configured"})
616
+
617
+ if self.steps.get("test_agent"):
618
+ prompt = prompts.render_template(self.app_dir, "test", {
619
+ "version": version, "goal": state.get("goal", ""),
620
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
621
+ "candidates": json.dumps(candidates, ensure_ascii=False, indent=2),
622
+ "context": collect_context(self.current_dir, max_bytes=40_000),
623
+ })
624
+ raw = self._call(state, f"TEST{label_suffix}", prompt, self.current_dir, step="TEST", agent="AgentTEST")
625
+ decision = extract_json(raw, {"commands": [], "reason": "fallback"})
626
+ commands = [str(c) for c in decision.get("commands", []) if str(c).strip()]
627
+ else:
628
+ decision = {"commands": [], "reason": "built-in detection (simple mode)"}
629
+ commands = []
630
+ self._emit(state, step="TEST", agent="builtin", message="running built-in tests")
631
+
632
+ if not commands:
633
+ commands = [candidates[0]["command"]] if candidates else ["__builtin_file_smoke__"]
634
+
635
+ results = []
636
+ for command in commands:
637
+ _log(f"[v{version}] [TEST] {command}")
638
+ log_path = self.logs_dir / f"v{version}{label_suffix}_test_{slugify(command)[:30]}.log"
639
+ results.append(testing.run_command(self.current_dir, command, self.test_timeout, log_path))
640
+ success = all(r.get("success") for r in results) if results else False
641
+ result = {"success": success, "decision": decision, "commands": commands, "results": results}
642
+ save_json(self.tests_dir / f"v{version}{label_suffix}.json", result)
643
+ return result
644
+
645
+ def _review(self, version: int, state: dict[str, Any], plan: dict[str, Any],
646
+ test_result: dict[str, Any], dev_outputs: list[dict[str, Any]], suffix: str = "") -> dict[str, Any]:
647
+ summaries = [{k: v for k, v in o.items() if k != "output"} for o in dev_outputs]
648
+ prompt = prompts.render_template(self.app_dir, "review", {
649
+ "version": version, "goal": state.get("goal", ""), "phase": state.get("phase", "build"),
650
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
651
+ "test_result": json.dumps(test_result, ensure_ascii=False, indent=2),
652
+ "dev_summaries": json.dumps(summaries, ensure_ascii=False, indent=2),
653
+ "context": collect_context(self.current_dir, max_bytes=50_000),
654
+ })
655
+ label = f"REVIEW_{suffix}" if suffix else "REVIEW"
656
+ raw = self._call(state, label, prompt, self.current_dir, step="REVIEW", agent="AgentREVIEW")
657
+ review = extract_json(raw, {
658
+ "score": 70, "blocking": False, "goal_met": False, "goal_progress": 0,
659
+ "issues": [], "good_points": [], "feature_summary": plan.get("version_goal", ""),
660
+ "whats_new": [], "suggestions_for_next_version": [],
661
+ })
662
+ # Pin the numeric scale: tolerate models that return 0-1 fractions.
663
+ review["score"] = _coerce_pct(review.get("score"), 0)
664
+ review["goal_progress"] = _coerce_pct(review.get("goal_progress"), 0)
665
+ save_json(self.reviews_dir / f"v{version}{('_' + suffix) if suffix else ''}.json", review)
666
+ return review
667
+
668
+ def _needs_fix(self, test_result: dict[str, Any], review: dict[str, Any]) -> bool:
669
+ if not test_result.get("success"):
670
+ return True
671
+ if review.get("blocking"):
672
+ return True
673
+ score = review.get("score", 0)
674
+ return isinstance(score, int) and score < self.review_threshold
675
+
676
+ def _fix(self, version: int, state: dict[str, Any], plan: dict[str, Any],
677
+ test_result: dict[str, Any], review: dict[str, Any], attempt: int) -> None:
678
+ prompt = prompts.render_template(self.app_dir, "fix", {
679
+ "version": version, "attempt": attempt, "goal": state.get("goal", ""),
680
+ "plan": json.dumps(plan, ensure_ascii=False, indent=2),
681
+ "test_result": json.dumps(test_result, ensure_ascii=False, indent=2),
682
+ "review": json.dumps(review, ensure_ascii=False, indent=2),
683
+ })
684
+ self._call(state, f"FIX{attempt}", prompt, self.current_dir, step="FIX", agent="AgentFIX")
685
+
686
+ def _assess_goal(self, version: int, state: dict[str, Any], review: dict[str, Any]) -> tuple[bool, int]:
687
+ goal_met = bool(review.get("goal_met"))
688
+ progress = _coerce_pct(review.get("goal_progress"), 0)
689
+ if not self.steps.get("goal_check"):
690
+ return goal_met, progress
691
+ prompt = prompts.render_template(self.app_dir, "goal_check", {
692
+ "goal": state.get("goal", ""),
693
+ "review": json.dumps(review, ensure_ascii=False, indent=2),
694
+ "context": collect_context(self.current_dir, max_bytes=40_000),
695
+ })
696
+ raw = self._call(state, "GOALCHECK", prompt, self.current_dir, step="GOAL_CHECK", agent="AgentGOALCHECK")
697
+ decision = extract_json(raw, {"goal_met": goal_met, "goal_progress": progress})
698
+ save_json(self.reviews_dir / f"v{version}_goalcheck.json", decision)
699
+ return bool(decision.get("goal_met", goal_met)), _coerce_pct(decision.get("goal_progress", progress), progress)
700
+
701
+ def _scout_and_evaluate(self, version: int, state: dict[str, Any], review: dict[str, Any]) -> None:
702
+ scout_prompt = prompts.render_template(self.app_dir, "scout", {
703
+ "version": version, "goal": state.get("goal", ""),
704
+ "review": json.dumps(review, ensure_ascii=False, indent=2),
705
+ "backlog": self._backlog_text(state),
706
+ "context": collect_context(self.current_dir, max_bytes=35_000),
707
+ })
708
+ raw = self._call(state, "SCOUT", scout_prompt, self.current_dir, step="SCOUT", agent="AgentSCOUT")
709
+ candidates = extract_json(raw, {"candidates": []}).get("candidates", [])
710
+ if not candidates:
711
+ return
712
+ if self.steps.get("evaluate"):
713
+ eval_prompt = prompts.render_template(self.app_dir, "evaluate", {
714
+ "goal": state.get("goal", ""),
715
+ "candidates": json.dumps(candidates, ensure_ascii=False, indent=2),
716
+ "threshold": self.value_threshold,
717
+ })
718
+ eraw = self._call(state, "EVALUATE", eval_prompt, self.current_dir, step="EVALUATE", agent="AgentEVALUATE")
719
+ evals = extract_json(eraw, {"evaluations": []}).get("evaluations", [])
720
+ by_title = {str(e.get("title", "")).strip().lower(): e for e in evals}
721
+ else:
722
+ by_title = {}
723
+
724
+ existing = {str(b.get("title", "")).strip().lower() for b in state.get("backlog", [])}
725
+ for cand in candidates:
726
+ title = str(cand.get("title", "")).strip()
727
+ if not title or title.lower() in existing:
728
+ continue
729
+ ev = by_title.get(title.lower(), {})
730
+ accepted = bool(ev.get("accepted")) if self.steps.get("evaluate") else True
731
+ value = int(ev.get("value", 0) or 0)
732
+ if self.steps.get("evaluate") and not accepted:
733
+ status = "rejected"
734
+ else:
735
+ status = "accepted"
736
+ state.setdefault("backlog", []).append({
737
+ "title": title, "description": cand.get("description", ""),
738
+ "value": value, "effort": ev.get("effort", ""), "status": status,
739
+ "reason": ev.get("reason", cand.get("rationale", "")),
740
+ "proposed_in_version": version,
741
+ })
742
+ save_json(self.app_dir / "backlog.json", {"backlog": state.get("backlog", [])}, stamp=False)
743
+
744
+ def _backlog_text(self, state: dict[str, Any]) -> str:
745
+ accepted = [b for b in state.get("backlog", []) if b.get("status") == "accepted"]
746
+ if not accepted:
747
+ return "(empty)"
748
+ accepted.sort(key=lambda b: b.get("value", 0), reverse=True)
749
+ return "\n".join(f"- [{b.get('value', 0)}] {b.get('title')}: {b.get('description', '')[:120]}"
750
+ for b in accepted[:12])