autoforge-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/.claude/commands/check-code.md +32 -0
  2. package/.claude/commands/checkpoint.md +40 -0
  3. package/.claude/commands/create-spec.md +613 -0
  4. package/.claude/commands/expand-project.md +234 -0
  5. package/.claude/commands/gsd-to-autoforge-spec.md +10 -0
  6. package/.claude/commands/review-pr.md +75 -0
  7. package/.claude/templates/app_spec.template.txt +331 -0
  8. package/.claude/templates/coding_prompt.template.md +265 -0
  9. package/.claude/templates/initializer_prompt.template.md +354 -0
  10. package/.claude/templates/testing_prompt.template.md +146 -0
  11. package/.env.example +64 -0
  12. package/LICENSE.md +676 -0
  13. package/README.md +423 -0
  14. package/agent.py +444 -0
  15. package/api/__init__.py +10 -0
  16. package/api/database.py +536 -0
  17. package/api/dependency_resolver.py +449 -0
  18. package/api/migration.py +156 -0
  19. package/auth.py +83 -0
  20. package/autoforge_paths.py +315 -0
  21. package/autonomous_agent_demo.py +293 -0
  22. package/bin/autoforge.js +3 -0
  23. package/client.py +607 -0
  24. package/env_constants.py +27 -0
  25. package/examples/OPTIMIZE_CONFIG.md +230 -0
  26. package/examples/README.md +531 -0
  27. package/examples/org_config.yaml +172 -0
  28. package/examples/project_allowed_commands.yaml +139 -0
  29. package/lib/cli.js +791 -0
  30. package/mcp_server/__init__.py +1 -0
  31. package/mcp_server/feature_mcp.py +988 -0
  32. package/package.json +53 -0
  33. package/parallel_orchestrator.py +1800 -0
  34. package/progress.py +247 -0
  35. package/prompts.py +427 -0
  36. package/pyproject.toml +17 -0
  37. package/rate_limit_utils.py +132 -0
  38. package/registry.py +614 -0
  39. package/requirements-prod.txt +14 -0
  40. package/security.py +959 -0
  41. package/server/__init__.py +17 -0
  42. package/server/main.py +261 -0
  43. package/server/routers/__init__.py +32 -0
  44. package/server/routers/agent.py +177 -0
  45. package/server/routers/assistant_chat.py +327 -0
  46. package/server/routers/devserver.py +309 -0
  47. package/server/routers/expand_project.py +239 -0
  48. package/server/routers/features.py +746 -0
  49. package/server/routers/filesystem.py +514 -0
  50. package/server/routers/projects.py +524 -0
  51. package/server/routers/schedules.py +356 -0
  52. package/server/routers/settings.py +127 -0
  53. package/server/routers/spec_creation.py +357 -0
  54. package/server/routers/terminal.py +453 -0
  55. package/server/schemas.py +593 -0
  56. package/server/services/__init__.py +36 -0
  57. package/server/services/assistant_chat_session.py +496 -0
  58. package/server/services/assistant_database.py +304 -0
  59. package/server/services/chat_constants.py +57 -0
  60. package/server/services/dev_server_manager.py +557 -0
  61. package/server/services/expand_chat_session.py +399 -0
  62. package/server/services/process_manager.py +657 -0
  63. package/server/services/project_config.py +475 -0
  64. package/server/services/scheduler_service.py +683 -0
  65. package/server/services/spec_chat_session.py +502 -0
  66. package/server/services/terminal_manager.py +756 -0
  67. package/server/utils/__init__.py +1 -0
  68. package/server/utils/process_utils.py +134 -0
  69. package/server/utils/project_helpers.py +32 -0
  70. package/server/utils/validation.py +54 -0
  71. package/server/websocket.py +903 -0
  72. package/start.py +456 -0
  73. package/ui/dist/assets/index-8W_wmZzz.js +168 -0
  74. package/ui/dist/assets/index-B47Ubhox.css +1 -0
  75. package/ui/dist/assets/vendor-flow-CVNK-_lx.js +7 -0
  76. package/ui/dist/assets/vendor-query-BUABzP5o.js +1 -0
  77. package/ui/dist/assets/vendor-radix-DTNNCg2d.js +45 -0
  78. package/ui/dist/assets/vendor-react-qkC6yhPU.js +1 -0
  79. package/ui/dist/assets/vendor-utils-COeKbHgx.js +2 -0
  80. package/ui/dist/assets/vendor-xterm-DP_gxef0.js +16 -0
  81. package/ui/dist/index.html +23 -0
  82. package/ui/dist/ollama.png +0 -0
  83. package/ui/dist/vite.svg +6 -0
  84. package/ui/package.json +57 -0
@@ -0,0 +1,1800 @@
1
+ """
2
+ Parallel Orchestrator
3
+ =====================
4
+
5
+ Unified orchestrator that handles all agent lifecycle:
6
+ - Initialization: Creates features from app_spec if needed
7
+ - Coding agents: Implement features one at a time
8
+ - Testing agents: Regression test passing features (optional)
9
+
10
+ Uses dependency-aware scheduling to ensure features are only started when their
11
+ dependencies are satisfied.
12
+
13
+ Usage:
14
+ # Entry point (always uses orchestrator)
15
+ python autonomous_agent_demo.py --project-dir my-app --concurrency 3
16
+
17
+ # Direct orchestrator usage
18
+ python parallel_orchestrator.py --project-dir my-app --max-concurrency 3
19
+ """
20
+
21
+ import asyncio
22
+ import atexit
23
+ import logging
24
+ import os
25
+ import re
26
+ import signal
27
+ import subprocess
28
+ import sys
29
+ import threading
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+ from typing import Any, Callable, Literal
33
+
34
+ from sqlalchemy import text
35
+
36
+ from api.database import Feature, create_database
37
+ from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores
38
+ from progress import has_features
39
+ from server.utils.process_utils import kill_process_tree
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ # Root directory of autoforge (where this script and autonomous_agent_demo.py live)
44
+ AUTOFORGE_ROOT = Path(__file__).parent.resolve()
45
+
46
+ # Debug log file path
47
+ DEBUG_LOG_FILE = AUTOFORGE_ROOT / "orchestrator_debug.log"
48
+
49
+
50
+ class DebugLogger:
51
+ """Thread-safe debug logger that writes to a file."""
52
+
53
+ def __init__(self, log_file: Path = DEBUG_LOG_FILE):
54
+ self.log_file = log_file
55
+ self._lock = threading.Lock()
56
+ self._session_started = False
57
+ # DON'T clear on import - only mark session start when run_loop begins
58
+
59
+ def start_session(self):
60
+ """Mark the start of a new orchestrator session. Clears previous logs."""
61
+ with self._lock:
62
+ self._session_started = True
63
+ with open(self.log_file, "w") as f:
64
+ f.write(f"=== Orchestrator Debug Log Started: {datetime.now().isoformat()} ===\n")
65
+ f.write(f"=== PID: {os.getpid()} ===\n\n")
66
+
67
+ def log(self, category: str, message: str, **kwargs):
68
+ """Write a timestamped log entry."""
69
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
70
+ with self._lock:
71
+ with open(self.log_file, "a") as f:
72
+ f.write(f"[{timestamp}] [{category}] {message}\n")
73
+ for key, value in kwargs.items():
74
+ f.write(f" {key}: {value}\n")
75
+ f.write("\n")
76
+
77
+ def section(self, title: str):
78
+ """Write a section header."""
79
+ with self._lock:
80
+ with open(self.log_file, "a") as f:
81
+ f.write(f"\n{'='*60}\n")
82
+ f.write(f" {title}\n")
83
+ f.write(f"{'='*60}\n\n")
84
+
85
+
86
+ # Global debug logger instance
87
+ debug_log = DebugLogger()
88
+
89
+
90
+ def _dump_database_state(feature_dicts: list[dict], label: str = ""):
91
+ """Helper to dump full database state to debug log.
92
+
93
+ Args:
94
+ feature_dicts: Pre-fetched list of feature dicts.
95
+ label: Optional label for the dump entry.
96
+ """
97
+ passing = [f for f in feature_dicts if f.get("passes")]
98
+ in_progress = [f for f in feature_dicts if f.get("in_progress") and not f.get("passes")]
99
+ pending = [f for f in feature_dicts if not f.get("passes") and not f.get("in_progress")]
100
+
101
+ debug_log.log("DB_DUMP", f"Full database state {label}",
102
+ total_features=len(feature_dicts),
103
+ passing_count=len(passing),
104
+ passing_ids=[f["id"] for f in passing],
105
+ in_progress_count=len(in_progress),
106
+ in_progress_ids=[f["id"] for f in in_progress],
107
+ pending_count=len(pending),
108
+ pending_ids=[f["id"] for f in pending[:10]]) # First 10 pending only
109
+
110
+ # =============================================================================
111
+ # Process Limits
112
+ # =============================================================================
113
+ # These constants bound the number of concurrent agent processes to prevent
114
+ # resource exhaustion (memory, CPU, API rate limits).
115
+ #
116
+ # MAX_PARALLEL_AGENTS: Max concurrent coding agents (each is a Claude session)
117
+ # MAX_TOTAL_AGENTS: Hard limit on total child processes (coding + testing)
118
+ #
119
+ # Expected process count during normal operation:
120
+ # - 1 orchestrator process (this script)
121
+ # - Up to MAX_PARALLEL_AGENTS coding agents
122
+ # - Up to max_concurrency testing agents
123
+ # - Total never exceeds MAX_TOTAL_AGENTS + 1 (including orchestrator)
124
+ #
125
+ # Stress test verification:
126
+ # 1. Note baseline: tasklist | findstr python | find /c /v ""
127
+ # 2. Run: python autonomous_agent_demo.py --project-dir test --parallel --max-concurrency 5
128
+ # 3. During run: count should never exceed baseline + 11 (1 orchestrator + 10 agents)
129
+ # 4. After stop: should return to baseline
130
+ # =============================================================================
131
+ MAX_PARALLEL_AGENTS = 5
132
+ MAX_TOTAL_AGENTS = 10
133
+ DEFAULT_CONCURRENCY = 3
134
+ DEFAULT_TESTING_BATCH_SIZE = 3 # Number of features per testing batch (1-5)
135
+ POLL_INTERVAL = 5 # seconds between checking for ready features
136
+ MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature
137
+ INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer
138
+
139
+
140
+ class ParallelOrchestrator:
141
+ """Orchestrates parallel execution of independent features.
142
+
143
+ Process bounds:
144
+ - Up to MAX_PARALLEL_AGENTS (5) coding agents concurrently
145
+ - Up to max_concurrency testing agents concurrently
146
+ - Hard limit of MAX_TOTAL_AGENTS (10) total child processes
147
+ """
148
+
149
+ def __init__(
150
+ self,
151
+ project_dir: Path,
152
+ max_concurrency: int = DEFAULT_CONCURRENCY,
153
+ model: str | None = None,
154
+ yolo_mode: bool = False,
155
+ testing_agent_ratio: int = 1,
156
+ testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
157
+ batch_size: int = 3,
158
+ on_output: Callable[[int, str], None] | None = None,
159
+ on_status: Callable[[int, str], None] | None = None,
160
+ ):
161
+ """Initialize the orchestrator.
162
+
163
+ Args:
164
+ project_dir: Path to the project directory
165
+ max_concurrency: Maximum number of concurrent coding agents (1-5).
166
+ Also caps testing agents at the same limit.
167
+ model: Claude model to use (or None for default)
168
+ yolo_mode: Whether to run in YOLO mode (skip testing agents entirely)
169
+ testing_agent_ratio: Number of regression testing agents to maintain (0-3).
170
+ 0 = disabled, 1-3 = maintain that many testing agents running independently.
171
+ testing_batch_size: Number of features to include per testing session (1-5).
172
+ Each testing agent receives this many features to regression test.
173
+ on_output: Callback for agent output (feature_id, line)
174
+ on_status: Callback for agent status changes (feature_id, status)
175
+ """
176
+ self.project_dir = project_dir
177
+ self.max_concurrency = min(max(max_concurrency, 1), MAX_PARALLEL_AGENTS)
178
+ self.model = model
179
+ self.yolo_mode = yolo_mode
180
+ self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3
181
+ self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5
182
+ self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3
183
+ self.on_output = on_output
184
+ self.on_status = on_status
185
+
186
+ # Thread-safe state
187
+ self._lock = threading.Lock()
188
+ # Coding agents: feature_id -> process
189
+ # Safe to key by feature_id because start_feature() checks for duplicates before spawning
190
+ self.running_coding_agents: dict[int, subprocess.Popen] = {}
191
+ # Testing agents: pid -> (feature_id, process)
192
+ # Keyed by PID (not feature_id) because multiple agents can test the same feature
193
+ self.running_testing_agents: dict[int, tuple[int, subprocess.Popen]] = {}
194
+ # Legacy alias for backward compatibility
195
+ self.running_agents = self.running_coding_agents
196
+ self.abort_events: dict[int, threading.Event] = {}
197
+ self.is_running = False
198
+
199
+ # Track feature failures to prevent infinite retry loops
200
+ self._failure_counts: dict[int, int] = {}
201
+
202
+ # Track recently tested feature IDs to avoid redundant re-testing.
203
+ # Cleared when all passing features have been covered at least once.
204
+ self._recently_tested: set[int] = set()
205
+
206
+ # Batch tracking: primary feature_id -> all feature IDs in batch
207
+ self._batch_features: dict[int, list[int]] = {}
208
+ # Reverse mapping: any feature_id -> primary feature_id
209
+ self._feature_to_primary: dict[int, int] = {}
210
+
211
+ # Shutdown flag for async-safe signal handling
212
+ # Signal handlers only set this flag; cleanup happens in the main loop
213
+ self._shutdown_requested = False
214
+
215
+ # Session tracking for logging/debugging
216
+ self.session_start_time: datetime | None = None
217
+
218
+ # Event signaled when any agent completes, allowing the main loop to wake
219
+ # immediately instead of waiting for the full POLL_INTERVAL timeout.
220
+ # This reduces latency when spawning the next feature after completion.
221
+ self._agent_completed_event: asyncio.Event | None = None # Created in run_loop
222
+ self._event_loop: asyncio.AbstractEventLoop | None = None # Stored for thread-safe signaling
223
+
224
+ # Database session for this orchestrator
225
+ self._engine, self._session_maker = create_database(project_dir)
226
+
227
+ def get_session(self):
228
+ """Get a new database session."""
229
+ return self._session_maker()
230
+
231
+ def _get_random_passing_feature(self) -> int | None:
232
+ """Get a random passing feature for regression testing (no claim needed).
233
+
234
+ Testing agents can test the same feature concurrently - it doesn't matter.
235
+ This simplifies the architecture by removing unnecessary coordination.
236
+
237
+ Returns the feature ID if available, None if no passing features exist.
238
+
239
+ Note: Prefer _get_test_batch() for batch testing mode. This method is
240
+ retained for backward compatibility.
241
+ """
242
+ from sqlalchemy.sql.expression import func
243
+
244
+ session = self.get_session()
245
+ try:
246
+ # Find a passing feature that's not currently being coded
247
+ # Multiple testing agents can test the same feature - that's fine
248
+ feature = (
249
+ session.query(Feature)
250
+ .filter(Feature.passes == True)
251
+ .filter(Feature.in_progress == False) # Don't test while coding
252
+ .order_by(func.random())
253
+ .first()
254
+ )
255
+ return feature.id if feature else None
256
+ finally:
257
+ session.close()
258
+
259
+ def _get_test_batch(self, batch_size: int = 3) -> list[int]:
260
+ """Select a prioritized batch of passing features for regression testing.
261
+
262
+ Uses weighted scoring to prioritize features that:
263
+ 1. Haven't been tested recently in this orchestrator session
264
+ 2. Are depended on by many other features (higher impact if broken)
265
+ 3. Have more dependencies themselves (complex integration points)
266
+
267
+ When all passing features have been recently tested, the tracking set
268
+ is cleared so the cycle starts fresh.
269
+
270
+ Args:
271
+ batch_size: Maximum number of feature IDs to return (1-5).
272
+
273
+ Returns:
274
+ List of feature IDs to test, may be shorter than batch_size if
275
+ fewer passing features are available. Empty list if none available.
276
+ """
277
+ session = self.get_session()
278
+ try:
279
+ session.expire_all()
280
+ passing = (
281
+ session.query(Feature)
282
+ .filter(Feature.passes == True)
283
+ .filter(Feature.in_progress == False) # Don't test while coding
284
+ .all()
285
+ )
286
+
287
+ # Extract data from ORM objects before closing the session to avoid
288
+ # DetachedInstanceError when accessing attributes after session.close().
289
+ passing_data: list[dict] = []
290
+ for f in passing:
291
+ passing_data.append({
292
+ 'id': f.id,
293
+ 'dependencies': f.get_dependencies_safe() if hasattr(f, 'get_dependencies_safe') else [],
294
+ })
295
+ finally:
296
+ session.close()
297
+
298
+ if not passing_data:
299
+ return []
300
+
301
+ # Build a reverse dependency map: feature_id -> count of features that depend on it.
302
+ # The Feature model stores dependencies (what I depend ON), so we invert to find
303
+ # dependents (what depends ON me).
304
+ dependent_counts: dict[int, int] = {}
305
+ for fd in passing_data:
306
+ for dep_id in fd['dependencies']:
307
+ dependent_counts[dep_id] = dependent_counts.get(dep_id, 0) + 1
308
+
309
+ # Exclude features that are already being tested by running testing agents
310
+ # to avoid redundant concurrent testing of the same features.
311
+ # running_testing_agents is dict[pid, (primary_feature_id, process)]
312
+ with self._lock:
313
+ currently_testing_ids: set[int] = set()
314
+ for _pid, (feat_id, _proc) in self.running_testing_agents.items():
315
+ currently_testing_ids.add(feat_id)
316
+
317
+ # If all passing features have been recently tested, reset the tracker
318
+ # so we cycle through them again rather than returning empty batches.
319
+ passing_ids = {fd['id'] for fd in passing_data}
320
+ if passing_ids.issubset(self._recently_tested):
321
+ self._recently_tested.clear()
322
+
323
+ # Score each feature by testing priority
324
+ scored: list[tuple[int, int]] = []
325
+ for fd in passing_data:
326
+ f_id = fd['id']
327
+
328
+ # Skip features already being tested by a running testing agent
329
+ if f_id in currently_testing_ids:
330
+ continue
331
+
332
+ score = 0
333
+
334
+ # Weight 1: Features depended on by many others are higher impact
335
+ # if they regress, so test them more often
336
+ score += dependent_counts.get(f_id, 0) * 2
337
+
338
+ # Weight 2: Strongly prefer features not tested recently
339
+ if f_id not in self._recently_tested:
340
+ score += 5
341
+
342
+ # Weight 3: Features with more dependencies are integration points
343
+ # that are more likely to regress when other code changes
344
+ dep_count = len(fd['dependencies'])
345
+ score += min(dep_count, 3) # Cap at 3 to avoid over-weighting
346
+
347
+ scored.append((f_id, score))
348
+
349
+ # Sort by score descending (highest priority first)
350
+ scored.sort(key=lambda x: x[1], reverse=True)
351
+ selected = [fid for fid, _ in scored[:batch_size]]
352
+
353
+ # Track what we've tested to avoid re-testing the same features next batch
354
+ self._recently_tested.update(selected)
355
+
356
+ debug_log.log("TEST_BATCH", f"Selected {len(selected)} features for testing batch",
357
+ selected_ids=selected,
358
+ recently_tested_count=len(self._recently_tested),
359
+ total_passing=len(passing_data))
360
+
361
+ return selected
362
+
363
+ def build_feature_batches(
364
+ self,
365
+ ready: list[dict],
366
+ all_features: list[dict],
367
+ scheduling_scores: dict[int, float],
368
+ ) -> list[list[dict]]:
369
+ """Build dependency-aware feature batches for coding agents.
370
+
371
+ Each batch contains up to `batch_size` features. The algorithm:
372
+ 1. Start with a ready feature (sorted by scheduling score)
373
+ 2. Chain extension: find dependents whose deps are satisfied if earlier batch features pass
374
+ 3. Same-category fill: fill remaining slots with ready features from the same category
375
+
376
+ Args:
377
+ ready: Ready features (sorted by scheduling score)
378
+ all_features: All features for dependency checking
379
+ scheduling_scores: Pre-computed scheduling scores
380
+
381
+ Returns:
382
+ List of batches, each batch is a list of feature dicts
383
+ """
384
+ if self.batch_size <= 1:
385
+ # No batching - return each feature as a single-item batch
386
+ return [[f] for f in ready]
387
+
388
+ # Build children adjacency: parent_id -> [child_ids]
389
+ children: dict[int, list[int]] = {f["id"]: [] for f in all_features}
390
+ feature_map: dict[int, dict] = {f["id"]: f for f in all_features}
391
+ for f in all_features:
392
+ for dep_id in (f.get("dependencies") or []):
393
+ if dep_id in children:
394
+ children[dep_id].append(f["id"])
395
+
396
+ # Pre-compute passing IDs
397
+ passing_ids = {f["id"] for f in all_features if f.get("passes")}
398
+
399
+ used_ids: set[int] = set() # Features already assigned to a batch
400
+ batches: list[list[dict]] = []
401
+
402
+ for feature in ready:
403
+ if feature["id"] in used_ids:
404
+ continue
405
+
406
+ batch = [feature]
407
+ used_ids.add(feature["id"])
408
+ # Simulate passing set = real passing + batch features
409
+ simulated_passing = passing_ids | {feature["id"]}
410
+
411
+ # Phase 1: Chain extension - find dependents whose deps are met
412
+ for _ in range(self.batch_size - 1):
413
+ best_candidate = None
414
+ best_score = -1.0
415
+ # Check children of all features currently in the batch
416
+ candidate_ids: set[int] = set()
417
+ for bf in batch:
418
+ for child_id in children.get(bf["id"], []):
419
+ if child_id not in used_ids and child_id not in simulated_passing:
420
+ candidate_ids.add(child_id)
421
+
422
+ for cid in candidate_ids:
423
+ cf = feature_map.get(cid)
424
+ if not cf or cf.get("passes") or cf.get("in_progress"):
425
+ continue
426
+ # Check if ALL deps are satisfied by simulated passing set
427
+ deps = cf.get("dependencies") or []
428
+ if all(d in simulated_passing for d in deps):
429
+ score = scheduling_scores.get(cid, 0)
430
+ if score > best_score:
431
+ best_score = score
432
+ best_candidate = cf
433
+
434
+ if best_candidate:
435
+ batch.append(best_candidate)
436
+ used_ids.add(best_candidate["id"])
437
+ simulated_passing.add(best_candidate["id"])
438
+ else:
439
+ break
440
+
441
+ # Phase 2: Same-category fill
442
+ if len(batch) < self.batch_size:
443
+ category = feature.get("category", "")
444
+ for rf in ready:
445
+ if len(batch) >= self.batch_size:
446
+ break
447
+ if rf["id"] in used_ids:
448
+ continue
449
+ if rf.get("category", "") == category:
450
+ batch.append(rf)
451
+ used_ids.add(rf["id"])
452
+
453
+ batches.append(batch)
454
+
455
+ debug_log.log("BATCH", f"Built {len(batches)} batches from {len(ready)} ready features",
456
+ batch_sizes=[len(b) for b in batches],
457
+ batch_ids=[[f['id'] for f in b] for b in batches[:5]])
458
+
459
+ return batches
460
+
461
+ def get_resumable_features(
462
+ self,
463
+ feature_dicts: list[dict] | None = None,
464
+ scheduling_scores: dict[int, float] | None = None,
465
+ ) -> list[dict]:
466
+ """Get features that were left in_progress from a previous session.
467
+
468
+ These are features where in_progress=True but passes=False, and they're
469
+ not currently being worked on by this orchestrator. This handles the case
470
+ where a previous session was interrupted before completing the feature.
471
+
472
+ Args:
473
+ feature_dicts: Pre-fetched list of feature dicts. If None, queries the database.
474
+ scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts.
475
+ """
476
+ if feature_dicts is None:
477
+ session = self.get_session()
478
+ try:
479
+ session.expire_all()
480
+ all_features = session.query(Feature).all()
481
+ feature_dicts = [f.to_dict() for f in all_features]
482
+ finally:
483
+ session.close()
484
+
485
+ # Snapshot running IDs once (include all batch feature IDs)
486
+ with self._lock:
487
+ running_ids = set(self.running_coding_agents.keys())
488
+ for batch_ids in self._batch_features.values():
489
+ running_ids.update(batch_ids)
490
+
491
+ resumable = []
492
+ for fd in feature_dicts:
493
+ if not fd.get("in_progress") or fd.get("passes"):
494
+ continue
495
+ # Skip if already running in this orchestrator instance
496
+ if fd["id"] in running_ids:
497
+ continue
498
+ # Skip if feature has failed too many times
499
+ if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES:
500
+ continue
501
+ resumable.append(fd)
502
+
503
+ # Sort by scheduling score (higher = first), then priority, then id
504
+ if scheduling_scores is None:
505
+ scheduling_scores = compute_scheduling_scores(feature_dicts)
506
+ resumable.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"]))
507
+ return resumable
508
+
509
+ def get_ready_features(
510
+ self,
511
+ feature_dicts: list[dict] | None = None,
512
+ scheduling_scores: dict[int, float] | None = None,
513
+ ) -> list[dict]:
514
+ """Get features with satisfied dependencies, not already running.
515
+
516
+ Args:
517
+ feature_dicts: Pre-fetched list of feature dicts. If None, queries the database.
518
+ scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts.
519
+ """
520
+ if feature_dicts is None:
521
+ session = self.get_session()
522
+ try:
523
+ session.expire_all()
524
+ all_features = session.query(Feature).all()
525
+ feature_dicts = [f.to_dict() for f in all_features]
526
+ finally:
527
+ session.close()
528
+
529
+ # Pre-compute passing_ids once to avoid O(n^2) in the loop
530
+ passing_ids = {fd["id"] for fd in feature_dicts if fd.get("passes")}
531
+
532
+ # Snapshot running IDs once (include all batch feature IDs)
533
+ with self._lock:
534
+ running_ids = set(self.running_coding_agents.keys())
535
+ for batch_ids in self._batch_features.values():
536
+ running_ids.update(batch_ids)
537
+
538
+ ready = []
539
+ skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0}
540
+ for fd in feature_dicts:
541
+ if fd.get("passes"):
542
+ skipped_reasons["passes"] += 1
543
+ continue
544
+ if fd.get("in_progress"):
545
+ skipped_reasons["in_progress"] += 1
546
+ continue
547
+ # Skip if already running in this orchestrator
548
+ if fd["id"] in running_ids:
549
+ skipped_reasons["running"] += 1
550
+ continue
551
+ # Skip if feature has failed too many times
552
+ if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES:
553
+ skipped_reasons["failed"] += 1
554
+ continue
555
+ # Check dependencies (pass pre-computed passing_ids)
556
+ if are_dependencies_satisfied(fd, feature_dicts, passing_ids):
557
+ ready.append(fd)
558
+ else:
559
+ skipped_reasons["deps"] += 1
560
+
561
+ # Sort by scheduling score (higher = first), then priority, then id
562
+ if scheduling_scores is None:
563
+ scheduling_scores = compute_scheduling_scores(feature_dicts)
564
+ ready.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"]))
565
+
566
+ # Summary counts for logging
567
+ passing = skipped_reasons["passes"]
568
+ in_progress = skipped_reasons["in_progress"]
569
+ total = len(feature_dicts)
570
+
571
+ debug_log.log("READY", "get_ready_features() called",
572
+ ready_count=len(ready),
573
+ ready_ids=[f['id'] for f in ready[:5]], # First 5 only
574
+ passing=passing,
575
+ in_progress=in_progress,
576
+ total=total,
577
+ skipped=skipped_reasons)
578
+
579
+ return ready
580
+
581
+ def get_all_complete(self, feature_dicts: list[dict] | None = None) -> bool:
582
+ """Check if all features are complete or permanently failed.
583
+
584
+ Returns False if there are no features (initialization needed).
585
+
586
+ Args:
587
+ feature_dicts: Pre-fetched list of feature dicts. If None, queries the database.
588
+ """
589
+ if feature_dicts is None:
590
+ session = self.get_session()
591
+ try:
592
+ session.expire_all()
593
+ all_features = session.query(Feature).all()
594
+ feature_dicts = [f.to_dict() for f in all_features]
595
+ finally:
596
+ session.close()
597
+
598
+ # No features = NOT complete, need initialization
599
+ if len(feature_dicts) == 0:
600
+ return False
601
+
602
+ passing_count = 0
603
+ failed_count = 0
604
+ pending_count = 0
605
+ for fd in feature_dicts:
606
+ if fd.get("passes"):
607
+ passing_count += 1
608
+ continue # Completed successfully
609
+ if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES:
610
+ failed_count += 1
611
+ continue # Permanently failed, count as "done"
612
+ pending_count += 1
613
+
614
+ total = len(feature_dicts)
615
+ is_complete = pending_count == 0
616
+ debug_log.log("COMPLETE_CHECK", f"get_all_complete: {passing_count}/{total} passing, "
617
+ f"{failed_count} failed, {pending_count} pending -> {is_complete}")
618
+ return is_complete
619
+
620
+ def get_passing_count(self, feature_dicts: list[dict] | None = None) -> int:
621
+ """Get the number of passing features.
622
+
623
+ Args:
624
+ feature_dicts: Pre-fetched list of feature dicts. If None, queries the database.
625
+ """
626
+ if feature_dicts is None:
627
+ session = self.get_session()
628
+ try:
629
+ session.expire_all()
630
+ count: int = session.query(Feature).filter(Feature.passes == True).count()
631
+ return count
632
+ finally:
633
+ session.close()
634
+ return sum(1 for fd in feature_dicts if fd.get("passes"))
635
+
636
+ def _maintain_testing_agents(self, feature_dicts: list[dict] | None = None) -> None:
637
+ """Maintain the desired count of testing agents independently.
638
+
639
+ This runs every loop iteration and spawns testing agents as needed to maintain
640
+ the configured testing_agent_ratio. Testing agents run independently from
641
+ coding agents and continuously re-test passing features to catch regressions.
642
+
643
+ Multiple testing agents can test the same feature concurrently - this is
644
+ intentional and simplifies the architecture by removing claim coordination.
645
+
646
+ Stops spawning when:
647
+ - YOLO mode is enabled
648
+ - testing_agent_ratio is 0
649
+ - No passing features exist yet
650
+
651
+ Args:
652
+ feature_dicts: Pre-fetched list of feature dicts. If None, queries the database.
653
+ """
654
+ # Skip if testing is disabled
655
+ if self.yolo_mode or self.testing_agent_ratio == 0:
656
+ return
657
+
658
+ # No testing until there are passing features
659
+ passing_count = self.get_passing_count(feature_dicts)
660
+ if passing_count == 0:
661
+ return
662
+
663
+ # Don't spawn testing agents if all features are already complete
664
+ if self.get_all_complete(feature_dicts):
665
+ return
666
+
667
+ # Spawn testing agents one at a time, re-checking limits each time
668
+ # This avoids TOCTOU race by holding lock during the decision
669
+ while True:
670
+ # Check limits and decide whether to spawn (atomically)
671
+ with self._lock:
672
+ current_testing = len(self.running_testing_agents)
673
+ desired = self.testing_agent_ratio
674
+ total_agents = len(self.running_coding_agents) + current_testing
675
+
676
+ # Check if we need more testing agents
677
+ if current_testing >= desired:
678
+ return # Already at desired count
679
+
680
+ # Check hard limit on total agents
681
+ if total_agents >= MAX_TOTAL_AGENTS:
682
+ return # At max total agents
683
+
684
+ # We're going to spawn - log while still holding lock
685
+ spawn_index = current_testing + 1
686
+ debug_log.log("TESTING", f"Spawning testing agent ({spawn_index}/{desired})",
687
+ passing_count=passing_count)
688
+
689
+ # Spawn outside lock (I/O bound operation)
690
+ logger.debug("Spawning testing agent (%d/%d)", spawn_index, desired)
691
+ success, msg = self._spawn_testing_agent()
692
+ if not success:
693
+ debug_log.log("TESTING", f"Spawn failed, stopping: {msg}")
694
+ return
695
+
696
+ def start_feature(self, feature_id: int, resume: bool = False) -> tuple[bool, str]:
697
+ """Start a single coding agent for a feature.
698
+
699
+ Args:
700
+ feature_id: ID of the feature to start
701
+ resume: If True, resume a feature that's already in_progress from a previous session
702
+
703
+ Returns:
704
+ Tuple of (success, message)
705
+ """
706
+ with self._lock:
707
+ if feature_id in self.running_coding_agents:
708
+ return False, "Feature already running"
709
+ if len(self.running_coding_agents) >= self.max_concurrency:
710
+ return False, "At max concurrency"
711
+ # Enforce hard limit on total agents (coding + testing)
712
+ total_agents = len(self.running_coding_agents) + len(self.running_testing_agents)
713
+ if total_agents >= MAX_TOTAL_AGENTS:
714
+ return False, f"At max total agents ({total_agents}/{MAX_TOTAL_AGENTS})"
715
+
716
+ # Mark as in_progress in database (or verify it's resumable)
717
+ session = self.get_session()
718
+ try:
719
+ feature = session.query(Feature).filter(Feature.id == feature_id).first()
720
+ if not feature:
721
+ return False, "Feature not found"
722
+ if feature.passes:
723
+ return False, "Feature already complete"
724
+
725
+ if resume:
726
+ # Resuming: feature should already be in_progress
727
+ if not feature.in_progress:
728
+ return False, "Feature not in progress, cannot resume"
729
+ else:
730
+ # Starting fresh: feature should not be in_progress
731
+ if feature.in_progress:
732
+ return False, "Feature already in progress"
733
+ feature.in_progress = True
734
+ session.commit()
735
+ finally:
736
+ session.close()
737
+
738
+ # Start coding agent subprocess
739
+ success, message = self._spawn_coding_agent(feature_id)
740
+ if not success:
741
+ return False, message
742
+
743
+ # NOTE: Testing agents are now maintained independently via _maintain_testing_agents()
744
+ # called in the main loop, rather than being spawned when coding agents start.
745
+
746
+ return True, f"Started feature {feature_id}"
747
+
748
+ def start_feature_batch(self, feature_ids: list[int], resume: bool = False) -> tuple[bool, str]:
749
+ """Start a coding agent for a batch of features.
750
+
751
+ Args:
752
+ feature_ids: List of feature IDs to implement in batch
753
+ resume: If True, resume features already in_progress
754
+
755
+ Returns:
756
+ Tuple of (success, message)
757
+ """
758
+ if not feature_ids:
759
+ return False, "No features to start"
760
+
761
+ # Single feature falls back to start_feature
762
+ if len(feature_ids) == 1:
763
+ return self.start_feature(feature_ids[0], resume=resume)
764
+
765
+ with self._lock:
766
+ # Check if any feature in batch is already running
767
+ for fid in feature_ids:
768
+ if fid in self.running_coding_agents or fid in self._feature_to_primary:
769
+ return False, f"Feature {fid} already running"
770
+ if len(self.running_coding_agents) >= self.max_concurrency:
771
+ return False, "At max concurrency"
772
+ total_agents = len(self.running_coding_agents) + len(self.running_testing_agents)
773
+ if total_agents >= MAX_TOTAL_AGENTS:
774
+ return False, f"At max total agents ({total_agents}/{MAX_TOTAL_AGENTS})"
775
+
776
+ # Mark all features as in_progress in a single transaction
777
+ session = self.get_session()
778
+ try:
779
+ features_to_mark = []
780
+ for fid in feature_ids:
781
+ feature = session.query(Feature).filter(Feature.id == fid).first()
782
+ if not feature:
783
+ return False, f"Feature {fid} not found"
784
+ if feature.passes:
785
+ return False, f"Feature {fid} already complete"
786
+ if not resume:
787
+ if feature.in_progress:
788
+ return False, f"Feature {fid} already in progress"
789
+ features_to_mark.append(feature)
790
+ else:
791
+ if not feature.in_progress:
792
+ return False, f"Feature {fid} not in progress, cannot resume"
793
+
794
+ for feature in features_to_mark:
795
+ feature.in_progress = True
796
+ session.commit()
797
+ finally:
798
+ session.close()
799
+
800
+ # Spawn batch coding agent
801
+ success, message = self._spawn_coding_agent_batch(feature_ids)
802
+ if not success:
803
+ # Clear in_progress on failure
804
+ session = self.get_session()
805
+ try:
806
+ for fid in feature_ids:
807
+ feature = session.query(Feature).filter(Feature.id == fid).first()
808
+ if feature and not resume:
809
+ feature.in_progress = False
810
+ session.commit()
811
+ finally:
812
+ session.close()
813
+ return False, message
814
+
815
+ return True, f"Started batch [{', '.join(str(fid) for fid in feature_ids)}]"
816
+
817
+ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
818
+ """Spawn a coding agent subprocess for a specific feature."""
819
+ # Create abort event
820
+ abort_event = threading.Event()
821
+
822
+ # Start subprocess for this feature
823
+ cmd = [
824
+ sys.executable,
825
+ "-u", # Force unbuffered stdout/stderr
826
+ str(AUTOFORGE_ROOT / "autonomous_agent_demo.py"),
827
+ "--project-dir", str(self.project_dir),
828
+ "--max-iterations", "1",
829
+ "--agent-type", "coding",
830
+ "--feature-id", str(feature_id),
831
+ ]
832
+ if self.model:
833
+ cmd.extend(["--model", self.model])
834
+ if self.yolo_mode:
835
+ cmd.append("--yolo")
836
+
837
+ try:
838
+ # CREATE_NO_WINDOW on Windows prevents console window pop-ups
839
+ # stdin=DEVNULL prevents blocking on stdin reads
840
+ # encoding="utf-8" and errors="replace" fix Windows CP1252 issues
841
+ popen_kwargs: dict[str, Any] = {
842
+ "stdin": subprocess.DEVNULL,
843
+ "stdout": subprocess.PIPE,
844
+ "stderr": subprocess.STDOUT,
845
+ "text": True,
846
+ "encoding": "utf-8",
847
+ "errors": "replace",
848
+ "cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
849
+ "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
850
+ }
851
+ if sys.platform == "win32":
852
+ popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
853
+
854
+ proc = subprocess.Popen(cmd, **popen_kwargs)
855
+ except Exception as e:
856
+ # Reset in_progress on failure
857
+ session = self.get_session()
858
+ try:
859
+ feature = session.query(Feature).filter(Feature.id == feature_id).first()
860
+ if feature:
861
+ feature.in_progress = False
862
+ session.commit()
863
+ finally:
864
+ session.close()
865
+ return False, f"Failed to start agent: {e}"
866
+
867
+ with self._lock:
868
+ self.running_coding_agents[feature_id] = proc
869
+ self.abort_events[feature_id] = abort_event
870
+
871
+ # Start output reader thread
872
+ threading.Thread(
873
+ target=self._read_output,
874
+ args=(feature_id, proc, abort_event, "coding"),
875
+ daemon=True
876
+ ).start()
877
+
878
+ if self.on_status is not None:
879
+ self.on_status(feature_id, "running")
880
+
881
+ print(f"Started coding agent for feature #{feature_id}", flush=True)
882
+ return True, f"Started feature {feature_id}"
883
+
884
+ def _spawn_coding_agent_batch(self, feature_ids: list[int]) -> tuple[bool, str]:
885
+ """Spawn a coding agent subprocess for a batch of features."""
886
+ primary_id = feature_ids[0]
887
+ abort_event = threading.Event()
888
+
889
+ cmd = [
890
+ sys.executable,
891
+ "-u",
892
+ str(AUTOFORGE_ROOT / "autonomous_agent_demo.py"),
893
+ "--project-dir", str(self.project_dir),
894
+ "--max-iterations", "1",
895
+ "--agent-type", "coding",
896
+ "--feature-ids", ",".join(str(fid) for fid in feature_ids),
897
+ ]
898
+ if self.model:
899
+ cmd.extend(["--model", self.model])
900
+ if self.yolo_mode:
901
+ cmd.append("--yolo")
902
+
903
+ try:
904
+ popen_kwargs: dict[str, Any] = {
905
+ "stdin": subprocess.DEVNULL,
906
+ "stdout": subprocess.PIPE,
907
+ "stderr": subprocess.STDOUT,
908
+ "text": True,
909
+ "encoding": "utf-8",
910
+ "errors": "replace",
911
+ "cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
912
+ "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
913
+ }
914
+ if sys.platform == "win32":
915
+ popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
916
+
917
+ proc = subprocess.Popen(cmd, **popen_kwargs)
918
+ except Exception as e:
919
+ # Reset in_progress on failure
920
+ session = self.get_session()
921
+ try:
922
+ for fid in feature_ids:
923
+ feature = session.query(Feature).filter(Feature.id == fid).first()
924
+ if feature:
925
+ feature.in_progress = False
926
+ session.commit()
927
+ finally:
928
+ session.close()
929
+ return False, f"Failed to start batch agent: {e}"
930
+
931
+ with self._lock:
932
+ self.running_coding_agents[primary_id] = proc
933
+ self.abort_events[primary_id] = abort_event
934
+ self._batch_features[primary_id] = list(feature_ids)
935
+ for fid in feature_ids:
936
+ self._feature_to_primary[fid] = primary_id
937
+
938
+ # Start output reader thread
939
+ threading.Thread(
940
+ target=self._read_output,
941
+ args=(primary_id, proc, abort_event, "coding"),
942
+ daemon=True
943
+ ).start()
944
+
945
+ if self.on_status is not None:
946
+ for fid in feature_ids:
947
+ self.on_status(fid, "running")
948
+
949
+ ids_str = ", ".join(f"#{fid}" for fid in feature_ids)
950
+ print(f"Started coding agent for features {ids_str}", flush=True)
951
+ return True, f"Started batch [{ids_str}]"
952
+
953
+ def _spawn_testing_agent(self) -> tuple[bool, str]:
954
+ """Spawn a testing agent subprocess for batch regression testing.
955
+
956
+ Selects a prioritized batch of passing features using weighted scoring
957
+ (via _get_test_batch) and passes them as --testing-feature-ids to the
958
+ subprocess. Falls back to single --testing-feature-id for batches of one.
959
+
960
+ Multiple testing agents can test the same feature concurrently - this is
961
+ intentional and simplifies the architecture by removing claim coordination.
962
+ """
963
+ # Check limits first (under lock)
964
+ with self._lock:
965
+ current_testing_count = len(self.running_testing_agents)
966
+ if current_testing_count >= self.max_concurrency:
967
+ debug_log.log("TESTING", f"Skipped spawn - at max testing agents ({current_testing_count}/{self.max_concurrency})")
968
+ return False, f"At max testing agents ({current_testing_count})"
969
+ total_agents = len(self.running_coding_agents) + len(self.running_testing_agents)
970
+ if total_agents >= MAX_TOTAL_AGENTS:
971
+ debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})")
972
+ return False, f"At max total agents ({total_agents})"
973
+
974
+ # Select a weighted batch of passing features for regression testing
975
+ batch = self._get_test_batch(self.testing_batch_size)
976
+ if not batch:
977
+ debug_log.log("TESTING", "No features available for testing")
978
+ return False, "No features available for testing"
979
+
980
+ # Use the first feature ID as the representative for logging/tracking
981
+ primary_feature_id = batch[0]
982
+ batch_str = ",".join(str(fid) for fid in batch)
983
+ debug_log.log("TESTING", f"Selected batch for testing: [{batch_str}]")
984
+
985
+ # Spawn the testing agent
986
+ with self._lock:
987
+ # Re-check limits in case another thread spawned while we were selecting
988
+ current_testing_count = len(self.running_testing_agents)
989
+ if current_testing_count >= self.max_concurrency:
990
+ return False, f"At max testing agents ({current_testing_count})"
991
+
992
+ cmd = [
993
+ sys.executable,
994
+ "-u",
995
+ str(AUTOFORGE_ROOT / "autonomous_agent_demo.py"),
996
+ "--project-dir", str(self.project_dir),
997
+ "--max-iterations", "1",
998
+ "--agent-type", "testing",
999
+ "--testing-feature-ids", batch_str,
1000
+ ]
1001
+ if self.model:
1002
+ cmd.extend(["--model", self.model])
1003
+
1004
+ try:
1005
+ # CREATE_NO_WINDOW on Windows prevents console window pop-ups
1006
+ # stdin=DEVNULL prevents blocking on stdin reads
1007
+ # encoding="utf-8" and errors="replace" fix Windows CP1252 issues
1008
+ popen_kwargs: dict[str, Any] = {
1009
+ "stdin": subprocess.DEVNULL,
1010
+ "stdout": subprocess.PIPE,
1011
+ "stderr": subprocess.STDOUT,
1012
+ "text": True,
1013
+ "encoding": "utf-8",
1014
+ "errors": "replace",
1015
+ "cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
1016
+ "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
1017
+ }
1018
+ if sys.platform == "win32":
1019
+ popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
1020
+
1021
+ proc = subprocess.Popen(cmd, **popen_kwargs)
1022
+ except Exception as e:
1023
+ debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}")
1024
+ return False, f"Failed to start testing agent: {e}"
1025
+
1026
+ # Register process by PID (not feature_id) to avoid overwrites
1027
+ # when multiple agents test the same feature
1028
+ self.running_testing_agents[proc.pid] = (primary_feature_id, proc)
1029
+ testing_count = len(self.running_testing_agents)
1030
+
1031
+ # Start output reader thread with primary feature ID for log attribution
1032
+ threading.Thread(
1033
+ target=self._read_output,
1034
+ args=(primary_feature_id, proc, threading.Event(), "testing"),
1035
+ daemon=True
1036
+ ).start()
1037
+
1038
+ print(f"Started testing agent for features [{batch_str}] (PID {proc.pid})", flush=True)
1039
+ debug_log.log("TESTING", f"Successfully spawned testing agent for batch [{batch_str}]",
1040
+ pid=proc.pid,
1041
+ feature_ids=batch,
1042
+ total_testing_agents=testing_count)
1043
+ return True, f"Started testing agent for features [{batch_str}]"
1044
+
1045
+ async def _run_initializer(self) -> bool:
1046
+ """Run initializer agent as blocking subprocess.
1047
+
1048
+ Returns True if initialization succeeded (features were created).
1049
+ """
1050
+ debug_log.section("INITIALIZER PHASE")
1051
+ debug_log.log("INIT", "Starting initializer subprocess",
1052
+ project_dir=str(self.project_dir))
1053
+
1054
+ cmd = [
1055
+ sys.executable, "-u",
1056
+ str(AUTOFORGE_ROOT / "autonomous_agent_demo.py"),
1057
+ "--project-dir", str(self.project_dir),
1058
+ "--agent-type", "initializer",
1059
+ "--max-iterations", "1",
1060
+ ]
1061
+ if self.model:
1062
+ cmd.extend(["--model", self.model])
1063
+
1064
+ print("Running initializer agent...", flush=True)
1065
+
1066
+ # CREATE_NO_WINDOW on Windows prevents console window pop-ups
1067
+ # stdin=DEVNULL prevents blocking on stdin reads
1068
+ # encoding="utf-8" and errors="replace" fix Windows CP1252 issues
1069
+ popen_kwargs: dict[str, Any] = {
1070
+ "stdin": subprocess.DEVNULL,
1071
+ "stdout": subprocess.PIPE,
1072
+ "stderr": subprocess.STDOUT,
1073
+ "text": True,
1074
+ "encoding": "utf-8",
1075
+ "errors": "replace",
1076
+ "cwd": str(AUTOFORGE_ROOT),
1077
+ "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
1078
+ }
1079
+ if sys.platform == "win32":
1080
+ popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
1081
+
1082
+ proc = subprocess.Popen(cmd, **popen_kwargs)
1083
+
1084
+ debug_log.log("INIT", "Initializer subprocess started", pid=proc.pid)
1085
+
1086
+ # Stream output with timeout
1087
+ loop = asyncio.get_running_loop()
1088
+ try:
1089
+ async def stream_output():
1090
+ while True:
1091
+ line = await loop.run_in_executor(None, proc.stdout.readline)
1092
+ if not line:
1093
+ break
1094
+ print(line.rstrip(), flush=True)
1095
+ if self.on_output is not None:
1096
+ self.on_output(0, line.rstrip()) # Use 0 as feature_id for initializer
1097
+ proc.wait()
1098
+
1099
+ await asyncio.wait_for(stream_output(), timeout=INITIALIZER_TIMEOUT)
1100
+
1101
+ except asyncio.TimeoutError:
1102
+ print(f"ERROR: Initializer timed out after {INITIALIZER_TIMEOUT // 60} minutes", flush=True)
1103
+ debug_log.log("INIT", "TIMEOUT - Initializer exceeded time limit",
1104
+ timeout_minutes=INITIALIZER_TIMEOUT // 60)
1105
+ result = kill_process_tree(proc)
1106
+ debug_log.log("INIT", "Killed timed-out initializer process tree",
1107
+ status=result.status, children_found=result.children_found)
1108
+ return False
1109
+
1110
+ debug_log.log("INIT", "Initializer subprocess completed",
1111
+ return_code=proc.returncode,
1112
+ success=proc.returncode == 0)
1113
+
1114
+ if proc.returncode != 0:
1115
+ print(f"ERROR: Initializer failed with exit code {proc.returncode}", flush=True)
1116
+ return False
1117
+
1118
+ return True
1119
+
1120
+ # Pattern to detect when a batch agent claims a new feature
1121
+ _CLAIM_FEATURE_PATTERN = re.compile(
1122
+ r"feature_claim_and_get\b.*?['\"]?feature_id['\"]?\s*[:=]\s*(\d+)"
1123
+ )
1124
+
1125
+ def _read_output(
1126
+ self,
1127
+ feature_id: int | None,
1128
+ proc: subprocess.Popen,
1129
+ abort: threading.Event,
1130
+ agent_type: Literal["coding", "testing"] = "coding",
1131
+ ):
1132
+ """Read output from subprocess and emit events."""
1133
+ current_feature_id = feature_id
1134
+ try:
1135
+ if proc.stdout is None:
1136
+ proc.wait()
1137
+ return
1138
+ for line in proc.stdout:
1139
+ if abort.is_set():
1140
+ break
1141
+ line = line.rstrip()
1142
+ # Detect when a batch agent claims a new feature
1143
+ claim_match = self._CLAIM_FEATURE_PATTERN.search(line)
1144
+ if claim_match:
1145
+ claimed_id = int(claim_match.group(1))
1146
+ if claimed_id != current_feature_id:
1147
+ current_feature_id = claimed_id
1148
+ if self.on_output is not None:
1149
+ self.on_output(current_feature_id or 0, line)
1150
+ else:
1151
+ # Both coding and testing agents now use [Feature #X] format
1152
+ print(f"[Feature #{current_feature_id}] {line}", flush=True)
1153
+ proc.wait()
1154
+ finally:
1155
+ # CRITICAL: Kill the process tree to clean up any child processes (e.g., Claude CLI)
1156
+ # This prevents zombie processes from accumulating
1157
+ try:
1158
+ kill_process_tree(proc, timeout=2.0)
1159
+ except Exception as e:
1160
+ debug_log.log("CLEANUP", f"Error killing process tree for {agent_type} agent", error=str(e))
1161
+ self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)
1162
+
1163
+ def _signal_agent_completed(self):
1164
+ """Signal that an agent has completed, waking the main loop.
1165
+
1166
+ This method is safe to call from any thread. It schedules the event.set()
1167
+ call to run on the event loop thread to avoid cross-thread issues with
1168
+ asyncio.Event.
1169
+ """
1170
+ if self._agent_completed_event is not None and self._event_loop is not None:
1171
+ try:
1172
+ # Use the stored event loop reference to schedule the set() call
1173
+ # This is necessary because asyncio.Event is not thread-safe and
1174
+ # asyncio.get_event_loop() fails in threads without an event loop
1175
+ if self._event_loop.is_running():
1176
+ self._event_loop.call_soon_threadsafe(self._agent_completed_event.set)
1177
+ else:
1178
+ # Fallback: set directly if loop isn't running (shouldn't happen during normal operation)
1179
+ self._agent_completed_event.set()
1180
+ except RuntimeError:
1181
+ # Event loop closed, ignore (orchestrator may be shutting down)
1182
+ pass
1183
+
1184
+ async def _wait_for_agent_completion(self, timeout: float = POLL_INTERVAL):
1185
+ """Wait for an agent to complete or until timeout expires.
1186
+
1187
+ This replaces fixed `asyncio.sleep(POLL_INTERVAL)` calls with event-based
1188
+ waiting. When an agent completes, _signal_agent_completed() sets the event,
1189
+ causing this method to return immediately. If no agent completes within
1190
+ the timeout, we return anyway to check for ready features.
1191
+
1192
+ Args:
1193
+ timeout: Maximum seconds to wait (default: POLL_INTERVAL)
1194
+ """
1195
+ if self._agent_completed_event is None:
1196
+ # Fallback if event not initialized (shouldn't happen in normal operation)
1197
+ await asyncio.sleep(timeout)
1198
+ return
1199
+
1200
+ try:
1201
+ await asyncio.wait_for(self._agent_completed_event.wait(), timeout=timeout)
1202
+ # Event was set - an agent completed. Clear it for the next wait cycle.
1203
+ self._agent_completed_event.clear()
1204
+ debug_log.log("EVENT", "Woke up immediately - agent completed")
1205
+ except asyncio.TimeoutError:
1206
+ # Timeout reached without agent completion - this is normal, just check anyway
1207
+ pass
1208
+
1209
+ def _on_agent_complete(
1210
+ self,
1211
+ feature_id: int | None,
1212
+ return_code: int,
1213
+ agent_type: Literal["coding", "testing"],
1214
+ proc: subprocess.Popen,
1215
+ ):
1216
+ """Handle agent completion.
1217
+
1218
+ For coding agents:
1219
+ - ALWAYS clears in_progress when agent exits, regardless of success/failure.
1220
+ - This prevents features from getting stuck if an agent crashes or is killed.
1221
+ - The agent marks features as passing BEFORE clearing in_progress, so this
1222
+ is safe.
1223
+
1224
+ For testing agents:
1225
+ - Remove from running dict (no claim to release - concurrent testing is allowed).
1226
+ """
1227
+ if agent_type == "testing":
1228
+ with self._lock:
1229
+ # Remove by PID
1230
+ self.running_testing_agents.pop(proc.pid, None)
1231
+
1232
+ status = "completed" if return_code == 0 else "failed"
1233
+ print(f"Feature #{feature_id} testing {status}", flush=True)
1234
+ debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished",
1235
+ pid=proc.pid,
1236
+ feature_id=feature_id,
1237
+ status=status)
1238
+ # Signal main loop that an agent slot is available
1239
+ self._signal_agent_completed()
1240
+ return
1241
+
1242
+ # feature_id is required for coding agents (always passed from start_feature)
1243
+ assert feature_id is not None, "feature_id must not be None for coding agents"
1244
+
1245
+ # Coding agent completion - handle both single and batch features
1246
+ batch_ids = None
1247
+ with self._lock:
1248
+ batch_ids = self._batch_features.pop(feature_id, None)
1249
+ if batch_ids:
1250
+ # Clean up reverse mapping
1251
+ for fid in batch_ids:
1252
+ self._feature_to_primary.pop(fid, None)
1253
+ self.running_coding_agents.pop(feature_id, None)
1254
+ self.abort_events.pop(feature_id, None)
1255
+
1256
+ all_feature_ids = batch_ids or [feature_id]
1257
+
1258
+ debug_log.log("COMPLETE", f"Coding agent for feature(s) {all_feature_ids} finished",
1259
+ return_code=return_code,
1260
+ status="success" if return_code == 0 else "failed",
1261
+ batch_size=len(all_feature_ids))
1262
+
1263
+ # Refresh session cache to see subprocess commits
1264
+ session = self.get_session()
1265
+ try:
1266
+ session.expire_all()
1267
+ for fid in all_feature_ids:
1268
+ feature = session.query(Feature).filter(Feature.id == fid).first()
1269
+ feature_passes = feature.passes if feature else None
1270
+ feature_in_progress = feature.in_progress if feature else None
1271
+ debug_log.log("DB", f"Feature #{fid} state after session.expire_all()",
1272
+ passes=feature_passes,
1273
+ in_progress=feature_in_progress)
1274
+ if feature and feature.in_progress and not feature.passes:
1275
+ feature.in_progress = False
1276
+ session.commit()
1277
+ debug_log.log("DB", f"Cleared in_progress for feature #{fid} (agent failed)")
1278
+ finally:
1279
+ session.close()
1280
+
1281
+ # Track failures for features still in_progress at exit
1282
+ if return_code != 0:
1283
+ with self._lock:
1284
+ for fid in all_feature_ids:
1285
+ self._failure_counts[fid] = self._failure_counts.get(fid, 0) + 1
1286
+ failure_count = self._failure_counts[fid]
1287
+ if failure_count >= MAX_FEATURE_RETRIES:
1288
+ print(f"Feature #{fid} has failed {failure_count} times, will not retry", flush=True)
1289
+ debug_log.log("COMPLETE", f"Feature #{fid} exceeded max retries",
1290
+ failure_count=failure_count)
1291
+
1292
+ status = "completed" if return_code == 0 else "failed"
1293
+ if self.on_status is not None:
1294
+ for fid in all_feature_ids:
1295
+ self.on_status(fid, status)
1296
+
1297
+ # CRITICAL: Print triggers WebSocket to emit agent_update
1298
+ if batch_ids and len(batch_ids) > 1:
1299
+ ids_str = ", ".join(f"#{fid}" for fid in batch_ids)
1300
+ print(f"Features {ids_str} {status}", flush=True)
1301
+ else:
1302
+ print(f"Feature #{feature_id} {status}", flush=True)
1303
+
1304
+ # Signal main loop that an agent slot is available
1305
+ self._signal_agent_completed()
1306
+
1307
+ def stop_feature(self, feature_id: int) -> tuple[bool, str]:
1308
+ """Stop a running coding agent and all its child processes."""
1309
+ with self._lock:
1310
+ # Check if this feature is part of a batch
1311
+ primary_id = self._feature_to_primary.get(feature_id, feature_id)
1312
+ if primary_id not in self.running_coding_agents:
1313
+ return False, "Feature not running"
1314
+
1315
+ abort = self.abort_events.get(primary_id)
1316
+ proc = self.running_coding_agents.get(primary_id)
1317
+
1318
+ if abort:
1319
+ abort.set()
1320
+ if proc:
1321
+ result = kill_process_tree(proc, timeout=5.0)
1322
+ debug_log.log("STOP", f"Killed feature {feature_id} (primary {primary_id}) process tree",
1323
+ status=result.status, children_found=result.children_found,
1324
+ children_terminated=result.children_terminated, children_killed=result.children_killed)
1325
+
1326
+ return True, f"Stopped feature {feature_id}"
1327
+
1328
+ def stop_all(self) -> None:
1329
+ """Stop all running agents (coding and testing)."""
1330
+ self.is_running = False
1331
+
1332
+ # Stop coding agents
1333
+ with self._lock:
1334
+ feature_ids = list(self.running_coding_agents.keys())
1335
+
1336
+ for fid in feature_ids:
1337
+ self.stop_feature(fid)
1338
+
1339
+ # Stop testing agents (no claim to release - concurrent testing is allowed)
1340
+ with self._lock:
1341
+ testing_items = list(self.running_testing_agents.items())
1342
+
1343
+ for pid, (feature_id, proc) in testing_items:
1344
+ result = kill_process_tree(proc, timeout=5.0)
1345
+ debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {pid})",
1346
+ status=result.status, children_found=result.children_found,
1347
+ children_terminated=result.children_terminated, children_killed=result.children_killed)
1348
+
1349
+ # Clear dict so get_status() doesn't report stale agents while
1350
+ # _on_agent_complete callbacks are still in flight.
1351
+ with self._lock:
1352
+ self.running_testing_agents.clear()
1353
+
1354
+ async def run_loop(self):
1355
+ """Main orchestration loop."""
1356
+ self.is_running = True
1357
+
1358
+ # Initialize the agent completion event for this run
1359
+ # Must be created in the async context where it will be used
1360
+ self._agent_completed_event = asyncio.Event()
1361
+ # Store the event loop reference for thread-safe signaling from output reader threads
1362
+ self._event_loop = asyncio.get_running_loop()
1363
+
1364
+ # Track session start for regression testing (UTC for consistency with last_tested_at)
1365
+ self.session_start_time = datetime.now(timezone.utc)
1366
+
1367
+ # Start debug logging session FIRST (clears previous logs)
1368
+ # Must happen before any debug_log.log() calls
1369
+ debug_log.start_session()
1370
+
1371
+ # Log startup to debug file
1372
+ debug_log.section("ORCHESTRATOR STARTUP")
1373
+ debug_log.log("STARTUP", "Orchestrator run_loop starting",
1374
+ project_dir=str(self.project_dir),
1375
+ max_concurrency=self.max_concurrency,
1376
+ yolo_mode=self.yolo_mode,
1377
+ testing_agent_ratio=self.testing_agent_ratio,
1378
+ session_start_time=self.session_start_time.isoformat())
1379
+
1380
+ print("=" * 70, flush=True)
1381
+ print(" UNIFIED ORCHESTRATOR SETTINGS", flush=True)
1382
+ print("=" * 70, flush=True)
1383
+ print(f"Project: {self.project_dir}", flush=True)
1384
+ print(f"Max concurrency: {self.max_concurrency} coding agents", flush=True)
1385
+ print(f"YOLO mode: {self.yolo_mode}", flush=True)
1386
+ print(f"Regression agents: {self.testing_agent_ratio} (maintained independently)", flush=True)
1387
+ print(f"Batch size: {self.batch_size} features per agent", flush=True)
1388
+ print("=" * 70, flush=True)
1389
+ print(flush=True)
1390
+
1391
+ # Phase 1: Check if initialization needed
1392
+ if not has_features(self.project_dir):
1393
+ print("=" * 70, flush=True)
1394
+ print(" INITIALIZATION PHASE", flush=True)
1395
+ print("=" * 70, flush=True)
1396
+ print("No features found - running initializer agent first...", flush=True)
1397
+ print("NOTE: This may take 10-20+ minutes to generate features.", flush=True)
1398
+ print(flush=True)
1399
+
1400
+ success = await self._run_initializer()
1401
+
1402
+ if not success or not has_features(self.project_dir):
1403
+ print("ERROR: Initializer did not create features. Exiting.", flush=True)
1404
+ return
1405
+
1406
+ print(flush=True)
1407
+ print("=" * 70, flush=True)
1408
+ print(" INITIALIZATION COMPLETE - Starting feature loop", flush=True)
1409
+ print("=" * 70, flush=True)
1410
+ print(flush=True)
1411
+
1412
+ # CRITICAL: Recreate database connection after initializer subprocess commits
1413
+ # The initializer runs as a subprocess and commits to the database file.
1414
+ # SQLAlchemy may have stale connections or cached state. Disposing the old
1415
+ # engine and creating a fresh engine/session_maker ensures we see all the
1416
+ # newly created features.
1417
+ debug_log.section("INITIALIZATION COMPLETE")
1418
+ debug_log.log("INIT", "Disposing old database engine and creating fresh connection")
1419
+ logger.debug("Recreating database connection after initialization")
1420
+ if self._engine is not None:
1421
+ self._engine.dispose()
1422
+ self._engine, self._session_maker = create_database(self.project_dir)
1423
+
1424
+ # Debug: Show state immediately after initialization
1425
+ logger.debug("Post-initialization state check")
1426
+ logger.debug("Post-initialization state: max_concurrency=%d, yolo_mode=%s, testing_agent_ratio=%d",
1427
+ self.max_concurrency, self.yolo_mode, self.testing_agent_ratio)
1428
+
1429
+ # Verify features were created and are visible
1430
+ session = self.get_session()
1431
+ try:
1432
+ feature_count = session.query(Feature).count()
1433
+ all_features = session.query(Feature).all()
1434
+ feature_names = [f"{f.id}: {f.name}" for f in all_features[:10]]
1435
+ logger.debug("Features in database: %d", feature_count)
1436
+ debug_log.log("INIT", "Post-initialization database state",
1437
+ max_concurrency=self.max_concurrency,
1438
+ yolo_mode=self.yolo_mode,
1439
+ testing_agent_ratio=self.testing_agent_ratio,
1440
+ feature_count=feature_count,
1441
+ first_10_features=feature_names)
1442
+ finally:
1443
+ session.close()
1444
+
1445
+ # Phase 2: Feature loop
1446
+ # Check for features to resume from previous session
1447
+ resumable = self.get_resumable_features()
1448
+ if resumable:
1449
+ print(f"Found {len(resumable)} feature(s) to resume from previous session:", flush=True)
1450
+ for f in resumable:
1451
+ print(f" - Feature #{f['id']}: {f['name']}", flush=True)
1452
+ print(flush=True)
1453
+
1454
+ debug_log.section("FEATURE LOOP STARTING")
1455
+ loop_iteration = 0
1456
+ while self.is_running and not self._shutdown_requested:
1457
+ loop_iteration += 1
1458
+ if loop_iteration <= 3:
1459
+ logger.debug("=== Loop iteration %d ===", loop_iteration)
1460
+
1461
+ # Query all features ONCE per iteration and build reusable snapshot.
1462
+ # Every sub-method receives this snapshot instead of re-querying the DB.
1463
+ session = self.get_session()
1464
+ session.expire_all()
1465
+ all_features = session.query(Feature).all()
1466
+ feature_dicts = [f.to_dict() for f in all_features]
1467
+ session.close()
1468
+
1469
+ # Pre-compute scheduling scores once (BFS + reverse topo sort)
1470
+ scheduling_scores = compute_scheduling_scores(feature_dicts)
1471
+
1472
+ # Log every iteration to debug file (first 10, then every 5th)
1473
+ if loop_iteration <= 10 or loop_iteration % 5 == 0:
1474
+ with self._lock:
1475
+ running_ids = list(self.running_coding_agents.keys())
1476
+ testing_count = len(self.running_testing_agents)
1477
+ debug_log.log("LOOP", f"Iteration {loop_iteration}",
1478
+ running_coding_agents=running_ids,
1479
+ running_testing_agents=testing_count,
1480
+ max_concurrency=self.max_concurrency)
1481
+
1482
+ # Full database dump every 5 iterations
1483
+ if loop_iteration == 1 or loop_iteration % 5 == 0:
1484
+ _dump_database_state(feature_dicts, f"(iteration {loop_iteration})")
1485
+
1486
+ try:
1487
+ # Check if all complete
1488
+ if self.get_all_complete(feature_dicts):
1489
+ print("\nAll features complete!", flush=True)
1490
+ break
1491
+
1492
+ # Maintain testing agents independently (runs every iteration)
1493
+ self._maintain_testing_agents(feature_dicts)
1494
+
1495
+ # Check capacity
1496
+ with self._lock:
1497
+ current = len(self.running_coding_agents)
1498
+ current_testing = len(self.running_testing_agents)
1499
+ running_ids = list(self.running_coding_agents.keys())
1500
+
1501
+ debug_log.log("CAPACITY", "Checking capacity",
1502
+ current_coding=current,
1503
+ current_testing=current_testing,
1504
+ running_coding_ids=running_ids,
1505
+ max_concurrency=self.max_concurrency,
1506
+ at_capacity=(current >= self.max_concurrency))
1507
+
1508
+ if current >= self.max_concurrency:
1509
+ debug_log.log("CAPACITY", "At max capacity, waiting for agent completion...")
1510
+ await self._wait_for_agent_completion()
1511
+ continue
1512
+
1513
+ # Priority 1: Resume features from previous session
1514
+ resumable = self.get_resumable_features(feature_dicts, scheduling_scores)
1515
+ if resumable:
1516
+ slots = self.max_concurrency - current
1517
+ for feature in resumable[:slots]:
1518
+ print(f"Resuming feature #{feature['id']}: {feature['name']}", flush=True)
1519
+ self.start_feature(feature["id"], resume=True)
1520
+ await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying
1521
+ continue
1522
+
1523
+ # Priority 2: Start new ready features
1524
+ ready = self.get_ready_features(feature_dicts, scheduling_scores)
1525
+ if not ready:
1526
+ # Wait for running features to complete
1527
+ if current > 0:
1528
+ await self._wait_for_agent_completion()
1529
+ continue
1530
+ else:
1531
+ # No ready features and nothing running
1532
+ # Force a fresh database check before declaring blocked
1533
+ # This handles the case where subprocess commits weren't visible yet
1534
+ session = self.get_session()
1535
+ try:
1536
+ session.expire_all()
1537
+ fresh_dicts = [f.to_dict() for f in session.query(Feature).all()]
1538
+ finally:
1539
+ session.close()
1540
+
1541
+ # Recheck if all features are now complete
1542
+ if self.get_all_complete(fresh_dicts):
1543
+ print("\nAll features complete!", flush=True)
1544
+ break
1545
+
1546
+ # Still have pending features but all are blocked by dependencies
1547
+ print("No ready features available. All remaining features may be blocked by dependencies.", flush=True)
1548
+ await self._wait_for_agent_completion(timeout=POLL_INTERVAL * 2)
1549
+ continue
1550
+
1551
+ # Build dependency-aware batches from ready features
1552
+ slots = self.max_concurrency - current
1553
+ batches = self.build_feature_batches(ready, feature_dicts, scheduling_scores)
1554
+
1555
+ logger.debug("Spawning loop: %d ready, %d slots available, %d batches built",
1556
+ len(ready), slots, len(batches))
1557
+
1558
+ debug_log.log("SPAWN", "Starting feature batches",
1559
+ ready_count=len(ready),
1560
+ slots_available=slots,
1561
+ batch_count=len(batches),
1562
+ batches=[[f['id'] for f in b] for b in batches[:slots]])
1563
+
1564
+ for batch in batches[:slots]:
1565
+ batch_ids = [f["id"] for f in batch]
1566
+ batch_names = [f"{f['id']}:{f['name']}" for f in batch]
1567
+ logger.debug("Starting batch: %s", batch_ids)
1568
+ success, msg = self.start_feature_batch(batch_ids)
1569
+ if not success:
1570
+ logger.debug("Failed to start batch %s: %s", batch_ids, msg)
1571
+ debug_log.log("SPAWN", f"FAILED to start batch {batch_ids}",
1572
+ batch_names=batch_names,
1573
+ error=msg)
1574
+ else:
1575
+ logger.debug("Successfully started batch %s", batch_ids)
1576
+ with self._lock:
1577
+ running_count = len(self.running_coding_agents)
1578
+ logger.debug("Running coding agents after start: %d", running_count)
1579
+ debug_log.log("SPAWN", f"Successfully started batch {batch_ids}",
1580
+ batch_names=batch_names,
1581
+ running_coding_agents=running_count)
1582
+
1583
+ await asyncio.sleep(0.5)
1584
+
1585
+ except Exception as e:
1586
+ print(f"Orchestrator error: {e}", flush=True)
1587
+ await self._wait_for_agent_completion()
1588
+
1589
+ # Wait for remaining agents to complete
1590
+ print("Waiting for running agents to complete...", flush=True)
1591
+ while True:
1592
+ with self._lock:
1593
+ coding_done = len(self.running_coding_agents) == 0
1594
+ testing_done = len(self.running_testing_agents) == 0
1595
+ if coding_done and testing_done:
1596
+ break
1597
+ # Use short timeout since we're just waiting for final agents to finish
1598
+ await self._wait_for_agent_completion(timeout=1.0)
1599
+
1600
+ print("Orchestrator finished.", flush=True)
1601
+
1602
+ def get_status(self) -> dict:
1603
+ """Get current orchestrator status."""
1604
+ with self._lock:
1605
+ return {
1606
+ "running_features": list(self.running_coding_agents.keys()),
1607
+ "coding_agent_count": len(self.running_coding_agents),
1608
+ "testing_agent_count": len(self.running_testing_agents),
1609
+ "count": len(self.running_coding_agents), # Legacy compatibility
1610
+ "max_concurrency": self.max_concurrency,
1611
+ "testing_agent_ratio": self.testing_agent_ratio,
1612
+ "is_running": self.is_running,
1613
+ "yolo_mode": self.yolo_mode,
1614
+ }
1615
+
1616
+ def cleanup(self) -> None:
1617
+ """Clean up database resources. Safe to call multiple times.
1618
+
1619
+ Forces WAL checkpoint to flush pending writes to main database file,
1620
+ then disposes engine to close all connections. Prevents stale cache
1621
+ issues when the orchestrator restarts.
1622
+ """
1623
+ # Atomically grab and clear the engine reference to prevent re-entry
1624
+ engine = self._engine
1625
+ self._engine = None
1626
+
1627
+ if engine is None:
1628
+ return # Already cleaned up
1629
+
1630
+ try:
1631
+ debug_log.log("CLEANUP", "Forcing WAL checkpoint before dispose")
1632
+ with engine.connect() as conn:
1633
+ conn.execute(text("PRAGMA wal_checkpoint(FULL)"))
1634
+ conn.commit()
1635
+ debug_log.log("CLEANUP", "WAL checkpoint completed, disposing engine")
1636
+ except Exception as e:
1637
+ debug_log.log("CLEANUP", f"WAL checkpoint failed (non-fatal): {e}")
1638
+
1639
+ try:
1640
+ engine.dispose()
1641
+ debug_log.log("CLEANUP", "Engine disposed successfully")
1642
+ except Exception as e:
1643
+ debug_log.log("CLEANUP", f"Engine dispose failed: {e}")
1644
+
1645
+
1646
+ async def run_parallel_orchestrator(
1647
+ project_dir: Path,
1648
+ max_concurrency: int = DEFAULT_CONCURRENCY,
1649
+ model: str | None = None,
1650
+ yolo_mode: bool = False,
1651
+ testing_agent_ratio: int = 1,
1652
+ testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
1653
+ batch_size: int = 3,
1654
+ ) -> None:
1655
+ """Run the unified orchestrator.
1656
+
1657
+ Args:
1658
+ project_dir: Path to the project directory
1659
+ max_concurrency: Maximum number of concurrent coding agents
1660
+ model: Claude model to use
1661
+ yolo_mode: Whether to run in YOLO mode (skip testing agents)
1662
+ testing_agent_ratio: Number of regression agents to maintain (0-3)
1663
+ testing_batch_size: Number of features per testing batch (1-5)
1664
+ batch_size: Max features per coding agent batch (1-3)
1665
+ """
1666
+ print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True)
1667
+ orchestrator = ParallelOrchestrator(
1668
+ project_dir=project_dir,
1669
+ max_concurrency=max_concurrency,
1670
+ model=model,
1671
+ yolo_mode=yolo_mode,
1672
+ testing_agent_ratio=testing_agent_ratio,
1673
+ testing_batch_size=testing_batch_size,
1674
+ batch_size=batch_size,
1675
+ )
1676
+
1677
+ # Set up cleanup to run on exit (handles normal exit, exceptions)
1678
+ def cleanup_handler():
1679
+ debug_log.log("CLEANUP", "atexit cleanup handler invoked")
1680
+ orchestrator.cleanup()
1681
+
1682
+ atexit.register(cleanup_handler)
1683
+
1684
+ # Set up async-safe signal handler for graceful shutdown
1685
+ # Only sets flags - everything else is unsafe in signal context
1686
+ def signal_handler(signum, frame):
1687
+ orchestrator._shutdown_requested = True
1688
+ orchestrator.is_running = False
1689
+
1690
+ # Register SIGTERM handler for process termination signals
1691
+ # Note: On Windows, SIGTERM handlers only fire from os.kill() calls within Python.
1692
+ # External termination (Task Manager, taskkill, Popen.terminate()) uses
1693
+ # TerminateProcess() which bypasses signal handlers entirely.
1694
+ signal.signal(signal.SIGTERM, signal_handler)
1695
+
1696
+ # Note: We intentionally do NOT register SIGINT handler
1697
+ # Let Python raise KeyboardInterrupt naturally so the except block works
1698
+
1699
+ try:
1700
+ await orchestrator.run_loop()
1701
+ except KeyboardInterrupt:
1702
+ print("\n\nInterrupted by user. Stopping agents...", flush=True)
1703
+ orchestrator.stop_all()
1704
+ finally:
1705
+ # CRITICAL: Always clean up database resources on exit
1706
+ # This forces WAL checkpoint and disposes connections
1707
+ orchestrator.cleanup()
1708
+
1709
+
1710
+ def main():
1711
+ """Main entry point for parallel orchestration."""
1712
+ import argparse
1713
+
1714
+ from dotenv import load_dotenv
1715
+
1716
+ from registry import DEFAULT_MODEL, get_project_path
1717
+
1718
+ load_dotenv()
1719
+
1720
+ parser = argparse.ArgumentParser(
1721
+ description="Parallel Feature Orchestrator - Run multiple agent instances",
1722
+ )
1723
+ parser.add_argument(
1724
+ "--project-dir",
1725
+ type=str,
1726
+ required=True,
1727
+ help="Project directory path (absolute) or registered project name",
1728
+ )
1729
+ parser.add_argument(
1730
+ "--max-concurrency",
1731
+ "-p",
1732
+ type=int,
1733
+ default=DEFAULT_CONCURRENCY,
1734
+ help=f"Maximum concurrent agents (1-{MAX_PARALLEL_AGENTS}, default: {DEFAULT_CONCURRENCY})",
1735
+ )
1736
+ parser.add_argument(
1737
+ "--model",
1738
+ type=str,
1739
+ default=DEFAULT_MODEL,
1740
+ help=f"Claude model to use (default: {DEFAULT_MODEL})",
1741
+ )
1742
+ parser.add_argument(
1743
+ "--yolo",
1744
+ action="store_true",
1745
+ default=False,
1746
+ help="Enable YOLO mode: rapid prototyping without browser testing",
1747
+ )
1748
+ parser.add_argument(
1749
+ "--testing-agent-ratio",
1750
+ type=int,
1751
+ default=1,
1752
+ help="Number of regression testing agents (0-3, default: 1). Set to 0 to disable testing agents.",
1753
+ )
1754
+ parser.add_argument(
1755
+ "--testing-batch-size",
1756
+ type=int,
1757
+ default=DEFAULT_TESTING_BATCH_SIZE,
1758
+ help=f"Number of features per testing batch (1-5, default: {DEFAULT_TESTING_BATCH_SIZE})",
1759
+ )
1760
+ parser.add_argument(
1761
+ "--batch-size",
1762
+ type=int,
1763
+ default=3,
1764
+ help="Max features per coding agent batch (1-5, default: 3)",
1765
+ )
1766
+
1767
+ args = parser.parse_args()
1768
+
1769
+ # Resolve project directory
1770
+ project_dir_input = args.project_dir
1771
+ project_dir = Path(project_dir_input)
1772
+
1773
+ if project_dir.is_absolute():
1774
+ if not project_dir.exists():
1775
+ print(f"Error: Project directory does not exist: {project_dir}", flush=True)
1776
+ sys.exit(1)
1777
+ else:
1778
+ registered_path = get_project_path(project_dir_input)
1779
+ if registered_path:
1780
+ project_dir = registered_path
1781
+ else:
1782
+ print(f"Error: Project '{project_dir_input}' not found in registry", flush=True)
1783
+ sys.exit(1)
1784
+
1785
+ try:
1786
+ asyncio.run(run_parallel_orchestrator(
1787
+ project_dir=project_dir,
1788
+ max_concurrency=args.max_concurrency,
1789
+ model=args.model,
1790
+ yolo_mode=args.yolo,
1791
+ testing_agent_ratio=args.testing_agent_ratio,
1792
+ testing_batch_size=args.testing_batch_size,
1793
+ batch_size=args.batch_size,
1794
+ ))
1795
+ except KeyboardInterrupt:
1796
+ print("\n\nInterrupted by user", flush=True)
1797
+
1798
+
1799
+ if __name__ == "__main__":
1800
+ main()