autoforge-ai 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autoforge-ai",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "description": "Autonomous coding agent with web UI - build complete apps with AI",
5
5
  "license": "AGPL-3.0",
6
6
  "bin": {
@@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path
17
17
  from ..utils.validation import validate_project_name
18
18
 
19
19
 
20
- def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
20
+ def _get_settings_defaults() -> tuple[bool, str, int, int, int]:
21
21
  """Get defaults from global settings.
22
22
 
23
23
  Returns:
24
- Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size)
24
+ Tuple of (yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size)
25
25
  """
26
26
  import sys
27
27
  root = Path(__file__).parent.parent.parent
@@ -40,8 +40,6 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
40
40
  except (ValueError, TypeError):
41
41
  testing_agent_ratio = 1
42
42
 
43
- playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true"
44
-
45
43
  try:
46
44
  batch_size = int(settings.get("batch_size", "3"))
47
45
  except (ValueError, TypeError):
@@ -52,7 +50,7 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
52
50
  except (ValueError, TypeError):
53
51
  testing_batch_size = 3
54
52
 
55
- return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size
53
+ return yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size
56
54
 
57
55
 
58
56
  router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
@@ -101,7 +99,7 @@ async def start_agent(
101
99
  manager = get_project_manager(project_name)
102
100
 
103
101
  # Get defaults from global settings if not provided in request
104
- default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_batch_size = _get_settings_defaults()
102
+ default_yolo, default_model, default_testing_ratio, default_batch_size, default_testing_batch_size = _get_settings_defaults()
105
103
 
106
104
  yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
107
105
  model = request.model if request.model else default_model
@@ -111,12 +109,13 @@ async def start_agent(
111
109
  batch_size = default_batch_size
112
110
  testing_batch_size = default_testing_batch_size
113
111
 
112
+ # Always run headless - the embedded browser view panel replaces desktop windows
114
113
  success, message = await manager.start(
115
114
  yolo_mode=yolo_mode,
116
115
  model=model,
117
116
  max_concurrency=max_concurrency,
118
117
  testing_agent_ratio=testing_agent_ratio,
119
- playwright_headless=playwright_headless,
118
+ playwright_headless=True,
120
119
  batch_size=batch_size,
121
120
  testing_batch_size=testing_batch_size,
122
121
  )
@@ -111,7 +111,7 @@ async def get_settings():
111
111
  glm_mode=glm_mode,
112
112
  ollama_mode=ollama_mode,
113
113
  testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
114
- playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
114
+ playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
115
115
  batch_size=_parse_int(all_settings.get("batch_size"), 3),
116
116
  testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
117
117
  api_provider=api_provider,
@@ -133,8 +133,8 @@ async def update_settings(update: SettingsUpdate):
133
133
  if update.testing_agent_ratio is not None:
134
134
  set_setting("testing_agent_ratio", str(update.testing_agent_ratio))
135
135
 
136
- if update.playwright_headless is not None:
137
- set_setting("playwright_headless", "true" if update.playwright_headless else "false")
136
+ # playwright_headless is no longer user-configurable; always headless
137
+ # with embedded browser view panel in the UI
138
138
 
139
139
  if update.batch_size is not None:
140
140
  set_setting("batch_size", str(update.batch_size))
@@ -179,7 +179,7 @@ async def update_settings(update: SettingsUpdate):
179
179
  glm_mode=glm_mode,
180
180
  ollama_mode=ollama_mode,
181
181
  testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
182
- playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
182
+ playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
183
183
  batch_size=_parse_int(all_settings.get("batch_size"), 3),
184
184
  testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
185
185
  api_provider=api_provider,
@@ -0,0 +1,280 @@
1
+ """
2
+ Browser View Service
3
+ ====================
4
+
5
+ Captures periodic screenshots from active playwright-cli browser sessions
6
+ and streams them to the UI via WebSocket callbacks.
7
+
8
+ Each agent gets an isolated browser session (e.g., coding-5, testing-0).
9
+ This service polls those sessions with `playwright-cli screenshot` and
10
+ delivers the frames to subscribed UI clients.
11
+ """
12
+
13
+ import asyncio
14
+ import base64
15
+ import logging
16
+ import shutil
17
+ import threading
18
+ from dataclasses import dataclass
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Awaitable, Callable
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ POLL_INTERVAL = 2.0 # seconds between screenshot captures
26
+ BACKOFF_INTERVAL = 10.0 # seconds after repeated failures
27
+ MAX_FAILURES_BEFORE_BACKOFF = 10
28
+ MAX_FAILURES_BEFORE_STOP = 90 # ~3 minutes at normal rate before giving up
29
+ SCREENSHOT_TIMEOUT = 5 # seconds
30
+
31
+
32
+ @dataclass
33
+ class SessionInfo:
34
+ """Metadata for an active browser session."""
35
+ session_name: str
36
+ agent_index: int
37
+ agent_type: str # "coding" or "testing"
38
+ feature_id: int
39
+ feature_name: str
40
+ consecutive_failures: int = 0
41
+ stopped: bool = False
42
+
43
+
44
+ @dataclass
45
+ class ScreenshotData:
46
+ """A captured screenshot ready for delivery."""
47
+ session_name: str
48
+ agent_index: int
49
+ agent_type: str
50
+ feature_id: int
51
+ feature_name: str
52
+ image_base64: str # base64-encoded PNG
53
+ timestamp: str
54
+
55
+
56
+ class BrowserViewService:
57
+ """Manages screenshot capture for active agent browser sessions.
58
+
59
+ Follows the same singleton-per-project pattern as DevServerProcessManager.
60
+ """
61
+
62
+ def __init__(self, project_name: str, project_dir: Path):
63
+ self.project_name = project_name
64
+ self.project_dir = project_dir
65
+ self._active_sessions: dict[str, SessionInfo] = {}
66
+ self._subscribers = 0
67
+ self._poll_task: asyncio.Task | None = None
68
+ self._screenshot_callbacks: set[Callable[[ScreenshotData], Awaitable[None]]] = set()
69
+ self._lock = asyncio.Lock()
70
+ self._playwright_cli: str | None = None
71
+
72
+ def _get_playwright_cli(self) -> str | None:
73
+ """Find playwright-cli executable."""
74
+ if self._playwright_cli is not None:
75
+ return self._playwright_cli
76
+ path = shutil.which("playwright-cli")
77
+ if path:
78
+ self._playwright_cli = path
79
+ else:
80
+ logger.warning("playwright-cli not found in PATH; browser view disabled")
81
+ return self._playwright_cli
82
+
83
+ async def register_session(
84
+ self,
85
+ session_name: str,
86
+ agent_index: int,
87
+ agent_type: str,
88
+ feature_id: int,
89
+ feature_name: str,
90
+ ) -> None:
91
+ """Register an agent's browser session for screenshot capture."""
92
+ async with self._lock:
93
+ self._active_sessions[session_name] = SessionInfo(
94
+ session_name=session_name,
95
+ agent_index=agent_index,
96
+ agent_type=agent_type,
97
+ feature_id=feature_id,
98
+ feature_name=feature_name,
99
+ )
100
+ logger.debug("Registered browser session: %s", session_name)
101
+
102
+ async def unregister_session(self, session_name: str) -> None:
103
+ """Unregister a browser session when agent completes."""
104
+ async with self._lock:
105
+ removed = self._active_sessions.pop(session_name, None)
106
+ if removed:
107
+ logger.debug("Unregistered browser session: %s", session_name)
108
+ # Clean up screenshot file
109
+ self._cleanup_screenshot_file(session_name)
110
+
111
+ def add_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
112
+ self._screenshot_callbacks.add(callback)
113
+
114
+ def remove_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
115
+ self._screenshot_callbacks.discard(callback)
116
+
117
+ async def add_subscriber(self) -> None:
118
+ """Called when a UI client wants browser screenshots."""
119
+ async with self._lock:
120
+ self._subscribers += 1
121
+ if self._subscribers == 1:
122
+ self._start_polling()
123
+
124
+ async def remove_subscriber(self) -> None:
125
+ """Called when a UI client stops wanting screenshots."""
126
+ async with self._lock:
127
+ self._subscribers = max(0, self._subscribers - 1)
128
+ if self._subscribers == 0:
129
+ self._stop_polling()
130
+
131
+ async def stop(self) -> None:
132
+ """Clean up all sessions and stop polling."""
133
+ async with self._lock:
134
+ for session_name in list(self._active_sessions):
135
+ self._cleanup_screenshot_file(session_name)
136
+ self._active_sessions.clear()
137
+ self._stop_polling()
138
+
139
+ def _start_polling(self) -> None:
140
+ """Start the screenshot polling loop."""
141
+ if self._poll_task is not None and not self._poll_task.done():
142
+ return
143
+ self._poll_task = asyncio.create_task(self._poll_loop())
144
+ logger.info("Started browser screenshot polling for %s", self.project_name)
145
+
146
+ def _stop_polling(self) -> None:
147
+ """Stop the screenshot polling loop."""
148
+ if self._poll_task is not None and not self._poll_task.done():
149
+ self._poll_task.cancel()
150
+ self._poll_task = None
151
+ logger.info("Stopped browser screenshot polling for %s", self.project_name)
152
+
153
+ async def _poll_loop(self) -> None:
154
+ """Main polling loop - capture screenshots for all active sessions."""
155
+ try:
156
+ while True:
157
+ async with self._lock:
158
+ sessions = list(self._active_sessions.values())
159
+
160
+ if sessions and self._screenshot_callbacks:
161
+ # Capture screenshots with limited concurrency
162
+ sem = asyncio.Semaphore(3)
163
+
164
+ async def capture_with_sem(session: SessionInfo) -> None:
165
+ async with sem:
166
+ await self._capture_and_deliver(session)
167
+
168
+ await asyncio.gather(
169
+ *(capture_with_sem(s) for s in sessions if not s.stopped),
170
+ return_exceptions=True,
171
+ )
172
+
173
+ await asyncio.sleep(POLL_INTERVAL)
174
+ except asyncio.CancelledError:
175
+ pass
176
+ except Exception:
177
+ logger.warning("Browser screenshot polling crashed", exc_info=True)
178
+
179
+ async def _capture_and_deliver(self, session: SessionInfo) -> None:
180
+ """Capture a screenshot for a session and deliver to callbacks."""
181
+ cli = self._get_playwright_cli()
182
+ if not cli:
183
+ return
184
+
185
+ # Determine interval based on failure count
186
+ if session.consecutive_failures >= MAX_FAILURES_BEFORE_BACKOFF:
187
+ # In backoff mode - only capture every BACKOFF_INTERVAL/POLL_INTERVAL polls
188
+ # We achieve this by checking a simple modulo on failure count
189
+ if session.consecutive_failures % int(BACKOFF_INTERVAL / POLL_INTERVAL) != 0:
190
+ return
191
+
192
+ screenshot_dir = self.project_dir / ".playwright-cli"
193
+ screenshot_dir.mkdir(parents=True, exist_ok=True)
194
+ screenshot_path = screenshot_dir / f"_view_{session.session_name}.png"
195
+
196
+ try:
197
+ proc = await asyncio.create_subprocess_exec(
198
+ cli, "-s", session.session_name, "screenshot",
199
+ f"--filename={screenshot_path}",
200
+ stdout=asyncio.subprocess.PIPE,
201
+ stderr=asyncio.subprocess.PIPE,
202
+ cwd=str(self.project_dir),
203
+ )
204
+ _, stderr = await asyncio.wait_for(proc.communicate(), timeout=SCREENSHOT_TIMEOUT)
205
+
206
+ if proc.returncode != 0:
207
+ session.consecutive_failures += 1
208
+ if session.consecutive_failures >= MAX_FAILURES_BEFORE_STOP:
209
+ session.stopped = True
210
+ logger.debug(
211
+ "Stopped polling session %s after %d failures",
212
+ session.session_name, session.consecutive_failures,
213
+ )
214
+ return
215
+
216
+ # Read and encode the screenshot
217
+ if not screenshot_path.exists():
218
+ session.consecutive_failures += 1
219
+ return
220
+
221
+ image_bytes = screenshot_path.read_bytes()
222
+ image_base64 = base64.b64encode(image_bytes).decode("ascii")
223
+
224
+ # Reset failure counter on success
225
+ session.consecutive_failures = 0
226
+ # Re-enable if previously stopped
227
+ session.stopped = False
228
+
229
+ screenshot = ScreenshotData(
230
+ session_name=session.session_name,
231
+ agent_index=session.agent_index,
232
+ agent_type=session.agent_type,
233
+ feature_id=session.feature_id,
234
+ feature_name=session.feature_name,
235
+ image_base64=image_base64,
236
+ timestamp=datetime.now().isoformat(),
237
+ )
238
+
239
+ # Deliver to all callbacks
240
+ for callback in list(self._screenshot_callbacks):
241
+ try:
242
+ await callback(screenshot)
243
+ except Exception:
244
+ pass # Connection may be closed
245
+
246
+ except asyncio.TimeoutError:
247
+ session.consecutive_failures += 1
248
+ except Exception:
249
+ session.consecutive_failures += 1
250
+ finally:
251
+ # Clean up the screenshot file
252
+ try:
253
+ screenshot_path.unlink(missing_ok=True)
254
+ except Exception:
255
+ pass
256
+
257
+ def _cleanup_screenshot_file(self, session_name: str) -> None:
258
+ """Remove a session's screenshot file."""
259
+ try:
260
+ path = self.project_dir / ".playwright-cli" / f"_view_{session_name}.png"
261
+ path.unlink(missing_ok=True)
262
+ except Exception:
263
+ pass
264
+
265
+
266
+ # ---------------------------------------------------------------------------
267
+ # Global instance management (thread-safe)
268
+ # ---------------------------------------------------------------------------
269
+
270
+ _services: dict[tuple[str, str], BrowserViewService] = {}
271
+ _services_lock = threading.Lock()
272
+
273
+
274
+ def get_browser_view_service(project_name: str, project_dir: Path) -> BrowserViewService:
275
+ """Get or create a BrowserViewService for a project (thread-safe)."""
276
+ with _services_lock:
277
+ key = (project_name, str(project_dir.resolve()))
278
+ if key not in _services:
279
+ _services[key] = BrowserViewService(project_name, project_dir)
280
+ return _services[key]
@@ -16,6 +16,7 @@ from typing import Set
16
16
  from fastapi import WebSocket, WebSocketDisconnect
17
17
 
18
18
  from .schemas import AGENT_MASCOTS
19
+ from .services.browser_view_service import get_browser_view_service
19
20
  from .services.chat_constants import ROOT_DIR
20
21
  from .services.dev_server_manager import get_devserver_manager
21
22
  from .services.process_manager import get_manager
@@ -787,8 +788,39 @@ async def project_websocket(websocket: WebSocket, project_name: str):
787
788
  # Create orchestrator tracker for observability
788
789
  orchestrator_tracker = OrchestratorTracker()
789
790
 
791
+ # Get browser view service for embedded browser screenshots
792
+ browser_view_service = get_browser_view_service(project_name, project_dir)
793
+ browser_view_subscribed = False
794
+ # Counter to mirror orchestrator's testing session naming (testing-0, testing-1, ...)
795
+ testing_session_counter = 0
796
+ # Deferred session registration: store metadata at agent start, register on first browser command.
797
+ # This avoids premature polling failures when agents spend time reading/planning before opening a browser.
798
+ # Key: session_name -> registration kwargs
799
+ pending_browser_sessions: dict[str, dict] = {}
800
+ # Track which feature IDs map to which session names (for deferred lookup)
801
+ feature_to_session: dict[int, str] = {}
802
+
803
+ async def on_screenshot(screenshot):
804
+ """Handle browser screenshot - send to this WebSocket."""
805
+ try:
806
+ await websocket.send_json({
807
+ "type": "browser_screenshot",
808
+ "sessionName": screenshot.session_name,
809
+ "agentIndex": screenshot.agent_index,
810
+ "agentType": screenshot.agent_type,
811
+ "featureId": screenshot.feature_id,
812
+ "featureName": screenshot.feature_name,
813
+ "imageData": screenshot.image_base64,
814
+ "timestamp": screenshot.timestamp,
815
+ })
816
+ except Exception:
817
+ pass # Connection may be closed
818
+
819
+ browser_view_service.add_screenshot_callback(on_screenshot)
820
+
790
821
  async def on_output(line: str):
791
822
  """Handle agent output - broadcast to this WebSocket."""
823
+ nonlocal testing_session_counter
792
824
  try:
793
825
  # Extract feature ID from line if present
794
826
  feature_id = None
@@ -817,6 +849,48 @@ async def project_websocket(websocket: WebSocket, project_name: str):
817
849
  if agent_update:
818
850
  await websocket.send_json(agent_update)
819
851
 
852
+ # Register/unregister browser sessions based on agent lifecycle
853
+ update_state = agent_update.get("state")
854
+ update_type = agent_update.get("agentType", "coding")
855
+ update_feature_id = agent_update.get("featureId", 0)
856
+ update_feature_name = agent_update.get("featureName", "")
857
+ update_agent_index = agent_update.get("agentIndex", 0)
858
+
859
+ if update_state == "thinking" and agent_update.get("thought") in ("Starting work...", "Starting batch work..."):
860
+ # Agent just started - defer browser session registration until
861
+ # we detect an actual playwright-cli open/goto command. This avoids
862
+ # polling failures while the agent is still reading code / planning.
863
+ if update_type == "coding":
864
+ session_name = f"coding-{update_feature_id}"
865
+ else:
866
+ session_name = f"testing-{testing_session_counter}"
867
+ testing_session_counter += 1
868
+ pending_browser_sessions[session_name] = dict(
869
+ session_name=session_name,
870
+ agent_index=update_agent_index,
871
+ agent_type=update_type,
872
+ feature_id=update_feature_id,
873
+ feature_name=update_feature_name,
874
+ )
875
+ feature_to_session[update_feature_id] = session_name
876
+ elif update_state in ("success", "error"):
877
+ # Agent completed - unregister browser session
878
+ if update_type == "coding":
879
+ session_name = f"coding-{update_feature_id}"
880
+ await browser_view_service.unregister_session(session_name)
881
+ pending_browser_sessions.pop(session_name, None)
882
+ feature_to_session.pop(update_feature_id, None)
883
+ # Testing sessions are cleaned up on orchestrator stop
884
+
885
+ # Detect playwright-cli browser commands and activate deferred sessions
886
+ if feature_id is not None and "playwright-cli" in line and any(
887
+ kw in line for kw in ("open ", "goto ", "open\t", "goto\t")
888
+ ):
889
+ sess_name = feature_to_session.get(feature_id)
890
+ if sess_name and sess_name in pending_browser_sessions:
891
+ reg = pending_browser_sessions.pop(sess_name)
892
+ await browser_view_service.register_session(**reg)
893
+
820
894
  # Also check for orchestrator events and emit orchestrator_update messages
821
895
  orch_update = await orchestrator_tracker.process_line(line)
822
896
  if orch_update:
@@ -826,6 +900,7 @@ async def project_websocket(websocket: WebSocket, project_name: str):
826
900
 
827
901
  async def on_status_change(status: str):
828
902
  """Handle status change - broadcast to this WebSocket."""
903
+ nonlocal testing_session_counter
829
904
  try:
830
905
  await websocket.send_json({
831
906
  "type": "agent_status",
@@ -835,6 +910,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
835
910
  if status in ("stopped", "crashed"):
836
911
  await agent_tracker.reset()
837
912
  await orchestrator_tracker.reset()
913
+ await browser_view_service.stop()
914
+ testing_session_counter = 0
915
+ pending_browser_sessions.clear()
916
+ feature_to_session.clear()
838
917
  except Exception:
839
918
  pass # Connection may be closed
840
919
 
@@ -908,10 +987,23 @@ async def project_websocket(websocket: WebSocket, project_name: str):
908
987
  data = await websocket.receive_text()
909
988
  message = json.loads(data)
910
989
 
990
+ msg_type = message.get("type")
991
+
911
992
  # Handle ping
912
- if message.get("type") == "ping":
993
+ if msg_type == "ping":
913
994
  await websocket.send_json({"type": "pong"})
914
995
 
996
+ # Handle browser view subscribe/unsubscribe
997
+ elif msg_type == "browser_view_subscribe":
998
+ if not browser_view_subscribed:
999
+ browser_view_subscribed = True
1000
+ await browser_view_service.add_subscriber()
1001
+
1002
+ elif msg_type == "browser_view_unsubscribe":
1003
+ if browser_view_subscribed:
1004
+ browser_view_subscribed = False
1005
+ await browser_view_service.remove_subscriber()
1006
+
915
1007
  except WebSocketDisconnect:
916
1008
  break
917
1009
  except json.JSONDecodeError:
@@ -935,5 +1027,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
935
1027
  devserver_manager.remove_output_callback(on_dev_output)
936
1028
  devserver_manager.remove_status_callback(on_dev_status_change)
937
1029
 
1030
+ # Unregister browser view callbacks and subscriber
1031
+ browser_view_service.remove_screenshot_callback(on_screenshot)
1032
+ if browser_view_subscribed:
1033
+ await browser_view_service.remove_subscriber()
1034
+
938
1035
  # Disconnect from manager
939
1036
  await manager.disconnect(websocket, project_name)