escarp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- escarp/__init__.py +13 -0
- escarp/broker/__init__.py +8 -0
- escarp/broker/api.py +135 -0
- escarp/broker/browser.py +220 -0
- escarp/broker/daemon.py +214 -0
- escarp/broker/discovery.py +94 -0
- escarp/broker/launcher.py +122 -0
- escarp/broker/lease.py +250 -0
- escarp/broker/slots.py +115 -0
- escarp/cli.py +42 -0
- escarp/config.py +26 -0
- escarp/identity/__init__.py +4 -0
- escarp/identity/keypair.py +70 -0
- escarp/identity/signing.py +148 -0
- escarp/mcp/__init__.py +5 -0
- escarp/mcp/server.py +213 -0
- escarp-1.0.0.dist-info/METADATA +205 -0
- escarp-1.0.0.dist-info/RECORD +21 -0
- escarp-1.0.0.dist-info/WHEEL +4 -0
- escarp-1.0.0.dist-info/entry_points.txt +3 -0
- escarp-1.0.0.dist-info/licenses/LICENSE +21 -0
escarp/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Escarp -- identity-aware runtime for parallel coding agents.
|
|
2
|
+
|
|
3
|
+
Public surface is the `escarp` and `escarp-mcp` CLIs and the broker's HTTP
|
|
4
|
+
API on http://127.0.0.1:7878. See README.md for the quick-start.
|
|
5
|
+
|
|
6
|
+
For programmatic access, import directly from the submodules:
|
|
7
|
+
from escarp.broker import claim_slot, ports_for_slot
|
|
8
|
+
from escarp.broker.lease import Broker
|
|
9
|
+
from escarp.broker.discovery import discover_pool
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.0"
|
|
13
|
+
__all__ = ["__version__"]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Escarp v2 broker: control-plane daemon for the worktree browser pool.
|
|
2
|
+
|
|
3
|
+
See V2_PLAN.md for the design. Public surface lives in `daemon` and `cli`.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from escarp.broker.slots import SlotLease, claim_slot, ports_for_slot
|
|
7
|
+
|
|
8
|
+
__all__ = ["SlotLease", "claim_slot", "ports_for_slot"]
|
escarp/broker/api.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""HTTP layer for the broker daemon.
|
|
2
|
+
|
|
3
|
+
Bind-and-shift on EADDRINUSE per V2_PLAN.md locked decision #6. Three verbs
|
|
4
|
+
plus one read endpoint:
|
|
5
|
+
|
|
6
|
+
GET /status -> pool snapshot (no lease tokens leaked)
|
|
7
|
+
POST /acquire {holder, slot?, dev_port?} -> LeaseRecord incl. token
|
|
8
|
+
POST /heartbeat {lease_token} -> LeaseRecord (refreshed expiry)
|
|
9
|
+
POST /release {lease_token} -> LeaseRecord (now free)
|
|
10
|
+
GET /reaped -> last-50 reap log (debugging)
|
|
11
|
+
|
|
12
|
+
Returns 409 for pool_exhausted / slot_leased, 404 for unknown_slot /
|
|
13
|
+
unknown_lease, 400 for malformed payloads. The MCP shim translates these
|
|
14
|
+
into model-readable errors; the LLM never sees raw HTTP status codes.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import errno
|
|
20
|
+
import socket
|
|
21
|
+
from dataclasses import asdict
|
|
22
|
+
|
|
23
|
+
from aiohttp import web
|
|
24
|
+
|
|
25
|
+
from escarp.broker.lease import (
|
|
26
|
+
Broker,
|
|
27
|
+
PoolExhausted,
|
|
28
|
+
SlotLeased,
|
|
29
|
+
UnknownLease,
|
|
30
|
+
UnknownSlot,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
DEFAULT_PORT = 7878
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def build_app(broker: Broker) -> web.Application:
|
|
37
|
+
routes = web.RouteTableDef()
|
|
38
|
+
|
|
39
|
+
@routes.get("/status")
|
|
40
|
+
async def status(_request: web.Request) -> web.Response:
|
|
41
|
+
return web.json_response(
|
|
42
|
+
{
|
|
43
|
+
"pool_size": broker.pool_size(),
|
|
44
|
+
"lease_ttl_s": broker.lease_ttl_s,
|
|
45
|
+
"slots": broker.snapshot(),
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@routes.post("/acquire")
|
|
50
|
+
async def acquire(request: web.Request) -> web.Response:
|
|
51
|
+
try:
|
|
52
|
+
body = await request.json()
|
|
53
|
+
except Exception:
|
|
54
|
+
return web.json_response({"error": "bad_json"}, status=400)
|
|
55
|
+
holder = body.get("holder")
|
|
56
|
+
if not holder or not isinstance(holder, str):
|
|
57
|
+
return web.json_response({"error": "missing_holder"}, status=400)
|
|
58
|
+
slot = body.get("slot")
|
|
59
|
+
dev_port = body.get("dev_port")
|
|
60
|
+
try:
|
|
61
|
+
rec = await broker.acquire(holder=holder, slot=slot, dev_port=dev_port)
|
|
62
|
+
except PoolExhausted:
|
|
63
|
+
return web.json_response(
|
|
64
|
+
{"error": "pool_exhausted", "snapshot": broker.snapshot()},
|
|
65
|
+
status=409,
|
|
66
|
+
)
|
|
67
|
+
except SlotLeased as exc:
|
|
68
|
+
return web.json_response({"error": "slot_leased", "message": str(exc)}, status=409)
|
|
69
|
+
except UnknownSlot as exc:
|
|
70
|
+
return web.json_response({"error": "unknown_slot", "message": str(exc)}, status=404)
|
|
71
|
+
# The acquire response is the ONLY place we return the lease token.
|
|
72
|
+
return web.json_response(asdict(rec))
|
|
73
|
+
|
|
74
|
+
@routes.post("/heartbeat")
|
|
75
|
+
async def heartbeat(request: web.Request) -> web.Response:
|
|
76
|
+
try:
|
|
77
|
+
body = await request.json()
|
|
78
|
+
except Exception:
|
|
79
|
+
return web.json_response({"error": "bad_json"}, status=400)
|
|
80
|
+
token = body.get("lease_token")
|
|
81
|
+
if not token:
|
|
82
|
+
return web.json_response({"error": "missing_lease_token"}, status=400)
|
|
83
|
+
try:
|
|
84
|
+
rec = await broker.heartbeat(lease_token=token)
|
|
85
|
+
except UnknownLease as exc:
|
|
86
|
+
return web.json_response({"error": "unknown_lease", "message": str(exc)}, status=404)
|
|
87
|
+
return web.json_response(asdict(rec))
|
|
88
|
+
|
|
89
|
+
@routes.post("/release")
|
|
90
|
+
async def release(request: web.Request) -> web.Response:
|
|
91
|
+
try:
|
|
92
|
+
body = await request.json()
|
|
93
|
+
except Exception:
|
|
94
|
+
return web.json_response({"error": "bad_json"}, status=400)
|
|
95
|
+
token = body.get("lease_token")
|
|
96
|
+
if not token:
|
|
97
|
+
return web.json_response({"error": "missing_lease_token"}, status=400)
|
|
98
|
+
try:
|
|
99
|
+
rec = await broker.release(lease_token=token)
|
|
100
|
+
except UnknownLease as exc:
|
|
101
|
+
return web.json_response({"error": "unknown_lease", "message": str(exc)}, status=404)
|
|
102
|
+
return web.json_response(rec.to_public())
|
|
103
|
+
|
|
104
|
+
@routes.get("/reaped")
|
|
105
|
+
async def reaped(_request: web.Request) -> web.Response:
|
|
106
|
+
return web.json_response({"reaped": broker.reaped_log()})
|
|
107
|
+
|
|
108
|
+
app = web.Application()
|
|
109
|
+
app.add_routes(routes)
|
|
110
|
+
return app
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def bind_with_shift(host: str, preferred_port: int, *, max_attempts: int = 10) -> tuple[socket.socket, int]:
|
|
114
|
+
"""Bind a TCP socket atomically; on EADDRINUSE shift by +10 and retry.
|
|
115
|
+
|
|
116
|
+
Per V2_PLAN.md locked decision #6: bind() is the atomic test, no
|
|
117
|
+
pre-check race. Returns (bound socket, actual port).
|
|
118
|
+
"""
|
|
119
|
+
last_error: OSError | None = None
|
|
120
|
+
for i in range(max_attempts):
|
|
121
|
+
port = preferred_port + i * 10
|
|
122
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
123
|
+
try:
|
|
124
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
125
|
+
sock.bind((host, port))
|
|
126
|
+
sock.listen(128)
|
|
127
|
+
return sock, port
|
|
128
|
+
except OSError as exc:
|
|
129
|
+
sock.close()
|
|
130
|
+
if exc.errno not in (errno.EADDRINUSE, errno.EACCES):
|
|
131
|
+
raise
|
|
132
|
+
last_error = exc
|
|
133
|
+
raise OSError(
|
|
134
|
+
f"could not bind {host}:{preferred_port} after {max_attempts} attempts: {last_error}"
|
|
135
|
+
)
|
escarp/broker/browser.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Chrome for Testing launcher and CDP discovery.
|
|
2
|
+
|
|
3
|
+
Phase 2 minimal: detached subprocess + HTTP /json/version discovery. No lease
|
|
4
|
+
state, no reset logic — that's Phase 3. This module owns "how do we make a
|
|
5
|
+
CfT window exist on the user's monitor and surface its CDP websocket URL."
|
|
6
|
+
|
|
7
|
+
Discovery contract (per Phase 0 smoke test, see V2_PLAN.md §9):
|
|
8
|
+
- Primary: GET http://127.0.0.1:<cdp_port>/json/version -> webSocketDebuggerUrl.
|
|
9
|
+
Empirically confirmed working in CfT 149.0.7827.54 on macOS arm64.
|
|
10
|
+
- The DevToolsActivePort file the original plan called for does NOT appear in
|
|
11
|
+
macOS profile dirs in our testing.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import shutil
|
|
18
|
+
import signal
|
|
19
|
+
import subprocess
|
|
20
|
+
import time
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
|
|
26
|
+
# Locked launch flags, per V2_PLAN.md §7 Phase 2.
|
|
27
|
+
_BASE_FLAGS: tuple[str, ...] = (
|
|
28
|
+
"--no-first-run",
|
|
29
|
+
"--no-default-browser-check",
|
|
30
|
+
"--disable-backgrounding-occluded-windows",
|
|
31
|
+
"--disable-renderer-backgrounding",
|
|
32
|
+
"--disable-background-timer-throttling",
|
|
33
|
+
"--disable-extensions",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BrowserLaunchError(RuntimeError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ManagedBrowser:
|
|
43
|
+
slot: int
|
|
44
|
+
cdp_port: int
|
|
45
|
+
cdp_ws_url: str
|
|
46
|
+
profile_dir: Path
|
|
47
|
+
process: subprocess.Popen[bytes]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def pid(self) -> int:
|
|
51
|
+
return self.process.pid
|
|
52
|
+
|
|
53
|
+
def is_alive(self) -> bool:
|
|
54
|
+
return self.process.poll() is None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def launch_cft(
|
|
58
|
+
*,
|
|
59
|
+
slot: int,
|
|
60
|
+
binary: Path,
|
|
61
|
+
profile_dir: Path,
|
|
62
|
+
cdp_port: int,
|
|
63
|
+
initial_url: str = "about:blank",
|
|
64
|
+
discovery_timeout: float = 10.0,
|
|
65
|
+
) -> ManagedBrowser:
|
|
66
|
+
"""Launch a detached Chrome for Testing process and resolve its CDP URL.
|
|
67
|
+
|
|
68
|
+
The process is started in a new session (`start_new_session=True`) so that
|
|
69
|
+
when our parent shell or the daemon exits, the browser is reparented to
|
|
70
|
+
launchd/init rather than being killed. This is the load-bearing detail
|
|
71
|
+
for the "browsers outlive every agent" persistence contract.
|
|
72
|
+
"""
|
|
73
|
+
if not binary.exists():
|
|
74
|
+
raise BrowserLaunchError(f"Chrome for Testing binary not found at {binary}")
|
|
75
|
+
|
|
76
|
+
profile_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
args = [
|
|
79
|
+
str(binary),
|
|
80
|
+
f"--remote-debugging-port={cdp_port}",
|
|
81
|
+
f"--user-data-dir={profile_dir}",
|
|
82
|
+
*_BASE_FLAGS,
|
|
83
|
+
initial_url,
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
process = subprocess.Popen(
|
|
87
|
+
args,
|
|
88
|
+
stdout=subprocess.DEVNULL,
|
|
89
|
+
stderr=subprocess.DEVNULL,
|
|
90
|
+
stdin=subprocess.DEVNULL,
|
|
91
|
+
start_new_session=True,
|
|
92
|
+
close_fds=True,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
ws_url = _resolve_ws_url(cdp_port, timeout=discovery_timeout)
|
|
97
|
+
except Exception:
|
|
98
|
+
# Discovery failed; don't leak the process.
|
|
99
|
+
_terminate(process)
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
return ManagedBrowser(
|
|
103
|
+
slot=slot,
|
|
104
|
+
cdp_port=cdp_port,
|
|
105
|
+
cdp_ws_url=ws_url,
|
|
106
|
+
profile_dir=profile_dir,
|
|
107
|
+
process=process,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _resolve_ws_url(cdp_port: int, *, timeout: float) -> str:
|
|
112
|
+
"""Poll /json/version until we get a webSocketDebuggerUrl or time out."""
|
|
113
|
+
deadline = time.monotonic() + timeout
|
|
114
|
+
last_error: Exception | None = None
|
|
115
|
+
while time.monotonic() < deadline:
|
|
116
|
+
try:
|
|
117
|
+
resp = httpx.get(
|
|
118
|
+
f"http://127.0.0.1:{cdp_port}/json/version",
|
|
119
|
+
timeout=1.0,
|
|
120
|
+
)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
ws_url = resp.json().get("webSocketDebuggerUrl")
|
|
123
|
+
if ws_url:
|
|
124
|
+
return ws_url
|
|
125
|
+
except Exception as exc:
|
|
126
|
+
last_error = exc
|
|
127
|
+
time.sleep(0.1)
|
|
128
|
+
raise BrowserLaunchError(
|
|
129
|
+
f"CDP discovery timed out after {timeout}s on port {cdp_port}"
|
|
130
|
+
+ (f" (last error: {last_error})" if last_error else "")
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _terminate(process: subprocess.Popen[bytes], *, grace: float = 3.0) -> None:
|
|
135
|
+
if process.poll() is not None:
|
|
136
|
+
return
|
|
137
|
+
try:
|
|
138
|
+
process.send_signal(signal.SIGTERM)
|
|
139
|
+
process.wait(timeout=grace)
|
|
140
|
+
except subprocess.TimeoutExpired:
|
|
141
|
+
process.kill()
|
|
142
|
+
process.wait(timeout=grace)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def shutdown(browser: ManagedBrowser, *, grace: float = 3.0) -> None:
|
|
146
|
+
_terminate(browser.process, grace=grace)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def reset_browser_state(cdp_port: int, *, target_url: str = "about:blank") -> int:
|
|
150
|
+
"""Reset a leased browser to a clean slate.
|
|
151
|
+
|
|
152
|
+
Per V2_PLAN.md §7 Phase 2: "on lease release, reset -- never close." Closes
|
|
153
|
+
every page-type tab except a freshly-created one navigated to target_url.
|
|
154
|
+
Internal target types (browser_ui, service_worker, etc.) are left alone --
|
|
155
|
+
those are chrome's own plumbing, not user-visible tabs.
|
|
156
|
+
|
|
157
|
+
Uses only the CfT /json HTTP endpoints (verified working in 149) -- no
|
|
158
|
+
websocket dance, no extra deps beyond httpx.
|
|
159
|
+
|
|
160
|
+
Returns the number of stale tabs closed (zero on a pristine browser).
|
|
161
|
+
"""
|
|
162
|
+
import httpx # local import keeps the sync launch_cft path light
|
|
163
|
+
|
|
164
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
165
|
+
# Order matters: create the new clean tab FIRST so we never close down
|
|
166
|
+
# to zero tabs (which would close the only window and kill the process).
|
|
167
|
+
new_resp = await client.put(f"http://127.0.0.1:{cdp_port}/json/new?{target_url}")
|
|
168
|
+
new_resp.raise_for_status()
|
|
169
|
+
new_id = new_resp.json()["id"]
|
|
170
|
+
|
|
171
|
+
list_resp = await client.get(f"http://127.0.0.1:{cdp_port}/json/list")
|
|
172
|
+
list_resp.raise_for_status()
|
|
173
|
+
tabs = list_resp.json()
|
|
174
|
+
|
|
175
|
+
closed = 0
|
|
176
|
+
for tab in tabs:
|
|
177
|
+
if tab.get("type") != "page" or tab.get("id") == new_id:
|
|
178
|
+
continue
|
|
179
|
+
try:
|
|
180
|
+
close_resp = await client.get(
|
|
181
|
+
f"http://127.0.0.1:{cdp_port}/json/close/{tab['id']}"
|
|
182
|
+
)
|
|
183
|
+
if close_resp.status_code < 400:
|
|
184
|
+
closed += 1
|
|
185
|
+
except httpx.HTTPError:
|
|
186
|
+
# Best-effort: a tab that vanished mid-loop isn't a problem.
|
|
187
|
+
pass
|
|
188
|
+
return closed
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def find_cft_binary() -> Path | None:
|
|
192
|
+
"""Best-effort discovery for the Chrome for Testing binary.
|
|
193
|
+
|
|
194
|
+
Order: $ESCARP_CFT_BINARY env, ~/.escarp/chrome/**, ./chrome/** (repo-local
|
|
195
|
+
dev install). Returns None if nothing usable is found.
|
|
196
|
+
"""
|
|
197
|
+
env = os.environ.get("ESCARP_CFT_BINARY")
|
|
198
|
+
if env:
|
|
199
|
+
path = Path(env)
|
|
200
|
+
if path.exists():
|
|
201
|
+
return path
|
|
202
|
+
candidates = [
|
|
203
|
+
Path.home() / ".escarp" / "chrome",
|
|
204
|
+
Path.cwd() / "chrome",
|
|
205
|
+
]
|
|
206
|
+
for root in candidates:
|
|
207
|
+
if not root.exists():
|
|
208
|
+
continue
|
|
209
|
+
# @puppeteer/browsers layout: chrome/<platform>-<ver>/chrome-<plat>/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
|
210
|
+
for app in root.rglob("Google Chrome for Testing.app"):
|
|
211
|
+
macos = app / "Contents" / "MacOS" / "Google Chrome for Testing"
|
|
212
|
+
if macos.exists():
|
|
213
|
+
return macos
|
|
214
|
+
# Linux layout: chrome/<platform>-<ver>/chrome-linux64/chrome
|
|
215
|
+
for binary in root.rglob("chrome"):
|
|
216
|
+
if binary.is_file() and os.access(binary, os.X_OK) and "chrome-" in str(binary.parent):
|
|
217
|
+
return binary
|
|
218
|
+
# Last resort: PATH lookup
|
|
219
|
+
which = shutil.which("chrome-for-testing")
|
|
220
|
+
return Path(which) if which else None
|
escarp/broker/daemon.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Broker daemon: discovers already-running browsers, brokers leases, runs reaper.
|
|
2
|
+
|
|
3
|
+
**Does NOT own chrome lifecycles.** Per V2_PLAN.md's persistence contract,
|
|
4
|
+
chromes are infrastructure that exists independently. To start the chromes,
|
|
5
|
+
run `escarp launch-pool` (or launchd, systemd, docker, manual shell, whatever).
|
|
6
|
+
This daemon's only relationship to a chrome is "discover via /json/version,
|
|
7
|
+
talk to it over CDP, never kill it."
|
|
8
|
+
|
|
9
|
+
End-to-end shape:
|
|
10
|
+
|
|
11
|
+
[user] $ escarp launch-pool # one-shot, exits, chromes persist
|
|
12
|
+
[user] $ escarp daemon # discovers chromes, brokers leases
|
|
13
|
+
|
|
|
14
|
+
+-- claim N slot locks (flock) <- "I am the broker for this pool"
|
|
15
|
+
+-- discover each slot via /json/version
|
|
16
|
+
+-- Broker.register() each discovered browser
|
|
17
|
+
+-- reset each browser to about:blank (state hygiene at boot)
|
|
18
|
+
+-- aiohttp server starts on 127.0.0.1:7878
|
|
19
|
+
+-- reaper task starts (sweeps every 2s)
|
|
20
|
+
+-- ^C -> cancel reaper, close server, release locks. CHROMES STAY ALIVE.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import os
|
|
27
|
+
import signal
|
|
28
|
+
import sys
|
|
29
|
+
|
|
30
|
+
from aiohttp import web
|
|
31
|
+
|
|
32
|
+
from escarp.broker.api import DEFAULT_PORT, bind_with_shift, build_app
|
|
33
|
+
from escarp.broker.browser import reset_browser_state
|
|
34
|
+
from escarp.broker.discovery import DiscoveredBrowser, discover_pool
|
|
35
|
+
from escarp.broker.lease import Broker, reaper_loop
|
|
36
|
+
from escarp.broker.slots import SlotBusy, SlotLease, claim_slot
|
|
37
|
+
|
|
38
|
+
DEFAULT_POOL_SIZE = 4
|
|
39
|
+
DEFAULT_CDP_BASE_PORT = 9222
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _env_int(name: str, default: int) -> int:
|
|
43
|
+
raw = os.environ.get(name)
|
|
44
|
+
if raw is None:
|
|
45
|
+
return default
|
|
46
|
+
try:
|
|
47
|
+
return int(raw)
|
|
48
|
+
except ValueError as exc:
|
|
49
|
+
raise SystemExit(f"{name} must be an integer, got {raw!r}") from exc
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _format_table(leases: list[SlotLease], browsers: list[DiscoveredBrowser]) -> str:
|
|
53
|
+
rows = [f"{'slot':<6}{'cdp':<8}{'frontend':<10}{'backend':<10}cdp_ws_url"]
|
|
54
|
+
for lease, browser in zip(leases, browsers, strict=True):
|
|
55
|
+
rows.append(
|
|
56
|
+
f"{lease.slot:<6}"
|
|
57
|
+
f"{lease.ports.cdp:<8}"
|
|
58
|
+
f"{lease.ports.frontend:<10}"
|
|
59
|
+
f"{lease.ports.backend:<10}"
|
|
60
|
+
f"{browser.cdp_ws_url}"
|
|
61
|
+
)
|
|
62
|
+
return "\n".join(rows)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def _serve_http(broker: Broker, api_port: int) -> tuple[web.AppRunner, int]:
|
|
66
|
+
|
|
67
|
+
app = build_app(broker)
|
|
68
|
+
runner = web.AppRunner(app)
|
|
69
|
+
await runner.setup()
|
|
70
|
+
sock, actual_port = bind_with_shift("127.0.0.1", api_port)
|
|
71
|
+
site = web.SockSite(runner, sock)
|
|
72
|
+
await site.start()
|
|
73
|
+
return runner, actual_port
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def run_daemon(
|
|
77
|
+
*,
|
|
78
|
+
pool_size: int,
|
|
79
|
+
cdp_base_port: int,
|
|
80
|
+
api_port: int,
|
|
81
|
+
lease_ttl_s: float,
|
|
82
|
+
discovery_wait_s: float,
|
|
83
|
+
) -> int:
|
|
84
|
+
leases: list[SlotLease] = []
|
|
85
|
+
browsers: list[DiscoveredBrowser] = []
|
|
86
|
+
|
|
87
|
+
async def reset_for_port(cdp_port: int) -> None:
|
|
88
|
+
try:
|
|
89
|
+
closed = await reset_browser_state(cdp_port)
|
|
90
|
+
print(f"[reset] cdp_port={cdp_port} closed {closed} stale tab(s)", flush=True)
|
|
91
|
+
except Exception as exc:
|
|
92
|
+
print(f"[reset] cdp_port={cdp_port} failed: {exc}", file=sys.stderr, flush=True)
|
|
93
|
+
|
|
94
|
+
broker = Broker(lease_ttl_s=lease_ttl_s, reset_fn=reset_for_port)
|
|
95
|
+
runner: web.AppRunner | None = None
|
|
96
|
+
reaper_task: asyncio.Task[None] | None = None
|
|
97
|
+
stop_event = asyncio.Event()
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
# 1. Discover whatever's already running.
|
|
101
|
+
print(
|
|
102
|
+
f"discovering pool: slots [0, {pool_size}) on cdp_ports "
|
|
103
|
+
f"{cdp_base_port}..{cdp_base_port + pool_size - 1}",
|
|
104
|
+
flush=True,
|
|
105
|
+
)
|
|
106
|
+
discovered, missing = await discover_pool(
|
|
107
|
+
pool_size=pool_size,
|
|
108
|
+
cdp_base_port=cdp_base_port,
|
|
109
|
+
wait_for_each=discovery_wait_s,
|
|
110
|
+
)
|
|
111
|
+
for slot in missing:
|
|
112
|
+
print(
|
|
113
|
+
f"[slot {slot}] no chrome on cdp_port {cdp_base_port + slot}. "
|
|
114
|
+
f"Run `escarp launch-pool` (or start a chrome there) first.",
|
|
115
|
+
file=sys.stderr,
|
|
116
|
+
flush=True,
|
|
117
|
+
)
|
|
118
|
+
if not discovered:
|
|
119
|
+
print("nothing to broker. exiting.", file=sys.stderr, flush=True)
|
|
120
|
+
return 1
|
|
121
|
+
|
|
122
|
+
# 2. Claim slot locks for whatever's discovered; register into the broker.
|
|
123
|
+
for browser in discovered:
|
|
124
|
+
try:
|
|
125
|
+
lease = claim_slot(browser.slot)
|
|
126
|
+
except SlotBusy as exc:
|
|
127
|
+
print(
|
|
128
|
+
f"[slot {browser.slot}] another broker holds the lock for this slot. {exc}",
|
|
129
|
+
file=sys.stderr,
|
|
130
|
+
flush=True,
|
|
131
|
+
)
|
|
132
|
+
continue
|
|
133
|
+
leases.append(lease)
|
|
134
|
+
browsers.append(browser)
|
|
135
|
+
broker.register(
|
|
136
|
+
slot=browser.slot,
|
|
137
|
+
cdp_port=browser.cdp_port,
|
|
138
|
+
cdp_ws_url=browser.cdp_ws_url,
|
|
139
|
+
pid=-1, # we don't own this process
|
|
140
|
+
)
|
|
141
|
+
try:
|
|
142
|
+
stale_closed = await reset_browser_state(browser.cdp_port)
|
|
143
|
+
except Exception as exc:
|
|
144
|
+
stale_closed = -1
|
|
145
|
+
print(f"[slot {browser.slot}] startup reset failed: {exc}", file=sys.stderr)
|
|
146
|
+
print(
|
|
147
|
+
f"[slot {browser.slot}] discovered ws={browser.cdp_ws_url}"
|
|
148
|
+
f" (reset closed {stale_closed} stale tab(s))",
|
|
149
|
+
flush=True,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if not browsers:
|
|
153
|
+
print("no slots brokered. exiting.", file=sys.stderr, flush=True)
|
|
154
|
+
return 1
|
|
155
|
+
|
|
156
|
+
# 3. HTTP + reaper.
|
|
157
|
+
runner, actual_port = await _serve_http(broker, api_port)
|
|
158
|
+
reaper_task = asyncio.create_task(reaper_loop(broker, stop=stop_event))
|
|
159
|
+
|
|
160
|
+
print()
|
|
161
|
+
print(_format_table(leases, browsers))
|
|
162
|
+
print()
|
|
163
|
+
print(f"broker http api: http://127.0.0.1:{actual_port}")
|
|
164
|
+
print(f"lease ttl: {lease_ttl_s}s reaper interval: 2s")
|
|
165
|
+
print(f"try: curl http://127.0.0.1:{actual_port}/status | jq")
|
|
166
|
+
print(
|
|
167
|
+
f"\npool of {len(browsers)} brokered. chromes are NOT owned by this daemon "
|
|
168
|
+
f"and will survive ctrl-c.",
|
|
169
|
+
flush=True,
|
|
170
|
+
)
|
|
171
|
+
print("ctrl-c to release the slot locks and stop brokering.\n", flush=True)
|
|
172
|
+
|
|
173
|
+
loop = asyncio.get_running_loop()
|
|
174
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
175
|
+
loop.add_signal_handler(sig, stop_event.set)
|
|
176
|
+
|
|
177
|
+
await stop_event.wait()
|
|
178
|
+
print("\nshutdown signal received. releasing slot locks; chromes left alive.")
|
|
179
|
+
return 0
|
|
180
|
+
finally:
|
|
181
|
+
stop_event.set()
|
|
182
|
+
if reaper_task is not None:
|
|
183
|
+
try:
|
|
184
|
+
await asyncio.wait_for(reaper_task, timeout=3.0)
|
|
185
|
+
except (TimeoutError, asyncio.CancelledError):
|
|
186
|
+
reaper_task.cancel()
|
|
187
|
+
if runner is not None:
|
|
188
|
+
await runner.cleanup()
|
|
189
|
+
for lease in leases:
|
|
190
|
+
lease.release()
|
|
191
|
+
# NOTE: intentionally do NOT touch chromes. They are infrastructure.
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def main(argv: list[str] | None = None) -> int:
|
|
195
|
+
pool_size = _env_int("ESCARP_POOL_SIZE", DEFAULT_POOL_SIZE)
|
|
196
|
+
if pool_size < 1:
|
|
197
|
+
raise SystemExit(f"ESCARP_POOL_SIZE must be >= 1, got {pool_size}")
|
|
198
|
+
cdp_base_port = _env_int("ESCARP_CDP_BASE", DEFAULT_CDP_BASE_PORT)
|
|
199
|
+
api_port = _env_int("ESCARP_API_PORT", DEFAULT_PORT)
|
|
200
|
+
lease_ttl_s = float(os.environ.get("ESCARP_LEASE_TTL_S", "60"))
|
|
201
|
+
discovery_wait_s = float(os.environ.get("ESCARP_DISCOVERY_WAIT_S", "0"))
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
return asyncio.run(
|
|
205
|
+
run_daemon(
|
|
206
|
+
pool_size=pool_size,
|
|
207
|
+
cdp_base_port=cdp_base_port,
|
|
208
|
+
api_port=api_port,
|
|
209
|
+
lease_ttl_s=lease_ttl_s,
|
|
210
|
+
discovery_wait_s=discovery_wait_s,
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
except KeyboardInterrupt:
|
|
214
|
+
return 0
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Discover already-running Chrome for Testing instances by cdp_port.
|
|
2
|
+
|
|
3
|
+
Escarp v2's persistence contract: the broker does NOT own chrome lifecycles.
|
|
4
|
+
Chromes are infrastructure that exists independently of escarp. The daemon's
|
|
5
|
+
job is to discover what's listening on the per-slot cdp_ports and broker
|
|
6
|
+
leases against those, full stop. If a chrome dies, that's somebody else's
|
|
7
|
+
problem (or a launchd/systemd auto-restart's job).
|
|
8
|
+
|
|
9
|
+
How to start chromes:
|
|
10
|
+
- `escarp launch-pool` (dev convenience, one-shot)
|
|
11
|
+
- launchd/systemd unit
|
|
12
|
+
- manual: `chrome-for-testing --remote-debugging-port=9222 --user-data-dir=...`
|
|
13
|
+
- docker
|
|
14
|
+
- whatever you want -- as long as cdp_ports 9222..9222+N-1 are listening.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class DiscoveredBrowser:
|
|
27
|
+
slot: int
|
|
28
|
+
cdp_port: int
|
|
29
|
+
cdp_ws_url: str
|
|
30
|
+
browser_version: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def probe(cdp_port: int, *, timeout: float = 2.0) -> dict | None:
|
|
34
|
+
"""GET /json/version on cdp_port. Returns the json dict or None if nothing
|
|
35
|
+
is listening / response unparseable. Per Phase 0 findings, /json/version
|
|
36
|
+
is the right discovery primitive on CfT 149+."""
|
|
37
|
+
try:
|
|
38
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
39
|
+
resp = await client.get(f"http://127.0.0.1:{cdp_port}/json/version")
|
|
40
|
+
resp.raise_for_status()
|
|
41
|
+
return resp.json()
|
|
42
|
+
except (httpx.HTTPError, ValueError):
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def discover_pool(
|
|
47
|
+
*,
|
|
48
|
+
pool_size: int,
|
|
49
|
+
cdp_base_port: int = 9222,
|
|
50
|
+
wait_for_each: float = 0.0,
|
|
51
|
+
) -> tuple[list[DiscoveredBrowser], list[int]]:
|
|
52
|
+
"""Walk slots [0, pool_size). For each slot, probe cdp_base_port+slot.
|
|
53
|
+
|
|
54
|
+
Returns (discovered, missing_slots). `missing_slots` is the list of slot
|
|
55
|
+
indices whose cdp_port wasn't responding -- the caller decides what to do
|
|
56
|
+
about that (warn, retry, error out).
|
|
57
|
+
"""
|
|
58
|
+
discovered: list[DiscoveredBrowser] = []
|
|
59
|
+
missing: list[int] = []
|
|
60
|
+
for slot in range(pool_size):
|
|
61
|
+
port = cdp_base_port + slot
|
|
62
|
+
info = await _probe_with_retry(port, wait_for_each=wait_for_each)
|
|
63
|
+
if info is None:
|
|
64
|
+
missing.append(slot)
|
|
65
|
+
continue
|
|
66
|
+
ws = info.get("webSocketDebuggerUrl")
|
|
67
|
+
if not ws:
|
|
68
|
+
missing.append(slot)
|
|
69
|
+
continue
|
|
70
|
+
discovered.append(
|
|
71
|
+
DiscoveredBrowser(
|
|
72
|
+
slot=slot,
|
|
73
|
+
cdp_port=port,
|
|
74
|
+
cdp_ws_url=ws,
|
|
75
|
+
browser_version=str(info.get("Browser", "")),
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
return discovered, missing
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def _probe_with_retry(cdp_port: int, *, wait_for_each: float) -> dict | None:
|
|
82
|
+
"""Best-effort: probe once; if missing and wait_for_each > 0, keep trying
|
|
83
|
+
up to that many seconds. Useful right after `escarp launch-pool` since
|
|
84
|
+
chrome can take a couple seconds to bind its CDP port."""
|
|
85
|
+
info = await probe(cdp_port)
|
|
86
|
+
if info is not None or wait_for_each <= 0:
|
|
87
|
+
return info
|
|
88
|
+
deadline = asyncio.get_running_loop().time() + wait_for_each
|
|
89
|
+
while asyncio.get_running_loop().time() < deadline:
|
|
90
|
+
await asyncio.sleep(0.2)
|
|
91
|
+
info = await probe(cdp_port)
|
|
92
|
+
if info is not None:
|
|
93
|
+
return info
|
|
94
|
+
return None
|