meshcode 2.11.134__tar.gz → 2.11.136__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.134 → meshcode-2.11.136}/PKG-INFO +1 -1
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/__init__.py +1 -1
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/hostd.py +106 -166
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/backend.py +13 -8
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/server.py +24 -4
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/protocol_handler.py +19 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.134 → meshcode-2.11.136}/pyproject.toml +1 -1
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_hostd_zombie_sessions.py +141 -3
- {meshcode-2.11.134 → meshcode-2.11.136}/README.md +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/__main__.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/_session_handoff_template.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/claude_update.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/cli.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/comms_v4.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/compat.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/daemon.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/doctor.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/invites.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/launcher.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/realtime.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/swarm.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/meshcode_mcp/test_swarm.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/preferences.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/run_agent.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/secrets.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/self_update.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/up.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode/upload.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/SOURCES.txt +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/setup.cfg +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_auto_update_hardening.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_autonomous_closegap_1.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_autonomous_closegap_2.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_autonomous_closegap_3.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_autonomous_prompt_inject.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_boot_bug_regression.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_color_truecolor.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_core.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_cross_agent_messaging.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_date_parse.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_doctor.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_epistemic_v1_python_sdk.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_epistemic_v1_stop_conditions.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_esc_deaf_state.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_exceptions.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_file_upload.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_init_device_code.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_install_guard.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_lease_sigterm_release.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_live_mesh_guard.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_mark_read_batch.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_marketplace_ratings.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_migration_integrity.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_realtime_event_freshness.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_rls_cross_tenant.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_rpc_grants.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_rpc_migrations.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_run_agent_dry_run.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_run_agent_no_server_import.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_security_regressions.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_self_update_user_site.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_sentinel.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_session_replay_gate.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_setup_path.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_sleep_signals.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_status_enum_coverage.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_stay_on_loop_hook.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_stop_ghost_terminal.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_swarm_events.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_task_progress.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_terminal_lifecycle.py +0 -0
- {meshcode-2.11.134 → meshcode-2.11.136}/tests/test_wait_open_tasks_contradiction.py +0 -0
|
@@ -1037,26 +1037,6 @@ def _recycle_blocked(st, target, now=None):
|
|
|
1037
1037
|
return None
|
|
1038
1038
|
|
|
1039
1039
|
|
|
1040
|
-
# Samuel rule 2026-06-04: never recycle a CONNECTED agent (live MCP session) except
|
|
1041
|
-
# the >3h uptime lifecycle. BUSY_STATUSES (working/online/busy) MISSES a connected-
|
|
1042
|
-
# but-idle agent (status idle/standby, window open, heartbeat fresh) — a fresh
|
|
1043
|
-
# heartbeat is the stronger 'live session' signal, so an idle-but-connected agent
|
|
1044
|
-
# was being version-recycled out from under the user (the storm he kept seeing).
|
|
1045
|
-
_CONNECTED_HEARTBEAT_S = _env_int("MESHCODE_CONNECTED_HEARTBEAT_SEC", 60, 10)
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
def _agent_connected(a) -> bool:
|
|
1049
|
-
"""True if the agent has a live MCP session: an explicitly-busy status OR a
|
|
1050
|
-
very-recent heartbeat (window open even when idle/standby)."""
|
|
1051
|
-
if (a.get("status") or "") in BUSY_STATUSES:
|
|
1052
|
-
return True
|
|
1053
|
-
hb = a.get("heartbeat_age_s")
|
|
1054
|
-
try:
|
|
1055
|
-
return hb is not None and float(hb) < _CONNECTED_HEARTBEAT_S
|
|
1056
|
-
except (TypeError, ValueError):
|
|
1057
|
-
return False
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
1040
|
# Part 2 (Samuel req #2 2026-06-04): on a VISIBLE recycle, close the OLD window so
|
|
1061
1041
|
# old+new don't both stay open (audit gap 6a203baa). DRY-RUN first (commander q2 +
|
|
1062
1042
|
# reaper safe-arm pattern): log-only until the logs confirm it's ONLY the old pid.
|
|
@@ -1350,6 +1330,46 @@ def _own_ancestry_pids() -> set:
|
|
|
1350
1330
|
return anc
|
|
1351
1331
|
|
|
1352
1332
|
|
|
1333
|
+
def _run_token_rx(target: str):
|
|
1334
|
+
"""`meshcode run <target>` token regex accepting BOTH spawn forms (task 3ed8781d).
|
|
1335
|
+
|
|
1336
|
+
BARE-NAME GAP (the Windows stop-por-RPC zombie, Samuel 2026-06-11): hostd spawns
|
|
1337
|
+
`run "project/agent"` but the FE Start click goes through protocol_handler's
|
|
1338
|
+
cmd_launch_batch, which spawns `run "agent"` (bare). The old per-call-site token
|
|
1339
|
+
(`run <project/agent>`) could NEVER match a bare-spawned session, so stop/force-kill/
|
|
1340
|
+
reap/reconcile discovery silently found nothing for protocol-launched agents — the
|
|
1341
|
+
session survived every Stop, the relaunch opened a NEW wt tab on top, and Samuel's
|
|
1342
|
+
box accumulated up to 3 sessions/tabs per agent.
|
|
1343
|
+
|
|
1344
|
+
- target 'proj/agent': matches `run [\"']?(proj/)?agent` — both qualified and bare.
|
|
1345
|
+
- target 'agent' (bare, the protocol reconcile caller): matches `run [\"']?(<any>/)?agent`
|
|
1346
|
+
— both forms again, since a survivor may have been hostd-spawned.
|
|
1347
|
+
All forms keep the optional opening quote (QUOTED-TARGET FIX 14e0760c) and the
|
|
1348
|
+
trailing lookahead, so 'qa' can never prefix-match 'qa-2'."""
|
|
1349
|
+
proj, _, agent = target.rpartition("/")
|
|
1350
|
+
if proj:
|
|
1351
|
+
pfx = r"(?:" + re.escape(proj) + r"/)?"
|
|
1352
|
+
else:
|
|
1353
|
+
pfx = r"(?:[^\s\"']*/)?"
|
|
1354
|
+
return re.compile(r"run\s+[\"']?" + pfx + re.escape(agent) + r"(?=\s|$|[\"'])")
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
def _bare_match_is_foreign(cl: str, cwd: str, target: str) -> bool:
|
|
1358
|
+
"""Cross-project guard for BARE-token matches (task 3ed8781d). When `target` is
|
|
1359
|
+
project-qualified but the candidate cmdline only carries the bare agent name,
|
|
1360
|
+
a same-named agent of ANOTHER mesh on this box could collide. The workspace
|
|
1361
|
+
cwd (`~/meshcode/<project>-<agent>`) disambiguates: if the cwd basename looks
|
|
1362
|
+
like a workspace of this agent but a DIFFERENT project, skip it. Unreadable
|
|
1363
|
+
cwd or qualified-token matches are never rejected (best-effort guard only)."""
|
|
1364
|
+
proj, _, agent = target.rpartition("/")
|
|
1365
|
+
if not proj or f"{proj}/{agent}" in (cl or ""):
|
|
1366
|
+
return False # qualified match — unambiguous
|
|
1367
|
+
# separator-agnostic last path component (a Windows cwd seen from tests/tools
|
|
1368
|
+
# running under POSIX must split the same way)
|
|
1369
|
+
base = re.split(r"[\\/]", (cwd or "").rstrip("/\\"))[-1]
|
|
1370
|
+
return base.endswith(f"-{agent}") and base != f"{proj}-{agent}"
|
|
1371
|
+
|
|
1372
|
+
|
|
1353
1373
|
def _discover_agent_pids(target: str) -> list:
|
|
1354
1374
|
"""Fallback PID discovery by command line, for agents spawned before this hostd
|
|
1355
1375
|
(no recorded PID) or after a state-file loss. Matches `meshcode run <target>`.
|
|
@@ -1373,9 +1393,16 @@ def _discover_agent_pids(target: str) -> list:
|
|
|
1373
1393
|
(`run\\s+<target>`) NEVER matched on Windows — stop/force-kill discovery silently
|
|
1374
1394
|
found nothing, which is how stopped sessions survived as zombies. The token now
|
|
1375
1395
|
accepts one optional opening quote; the trailing lookahead still forbids any
|
|
1376
|
-
'qa' vs 'qa-2' prefix collision.
|
|
1396
|
+
'qa' vs 'qa-2' prefix collision.
|
|
1397
|
+
|
|
1398
|
+
BARE-NAME FIX (task 3ed8781d): the token now comes from _run_token_rx, which ALSO
|
|
1399
|
+
matches protocol-handler bare-name spawns (`run "agent"`) — the form every FE Start
|
|
1400
|
+
click produces. Bare matches are cwd-guarded against same-named agents of another
|
|
1401
|
+
mesh (psutil path only; the native fallbacks accept the regex as-is, documented
|
|
1402
|
+
tradeoff: a zombie that survives every stop is the live bug, the cross-mesh
|
|
1403
|
+
same-name collision is the rare one)."""
|
|
1377
1404
|
pids = []
|
|
1378
|
-
tok =
|
|
1405
|
+
tok = _run_token_rx(target)
|
|
1379
1406
|
ps = _psutil()
|
|
1380
1407
|
if ps is not None:
|
|
1381
1408
|
try:
|
|
@@ -1386,10 +1413,23 @@ def _discover_agent_pids(target: str) -> list:
|
|
|
1386
1413
|
continue
|
|
1387
1414
|
cl = " ".join(p.cmdline() or [])
|
|
1388
1415
|
if "meshcode" in cl and tok.search(cl) and "python" in (p.name() or "").lower():
|
|
1416
|
+
try:
|
|
1417
|
+
cwd = p.cwd() or ""
|
|
1418
|
+
except Exception:
|
|
1419
|
+
cwd = ""
|
|
1420
|
+
if _bare_match_is_foreign(cl, cwd, target):
|
|
1421
|
+
_log(f"DISCOVER {target}: pid {p.pid} bare-name match belongs to "
|
|
1422
|
+
f"another project (cwd {cwd!r}) — skip")
|
|
1423
|
+
continue
|
|
1389
1424
|
pids.append(p.pid)
|
|
1390
1425
|
except Exception:
|
|
1391
1426
|
continue
|
|
1392
|
-
|
|
1427
|
+
# GHOST-TERMINAL fix (91201315) APPLIES HERE TOO (task 3ed8781d): the old
|
|
1428
|
+
# `return pids` skipped the POSIX launcher-children fallback whenever
|
|
1429
|
+
# psutil was importable — re-defeating the '0 stopped forever' hole on
|
|
1430
|
+
# every box with psutil. Fall through to the shared tail instead.
|
|
1431
|
+
return pids + ([p for p in _discover_launcher_child_pids(target)
|
|
1432
|
+
if p not in pids] if sys.platform != "win32" else [])
|
|
1393
1433
|
except Exception:
|
|
1394
1434
|
pids = [] # psutil enumeration itself broke — fall through to native
|
|
1395
1435
|
try:
|
|
@@ -1409,8 +1449,11 @@ def _discover_agent_pids(target: str) -> list:
|
|
|
1409
1449
|
pass
|
|
1410
1450
|
block_pid = None
|
|
1411
1451
|
else:
|
|
1452
|
+
# task 3ed8781d: pattern WITHOUT the target so bare-name spawns
|
|
1453
|
+
# (`run "agent"`) are candidates too; the exact tok filter below
|
|
1454
|
+
# keeps the kill set tight.
|
|
1412
1455
|
out = subprocess.run(
|
|
1413
|
-
["pgrep", "-f",
|
|
1456
|
+
["pgrep", "-f", "meshcode.* run "],
|
|
1414
1457
|
capture_output=True, text=True, timeout=8).stdout
|
|
1415
1458
|
for ln in out.split():
|
|
1416
1459
|
try:
|
|
@@ -1494,12 +1537,17 @@ def _discover_serve_pids(target: str) -> list:
|
|
|
1494
1537
|
sibling agent's serve (same project, different agent) can never match. psutil-only
|
|
1495
1538
|
(environ() needs a real process handle); without psutil returns [] — degraded but
|
|
1496
1539
|
safe, and the session-root tree kill still takes any serve that is STILL parented
|
|
1497
|
-
under the session.
|
|
1540
|
+
under the session.
|
|
1541
|
+
|
|
1542
|
+
BARE-TARGET support (task 3ed8781d): a bare `agent` target (the protocol-handler
|
|
1543
|
+
relaunch reconcile) matches on MESHCODE_AGENT alone — the relaunch click means
|
|
1544
|
+
'fresh session of THIS agent on this box', so any project's orphan of that name
|
|
1545
|
+
is a survivor to clear. Qualified targets keep the exact two-field match."""
|
|
1498
1546
|
ps = _psutil()
|
|
1499
1547
|
if ps is None:
|
|
1500
1548
|
return []
|
|
1501
|
-
proj, _, agent = target.
|
|
1502
|
-
if not
|
|
1549
|
+
proj, _, agent = target.rpartition("/")
|
|
1550
|
+
if not agent:
|
|
1503
1551
|
return []
|
|
1504
1552
|
own = _own_pid()
|
|
1505
1553
|
out = []
|
|
@@ -1512,7 +1560,8 @@ def _discover_serve_pids(target: str) -> list:
|
|
|
1512
1560
|
if "meshcode" not in cl or "serve" not in cl or "meshcode_mcp" not in cl:
|
|
1513
1561
|
continue
|
|
1514
1562
|
env = p.environ() or {}
|
|
1515
|
-
if env.get("
|
|
1563
|
+
if env.get("MESHCODE_AGENT") == agent and \
|
|
1564
|
+
(not proj or env.get("MESHCODE_PROJECT") == proj):
|
|
1516
1565
|
out.append(p.pid)
|
|
1517
1566
|
except Exception:
|
|
1518
1567
|
continue # process vanished / access denied — never block the sweep
|
|
@@ -1527,10 +1576,16 @@ def _kill_heartbeat_fork(target: str) -> None:
|
|
|
1527
1576
|
would keep POSTing and show a stopped agent 'online'. Stop it by its pidfile
|
|
1528
1577
|
heartbeat_<proj>_<name>.pid. Best-effort."""
|
|
1529
1578
|
try:
|
|
1530
|
-
proj, _, agent = target.
|
|
1579
|
+
proj, _, agent = target.rpartition("/")
|
|
1531
1580
|
if not agent:
|
|
1532
1581
|
return
|
|
1533
|
-
|
|
1582
|
+
if proj:
|
|
1583
|
+
pidf = STATE_DIR / f"heartbeat_{proj}_{agent}.pid"
|
|
1584
|
+
else:
|
|
1585
|
+
# bare target (task 3ed8781d, protocol-handler reconcile): project
|
|
1586
|
+
# unknown — glob the agent's pidfile across projects (first hit).
|
|
1587
|
+
cands = sorted(STATE_DIR.glob(f"heartbeat_*_{agent}.pid"))
|
|
1588
|
+
pidf = cands[0] if cands else STATE_DIR / f"heartbeat__{agent}.pid"
|
|
1534
1589
|
if not pidf.exists():
|
|
1535
1590
|
return
|
|
1536
1591
|
try:
|
|
@@ -1562,15 +1617,25 @@ def _write_stop_marker(target: str) -> None:
|
|
|
1562
1617
|
(6/6 needed SIGKILL), so the wrapper sees rc=137 — indistinguishable from
|
|
1563
1618
|
a crash by exit code alone. The wrapper treats a non-zero rc as a CLEAN
|
|
1564
1619
|
stop (closes its own tab) only when this marker exists and is fresh;
|
|
1565
|
-
real crashes have no marker and keep their scrollback open. Best-effort.
|
|
1620
|
+
real crashes have no marker and keep their scrollback open. Best-effort.
|
|
1621
|
+
|
|
1622
|
+
DUAL-LABEL FIX (task 3ed8781d, Windows tab accumulation): the launcher script
|
|
1623
|
+
computes its marker name from ITS OWN spawn cmd — hostd spawns carry
|
|
1624
|
+
'project/agent' (label 'project_agent') but protocol-handler launch-batch spawns
|
|
1625
|
+
carry the BARE agent name (label 'agent'). A stop that only stamped the
|
|
1626
|
+
target-form marker left a bare-spawned wrapper seeing rc!=0 WITHOUT its marker
|
|
1627
|
+
-> `pause` -> dead wt tab held open forever; relaunch then added a fresh tab
|
|
1628
|
+
(Samuel: 'demasiadas tabs'). Stamp BOTH labels — both are consumed/GC'd."""
|
|
1566
1629
|
try:
|
|
1567
|
-
# EXACT mirror of protocol_handler._launcher_label's sanitization —
|
|
1568
|
-
# the wrapper computes the same name at generation time.
|
|
1569
|
-
safe = re.sub(r"[^A-Za-z0-9_.-]", "_", target.strip()).strip("_")[:80]
|
|
1570
|
-
if not safe:
|
|
1571
|
-
return
|
|
1572
1630
|
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
|
1573
|
-
|
|
1631
|
+
labels = {target.strip(), target.rpartition("/")[2].strip()}
|
|
1632
|
+
for raw in labels:
|
|
1633
|
+
# EXACT mirror of protocol_handler._launcher_label's sanitization —
|
|
1634
|
+
# the wrapper computes the same name at generation time.
|
|
1635
|
+
safe = re.sub(r"[^A-Za-z0-9_.-]", "_", raw).strip("_")[:80]
|
|
1636
|
+
if not safe:
|
|
1637
|
+
continue
|
|
1638
|
+
(STATE_DIR / f"stopmark_{safe}").write_text(str(time.time()), encoding="utf-8")
|
|
1574
1639
|
except Exception:
|
|
1575
1640
|
pass
|
|
1576
1641
|
|
|
@@ -1589,9 +1654,10 @@ def _session_root_pid(target: str, pid: int) -> int:
|
|
|
1589
1654
|
ps = _psutil()
|
|
1590
1655
|
if ps is None:
|
|
1591
1656
|
return pid
|
|
1592
|
-
#
|
|
1593
|
-
#
|
|
1594
|
-
|
|
1657
|
+
# Same token everywhere (task 3ed8781d): _run_token_rx accepts the quoted AND the
|
|
1658
|
+
# bare-name spawn forms, so the climb also reaches the wrapper of a protocol-
|
|
1659
|
+
# handler-launched session (`run "agent"`).
|
|
1660
|
+
tok = _run_token_rx(target)
|
|
1595
1661
|
anc = _own_ancestry_pids()
|
|
1596
1662
|
root = pid
|
|
1597
1663
|
try:
|
|
@@ -2122,57 +2188,6 @@ def _do_recycles(api_key: str, host_id: str) -> int:
|
|
|
2122
2188
|
# it stays dead even if a schedule row reappears. Crash-RESPAWN (_do_respawns) is
|
|
2123
2189
|
# UNAFFECTED — only RECYCLE triggers are killed.
|
|
2124
2190
|
return 0
|
|
2125
|
-
cfg = _rpc("mc_host_config_get", {"p_api_key": api_key, "p_host_id": host_id})
|
|
2126
|
-
if not cfg or not cfg.get("ok"):
|
|
2127
|
-
return 0
|
|
2128
|
-
pol = _rpc("mc_host_recycle_policy", {"p_api_key": api_key, "p_host_id": host_id})
|
|
2129
|
-
mode = (pol or {}).get("recycle_mode")
|
|
2130
|
-
value = (pol or {}).get("recycle_value")
|
|
2131
|
-
if mode != "time" or not value:
|
|
2132
|
-
return 0 # context-recycle is agent-cooperative; only time is daemon-driven
|
|
2133
|
-
st = _load_state()
|
|
2134
|
-
now = time.time()
|
|
2135
|
-
n = 0
|
|
2136
|
-
seen = set()
|
|
2137
|
-
for a in cfg.get("agents", []):
|
|
2138
|
-
if a.get("desired_state") != "running":
|
|
2139
|
-
continue
|
|
2140
|
-
proj, agent = a.get("project_name"), a.get("name")
|
|
2141
|
-
if not proj or not agent:
|
|
2142
|
-
continue
|
|
2143
|
-
key = f"{a.get('project_id')}/{agent}"
|
|
2144
|
-
seen.add(key)
|
|
2145
|
-
first = st.get(key)
|
|
2146
|
-
if first is None:
|
|
2147
|
-
st[key] = now # start the uptime clock on first observation
|
|
2148
|
-
continue
|
|
2149
|
-
if (a.get("status") or "") in BUSY_STATUSES:
|
|
2150
|
-
continue # task boundary only — never mid-task
|
|
2151
|
-
if (now - first) >= float(value) * 3600.0:
|
|
2152
|
-
_log(f"RECYCLE {proj}/{agent} (uptime {(now-first)/3600:.1f}h >= {value}h)")
|
|
2153
|
-
# Server-authorized clean-exit (task 548c863e, mig 364): SIGNAL the
|
|
2154
|
-
# recycle instead of spawning. A direct _spawn_agent here duplicates
|
|
2155
|
-
# the still-alive process. mc_request_recycle sets a flag; the
|
|
2156
|
-
# agent's wait-loop consumes it, returns must_exit/reason=recycle,
|
|
2157
|
-
# exits CLEAN (Stop-hook writes the handoff), then the respawn path
|
|
2158
|
-
# (_do_respawns) relaunches it fresh. Recorded via mc_record_recycle
|
|
2159
|
-
# so it's NEVER counted against the crash respawn cap.
|
|
2160
|
-
req = _rpc("mc_request_recycle",
|
|
2161
|
-
{"p_api_key": api_key, "p_project_id": a.get("project_id"), "p_agent_name": agent})
|
|
2162
|
-
if req and req.get("ok") and req.get("requested"):
|
|
2163
|
-
_rpc("mc_record_recycle",
|
|
2164
|
-
{"p_api_key": api_key, "p_project_id": a.get("project_id"), "p_agent_name": agent})
|
|
2165
|
-
st[key] = now # reset clock after signaling recycle
|
|
2166
|
-
n += 1
|
|
2167
|
-
elif req and req.get("ok") and not req.get("requested"):
|
|
2168
|
-
# Agent flipped busy between the roster read and the request —
|
|
2169
|
-
# skip; retry next sweep at the next task boundary.
|
|
2170
|
-
_log(f"SKIP recycle {proj}/{agent}: {req.get('reason','not_requested')}")
|
|
2171
|
-
# prune state for agents no longer managed on this host
|
|
2172
|
-
for k in [k for k in st if k not in seen]:
|
|
2173
|
-
st.pop(k, None)
|
|
2174
|
-
_save_state(st)
|
|
2175
|
-
return n
|
|
2176
2191
|
|
|
2177
2192
|
|
|
2178
2193
|
# ------------------------------------------------------------------
|
|
@@ -2502,10 +2517,6 @@ def _hostd_bootstrap(verbose: bool = False) -> bool:
|
|
|
2502
2517
|
return False
|
|
2503
2518
|
|
|
2504
2519
|
|
|
2505
|
-
# a69ac010-C: after this many version-recycles at the SAME cv with no advance, STOP (env won't converge).
|
|
2506
|
-
_VERREC_MAX_ATTEMPTS = 3
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
2520
|
def _do_version_recycles(api_key: str, host_id: str) -> int:
|
|
2510
2521
|
"""Recycle managed agents running an OLDER meshcode than what's installed on disk so they pick up
|
|
2511
2522
|
the auto-updated version (task 14782bb4 D). GRACIOUS (guardrail #5): idle-only (NEVER a BUSY agent /
|
|
@@ -2516,77 +2527,6 @@ def _do_version_recycles(api_key: str, host_id: str) -> int:
|
|
|
2516
2527
|
# This was the env-mismatch storm source; fixed-at-source via run_agent env-sync, but the
|
|
2517
2528
|
# whole recycle feature is being removed per owner. Crash-RESPAWN is unaffected.
|
|
2518
2529
|
return 0
|
|
2519
|
-
cfg = _rpc("mc_host_config_get", {"p_api_key": api_key, "p_host_id": host_id})
|
|
2520
|
-
if not cfg or not cfg.get("ok"):
|
|
2521
|
-
return 0
|
|
2522
|
-
try:
|
|
2523
|
-
from meshcode import self_update as _su
|
|
2524
|
-
disk = _su._current_version()
|
|
2525
|
-
except Exception:
|
|
2526
|
-
return 0
|
|
2527
|
-
if not disk:
|
|
2528
|
-
return 0
|
|
2529
|
-
st = _load_state()
|
|
2530
|
-
now = time.time()
|
|
2531
|
-
n = 0
|
|
2532
|
-
for a in cfg.get("agents", []):
|
|
2533
|
-
if a.get("desired_state") != "running":
|
|
2534
|
-
continue
|
|
2535
|
-
cv = a.get("cli_version")
|
|
2536
|
-
if not cv:
|
|
2537
|
-
continue
|
|
2538
|
-
try:
|
|
2539
|
-
if not _su._is_newer(disk, cv):
|
|
2540
|
-
continue # agent already on the on-disk version (or newer) — nothing to do
|
|
2541
|
-
except Exception:
|
|
2542
|
-
continue
|
|
2543
|
-
if _agent_connected(a):
|
|
2544
|
-
continue # Samuel rule: never version-recycle a CONNECTED agent (live MCP session,
|
|
2545
|
-
# even if idle/standby) — the >3h uptime lifecycle (_do_recycles) is the
|
|
2546
|
-
# only recycle that may touch a connected agent.
|
|
2547
|
-
proj, agent = a.get("project_name"), a.get("name")
|
|
2548
|
-
if not proj or not agent:
|
|
2549
|
-
continue
|
|
2550
|
-
if _recycle_blocked(st, f"{proj}/{agent}", now):
|
|
2551
|
-
continue # 451d33a0 unify: already blocked as a non-converging recycle — don't also version-recycle it
|
|
2552
|
-
key = f"verrec/{a.get('project_id')}/{agent}"
|
|
2553
|
-
if now - float(st.get(key, 0) or 0) < 1800:
|
|
2554
|
-
continue # rate-limit: <=1 version-recycle per agent / 30min (no recycle-storm)
|
|
2555
|
-
# a69ac010-C CONVERGENCE GUARD: if we've already version-recycled this agent _VERREC_MAX_ATTEMPTS
|
|
2556
|
-
# times at THIS exact cv and it never advanced, the agent's spawn env is OLDER than hostd's disk
|
|
2557
|
-
# and recycling will NEVER converge (respawn pulls the same stale env -> same cv -> recycle again).
|
|
2558
|
-
# STOP + alert ONCE instead of version-recycling every 30min forever (the env-mismatch storm RC).
|
|
2559
|
-
conv_key = f"verrec_conv/{a.get('project_id')}/{agent}"
|
|
2560
|
-
conv = dict(st.get(conv_key) or {})
|
|
2561
|
-
if conv.get("cv") == cv and int(conv.get("count", 0)) >= _VERREC_MAX_ATTEMPTS:
|
|
2562
|
-
if not conv.get("alerted"):
|
|
2563
|
-
conv["alerted"] = True
|
|
2564
|
-
st[conv_key] = conv
|
|
2565
|
-
_save_state(st)
|
|
2566
|
-
_log(f"VERSION-RECYCLE STUCK {proj}/{agent}: still {cv} after {conv['count']} recycles "
|
|
2567
|
-
f"(disk {disk}); spawn env older than hostd — align it (e.g. "
|
|
2568
|
-
f"~/meshcode-env/bin/pip install -U meshcode). BLOCKING further version-recycles "
|
|
2569
|
-
f"until cv advances. [recycle_blocked_reason=version_no_converge]")
|
|
2570
|
-
continue
|
|
2571
|
-
req = _rpc("mc_request_recycle",
|
|
2572
|
-
{"p_api_key": api_key, "p_project_id": a.get("project_id"), "p_agent_name": agent})
|
|
2573
|
-
if req and req.get("ok") and req.get("requested"):
|
|
2574
|
-
_rpc("mc_record_recycle",
|
|
2575
|
-
{"p_api_key": api_key, "p_project_id": a.get("project_id"), "p_agent_name": agent})
|
|
2576
|
-
st[key] = now
|
|
2577
|
-
# advance convergence counter for THIS cv; a cv change (env fixed) resets + releases the guard.
|
|
2578
|
-
if conv.get("cv") == cv:
|
|
2579
|
-
conv["count"] = int(conv.get("count", 0)) + 1
|
|
2580
|
-
else:
|
|
2581
|
-
conv = {"cv": cv, "count": 1}
|
|
2582
|
-
conv["alerted"] = False
|
|
2583
|
-
st[conv_key] = conv
|
|
2584
|
-
n += 1
|
|
2585
|
-
_log(f"VERSION-RECYCLE {proj}/{agent}: running {cv} < disk {disk} — recycle at idle to update "
|
|
2586
|
-
f"(attempt {conv['count']}/{_VERREC_MAX_ATTEMPTS})")
|
|
2587
|
-
if n:
|
|
2588
|
-
_save_state(st)
|
|
2589
|
-
return n
|
|
2590
2530
|
|
|
2591
2531
|
|
|
2592
2532
|
# c301be69: hold the singleton lock handle for the daemon's lifetime — module
|
|
@@ -907,16 +907,21 @@ def take_memory_hints() -> List[Dict]:
|
|
|
907
907
|
return out
|
|
908
908
|
|
|
909
909
|
|
|
910
|
-
def read_inbox(project_id: str, agent: str, mark_read: bool = True, api_key: Optional[str] = None
|
|
911
|
-
|
|
910
|
+
def read_inbox(project_id: str, agent: str, mark_read: bool = True, api_key: Optional[str] = None,
|
|
911
|
+
rpc_result: Optional[Dict] = None) -> List[Dict]:
|
|
912
|
+
# Use SECURITY DEFINER RPC when api_key is available (bypasses RLS safely).
|
|
913
|
+
# rpc_result (mig 521 wait-drain): a pre-fetched mc_read_inbox payload (the
|
|
914
|
+
# 'inbox' field of mc_wait_poll v5). When given, skip the RPC and run the
|
|
915
|
+
# identical post-process (decrypt, memory_hints, ack) on it.
|
|
912
916
|
global _LAST_MEMORY_HINTS
|
|
913
917
|
if api_key:
|
|
914
|
-
rpc_result
|
|
915
|
-
"
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
918
|
+
if rpc_result is None:
|
|
919
|
+
rpc_result = sb_rpc("mc_read_inbox", {
|
|
920
|
+
"p_api_key": api_key,
|
|
921
|
+
"p_project_id": project_id,
|
|
922
|
+
"p_agent_name": agent,
|
|
923
|
+
"p_mark_read": mark_read,
|
|
924
|
+
})
|
|
920
925
|
if isinstance(rpc_result, dict) and rpc_result.get("ok"):
|
|
921
926
|
# mig 220: stash memory_hints so the calling tool can attach
|
|
922
927
|
# them to its response. Reset on every call so stale hints
|
|
@@ -3665,6 +3665,11 @@ def _wait_poll_or_legacy() -> Dict[str, Any]:
|
|
|
3665
3665
|
"p_api_key": api_key,
|
|
3666
3666
|
"p_project_id": _PROJECT_ID,
|
|
3667
3667
|
"p_agent_name": AGENT_NAME,
|
|
3668
|
+
# wait-loop fase 2 (proposal 0bd63160, mig 521): the server
|
|
3669
|
+
# folds read_inbox INTO the poll when there is pending mail —
|
|
3670
|
+
# a message-carrying cycle is 1 RPC instead of 2. mc_wait_poll
|
|
3671
|
+
# v5 (5-arg, default false) serves old SDKs unchanged.
|
|
3672
|
+
"p_drain_inbox": True,
|
|
3668
3673
|
})
|
|
3669
3674
|
if isinstance(resp, dict) and resp.get("ok"):
|
|
3670
3675
|
return {
|
|
@@ -3677,6 +3682,9 @@ def _wait_poll_or_legacy() -> Dict[str, Any]:
|
|
|
3677
3682
|
# Surface it so the caller folds that RPC away. None on
|
|
3678
3683
|
# the legacy path = caller must count_pending itself.
|
|
3679
3684
|
"pending_count": resp.get("pending_count"),
|
|
3685
|
+
# mig 521: pre-drained mc_read_inbox result (same shape the
|
|
3686
|
+
# RPC returns) — present only when pending_count>0.
|
|
3687
|
+
"inbox": resp.get("inbox"),
|
|
3680
3688
|
"_via": "mc_wait_poll",
|
|
3681
3689
|
}
|
|
3682
3690
|
except Exception as e:
|
|
@@ -3687,21 +3695,30 @@ def _wait_poll_or_legacy() -> Dict[str, Any]:
|
|
|
3687
3695
|
"stop": _check_stop_request(),
|
|
3688
3696
|
"tasks": _get_pending_tasks_summary(),
|
|
3689
3697
|
"pending_count": None,
|
|
3698
|
+
"inbox": None,
|
|
3690
3699
|
"_via": "legacy",
|
|
3691
3700
|
}
|
|
3692
3701
|
|
|
3693
3702
|
|
|
3694
|
-
def _drain_unread_response(include_acks: bool
|
|
3703
|
+
def _drain_unread_response(include_acks: bool,
|
|
3704
|
+
rpc_result: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
|
|
3695
3705
|
"""SDK-EFF (task ab1f9f5a): read+mark unread DB messages and shape them as
|
|
3696
3706
|
a wait response. Extracted from the inner-loop's old final-fallback tail so
|
|
3697
3707
|
the outer loop can invoke it only when mc_wait_poll reports pending_count>0
|
|
3698
3708
|
(was: an unconditional count_pending every idle cycle). Returns None when
|
|
3699
|
-
nothing real (acks only / already seen) was found.
|
|
3709
|
+
nothing real (acks only / already seen) was found.
|
|
3710
|
+
|
|
3711
|
+
rpc_result (mig 521, proposal 0bd63160): pre-drained mc_read_inbox payload
|
|
3712
|
+
embedded in the mc_wait_poll v5 response ('inbox' key). When given, NO
|
|
3713
|
+
read_inbox RPC fires — backend.read_inbox still runs its full post-process
|
|
3714
|
+
(decrypt, memory_hints, ack) on the embedded payload, so semantics are
|
|
3715
|
+
byte-identical to the 2-RPC path."""
|
|
3700
3716
|
try:
|
|
3701
3717
|
api_key = _get_api_key()
|
|
3702
3718
|
if not api_key:
|
|
3703
3719
|
return None
|
|
3704
|
-
raw = be.read_inbox(_PROJECT_ID, AGENT_NAME, mark_read=True, api_key=api_key
|
|
3720
|
+
raw = be.read_inbox(_PROJECT_ID, AGENT_NAME, mark_read=True, api_key=api_key,
|
|
3721
|
+
rpc_result=rpc_result)
|
|
3705
3722
|
if not raw:
|
|
3706
3723
|
return None
|
|
3707
3724
|
msgs = [
|
|
@@ -4591,7 +4608,10 @@ async def meshcode_wait(timeout_seconds: int = 20, include_acks: bool = False) -
|
|
|
4591
4608
|
except Exception:
|
|
4592
4609
|
_db_pending = 0
|
|
4593
4610
|
if _db_pending > 0:
|
|
4594
|
-
|
|
4611
|
+
# mig 521: the poll already drained the inbox server-side
|
|
4612
|
+
# (1 RPC); pass it through. None => classic read_inbox RPC.
|
|
4613
|
+
_drained = _drain_unread_response(include_acks,
|
|
4614
|
+
rpc_result=_wp.get("inbox"))
|
|
4595
4615
|
if _drained:
|
|
4596
4616
|
result = _drained
|
|
4597
4617
|
_set_state("online", "")
|
|
@@ -1047,6 +1047,7 @@ def cmd_launch_batch(agent_names: Iterable[str], repo_path: Optional[str] = None
|
|
|
1047
1047
|
|
|
1048
1048
|
launched: list[str] = []
|
|
1049
1049
|
skipped: list[dict] = []
|
|
1050
|
+
reconciled: dict = {} # agent -> surviving session trees killed pre-spawn (3ed8781d)
|
|
1050
1051
|
|
|
1051
1052
|
# Resolve `meshcode` binary path (CLI wrapper installed by pip).
|
|
1052
1053
|
mc_bin = shutil.which("meshcode") or "meshcode"
|
|
@@ -1094,6 +1095,22 @@ def cmd_launch_batch(agent_names: Iterable[str], repo_path: Optional[str] = None
|
|
|
1094
1095
|
skipped.append({"agent": name,
|
|
1095
1096
|
"reason": f"cooldown {_LAUNCH_COOLDOWN_S}s (recently launched)"})
|
|
1096
1097
|
continue
|
|
1098
|
+
# RELAUNCH RECONCILIATION (task 3ed8781d — the hostd respawn path has had this
|
|
1099
|
+
# since 14e0760c, but THIS path — every FE Start click — never did): a
|
|
1100
|
+
# stopped-but-surviving session (stale heartbeat, so DEDUP 1 doesn't see it)
|
|
1101
|
+
# would get a fresh session spawned ON TOP, splitting the inbox and stacking
|
|
1102
|
+
# wt tabs (Samuel's Windows box: up to 3 sessions/tabs per agent). Kill any
|
|
1103
|
+
# surviving session tree of this agent BEFORE opening the new one — strictly
|
|
1104
|
+
# pre-spawn, so the fresh session can never be in the kill set. hostd's
|
|
1105
|
+
# machinery (token discovery + tree kill + stop-marker so the old tab
|
|
1106
|
+
# self-closes) accepts bare names since task 3ed8781d. Best-effort.
|
|
1107
|
+
try:
|
|
1108
|
+
from meshcode import hostd as _hostd
|
|
1109
|
+
_rec = _hostd._reconcile_target(name)
|
|
1110
|
+
if _rec:
|
|
1111
|
+
reconciled[name] = _rec
|
|
1112
|
+
except Exception:
|
|
1113
|
+
pass
|
|
1097
1114
|
# PER-PLATFORM quoting (mesh-core FIX2): cmd.exe wants double-quotes, not POSIX shlex
|
|
1098
1115
|
# single-quotes (cmd.exe passes single-quotes through literally -> file-not-found).
|
|
1099
1116
|
if sys.platform == "win32":
|
|
@@ -1113,6 +1130,8 @@ def cmd_launch_batch(agent_names: Iterable[str], repo_path: Optional[str] = None
|
|
|
1113
1130
|
"agents": launched,
|
|
1114
1131
|
"skipped": skipped,
|
|
1115
1132
|
}
|
|
1133
|
+
if reconciled:
|
|
1134
|
+
out["reconciled"] = reconciled
|
|
1116
1135
|
print(json.dumps(out))
|
|
1117
1136
|
return 0 if not skipped else 1
|
|
1118
1137
|
|
|
@@ -34,13 +34,17 @@ class FakeNoSuchProcess(Exception):
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class FakeProc:
|
|
37
|
-
def __init__(self, table, pid, ppid, cmdline, name="", env=None):
|
|
37
|
+
def __init__(self, table, pid, ppid, cmdline, name="", env=None, cwd=""):
|
|
38
38
|
self._table = table
|
|
39
39
|
self.pid = pid
|
|
40
40
|
self._ppid = ppid
|
|
41
41
|
self._cmdline = cmdline
|
|
42
42
|
self._name = name or (cmdline.split()[0] if cmdline else "")
|
|
43
43
|
self._env = env or {}
|
|
44
|
+
self._cwd = cwd
|
|
45
|
+
|
|
46
|
+
def cwd(self):
|
|
47
|
+
return self._cwd
|
|
44
48
|
|
|
45
49
|
# --- psutil.Process API surface used by hostd ---
|
|
46
50
|
def cmdline(self):
|
|
@@ -81,8 +85,8 @@ class FakePsutil:
|
|
|
81
85
|
self.live = set()
|
|
82
86
|
self.killed = [] # kill order, for pre-spawn ordering assertions
|
|
83
87
|
|
|
84
|
-
def add(self, pid, ppid, cmdline, name="", env=None):
|
|
85
|
-
self.procs[pid] = FakeProc(self, pid, ppid, cmdline, name=name, env=env)
|
|
88
|
+
def add(self, pid, ppid, cmdline, name="", env=None, cwd=""):
|
|
89
|
+
self.procs[pid] = FakeProc(self, pid, ppid, cmdline, name=name, env=env, cwd=cwd)
|
|
86
90
|
self.live.add(pid)
|
|
87
91
|
return self.procs[pid]
|
|
88
92
|
|
|
@@ -427,5 +431,139 @@ class DiscoveryTests(ZombieFixBase):
|
|
|
427
431
|
self.assertEqual(hostd._discover_agent_pids(TARGET), [qa["run"]])
|
|
428
432
|
|
|
429
433
|
|
|
434
|
+
class BareNameSpawnTests(ZombieFixBase):
|
|
435
|
+
"""task 3ed8781d — the Windows stop-por-RPC + relaunch zombie. protocol_handler's
|
|
436
|
+
cmd_launch_batch (every FE Start click) spawns `meshcode run "agent"` (BARE name);
|
|
437
|
+
the old per-site token (`run <project/agent>`) never matched it, so stop/reap/
|
|
438
|
+
reconcile found nothing -> sessions + wt tabs stacked up to 3 per agent."""
|
|
439
|
+
|
|
440
|
+
def add_bare_session(self, base, agent="qa", cwd=""):
|
|
441
|
+
"""Protocol-handler-shaped tree: cmd wrapper runs the launcher SCRIPT (no
|
|
442
|
+
token in its cmdline post-a4001d59) -> python `run "<agent>"` (bare)."""
|
|
443
|
+
self.ps.add(50, 1, "WindowsTerminal.exe", name="WindowsTerminal.exe")
|
|
444
|
+
self.ps.add(base, 50,
|
|
445
|
+
f'cmd /c C:\\u\\.meshcode\\launchers\\{agent}.cmd',
|
|
446
|
+
name="cmd.exe")
|
|
447
|
+
self.ps.add(base + 1, base, f'C:\\py\\python.exe -m meshcode run "{agent}"',
|
|
448
|
+
name="python.exe", cwd=cwd)
|
|
449
|
+
self.ps.add(base + 2, base + 1, "C:\\claude\\claude.exe --session x",
|
|
450
|
+
name="claude.exe")
|
|
451
|
+
return {"cmd": base, "run": base + 1, "claude": base + 2}
|
|
452
|
+
|
|
453
|
+
def test_qualified_target_discovers_bare_spawn(self):
|
|
454
|
+
"""LOAD-BEARING: hostd stop sweeps (target 'proj/agent') must find a
|
|
455
|
+
protocol-launched session whose cmdline is `run "agent"`."""
|
|
456
|
+
self.add_hostd()
|
|
457
|
+
s = self.add_bare_session(100)
|
|
458
|
+
self.assertEqual(hostd._discover_agent_pids(TARGET), [s["run"]])
|
|
459
|
+
|
|
460
|
+
def test_bare_target_discovers_qualified_spawn(self):
|
|
461
|
+
"""Reverse direction: the protocol-handler relaunch reconcile (bare 'qa')
|
|
462
|
+
must find a hostd-spawned session (`run "mesh-core/qa"`)."""
|
|
463
|
+
self.add_hostd()
|
|
464
|
+
qa = self.add_session(100)
|
|
465
|
+
self.assertEqual(hostd._discover_agent_pids("qa"), [qa["run"]])
|
|
466
|
+
|
|
467
|
+
def test_bare_forms_keep_prefix_safety(self):
|
|
468
|
+
"""'qa' must never match 'qa-2' in either form."""
|
|
469
|
+
self.add_hostd()
|
|
470
|
+
self.ps.add(601, 1, 'python.exe -m meshcode run "qa-2"', name="python.exe")
|
|
471
|
+
self.ps.add(602, 1, 'python.exe -m meshcode run "mesh-core/qa-2"', name="python.exe")
|
|
472
|
+
self.assertEqual(hostd._discover_agent_pids(TARGET), [])
|
|
473
|
+
self.assertEqual(hostd._discover_agent_pids("qa"), [])
|
|
474
|
+
|
|
475
|
+
def test_bare_match_foreign_project_cwd_is_skipped(self):
|
|
476
|
+
"""Cross-mesh guard: a bare-spawned 'qa' whose cwd is ANOTHER project's
|
|
477
|
+
workspace (~/meshcode/<other>-qa) is not killable via 'mesh-core/qa'."""
|
|
478
|
+
self.add_hostd()
|
|
479
|
+
self.add_bare_session(100, cwd="C:\\u\\meshcode\\other-mesh-qa")
|
|
480
|
+
self.assertEqual(hostd._discover_agent_pids(TARGET), [])
|
|
481
|
+
|
|
482
|
+
def test_bare_match_own_project_cwd_is_accepted(self):
|
|
483
|
+
self.add_hostd()
|
|
484
|
+
s = self.add_bare_session(100, cwd="C:\\u\\meshcode\\mesh-core-qa")
|
|
485
|
+
self.assertEqual(hostd._discover_agent_pids(TARGET), [s["run"]])
|
|
486
|
+
|
|
487
|
+
def test_stop_kills_bare_spawned_session_tree(self):
|
|
488
|
+
"""End-to-end: desired_state='stopped' takes down the protocol-launched
|
|
489
|
+
tree (python run + claude); the launcher cmd wrapper SURVIVES by design
|
|
490
|
+
(no token in its cmdline) to run the stopmark epilogue -> tab self-closes."""
|
|
491
|
+
self.add_hostd()
|
|
492
|
+
s = self.add_bare_session(100)
|
|
493
|
+
self.rpc.script("mc_agents_to_stop",
|
|
494
|
+
{"ok": True, "agents": [{"project_name": "mesh-core", "agent": "qa"}]})
|
|
495
|
+
n = hostd._do_stops("k", "h")
|
|
496
|
+
self.assertEqual(n, 1)
|
|
497
|
+
self.assertNotIn(s["run"], self.ps.live, "bare-spawned run survived the stop")
|
|
498
|
+
self.assertNotIn(s["claude"], self.ps.live, "bare-spawned claude survived the stop")
|
|
499
|
+
self.assertIn(50, self.ps.live, "terminal host was killed")
|
|
500
|
+
|
|
501
|
+
def test_serve_discovery_bare_target_matches_agent_env(self):
|
|
502
|
+
self.ps.add(200, 1, "python.exe -m meshcode.meshcode_mcp serve", name="python.exe",
|
|
503
|
+
env={"MESHCODE_PROJECT": "mesh-core", "MESHCODE_AGENT": "qa"})
|
|
504
|
+
self.assertEqual(hostd._discover_serve_pids("qa"), [200])
|
|
505
|
+
self.assertEqual(hostd._discover_serve_pids("backend"), [])
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class StopMarkerDualLabelTests(unittest.TestCase):
|
|
509
|
+
"""task 3ed8781d — _write_stop_marker must stamp BOTH the target-form label
|
|
510
|
+
(hostd spawns) and the bare-agent label (protocol-handler spawns): the launcher
|
|
511
|
+
script checks the label derived from ITS OWN spawn cmd, and a missing marker
|
|
512
|
+
flips an intentional stop into a `pause`-held dead wt tab."""
|
|
513
|
+
|
|
514
|
+
def test_writes_both_labels(self):
|
|
515
|
+
import tempfile
|
|
516
|
+
from pathlib import Path
|
|
517
|
+
with tempfile.TemporaryDirectory() as td:
|
|
518
|
+
with mock.patch.object(hostd, "STATE_DIR", Path(td)):
|
|
519
|
+
hostd._write_stop_marker("mesh-core/qa")
|
|
520
|
+
names = sorted(p.name for p in Path(td).glob("stopmark_*"))
|
|
521
|
+
self.assertEqual(names, ["stopmark_mesh-core_qa", "stopmark_qa"])
|
|
522
|
+
|
|
523
|
+
def test_bare_target_writes_single_label(self):
|
|
524
|
+
import tempfile
|
|
525
|
+
from pathlib import Path
|
|
526
|
+
with tempfile.TemporaryDirectory() as td:
|
|
527
|
+
with mock.patch.object(hostd, "STATE_DIR", Path(td)):
|
|
528
|
+
hostd._write_stop_marker("qa")
|
|
529
|
+
names = sorted(p.name for p in Path(td).glob("stopmark_*"))
|
|
530
|
+
self.assertEqual(names, ["stopmark_qa"])
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
class LaunchBatchReconcileTests(unittest.TestCase):
|
|
534
|
+
"""task 3ed8781d — cmd_launch_batch (every FE Start click / launch-url) must
|
|
535
|
+
reconcile surviving sessions pre-spawn, like the hostd respawn path has since
|
|
536
|
+
14e0760c."""
|
|
537
|
+
|
|
538
|
+
def _run(self, reconcile_side_effect):
|
|
539
|
+
from meshcode import protocol_handler as ph
|
|
540
|
+
import json as _json
|
|
541
|
+
printed = []
|
|
542
|
+
with mock.patch.object(ph, "live_agent_names", lambda names, project=None: set()), \
|
|
543
|
+
mock.patch.object(ph, "_read_cooldowns", lambda: {}), \
|
|
544
|
+
mock.patch.object(ph, "_record_spawn", lambda n, now=None: None), \
|
|
545
|
+
mock.patch.object(ph, "_spawn_terminal", lambda cmd: (True, "wt(fleet-tab)")), \
|
|
546
|
+
mock.patch.object(hostd, "_reconcile_target", reconcile_side_effect), \
|
|
547
|
+
mock.patch("builtins.print", lambda *a, **k: printed.append(a[0] if a else "")):
|
|
548
|
+
rc = ph.cmd_launch_batch(["qa"])
|
|
549
|
+
return rc, _json.loads(printed[-1])
|
|
550
|
+
|
|
551
|
+
def test_reconcile_called_before_spawn_and_reported(self):
|
|
552
|
+
calls = []
|
|
553
|
+
rc, out = self._run(lambda name: calls.append(name) or 2)
|
|
554
|
+
self.assertEqual(rc, 0)
|
|
555
|
+
self.assertEqual(calls, ["qa"])
|
|
556
|
+
self.assertEqual(out.get("reconciled"), {"qa": 2})
|
|
557
|
+
self.assertEqual(out["agents"], ["qa"])
|
|
558
|
+
|
|
559
|
+
def test_reconcile_failure_never_blocks_launch(self):
|
|
560
|
+
def boom(name):
|
|
561
|
+
raise RuntimeError("psutil exploded")
|
|
562
|
+
rc, out = self._run(boom)
|
|
563
|
+
self.assertEqual(rc, 0)
|
|
564
|
+
self.assertEqual(out["agents"], ["qa"])
|
|
565
|
+
self.assertNotIn("reconciled", out)
|
|
566
|
+
|
|
567
|
+
|
|
430
568
|
if __name__ == "__main__":
|
|
431
569
|
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|