meshcode 2.11.114rc1__tar.gz → 2.11.116__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {meshcode-2.11.114rc1 → meshcode-2.11.116}/PKG-INFO +1 -1
  2. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/__init__.py +1 -1
  3. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/_session_handoff_template.py +49 -0
  4. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/comms_v4.py +1 -1
  5. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hostd.py +0 -236
  6. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/server.py +115 -244
  7. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/protocol_handler.py +34 -1
  8. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/run_agent.py +37 -17
  9. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/PKG-INFO +1 -1
  10. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/SOURCES.txt +37 -7
  11. {meshcode-2.11.114rc1 → meshcode-2.11.116}/pyproject.toml +1 -1
  12. meshcode-2.11.116/tests/test_auto_update_hardening.py +295 -0
  13. meshcode-2.11.116/tests/test_autonomous_closegap_1.py +164 -0
  14. meshcode-2.11.116/tests/test_autonomous_closegap_2.py +210 -0
  15. meshcode-2.11.116/tests/test_autonomous_closegap_3.py +163 -0
  16. meshcode-2.11.116/tests/test_autonomous_prompt_inject.py +126 -0
  17. meshcode-2.11.116/tests/test_boot_bug_regression.py +205 -0
  18. meshcode-2.11.116/tests/test_color_truecolor.py +83 -0
  19. meshcode-2.11.116/tests/test_core.py +216 -0
  20. meshcode-2.11.116/tests/test_cross_agent_messaging.py +366 -0
  21. meshcode-2.11.116/tests/test_date_parse.py +112 -0
  22. meshcode-2.11.116/tests/test_doctor.py +123 -0
  23. meshcode-2.11.116/tests/test_epistemic_v1_python_sdk.py +177 -0
  24. meshcode-2.11.116/tests/test_epistemic_v1_stop_conditions.py +158 -0
  25. meshcode-2.11.116/tests/test_esc_deaf_state.py +361 -0
  26. meshcode-2.11.116/tests/test_exceptions.py +107 -0
  27. meshcode-2.11.116/tests/test_file_upload.py +171 -0
  28. meshcode-2.11.116/tests/test_init_device_code.py +68 -0
  29. meshcode-2.11.116/tests/test_install_guard.py +170 -0
  30. meshcode-2.11.116/tests/test_lease_sigterm_release.py +299 -0
  31. meshcode-2.11.116/tests/test_mark_read_batch.py +200 -0
  32. meshcode-2.11.116/tests/test_marketplace_ratings.py +174 -0
  33. meshcode-2.11.116/tests/test_migration_integrity.py +176 -0
  34. meshcode-2.11.116/tests/test_realtime_event_freshness.py +236 -0
  35. meshcode-2.11.116/tests/test_rls_cross_tenant.py +255 -0
  36. meshcode-2.11.116/tests/test_rpc_grants.py +76 -0
  37. meshcode-2.11.116/tests/test_rpc_migrations.py +452 -0
  38. meshcode-2.11.116/tests/test_run_agent_dry_run.py +128 -0
  39. meshcode-2.11.116/tests/test_run_agent_no_server_import.py +85 -0
  40. meshcode-2.11.116/tests/test_security_regressions.py +228 -0
  41. meshcode-2.11.116/tests/test_self_update_user_site.py +139 -0
  42. meshcode-2.11.116/tests/test_sentinel.py +148 -0
  43. meshcode-2.11.116/tests/test_setup_path.py +66 -0
  44. meshcode-2.11.116/tests/test_sleep_signals.py +160 -0
  45. meshcode-2.11.116/tests/test_status_enum_coverage.py +231 -0
  46. meshcode-2.11.116/tests/test_stay_on_loop_hook.py +302 -0
  47. meshcode-2.11.116/tests/test_wait_open_tasks_contradiction.py +87 -0
  48. meshcode-2.11.114rc1/meshcode/_session_handoff_template 2.py +0 -296
  49. meshcode-2.11.114rc1/meshcode/_session_handoff_template 3.py +0 -296
  50. meshcode-2.11.114rc1/meshcode/claude_update 2.py +0 -258
  51. meshcode-2.11.114rc1/meshcode/claude_update 3.py +0 -258
  52. meshcode-2.11.114rc1/meshcode/hostd 2.py +0 -1269
  53. meshcode-2.11.114rc1/meshcode/up 2.py +0 -257
  54. {meshcode-2.11.114rc1 → meshcode-2.11.116}/README.md +0 -0
  55. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/__main__.py +0 -0
  56. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/_stop_hook_template.py +0 -0
  57. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/ascii_art.py +0 -0
  58. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/atomic_push.py +0 -0
  59. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/claude_update.py +0 -0
  60. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/cli.py +0 -0
  61. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/compat.py +0 -0
  62. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/daemon.py +0 -0
  63. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/date_parse.py +0 -0
  64. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/doctor.py +0 -0
  65. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/error_hints.py +0 -0
  66. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/exceptions.py +0 -0
  67. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hooks/__init__.py +0 -0
  68. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hooks/repo_path_lock.py +0 -0
  69. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/invites.py +0 -0
  70. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/launcher.py +0 -0
  71. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/launcher_install.py +0 -0
  72. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/__init__.py +0 -0
  73. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/__main__.py +0 -0
  74. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/backend.py +0 -0
  75. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/realtime.py +0 -0
  76. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
  77. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_backend.py +0 -0
  78. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
  79. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
  80. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
  81. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_realtime.py +0 -0
  82. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
  83. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/preferences.py +0 -0
  84. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/protocol_v2.py +0 -0
  85. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/quickstart.py +0 -0
  86. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/rpc_allowlist.py +0 -0
  87. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/scripts/check_secrets.py +0 -0
  88. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/scripts/race_rate_harness.py +0 -0
  89. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/secrets.py +0 -0
  90. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/self_update.py +0 -0
  91. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/setup_clients.py +0 -0
  92. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/supervisor.py +0 -0
  93. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/up.py +0 -0
  94. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/upload.py +0 -0
  95. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/dependency_links.txt +0 -0
  96. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/entry_points.txt +0 -0
  97. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/requires.txt +0 -0
  98. {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/top_level.txt +0 -0
  99. {meshcode-2.11.114rc1 → meshcode-2.11.116}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshcode
3
- Version: 2.11.114rc1
3
+ Version: 2.11.116
4
4
  Summary: Real-time communication between AI agents — Supabase-backed CLI
5
5
  Author-email: MeshCode <hello@meshcode.io>
6
6
  License: MIT
@@ -1,5 +1,5 @@
1
1
  """MeshCode — Real-time communication between AI agents."""
2
- __version__ = "2.11.114rc1"
2
+ __version__ = "2.11.116"
3
3
 
4
4
  # Exception hierarchy — eagerly imported (lightweight, no deps)
5
5
  from meshcode.exceptions import ( # noqa: F401
@@ -161,6 +161,53 @@ def _request_recycle_if_marked(project_dir) -> None:
161
161
  sys.stderr.write(f"[session_handoff_write] recycle-request skipped: {e}\\n")
162
162
 
163
163
 
164
+ def _persist_handoff_to_memory(project_dir, handoff) -> None:
165
+ """L6 M6.1 (task 84c426d4, 2.11.114): mirror the handoff into
166
+ mc_agent_memory key='session_handoff' so the server-side boot
167
+ continuity_capsule (mig 456) can surface it on the NEXT session even
168
+ when the local handoff.json is gone (new host, wiped workspace).
169
+ Same best-effort creds pattern as _request_recycle_if_marked —
170
+ any failure silently skips; handoff.json already covers the local path.
171
+ """
172
+ try:
173
+ mcp = json.loads((project_dir / ".mcp.json").read_text(encoding="utf-8"))
174
+ env = (next(iter((mcp.get("mcpServers") or {}).values()), {}) or {}).get("env", {}) or {}
175
+ url = env.get("SUPABASE_URL"); key = env.get("SUPABASE_KEY")
176
+ agent = env.get("MESHCODE_AGENT"); project = env.get("MESHCODE_PROJECT")
177
+ if not (url and key and agent):
178
+ return
179
+ api_key = os.environ.get("MESHCODE_API_KEY")
180
+ if not api_key:
181
+ try:
182
+ import importlib
183
+ api_key = importlib.import_module("meshcode.secrets").get_api_key(
184
+ profile=env.get("MESHCODE_KEYCHAIN_PROFILE") or "default")
185
+ except Exception:
186
+ api_key = None
187
+ if not api_key:
188
+ return
189
+ turns = handoff.get("turns") or []
190
+ compact = {
191
+ "trigger": handoff.get("trigger"),
192
+ "captured_at_session": handoff.get("session_id"),
193
+ "tail": [{"role": t["role"], "text": t["text"][:400]} for t in turns[-8:]],
194
+ }
195
+ import urllib.request as _u
196
+ body = json.dumps({
197
+ "p_api_key": api_key, "p_agent_name": agent,
198
+ "p_key": "session_handoff", "p_value": compact,
199
+ "p_tier": "episodic", "p_project_name": project,
200
+ }).encode("utf-8")
201
+ req = _u.Request(
202
+ url.rstrip("/") + "/rest/v1/rpc/mc_memory_set",
203
+ data=body, method="POST",
204
+ headers={"apikey": key, "Authorization": "Bearer " + key,
205
+ "Content-Type": "application/json"})
206
+ _u.urlopen(req, timeout=5).read()
207
+ except Exception as e: # noqa: BLE001 — never block compaction
208
+ sys.stderr.write(f"[session_handoff_write] memory-persist skipped: {e}\\n")
209
+
210
+
164
211
  def main() -> int:
165
212
  try:
166
213
  raw = sys.stdin.read()
@@ -185,6 +232,8 @@ def main() -> int:
185
232
  tmp.replace(d / "handoff.json")
186
233
  except OSError as e:
187
234
  sys.stderr.write(f"[session_handoff_write] skipped: {e}\\n")
235
+ # L6 M6.1: mirror to server-side memory for the boot continuity capsule.
236
+ _persist_handoff_to_memory(_project_dir(), handoff)
188
237
  # CTX-CLOSE-RELAUNCH (task 400fc536): now that the thread is snapshotted,
189
238
  # commander-tier sessions ask the server to recycle at the next task-edge.
190
239
  _request_recycle_if_marked(_project_dir())
@@ -1886,7 +1886,7 @@ def _start_heartbeat_daemon(project, name, agent_pid=None):
1886
1886
  " if not check_still_leased(pid):\n"
1887
1887
  " sys.exit(0)\n"
1888
1888
  " post('/rest/v1/rpc/mc_heartbeat', {'p_project_id':pid,'p_agent_name':name})\n"
1889
- " time.sleep(30)\n"
1889
+ " time.sleep(10)\n" # R2-3 (.116): 30s->10s so the fork's agent-alive check (self-exit on recycle/stop) tightens the stale-heartbeat window to <=10s
1890
1890
  )
1891
1891
  # Windows: start_new_session kwarg doesn't exist. Use creationflags.
1892
1892
  _popen_kwargs = {
@@ -816,32 +816,6 @@ def _pid_cmdline(pid: int) -> str:
816
816
  return ""
817
817
 
818
818
 
819
- def _pid_alive(pid: int) -> bool:
820
- """True iff `pid` is a live process. Signal-0 probe on POSIX; tasklist on Windows.
821
- Best-effort — on any unexpected error returns False (treat unknown as not-alive so
822
- the ghost detector never mis-reports). PermissionError means the process exists but
823
- isn't ours = alive; ProcessLookupError means dead."""
824
- try:
825
- pid = int(pid)
826
- if pid <= 0:
827
- return False
828
- if sys.platform == "win32":
829
- # /FO CSV so the pid is a quoted field — anchor on `"<pid>"` instead of a bare
830
- # substring (a bare pid can match a memory/session column → false-positive).
831
- out = subprocess.run(["tasklist", "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"],
832
- capture_output=True, text=True, timeout=5).stdout or ""
833
- return f'"{pid}"' in out
834
- try:
835
- os.kill(pid, 0)
836
- except ProcessLookupError:
837
- return False
838
- except PermissionError:
839
- return True
840
- return True
841
- except Exception:
842
- return False
843
-
844
-
845
819
  def _discover_agent_pids(target: str) -> list:
846
820
  """Fallback PID discovery by command line, for agents spawned before this hostd
847
821
  (no recorded PID) or after a state-file loss. Matches `meshcode run <target>`.
@@ -1228,215 +1202,6 @@ def _do_recycle_enforce(api_key: str, host_id: str) -> int:
1228
1202
  return n
1229
1203
 
1230
1204
 
1231
- # ------------------------------------------------------------------
1232
- # FOCUS-1 GHOST SESSIONS (back-2, task 09fca8fe) — dead-MCP detector.
1233
- #
1234
- # OS-ghost = a `claude` agent process still ALIVE but its `meshcode_mcp serve`
1235
- # child DEAD → no heartbeat → the dashboard shows it offline/invisible while a
1236
- # terminal window is still open ("connected-but-invisible"). Two local pidfiles
1237
- # correlate it, no roster RPC needed for detection:
1238
- # S3 claude alive — ~/.meshcode/pids/<project>__<agent>.pid, stamped by
1239
- # run_agent right before os.execvp (the pid survives exec => == claude pid).
1240
- # S2 mcp dead — {tempdir}/meshcode_mcp_<project>_<agent>.pid, written by the
1241
- # MCP server (meshcode_mcp/server.py). If that pid is dead/missing, mcp died.
1242
- # A ghost = S3 alive AND S2 dead. The eventual reap (kill the orphaned claude so
1243
- # the respawn sweep relaunches it clean) ALSO needs S1 = the server's is_ghost /
1244
- # effective_status='offline' (prod mc_agent_liveness) as a third independent
1245
- # guard, plus the cmdline reuse-guard — wired when arming.
1246
- #
1247
- # SHIPS LOG-ONLY (_GHOST_REAP_DRYRUN=True): logs GHOST-DRYRUN candidates so we can
1248
- # confirm ZERO false positives across the live fleet BEFORE flipping to a real
1249
- # kill (commander-mandated staged arming). Kills NOTHING while dry-run.
1250
- #
1251
- # GUARDS folded from back's cross-review (HIGH_1/HIGH_2) — all enforced even in the
1252
- # DRY-RUN classification so the logs only ever show TRUE ghosts:
1253
- # - boot-grace: skip until spawned_age (pidfile mtime) > GHOST_BOOT_GRACE_SEC, so a
1254
- # freshly-booting agent (claude up, MCP still connecting) is NOT mis-flagged.
1255
- # - cwd-guard: the live pid's working dir MUST equal the agent's recorded launch cwd
1256
- # (pidfile {cwd}, or the workspace as legacy fallback). Anti PID-reuse mis-kill —
1257
- # post-execvp claude's cmdline loses <target>, so cwd is the stable correlator. If
1258
- # cwd is unreadable on this platform → fail-SAFE (skip, never reap on uncertainty).
1259
- # ------------------------------------------------------------------
1260
- _GHOST_REAP_DRYRUN = True
1261
- GHOST_BOOT_GRACE_SEC = _env_int("MESHCODE_GHOST_BOOT_GRACE_SEC", 120, 30) # mac boots slow; >> _REAP grace + MCP reconnect
1262
- GHOST_PERSIST_SEC = _env_int("MESHCODE_GHOST_PERSIST_SEC", 90, 30) # ghost must hold this long → a brief mcp restart doesn't count (MED_restart)
1263
-
1264
-
1265
- def _mcp_serve_pid(project: str, agent: str) -> Optional[int]:
1266
- """Read the MCP server's recorded pid for this agent (meshcode_mcp/server.py
1267
- lockfile). Returns the pid, or None if the lockfile is missing/unparseable.
1268
- Path + JSON|bare-int format mirror _pid_lockfile_path()/_read_pid_lockfile()."""
1269
- try:
1270
- import tempfile as _tf
1271
- safe = f"meshcode_mcp_{project}_{agent}.pid".replace("/", "_").replace(" ", "_")
1272
- path = os.path.join(_tf.gettempdir(), safe)
1273
- if not os.path.exists(path):
1274
- return None
1275
- raw = open(path, "r").read().strip()
1276
- if not raw:
1277
- return None
1278
- try:
1279
- data = json.loads(raw)
1280
- if isinstance(data, dict) and "pid" in data:
1281
- return int(data["pid"])
1282
- except (ValueError, TypeError):
1283
- pass
1284
- return int(raw)
1285
- except Exception:
1286
- return None
1287
-
1288
-
1289
- def _agent_launcher_record(project: str, agent: str):
1290
- """Read run_agent's pre-execvp pidfile (~/.meshcode/pids/<project>__<agent>.pid).
1291
- Returns (pid:int|None, cwd:str|None). New format is JSON {"pid","cwd"}; tolerates
1292
- the legacy bare-int (no recorded cwd)."""
1293
- try:
1294
- safe = f"{project}__{agent}".replace("/", "_").replace("\\", "_").replace(" ", "_")
1295
- path = STATE_DIR / "pids" / f"{safe}.pid"
1296
- if not path.exists():
1297
- return None, None
1298
- raw = path.read_text(encoding="utf-8").strip()
1299
- if not raw:
1300
- return None, None
1301
- try:
1302
- data = json.loads(raw)
1303
- if isinstance(data, dict) and "pid" in data:
1304
- return int(data["pid"]), (data.get("cwd") or None)
1305
- except (ValueError, TypeError):
1306
- pass
1307
- return int(raw), None
1308
- except Exception:
1309
- return None, None
1310
-
1311
-
1312
- def _read_pid_cwd(pid: int) -> Optional[str]:
1313
- """Best-effort working directory of a live pid (time-boxed). None if unreadable.
1314
- linux : /proc/<pid>/cwd symlink
1315
- macOS : lsof -a -p <pid> -d cwd -Fn → the 'n' line (no Full Disk Access needed)
1316
- win : None (run_agent does NOT execvp there; cmdline keeps `run <target>`, so
1317
- the existing cmdline token-guard already correlates — cwd not needed)."""
1318
- try:
1319
- if sys.platform == "win32":
1320
- return None
1321
- if sys.platform == "darwin":
1322
- out = subprocess.run(["lsof", "-a", "-p", str(int(pid)), "-d", "cwd", "-Fn"],
1323
- capture_output=True, text=True, timeout=5).stdout or ""
1324
- for line in out.splitlines():
1325
- if line.startswith("n"):
1326
- return line[1:].strip() or None
1327
- return None
1328
- return os.readlink(f"/proc/{int(pid)}/cwd") # linux
1329
- except Exception:
1330
- return None
1331
-
1332
-
1333
- def _pid_cwd_matches(pid: int, expected_cwd: Optional[str], project: str, agent: str):
1334
- """Tri-state: True if the pid's live cwd == the agent's launch cwd; False if it
1335
- differs; None if unreadable. `expected_cwd` is the pidfile-recorded cwd; when absent
1336
- (legacy pidfile) fall back to the canonical workspace dir. realpath both sides
1337
- (mac /Users vs /private symlinks, --repo dirs). Autonomous reap requires True."""
1338
- live = _read_pid_cwd(pid)
1339
- if not live:
1340
- return None # unreadable → unknown → fail-safe skip
1341
- exp = set()
1342
- if expected_cwd:
1343
- try: exp.add(os.path.realpath(os.path.expanduser(expected_cwd)))
1344
- except Exception: pass
1345
- try:
1346
- exp.add(os.path.realpath(os.path.join(os.path.expanduser("~"), "meshcode", f"{project}-{agent}")))
1347
- except Exception:
1348
- pass
1349
- try:
1350
- return os.path.realpath(live) in exp
1351
- except Exception:
1352
- return None
1353
-
1354
-
1355
- def _do_ghost_reap(api_key: str, host_id: str) -> int:
1356
- """Detect dead-MCP ghosts (claude ALIVE + mcp_serve DEAD) from local pidfiles.
1357
- LOG-ONLY while _GHOST_REAP_DRYRUN — confirm zero false positives in the fleet logs
1358
- before arming. Returns the count that WOULD be reaped.
1359
-
1360
- Self-contained: enumerates this host's launcher pidfiles (agents WE launched), so
1361
- no roster RPC is needed for detection. All HIGH guards (boot-grace + cwd-match) are
1362
- enforced here so DRYRUN logs only ever show TRUE ghosts. Fail-open at the top level
1363
- so a bug here can never wedge the sweep."""
1364
- n = 0
1365
- try:
1366
- pid_dir = STATE_DIR / "pids"
1367
- if not pid_dir.is_dir():
1368
- return 0
1369
- now = time.time()
1370
- st = _load_state()
1371
- seen = dict(st.get("ghost_seen") or {}) # {target: first_candidate_ts} — MED_restart persistence
1372
- still = set() # targets that are candidates THIS sweep (others get cleared)
1373
- for pf in sorted(pid_dir.glob("*.pid")):
1374
- try:
1375
- stem = pf.stem # "<project>__<agent>"
1376
- if "__" not in stem:
1377
- continue
1378
- project, agent = stem.split("__", 1)
1379
- target = f"{project}/{agent}"
1380
- claude_pid, rec_cwd = _agent_launcher_record(project, agent)
1381
- if not claude_pid or not _pid_alive(claude_pid):
1382
- # MED_pidfile_stale: the agent exited (run_agent execvp'd, can't self-clean) →
1383
- # the launcher pid is dead. Prune the stale pidfile so a future reused pid can't FP.
1384
- try:
1385
- pf.unlink()
1386
- except Exception:
1387
- pass
1388
- continue # S3 fail: no live claude → not a ghost
1389
- # HIGH_2 boot-grace: pidfile mtime ≈ spawn time (written just before execvp).
1390
- try:
1391
- spawned_age = now - pf.stat().st_mtime
1392
- except Exception:
1393
- spawned_age = 1e9
1394
- if spawned_age < GHOST_BOOT_GRACE_SEC:
1395
- continue # still booting / MCP may be connecting → never reap
1396
- mcp_pid = _mcp_serve_pid(project, agent)
1397
- if mcp_pid and _pid_alive(mcp_pid):
1398
- continue # S2 fail: mcp_serve alive → healthy
1399
- mcp_state = (f"pid {mcp_pid} DEAD" if mcp_pid else "lockfile MISSING")
1400
- # HIGH_1 cwd-guard: the live pid MUST be running in this agent's launch cwd.
1401
- live_cwd = _read_pid_cwd(claude_pid)
1402
- cwd_match = _pid_cwd_matches(claude_pid, rec_cwd, project, agent)
1403
- if cwd_match is not True:
1404
- _log(f"GHOST-SKIP {target}: claude pid {claude_pid} + mcp {mcp_state} but cwd-guard "
1405
- f"{'UNREADABLE' if cwd_match is None else 'MISMATCH'} "
1406
- f"(live_cwd={live_cwd!r} expected~{rec_cwd!r}) — NOT a confirmed ghost, no reap.")
1407
- continue # fail-SAFE: never reap on cwd uncertainty/mismatch
1408
- # Candidate (S3 alive ∧ boot-grace ∧ S2 dead ∧ cwd-match). MED_restart persistence:
1409
- # only COUNT after the ghost has held GHOST_PERSIST_SEC, so a brief mcp restart
1410
- # (dead for one sweep) never registers.
1411
- still.add(target)
1412
- first_ts = seen.get(target) or now
1413
- seen[target] = first_ts
1414
- held = now - first_ts
1415
- if held < GHOST_PERSIST_SEC:
1416
- _log(f"GHOST-PENDING {target}: dead-MCP candidate held {int(held)}s (< {GHOST_PERSIST_SEC}s) "
1417
- f"— waiting for persistence before counting (guards against a brief mcp restart).")
1418
- continue
1419
- # CONFIRMED, PERSISTENT dead-MCP ghost.
1420
- n += 1
1421
- cmdline = _pid_cmdline(claude_pid).strip()[:120]
1422
- _log(f"GHOST-DRYRUN {target}: claude pid {claude_pid} ALIVE (cwd={live_cwd!r}, "
1423
- f"spawned_age={int(spawned_age)}s, ghost_held={int(held)}s) but meshcode_mcp serve "
1424
- f"{mcp_state} — connected-but-invisible. WOULD reap claude → respawn relaunches clean "
1425
- f"[log-only; arming also needs S1 is_ghost + human-defer + spawn-breaker]. cmdline={cmdline!r}")
1426
- # ARMED PATH (not yet enabled): AND S1 (roster is_ghost) AND not _human_recently_active(target)
1427
- # AND _spawn_rate_ok(target), then _kill_headless_pid(target, claude_pid) → respawn relaunches.
1428
- except Exception:
1429
- continue
1430
- # Clear persistence for targets that recovered (mcp back, agent gone) so a future
1431
- # transient doesn't inherit a stale clock.
1432
- seen = {t: ts for t, ts in seen.items() if t in still}
1433
- st["ghost_seen"] = seen
1434
- _save_state(st)
1435
- except Exception:
1436
- pass
1437
- return n
1438
-
1439
-
1440
1205
  def _do_recycles(api_key: str, host_id: str) -> int:
1441
1206
  """Uptime-based recycle at task boundary. Returns number recycled."""
1442
1207
  # DEAD-FEATURE (task 222b1b02, Samuel 2026-06-04): the RECYCLE feature is disabled in
@@ -1994,7 +1759,6 @@ def cmd_hostd(args: list) -> int:
1994
1759
  stopped = _do_stops(api_key, host_id)
1995
1760
  force_killed = _do_force_kills(api_key, host_id) # 38523a98 Gap1: visible explicit human stop
1996
1761
  reaped = _do_reap(api_key, host_id) # 38523a98: kill ghosts/dup-PIDs/crashed-orphans
1997
- ghosts = _do_ghost_reap(api_key, host_id) # FOCUS-1 09fca8fe: dead-MCP ghost detector (LOG-ONLY)
1998
1762
  _gc_headless_pids() # cb90b058: drop dead PIDs (stale entry can't mask a live agent)
1999
1763
  _up = int(time.monotonic() - _spawn_mono)
2000
1764
  if relaunched or recycled or ver_recycled or stopped or enforced or reaped or force_killed: