meshcode 2.11.114rc1__tar.gz → 2.11.116__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/PKG-INFO +1 -1
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/__init__.py +1 -1
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/_session_handoff_template.py +49 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/comms_v4.py +1 -1
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hostd.py +0 -236
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/server.py +115 -244
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/protocol_handler.py +34 -1
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/run_agent.py +37 -17
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/SOURCES.txt +37 -7
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/pyproject.toml +1 -1
- meshcode-2.11.116/tests/test_auto_update_hardening.py +295 -0
- meshcode-2.11.116/tests/test_autonomous_closegap_1.py +164 -0
- meshcode-2.11.116/tests/test_autonomous_closegap_2.py +210 -0
- meshcode-2.11.116/tests/test_autonomous_closegap_3.py +163 -0
- meshcode-2.11.116/tests/test_autonomous_prompt_inject.py +126 -0
- meshcode-2.11.116/tests/test_boot_bug_regression.py +205 -0
- meshcode-2.11.116/tests/test_color_truecolor.py +83 -0
- meshcode-2.11.116/tests/test_core.py +216 -0
- meshcode-2.11.116/tests/test_cross_agent_messaging.py +366 -0
- meshcode-2.11.116/tests/test_date_parse.py +112 -0
- meshcode-2.11.116/tests/test_doctor.py +123 -0
- meshcode-2.11.116/tests/test_epistemic_v1_python_sdk.py +177 -0
- meshcode-2.11.116/tests/test_epistemic_v1_stop_conditions.py +158 -0
- meshcode-2.11.116/tests/test_esc_deaf_state.py +361 -0
- meshcode-2.11.116/tests/test_exceptions.py +107 -0
- meshcode-2.11.116/tests/test_file_upload.py +171 -0
- meshcode-2.11.116/tests/test_init_device_code.py +68 -0
- meshcode-2.11.116/tests/test_install_guard.py +170 -0
- meshcode-2.11.116/tests/test_lease_sigterm_release.py +299 -0
- meshcode-2.11.116/tests/test_mark_read_batch.py +200 -0
- meshcode-2.11.116/tests/test_marketplace_ratings.py +174 -0
- meshcode-2.11.116/tests/test_migration_integrity.py +176 -0
- meshcode-2.11.116/tests/test_realtime_event_freshness.py +236 -0
- meshcode-2.11.116/tests/test_rls_cross_tenant.py +255 -0
- meshcode-2.11.116/tests/test_rpc_grants.py +76 -0
- meshcode-2.11.116/tests/test_rpc_migrations.py +452 -0
- meshcode-2.11.116/tests/test_run_agent_dry_run.py +128 -0
- meshcode-2.11.116/tests/test_run_agent_no_server_import.py +85 -0
- meshcode-2.11.116/tests/test_security_regressions.py +228 -0
- meshcode-2.11.116/tests/test_self_update_user_site.py +139 -0
- meshcode-2.11.116/tests/test_sentinel.py +148 -0
- meshcode-2.11.116/tests/test_setup_path.py +66 -0
- meshcode-2.11.116/tests/test_sleep_signals.py +160 -0
- meshcode-2.11.116/tests/test_status_enum_coverage.py +231 -0
- meshcode-2.11.116/tests/test_stay_on_loop_hook.py +302 -0
- meshcode-2.11.116/tests/test_wait_open_tasks_contradiction.py +87 -0
- meshcode-2.11.114rc1/meshcode/_session_handoff_template 2.py +0 -296
- meshcode-2.11.114rc1/meshcode/_session_handoff_template 3.py +0 -296
- meshcode-2.11.114rc1/meshcode/claude_update 2.py +0 -258
- meshcode-2.11.114rc1/meshcode/claude_update 3.py +0 -258
- meshcode-2.11.114rc1/meshcode/hostd 2.py +0 -1269
- meshcode-2.11.114rc1/meshcode/up 2.py +0 -257
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/README.md +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/__main__.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/claude_update.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/cli.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/compat.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/daemon.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/doctor.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/invites.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/launcher.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/backend.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/realtime.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/preferences.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/secrets.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/self_update.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/up.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode/upload.py +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.114rc1 → meshcode-2.11.116}/setup.cfg +0 -0
|
@@ -161,6 +161,53 @@ def _request_recycle_if_marked(project_dir) -> None:
|
|
|
161
161
|
sys.stderr.write(f"[session_handoff_write] recycle-request skipped: {e}\\n")
|
|
162
162
|
|
|
163
163
|
|
|
164
|
+
def _persist_handoff_to_memory(project_dir, handoff) -> None:
|
|
165
|
+
"""L6 M6.1 (task 84c426d4, 2.11.114): mirror the handoff into
|
|
166
|
+
mc_agent_memory key='session_handoff' so the server-side boot
|
|
167
|
+
continuity_capsule (mig 456) can surface it on the NEXT session even
|
|
168
|
+
when the local handoff.json is gone (new host, wiped workspace).
|
|
169
|
+
Same best-effort creds pattern as _request_recycle_if_marked —
|
|
170
|
+
any failure silently skips; handoff.json already covers the local path.
|
|
171
|
+
"""
|
|
172
|
+
try:
|
|
173
|
+
mcp = json.loads((project_dir / ".mcp.json").read_text(encoding="utf-8"))
|
|
174
|
+
env = (next(iter((mcp.get("mcpServers") or {}).values()), {}) or {}).get("env", {}) or {}
|
|
175
|
+
url = env.get("SUPABASE_URL"); key = env.get("SUPABASE_KEY")
|
|
176
|
+
agent = env.get("MESHCODE_AGENT"); project = env.get("MESHCODE_PROJECT")
|
|
177
|
+
if not (url and key and agent):
|
|
178
|
+
return
|
|
179
|
+
api_key = os.environ.get("MESHCODE_API_KEY")
|
|
180
|
+
if not api_key:
|
|
181
|
+
try:
|
|
182
|
+
import importlib
|
|
183
|
+
api_key = importlib.import_module("meshcode.secrets").get_api_key(
|
|
184
|
+
profile=env.get("MESHCODE_KEYCHAIN_PROFILE") or "default")
|
|
185
|
+
except Exception:
|
|
186
|
+
api_key = None
|
|
187
|
+
if not api_key:
|
|
188
|
+
return
|
|
189
|
+
turns = handoff.get("turns") or []
|
|
190
|
+
compact = {
|
|
191
|
+
"trigger": handoff.get("trigger"),
|
|
192
|
+
"captured_at_session": handoff.get("session_id"),
|
|
193
|
+
"tail": [{"role": t["role"], "text": t["text"][:400]} for t in turns[-8:]],
|
|
194
|
+
}
|
|
195
|
+
import urllib.request as _u
|
|
196
|
+
body = json.dumps({
|
|
197
|
+
"p_api_key": api_key, "p_agent_name": agent,
|
|
198
|
+
"p_key": "session_handoff", "p_value": compact,
|
|
199
|
+
"p_tier": "episodic", "p_project_name": project,
|
|
200
|
+
}).encode("utf-8")
|
|
201
|
+
req = _u.Request(
|
|
202
|
+
url.rstrip("/") + "/rest/v1/rpc/mc_memory_set",
|
|
203
|
+
data=body, method="POST",
|
|
204
|
+
headers={"apikey": key, "Authorization": "Bearer " + key,
|
|
205
|
+
"Content-Type": "application/json"})
|
|
206
|
+
_u.urlopen(req, timeout=5).read()
|
|
207
|
+
except Exception as e: # noqa: BLE001 — never block compaction
|
|
208
|
+
sys.stderr.write(f"[session_handoff_write] memory-persist skipped: {e}\\n")
|
|
209
|
+
|
|
210
|
+
|
|
164
211
|
def main() -> int:
|
|
165
212
|
try:
|
|
166
213
|
raw = sys.stdin.read()
|
|
@@ -185,6 +232,8 @@ def main() -> int:
|
|
|
185
232
|
tmp.replace(d / "handoff.json")
|
|
186
233
|
except OSError as e:
|
|
187
234
|
sys.stderr.write(f"[session_handoff_write] skipped: {e}\\n")
|
|
235
|
+
# L6 M6.1: mirror to server-side memory for the boot continuity capsule.
|
|
236
|
+
_persist_handoff_to_memory(_project_dir(), handoff)
|
|
188
237
|
# CTX-CLOSE-RELAUNCH (task 400fc536): now that the thread is snapshotted,
|
|
189
238
|
# commander-tier sessions ask the server to recycle at the next task-edge.
|
|
190
239
|
_request_recycle_if_marked(_project_dir())
|
|
@@ -1886,7 +1886,7 @@ def _start_heartbeat_daemon(project, name, agent_pid=None):
|
|
|
1886
1886
|
" if not check_still_leased(pid):\n"
|
|
1887
1887
|
" sys.exit(0)\n"
|
|
1888
1888
|
" post('/rest/v1/rpc/mc_heartbeat', {'p_project_id':pid,'p_agent_name':name})\n"
|
|
1889
|
-
" time.sleep(
|
|
1889
|
+
" time.sleep(10)\n" # R2-3 (.116): 30s->10s so the fork's agent-alive check (self-exit on recycle/stop) tightens the stale-heartbeat window to <=10s
|
|
1890
1890
|
)
|
|
1891
1891
|
# Windows: start_new_session kwarg doesn't exist. Use creationflags.
|
|
1892
1892
|
_popen_kwargs = {
|
|
@@ -816,32 +816,6 @@ def _pid_cmdline(pid: int) -> str:
|
|
|
816
816
|
return ""
|
|
817
817
|
|
|
818
818
|
|
|
819
|
-
def _pid_alive(pid: int) -> bool:
|
|
820
|
-
"""True iff `pid` is a live process. Signal-0 probe on POSIX; tasklist on Windows.
|
|
821
|
-
Best-effort — on any unexpected error returns False (treat unknown as not-alive so
|
|
822
|
-
the ghost detector never mis-reports). PermissionError means the process exists but
|
|
823
|
-
isn't ours = alive; ProcessLookupError means dead."""
|
|
824
|
-
try:
|
|
825
|
-
pid = int(pid)
|
|
826
|
-
if pid <= 0:
|
|
827
|
-
return False
|
|
828
|
-
if sys.platform == "win32":
|
|
829
|
-
# /FO CSV so the pid is a quoted field — anchor on `"<pid>"` instead of a bare
|
|
830
|
-
# substring (a bare pid can match a memory/session column → false-positive).
|
|
831
|
-
out = subprocess.run(["tasklist", "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"],
|
|
832
|
-
capture_output=True, text=True, timeout=5).stdout or ""
|
|
833
|
-
return f'"{pid}"' in out
|
|
834
|
-
try:
|
|
835
|
-
os.kill(pid, 0)
|
|
836
|
-
except ProcessLookupError:
|
|
837
|
-
return False
|
|
838
|
-
except PermissionError:
|
|
839
|
-
return True
|
|
840
|
-
return True
|
|
841
|
-
except Exception:
|
|
842
|
-
return False
|
|
843
|
-
|
|
844
|
-
|
|
845
819
|
def _discover_agent_pids(target: str) -> list:
|
|
846
820
|
"""Fallback PID discovery by command line, for agents spawned before this hostd
|
|
847
821
|
(no recorded PID) or after a state-file loss. Matches `meshcode run <target>`.
|
|
@@ -1228,215 +1202,6 @@ def _do_recycle_enforce(api_key: str, host_id: str) -> int:
|
|
|
1228
1202
|
return n
|
|
1229
1203
|
|
|
1230
1204
|
|
|
1231
|
-
# ------------------------------------------------------------------
|
|
1232
|
-
# FOCUS-1 GHOST SESSIONS (back-2, task 09fca8fe) — dead-MCP detector.
|
|
1233
|
-
#
|
|
1234
|
-
# OS-ghost = a `claude` agent process still ALIVE but its `meshcode_mcp serve`
|
|
1235
|
-
# child DEAD → no heartbeat → the dashboard shows it offline/invisible while a
|
|
1236
|
-
# terminal window is still open ("connected-but-invisible"). Two local pidfiles
|
|
1237
|
-
# correlate it, no roster RPC needed for detection:
|
|
1238
|
-
# S3 claude alive — ~/.meshcode/pids/<project>__<agent>.pid, stamped by
|
|
1239
|
-
# run_agent right before os.execvp (the pid survives exec => == claude pid).
|
|
1240
|
-
# S2 mcp dead — {tempdir}/meshcode_mcp_<project>_<agent>.pid, written by the
|
|
1241
|
-
# MCP server (meshcode_mcp/server.py). If that pid is dead/missing, mcp died.
|
|
1242
|
-
# A ghost = S3 alive AND S2 dead. The eventual reap (kill the orphaned claude so
|
|
1243
|
-
# the respawn sweep relaunches it clean) ALSO needs S1 = the server's is_ghost /
|
|
1244
|
-
# effective_status='offline' (prod mc_agent_liveness) as a third independent
|
|
1245
|
-
# guard, plus the cmdline reuse-guard — wired when arming.
|
|
1246
|
-
#
|
|
1247
|
-
# SHIPS LOG-ONLY (_GHOST_REAP_DRYRUN=True): logs GHOST-DRYRUN candidates so we can
|
|
1248
|
-
# confirm ZERO false positives across the live fleet BEFORE flipping to a real
|
|
1249
|
-
# kill (commander-mandated staged arming). Kills NOTHING while dry-run.
|
|
1250
|
-
#
|
|
1251
|
-
# GUARDS folded from back's cross-review (HIGH_1/HIGH_2) — all enforced even in the
|
|
1252
|
-
# DRY-RUN classification so the logs only ever show TRUE ghosts:
|
|
1253
|
-
# - boot-grace: skip until spawned_age (pidfile mtime) > GHOST_BOOT_GRACE_SEC, so a
|
|
1254
|
-
# freshly-booting agent (claude up, MCP still connecting) is NOT mis-flagged.
|
|
1255
|
-
# - cwd-guard: the live pid's working dir MUST equal the agent's recorded launch cwd
|
|
1256
|
-
# (pidfile {cwd}, or the workspace as legacy fallback). Anti PID-reuse mis-kill —
|
|
1257
|
-
# post-execvp claude's cmdline loses <target>, so cwd is the stable correlator. If
|
|
1258
|
-
# cwd is unreadable on this platform → fail-SAFE (skip, never reap on uncertainty).
|
|
1259
|
-
# ------------------------------------------------------------------
|
|
1260
|
-
_GHOST_REAP_DRYRUN = True
|
|
1261
|
-
GHOST_BOOT_GRACE_SEC = _env_int("MESHCODE_GHOST_BOOT_GRACE_SEC", 120, 30) # mac boots slow; >> _REAP grace + MCP reconnect
|
|
1262
|
-
GHOST_PERSIST_SEC = _env_int("MESHCODE_GHOST_PERSIST_SEC", 90, 30) # ghost must hold this long → a brief mcp restart doesn't count (MED_restart)
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
def _mcp_serve_pid(project: str, agent: str) -> Optional[int]:
|
|
1266
|
-
"""Read the MCP server's recorded pid for this agent (meshcode_mcp/server.py
|
|
1267
|
-
lockfile). Returns the pid, or None if the lockfile is missing/unparseable.
|
|
1268
|
-
Path + JSON|bare-int format mirror _pid_lockfile_path()/_read_pid_lockfile()."""
|
|
1269
|
-
try:
|
|
1270
|
-
import tempfile as _tf
|
|
1271
|
-
safe = f"meshcode_mcp_{project}_{agent}.pid".replace("/", "_").replace(" ", "_")
|
|
1272
|
-
path = os.path.join(_tf.gettempdir(), safe)
|
|
1273
|
-
if not os.path.exists(path):
|
|
1274
|
-
return None
|
|
1275
|
-
raw = open(path, "r").read().strip()
|
|
1276
|
-
if not raw:
|
|
1277
|
-
return None
|
|
1278
|
-
try:
|
|
1279
|
-
data = json.loads(raw)
|
|
1280
|
-
if isinstance(data, dict) and "pid" in data:
|
|
1281
|
-
return int(data["pid"])
|
|
1282
|
-
except (ValueError, TypeError):
|
|
1283
|
-
pass
|
|
1284
|
-
return int(raw)
|
|
1285
|
-
except Exception:
|
|
1286
|
-
return None
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
def _agent_launcher_record(project: str, agent: str):
|
|
1290
|
-
"""Read run_agent's pre-execvp pidfile (~/.meshcode/pids/<project>__<agent>.pid).
|
|
1291
|
-
Returns (pid:int|None, cwd:str|None). New format is JSON {"pid","cwd"}; tolerates
|
|
1292
|
-
the legacy bare-int (no recorded cwd)."""
|
|
1293
|
-
try:
|
|
1294
|
-
safe = f"{project}__{agent}".replace("/", "_").replace("\\", "_").replace(" ", "_")
|
|
1295
|
-
path = STATE_DIR / "pids" / f"{safe}.pid"
|
|
1296
|
-
if not path.exists():
|
|
1297
|
-
return None, None
|
|
1298
|
-
raw = path.read_text(encoding="utf-8").strip()
|
|
1299
|
-
if not raw:
|
|
1300
|
-
return None, None
|
|
1301
|
-
try:
|
|
1302
|
-
data = json.loads(raw)
|
|
1303
|
-
if isinstance(data, dict) and "pid" in data:
|
|
1304
|
-
return int(data["pid"]), (data.get("cwd") or None)
|
|
1305
|
-
except (ValueError, TypeError):
|
|
1306
|
-
pass
|
|
1307
|
-
return int(raw), None
|
|
1308
|
-
except Exception:
|
|
1309
|
-
return None, None
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
def _read_pid_cwd(pid: int) -> Optional[str]:
|
|
1313
|
-
"""Best-effort working directory of a live pid (time-boxed). None if unreadable.
|
|
1314
|
-
linux : /proc/<pid>/cwd symlink
|
|
1315
|
-
macOS : lsof -a -p <pid> -d cwd -Fn → the 'n' line (no Full Disk Access needed)
|
|
1316
|
-
win : None (run_agent does NOT execvp there; cmdline keeps `run <target>`, so
|
|
1317
|
-
the existing cmdline token-guard already correlates — cwd not needed)."""
|
|
1318
|
-
try:
|
|
1319
|
-
if sys.platform == "win32":
|
|
1320
|
-
return None
|
|
1321
|
-
if sys.platform == "darwin":
|
|
1322
|
-
out = subprocess.run(["lsof", "-a", "-p", str(int(pid)), "-d", "cwd", "-Fn"],
|
|
1323
|
-
capture_output=True, text=True, timeout=5).stdout or ""
|
|
1324
|
-
for line in out.splitlines():
|
|
1325
|
-
if line.startswith("n"):
|
|
1326
|
-
return line[1:].strip() or None
|
|
1327
|
-
return None
|
|
1328
|
-
return os.readlink(f"/proc/{int(pid)}/cwd") # linux
|
|
1329
|
-
except Exception:
|
|
1330
|
-
return None
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
def _pid_cwd_matches(pid: int, expected_cwd: Optional[str], project: str, agent: str):
|
|
1334
|
-
"""Tri-state: True if the pid's live cwd == the agent's launch cwd; False if it
|
|
1335
|
-
differs; None if unreadable. `expected_cwd` is the pidfile-recorded cwd; when absent
|
|
1336
|
-
(legacy pidfile) fall back to the canonical workspace dir. realpath both sides
|
|
1337
|
-
(mac /Users vs /private symlinks, --repo dirs). Autonomous reap requires True."""
|
|
1338
|
-
live = _read_pid_cwd(pid)
|
|
1339
|
-
if not live:
|
|
1340
|
-
return None # unreadable → unknown → fail-safe skip
|
|
1341
|
-
exp = set()
|
|
1342
|
-
if expected_cwd:
|
|
1343
|
-
try: exp.add(os.path.realpath(os.path.expanduser(expected_cwd)))
|
|
1344
|
-
except Exception: pass
|
|
1345
|
-
try:
|
|
1346
|
-
exp.add(os.path.realpath(os.path.join(os.path.expanduser("~"), "meshcode", f"{project}-{agent}")))
|
|
1347
|
-
except Exception:
|
|
1348
|
-
pass
|
|
1349
|
-
try:
|
|
1350
|
-
return os.path.realpath(live) in exp
|
|
1351
|
-
except Exception:
|
|
1352
|
-
return None
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
def _do_ghost_reap(api_key: str, host_id: str) -> int:
|
|
1356
|
-
"""Detect dead-MCP ghosts (claude ALIVE + mcp_serve DEAD) from local pidfiles.
|
|
1357
|
-
LOG-ONLY while _GHOST_REAP_DRYRUN — confirm zero false positives in the fleet logs
|
|
1358
|
-
before arming. Returns the count that WOULD be reaped.
|
|
1359
|
-
|
|
1360
|
-
Self-contained: enumerates this host's launcher pidfiles (agents WE launched), so
|
|
1361
|
-
no roster RPC is needed for detection. All HIGH guards (boot-grace + cwd-match) are
|
|
1362
|
-
enforced here so DRYRUN logs only ever show TRUE ghosts. Fail-open at the top level
|
|
1363
|
-
so a bug here can never wedge the sweep."""
|
|
1364
|
-
n = 0
|
|
1365
|
-
try:
|
|
1366
|
-
pid_dir = STATE_DIR / "pids"
|
|
1367
|
-
if not pid_dir.is_dir():
|
|
1368
|
-
return 0
|
|
1369
|
-
now = time.time()
|
|
1370
|
-
st = _load_state()
|
|
1371
|
-
seen = dict(st.get("ghost_seen") or {}) # {target: first_candidate_ts} — MED_restart persistence
|
|
1372
|
-
still = set() # targets that are candidates THIS sweep (others get cleared)
|
|
1373
|
-
for pf in sorted(pid_dir.glob("*.pid")):
|
|
1374
|
-
try:
|
|
1375
|
-
stem = pf.stem # "<project>__<agent>"
|
|
1376
|
-
if "__" not in stem:
|
|
1377
|
-
continue
|
|
1378
|
-
project, agent = stem.split("__", 1)
|
|
1379
|
-
target = f"{project}/{agent}"
|
|
1380
|
-
claude_pid, rec_cwd = _agent_launcher_record(project, agent)
|
|
1381
|
-
if not claude_pid or not _pid_alive(claude_pid):
|
|
1382
|
-
# MED_pidfile_stale: the agent exited (run_agent execvp'd, can't self-clean) →
|
|
1383
|
-
# the launcher pid is dead. Prune the stale pidfile so a future reused pid can't FP.
|
|
1384
|
-
try:
|
|
1385
|
-
pf.unlink()
|
|
1386
|
-
except Exception:
|
|
1387
|
-
pass
|
|
1388
|
-
continue # S3 fail: no live claude → not a ghost
|
|
1389
|
-
# HIGH_2 boot-grace: pidfile mtime ≈ spawn time (written just before execvp).
|
|
1390
|
-
try:
|
|
1391
|
-
spawned_age = now - pf.stat().st_mtime
|
|
1392
|
-
except Exception:
|
|
1393
|
-
spawned_age = 1e9
|
|
1394
|
-
if spawned_age < GHOST_BOOT_GRACE_SEC:
|
|
1395
|
-
continue # still booting / MCP may be connecting → never reap
|
|
1396
|
-
mcp_pid = _mcp_serve_pid(project, agent)
|
|
1397
|
-
if mcp_pid and _pid_alive(mcp_pid):
|
|
1398
|
-
continue # S2 fail: mcp_serve alive → healthy
|
|
1399
|
-
mcp_state = (f"pid {mcp_pid} DEAD" if mcp_pid else "lockfile MISSING")
|
|
1400
|
-
# HIGH_1 cwd-guard: the live pid MUST be running in this agent's launch cwd.
|
|
1401
|
-
live_cwd = _read_pid_cwd(claude_pid)
|
|
1402
|
-
cwd_match = _pid_cwd_matches(claude_pid, rec_cwd, project, agent)
|
|
1403
|
-
if cwd_match is not True:
|
|
1404
|
-
_log(f"GHOST-SKIP {target}: claude pid {claude_pid} + mcp {mcp_state} but cwd-guard "
|
|
1405
|
-
f"{'UNREADABLE' if cwd_match is None else 'MISMATCH'} "
|
|
1406
|
-
f"(live_cwd={live_cwd!r} expected~{rec_cwd!r}) — NOT a confirmed ghost, no reap.")
|
|
1407
|
-
continue # fail-SAFE: never reap on cwd uncertainty/mismatch
|
|
1408
|
-
# Candidate (S3 alive ∧ boot-grace ∧ S2 dead ∧ cwd-match). MED_restart persistence:
|
|
1409
|
-
# only COUNT after the ghost has held GHOST_PERSIST_SEC, so a brief mcp restart
|
|
1410
|
-
# (dead for one sweep) never registers.
|
|
1411
|
-
still.add(target)
|
|
1412
|
-
first_ts = seen.get(target) or now
|
|
1413
|
-
seen[target] = first_ts
|
|
1414
|
-
held = now - first_ts
|
|
1415
|
-
if held < GHOST_PERSIST_SEC:
|
|
1416
|
-
_log(f"GHOST-PENDING {target}: dead-MCP candidate held {int(held)}s (< {GHOST_PERSIST_SEC}s) "
|
|
1417
|
-
f"— waiting for persistence before counting (guards against a brief mcp restart).")
|
|
1418
|
-
continue
|
|
1419
|
-
# CONFIRMED, PERSISTENT dead-MCP ghost.
|
|
1420
|
-
n += 1
|
|
1421
|
-
cmdline = _pid_cmdline(claude_pid).strip()[:120]
|
|
1422
|
-
_log(f"GHOST-DRYRUN {target}: claude pid {claude_pid} ALIVE (cwd={live_cwd!r}, "
|
|
1423
|
-
f"spawned_age={int(spawned_age)}s, ghost_held={int(held)}s) but meshcode_mcp serve "
|
|
1424
|
-
f"{mcp_state} — connected-but-invisible. WOULD reap claude → respawn relaunches clean "
|
|
1425
|
-
f"[log-only; arming also needs S1 is_ghost + human-defer + spawn-breaker]. cmdline={cmdline!r}")
|
|
1426
|
-
# ARMED PATH (not yet enabled): AND S1 (roster is_ghost) AND not _human_recently_active(target)
|
|
1427
|
-
# AND _spawn_rate_ok(target), then _kill_headless_pid(target, claude_pid) → respawn relaunches.
|
|
1428
|
-
except Exception:
|
|
1429
|
-
continue
|
|
1430
|
-
# Clear persistence for targets that recovered (mcp back, agent gone) so a future
|
|
1431
|
-
# transient doesn't inherit a stale clock.
|
|
1432
|
-
seen = {t: ts for t, ts in seen.items() if t in still}
|
|
1433
|
-
st["ghost_seen"] = seen
|
|
1434
|
-
_save_state(st)
|
|
1435
|
-
except Exception:
|
|
1436
|
-
pass
|
|
1437
|
-
return n
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
1205
|
def _do_recycles(api_key: str, host_id: str) -> int:
|
|
1441
1206
|
"""Uptime-based recycle at task boundary. Returns number recycled."""
|
|
1442
1207
|
# DEAD-FEATURE (task 222b1b02, Samuel 2026-06-04): the RECYCLE feature is disabled in
|
|
@@ -1994,7 +1759,6 @@ def cmd_hostd(args: list) -> int:
|
|
|
1994
1759
|
stopped = _do_stops(api_key, host_id)
|
|
1995
1760
|
force_killed = _do_force_kills(api_key, host_id) # 38523a98 Gap1: visible explicit human stop
|
|
1996
1761
|
reaped = _do_reap(api_key, host_id) # 38523a98: kill ghosts/dup-PIDs/crashed-orphans
|
|
1997
|
-
ghosts = _do_ghost_reap(api_key, host_id) # FOCUS-1 09fca8fe: dead-MCP ghost detector (LOG-ONLY)
|
|
1998
1762
|
_gc_headless_pids() # cb90b058: drop dead PIDs (stale entry can't mask a live agent)
|
|
1999
1763
|
_up = int(time.monotonic() - _spawn_mono)
|
|
2000
1764
|
if relaunched or recycled or ver_recycled or stopped or enforced or reaped or force_killed:
|