meshcode 2.11.125__tar.gz → 2.11.127__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {meshcode-2.11.125 → meshcode-2.11.127}/PKG-INFO +1 -1
  2. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/__init__.py +1 -1
  3. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/hostd.py +267 -2
  4. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/protocol_handler.py +8 -5
  5. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/PKG-INFO +1 -1
  6. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/SOURCES.txt +1 -0
  7. {meshcode-2.11.125 → meshcode-2.11.127}/pyproject.toml +1 -1
  8. meshcode-2.11.127/tests/test_stop_ghost_terminal.py +211 -0
  9. {meshcode-2.11.125 → meshcode-2.11.127}/README.md +0 -0
  10. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/__main__.py +0 -0
  11. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/_session_handoff_template.py +0 -0
  12. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/_stop_hook_template.py +0 -0
  13. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/ascii_art.py +0 -0
  14. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/atomic_push.py +0 -0
  15. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/claude_update.py +0 -0
  16. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/cli.py +0 -0
  17. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/comms_v4.py +0 -0
  18. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/compat.py +0 -0
  19. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/daemon.py +0 -0
  20. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/date_parse.py +0 -0
  21. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/doctor.py +0 -0
  22. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/error_hints.py +0 -0
  23. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/exceptions.py +0 -0
  24. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/hooks/__init__.py +0 -0
  25. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/hooks/repo_path_lock.py +0 -0
  26. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/invites.py +0 -0
  27. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/launcher.py +0 -0
  28. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/launcher_install.py +0 -0
  29. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/__init__.py +0 -0
  30. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/__main__.py +0 -0
  31. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/backend.py +0 -0
  32. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/realtime.py +0 -0
  33. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/server.py +0 -0
  34. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
  35. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/swarm.py +0 -0
  36. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_backend.py +0 -0
  37. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
  38. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
  39. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
  40. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_realtime.py +0 -0
  41. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
  42. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/meshcode_mcp/test_swarm.py +0 -0
  43. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/preferences.py +0 -0
  44. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/protocol_v2.py +0 -0
  45. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/quickstart.py +0 -0
  46. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/rpc_allowlist.py +0 -0
  47. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/run_agent.py +0 -0
  48. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/scripts/check_secrets.py +0 -0
  49. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/scripts/race_rate_harness.py +0 -0
  50. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/secrets.py +0 -0
  51. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/self_update.py +0 -0
  52. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/setup_clients.py +0 -0
  53. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/supervisor.py +0 -0
  54. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/up.py +0 -0
  55. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode/upload.py +0 -0
  56. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/dependency_links.txt +0 -0
  57. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/entry_points.txt +0 -0
  58. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/requires.txt +0 -0
  59. {meshcode-2.11.125 → meshcode-2.11.127}/meshcode.egg-info/top_level.txt +0 -0
  60. {meshcode-2.11.125 → meshcode-2.11.127}/setup.cfg +0 -0
  61. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_auto_update_hardening.py +0 -0
  62. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_autonomous_closegap_1.py +0 -0
  63. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_autonomous_closegap_2.py +0 -0
  64. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_autonomous_closegap_3.py +0 -0
  65. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_autonomous_prompt_inject.py +0 -0
  66. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_boot_bug_regression.py +0 -0
  67. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_color_truecolor.py +0 -0
  68. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_core.py +0 -0
  69. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_cross_agent_messaging.py +0 -0
  70. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_date_parse.py +0 -0
  71. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_doctor.py +0 -0
  72. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_epistemic_v1_python_sdk.py +0 -0
  73. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_epistemic_v1_stop_conditions.py +0 -0
  74. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_esc_deaf_state.py +0 -0
  75. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_exceptions.py +0 -0
  76. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_file_upload.py +0 -0
  77. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_init_device_code.py +0 -0
  78. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_install_guard.py +0 -0
  79. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_lease_sigterm_release.py +0 -0
  80. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_mark_read_batch.py +0 -0
  81. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_marketplace_ratings.py +0 -0
  82. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_migration_integrity.py +0 -0
  83. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_realtime_event_freshness.py +0 -0
  84. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_rls_cross_tenant.py +0 -0
  85. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_rpc_grants.py +0 -0
  86. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_rpc_migrations.py +0 -0
  87. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_run_agent_dry_run.py +0 -0
  88. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_run_agent_no_server_import.py +0 -0
  89. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_security_regressions.py +0 -0
  90. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_self_update_user_site.py +0 -0
  91. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_sentinel.py +0 -0
  92. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_setup_path.py +0 -0
  93. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_sleep_signals.py +0 -0
  94. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_status_enum_coverage.py +0 -0
  95. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_stay_on_loop_hook.py +0 -0
  96. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_swarm_events.py +0 -0
  97. {meshcode-2.11.125 → meshcode-2.11.127}/tests/test_wait_open_tasks_contradiction.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshcode
3
- Version: 2.11.125
3
+ Version: 2.11.127
4
4
  Summary: Real-time communication between AI agents — Supabase-backed CLI
5
5
  Author-email: MeshCode <hello@meshcode.io>
6
6
  License: MIT
@@ -1,5 +1,5 @@
1
1
  """MeshCode — Real-time communication between AI agents."""
2
- __version__ = "2.11.125"
2
+ __version__ = "2.11.127"
3
3
 
4
4
  # Exception hierarchy — eagerly imported (lightweight, no deps)
5
5
  from meshcode.exceptions import ( # noqa: F401
@@ -209,6 +209,34 @@ SPAWN_MIN_INTERVAL_SEC = _env_int("MESHCODE_SPAWN_MIN_INTERVAL_SEC", 45, 5) #
209
209
  SPAWN_BURST_CAP = _env_int("MESHCODE_SPAWN_BURST_CAP", 5, 2) # max spawns ...
210
210
  SPAWN_BURST_WINDOW_SEC = _env_int("MESHCODE_SPAWN_BURST_WINDOW_SEC", 600, 60) # ... within this rolling window before tripping
211
211
 
212
+ # ------------------------------------------------------------------
213
+ # Plain-respawn guards (Samuel P0 2026-06-10, terminal storm): the burst
214
+ # breaker above NEVER saw the overnight storm — hostd respawned zylo-bemate
215
+ # every ~16 min for 13 h (125 terminals): slow boots under load kept resetting
216
+ # the server-side respawn cap (the agent heartbeats briefly, count -> 0), the
217
+ # ~16 min cadence stays under 5 spawns/10 min, and every respawn STACKED a new
218
+ # window on top of the still-alive previous session, adding load that made the
219
+ # next boot even slower. Two guards on the plain-respawn path in _do_respawns:
220
+ #
221
+ # LIVE-SESSION GUARD — never open a second terminal while the previous
222
+ # session's process is still alive: HOLD (spawn nothing), hard-kill the stuck
223
+ # session after _RESPAWN_STUCK_KILL_S, and relaunch only once it is fully
224
+ # gone. An explicit Start (fresh spawned_at) kills the old session at once —
225
+ # a restart the human asked for must first fully close the old terminal.
226
+ #
227
+ # CONVERGENCE GUARD — mirror of the recycle guard (451d33a0) for plain
228
+ # respawns, keyed on time BETWEEN consecutive respawns so it catches ANY
229
+ # cadence: _RESPAWN_CONVERGE_MAX consecutive respawns each started less than
230
+ # _RESPAWN_CONVERGE_LIFETIME_S after the previous one -> BLOCK further
231
+ # respawns for _RESPAWN_CONVERGE_BLOCK_TTL_S + resolve the pending launch as
232
+ # failed (FE toast). An explicit Start clears the block.
233
+ # ------------------------------------------------------------------
234
+ _RESPAWN_STUCK_KILL_S = _env_int("MESHCODE_RESPAWN_STUCK_KILL_SEC", 600, 60) # kill a stale-but-alive session after this hold
235
+ _RESPAWN_CONVERGE_MAX = _env_int("MESHCODE_RESPAWN_CONVERGE_MAX", 3, 2) # consecutive short-lived respawns before blocking
236
+ _RESPAWN_CONVERGE_LIFETIME_S = _env_int("MESHCODE_RESPAWN_CONVERGE_LIFETIME_SEC", 1800, 120) # "short-lived" = next respawn needed within this
237
+ _RESPAWN_CONVERGE_BLOCK_TTL_S = _env_int("MESHCODE_RESPAWN_CONVERGE_BLOCK_TTL_SEC", 21600, 600) # block duration before one clean retry
238
+ _RESPAWN_FRESH_CLICK_S = _env_int("MESHCODE_RESPAWN_FRESH_CLICK_SEC", 90, 30) # spawned_age_s under this = explicit Start click
239
+
212
240
 
213
241
  def _log(msg: str) -> None:
214
242
  line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
@@ -535,6 +563,88 @@ def _do_respawns(api_key: str, host_id: str) -> int:
535
563
  _rrall[_target] = _rr
536
564
  _rst["recyrespawn"] = _rrall
537
565
  _save_state(_rst)
566
+ # ---- plain-respawn guards (Samuel P0 2026-06-10, terminal storm; see
567
+ # ---- constants block above for the full incident rationale) ----
568
+ if not _is_recycle:
569
+ _now2 = time.time()
570
+ _st2 = _load_state()
571
+ try:
572
+ _fresh_click = float(c.get("spawned_age_s")) < _RESPAWN_FRESH_CLICK_S
573
+ except (TypeError, ValueError):
574
+ _fresh_click = False
575
+ _cv_all = dict(_st2.get("respawn_conv") or {})
576
+ _cv = dict(_cv_all.get(_target) or {})
577
+ if _fresh_click and (_cv or _target in (_st2.get("respawn_hold") or {})):
578
+ # explicit Start (fresh spawned_at) = human permission: clear guard state
579
+ _cv = {}
580
+ _cv_all.pop(_target, None)
581
+ _hh = dict(_st2.get("respawn_hold") or {})
582
+ _hh.pop(_target, None)
583
+ _st2["respawn_conv"] = _cv_all
584
+ _st2["respawn_hold"] = _hh
585
+ _save_state(_st2)
586
+ _log(f"RESPAWN-GUARD {_target}: explicit Start — cleared hold/convergence state")
587
+ elif _cv.get("blocked_ts"):
588
+ if (_now2 - float(_cv["blocked_ts"])) < _RESPAWN_CONVERGE_BLOCK_TTL_S:
589
+ _log(f"SKIP respawn {_target}: BLOCKED (respawn_no_converge, "
590
+ f"{_cv.get('count')} short-lived respawns) — holding until TTL or a manual Start")
591
+ continue
592
+ _cv.pop("blocked_ts", None)
593
+ _cv["count"] = 0 # TTL elapsed -> one clean retry; re-blocks if it storms again
594
+ # LIVE-SESSION GUARD: never stack a second terminal on a still-alive session.
595
+ _live = [p for p in _discover_agent_pids(_target) if _pid_alive(p)]
596
+ if _live:
597
+ _hold_all = dict(_st2.get("respawn_hold") or {})
598
+ _hold = dict(_hold_all.get(_target) or {})
599
+ _first = float(_hold.get("first_ts") or _now2)
600
+ if _fresh_click or (_now2 - _first) >= _RESPAWN_STUCK_KILL_S:
601
+ _killed = sum(1 for _p in _live if _kill_headless_pid(_target, _p))
602
+ _hold_all.pop(_target, None)
603
+ _log(f"RESPAWN-STUCK-KILL {_target}: closed {_killed}/{len(_live)} stale-but-alive "
604
+ f"session pid(s) {_live} "
605
+ f"({'explicit Start' if _fresh_click else 'stuck > ' + str(_RESPAWN_STUCK_KILL_S) + 's'}) "
606
+ f"— relaunch on next sweep once fully gone")
607
+ else:
608
+ _hold["first_ts"] = _first
609
+ _hold_all[_target] = _hold
610
+ _log(f"RESPAWN-HOLD {_target}: previous session still ALIVE (pids {_live}) — "
611
+ f"NOT opening another terminal; stuck-kill in "
612
+ f"{int(_RESPAWN_STUCK_KILL_S - (_now2 - _first))}s unless it heartbeats")
613
+ _st2["respawn_hold"] = _hold_all
614
+ _save_state(_st2)
615
+ continue
616
+ # previous session fully gone — count this relaunch against convergence
617
+ _hold_all = dict(_st2.get("respawn_hold") or {})
618
+ if _target in _hold_all:
619
+ _hold_all.pop(_target, None)
620
+ _st2["respawn_hold"] = _hold_all
621
+ if _cv.get("last_ts") and (_now2 - float(_cv["last_ts"])) <= _RESPAWN_CONVERGE_LIFETIME_S:
622
+ _cv["count"] = int(_cv.get("count", 0)) + 1
623
+ else:
624
+ _cv = {"count": 1} # converged/idle long enough -> fresh count
625
+ _cv["last_ts"] = _now2
626
+ if _cv["count"] >= _RESPAWN_CONVERGE_MAX:
627
+ _cv["blocked_ts"] = _now2
628
+ _cv_all[_target] = _cv
629
+ _st2["respawn_conv"] = _cv_all
630
+ _save_state(_st2)
631
+ _log(f"ALERT {_target}: respawn NOT CONVERGING — {_cv['count']} consecutive respawns "
632
+ f"each <{_RESPAWN_CONVERGE_LIFETIME_S}s apart; BLOCKING respawns for "
633
+ f"{_RESPAWN_CONVERGE_BLOCK_TTL_S}s (a manual Start clears the block). "
634
+ f"[respawn_blocked_reason=respawn_no_converge]")
635
+ try: # stop the dashboard's eternal 'launching…' spinner + actionable toast
636
+ _rpc("mc_resolve_launch", {
637
+ "p_api_key": api_key, "p_project_id": c.get("project_id"), "p_agent": agent,
638
+ "p_status": "failed", "p_reason": "respawn_no_converge",
639
+ "p_detail": f"agent kept dying or booting too slowly {_cv['count']}x in a row — "
640
+ f"respawns paused to protect your machine and tokens; press Start "
641
+ f"to retry when ready"})
642
+ except Exception:
643
+ pass
644
+ continue
645
+ _cv_all[_target] = _cv
646
+ _st2["respawn_conv"] = _cv_all
647
+ _save_state(_st2)
538
648
  _ok, _burst, _why = _spawn_rate_ok(_target)
539
649
  if not _ok:
540
650
  _log(f"SKIP {'recycle-' if _is_recycle else ''}respawn {_target}: rate-limited ({_why})")
@@ -941,9 +1051,63 @@ def _discover_agent_pids(target: str) -> list:
941
1051
  pids.append(p)
942
1052
  except Exception:
943
1053
  pass
1054
+ # GHOST-TERMINAL fix (task 91201315): on POSIX `meshcode run` EXECVP's claude, so the
1055
+ # live agent's cmdline is `claude …` — no 'meshcode run <target>' survives and the
1056
+ # pgrep above sees NOTHING for a visible macOS agent (the '0 stopped forever' hole).
1057
+ # Its launcher bash (~/.meshcode/launchers/<label>.command) DOES keep the target in
1058
+ # its cmdline for the window's whole life — so also return that bash's direct agent
1059
+ # children. Launcher children are hostd-spawned BY CONSTRUCTION: a session the human
1060
+ # opened by hand (`claude --resume` in a plain tab) can NEVER match.
1061
+ if sys.platform != "win32":
1062
+ try:
1063
+ pids += [p for p in _discover_launcher_child_pids(target) if p not in pids]
1064
+ except Exception:
1065
+ pass
944
1066
  return pids
945
1067
 
946
1068
 
1069
+ def _discover_launcher_child_pids(target: str) -> list:
1070
+ """PIDs of `target`'s agent process(es) running UNDER its launcher bash
1071
+ (~/.meshcode/launchers/<label>.command — the macOS visible spawn path).
1072
+
1073
+ One ps snapshot: find the bash whose cmdline ends in the target's launcher
1074
+ file, then return its DIRECT children whose cmdline looks like the agent
1075
+ (claude/meshcode). The marker is re.escape'd and anchored (whitespace/EOL
1076
+ lookahead) so 'mesh-dev_back.command' can never prefix-match
1077
+ 'mesh-dev_back-2.command' (same WHOLE-token rule as _discover_agent_pids).
1078
+ Best-effort; [] on non-POSIX or any failure."""
1079
+ if sys.platform == "win32":
1080
+ return []
1081
+ marker = _terminal_window_marker(target) # '<sanitized label>.command'
1082
+ if not marker:
1083
+ return []
1084
+ rx = re.compile(r"launchers/" + re.escape(marker) + r"(?=\s|$)")
1085
+ kids = []
1086
+ try:
1087
+ out = subprocess.run(["ps", "-axo", "pid=,ppid=,args="],
1088
+ capture_output=True, text=True, timeout=8).stdout
1089
+ rows, bash_pids = [], set()
1090
+ for ln in out.splitlines():
1091
+ parts = ln.split(None, 2)
1092
+ if len(parts) < 3:
1093
+ continue
1094
+ try:
1095
+ pid, ppid = int(parts[0]), int(parts[1])
1096
+ except Exception:
1097
+ continue
1098
+ rows.append((pid, ppid, parts[2]))
1099
+ if rx.search(parts[2]):
1100
+ bash_pids.add(pid)
1101
+ for pid, ppid, args in rows:
1102
+ if ppid in bash_pids and pid not in bash_pids:
1103
+ low = args.lower()
1104
+ if "claude" in low or "meshcode" in low:
1105
+ kids.append(pid)
1106
+ except Exception:
1107
+ pass
1108
+ return kids
1109
+
1110
+
947
1111
  def _kill_heartbeat_fork(target: str) -> None:
948
1112
  """Kill the agent's heartbeat daemon fork (Fix B, task 24e3dd44). The fork is detached into its
949
1113
  OWN session, so os.killpg(getpgid(agent_pid)) in _kill_headless_pid does NOT take it down — it
@@ -1055,6 +1219,104 @@ def _do_stops(api_key: str, host_id: str) -> int:
1055
1219
  return n
1056
1220
 
1057
1221
 
1222
+ def _term_ghost_pid(target: str, pid: int) -> bool:
1223
+ """Kill a stop-ghost agent process WITHOUT killpg (task 91201315). The launcher
1224
+ bash shares the pgid — it must SURVIVE the kill so it sees rc=143 and the
1225
+ wrapper's clean-close path (protocol_handler) closes its own Terminal window.
1226
+ SIGTERM the pid only, escalate to SIGKILL (pid only) after a short grace.
1227
+ Cmdline reuse-guard is STRICTER than _kill_headless_pid's: an EMPTY cmdline is
1228
+ a skip too (we only ever kill a process we can positively read as the agent).
1229
+ Windows has no .command wrapper — delegate to _kill_headless_pid (tree kill).
1230
+ Returns True if the ghost was killed."""
1231
+ if not pid:
1232
+ return False
1233
+ if sys.platform == "win32":
1234
+ return _kill_headless_pid(target, pid)
1235
+ cl = _pid_cmdline(pid).lower()
1236
+ if "meshcode" not in cl and "claude" not in cl:
1237
+ _log(f"GHOST {target}: pid {pid} cmdline unreadable/mismatch — skip kill")
1238
+ return False
1239
+ try:
1240
+ os.kill(pid, _signal.SIGTERM)
1241
+ except ProcessLookupError:
1242
+ return False
1243
+ except Exception as e:
1244
+ _log(f"WARN: GHOST {target}: SIGTERM pid {pid} failed: {e}")
1245
+ return False
1246
+ time.sleep(2.0)
1247
+ try:
1248
+ os.kill(pid, 0) # still alive?
1249
+ os.kill(pid, _signal.SIGKILL) # didn't honor SIGTERM
1250
+ _log(f"GHOST {target}: pid {pid} ignored SIGTERM — SIGKILLed")
1251
+ except ProcessLookupError:
1252
+ pass
1253
+ except Exception:
1254
+ pass
1255
+ _kill_heartbeat_fork(target) # its detached heartbeat fork won't die with the pid
1256
+ _log(f"GHOST {target}: killed stop-ghost pid {pid} (desired=stopped, heartbeat stale)")
1257
+ return True
1258
+
1259
+
1260
+ def _do_stopped_ghost_sweep(api_key: str, host_id: str) -> int:
1261
+ """GHOST-TERMINAL kill sweep (task 91201315; Ian's live repro: back-2 pid 26078,
1262
+ hostd logging '0 stopped' forever). The hole: on Stop the agent exits its loop
1263
+ cleanly, clears its instance_id and STOPS heartbeating — and BOTH kill RPCs
1264
+ (mc_agents_to_stop, mc_agents_to_force_kill) gate on last_heartbeat < 90s, so
1265
+ once the heartbeat goes stale the still-alive claude process is INVISIBLE to
1266
+ every RPC sweep and its terminal window stays open with claude running.
1267
+
1268
+ Sweep the ROSTER instead (mc_host_config_get — no fresh-heartbeat gate): every
1269
+ desired_state='stopped' agent whose heartbeat is STALE or absent (>=90s; fresh
1270
+ ones stay with the RPC sweeps so the cooperative must_exit gets first crack)
1271
+ gets PID discovery; any live process found IS a ghost — desired says stopped,
1272
+ nothing legit can be running. Kill via _term_ghost_pid (SIGTERM, no killpg) so
1273
+ the launcher bash survives, sees rc=143 and closes its own window; then the
1274
+ dead-window reap below mops up any '[Process completed]' leftovers (SIGKILL
1275
+ path / wrappers generated before the rc=143 fix).
1276
+
1277
+ NEVER touches a hand-opened session: discovery matches ONLY 'meshcode run
1278
+ <target>' cmdlines (token-safe) or direct children of the target's own
1279
+ hostd launcher .command — a human `claude --resume` in a plain tab can't
1280
+ match either. Returns number of ghosts killed."""
1281
+ cfg = _rpc("mc_host_config_get", {"p_api_key": api_key, "p_host_id": host_id})
1282
+ if not cfg or not cfg.get("ok"):
1283
+ return 0
1284
+ st = _load_state()
1285
+ pids = st.get("headless_pids") or {}
1286
+ n = 0
1287
+ for a in (cfg.get("agents") or []):
1288
+ if a.get("desired_state") != "stopped":
1289
+ continue
1290
+ hb = a.get("heartbeat_age_s")
1291
+ if hb is not None and hb < 90:
1292
+ continue # fresh heartbeat -> mc_agents_to_stop/_force_kill own this window
1293
+ proj, agent = a.get("project_name"), a.get("name")
1294
+ if not proj or not agent:
1295
+ continue
1296
+ target = f"{proj}/{agent}"
1297
+ killed_here = 0
1298
+ for pid in _discover_agent_pids(target):
1299
+ if _term_ghost_pid(target, pid):
1300
+ killed_here += 1
1301
+ if not killed_here:
1302
+ continue
1303
+ n += killed_here
1304
+ pids.pop(target, None)
1305
+ # The human pressed Stop — finish the job like _do_force_kills does: brief
1306
+ # settle, then close any window left dead (SIGKILL fallback / old wrapper).
1307
+ try:
1308
+ time.sleep(1.5)
1309
+ _wins = _list_dead_terminal_windows(target)
1310
+ if _wins:
1311
+ _close_dead_terminal_windows(target, _wins)
1312
+ except Exception:
1313
+ pass
1314
+ if n:
1315
+ st["headless_pids"] = pids
1316
+ _save_state(st)
1317
+ return n
1318
+
1319
+
1058
1320
  # 38523a98 Gap 1: explicit human force-kill of VISIBLE agents. ENABLED in 2.11.112 (task fa11ff48):
1059
1321
  # Samuel-blessed + 0 false-positives verified (backend2 pre-check + empty owner-scoped queue on our box).
1060
1322
  # _REAP_DRYRUN (autonomous reaper, below) stays True — SEPARATE gate, needs its own 0-FP + Samuel OK.
@@ -1907,11 +2169,14 @@ def cmd_hostd(args: list) -> int:
1907
2169
  ver_recycled = _do_version_recycles(api_key, host_id)
1908
2170
  stopped = _do_stops(api_key, host_id)
1909
2171
  force_killed = _do_force_kills(api_key, host_id) # 38523a98 Gap1: visible explicit human stop
2172
+ # 91201315: stopped agents whose instance/heartbeat already cleared —
2173
+ # invisible to both RPC sweeps above (their <90s heartbeat gate).
2174
+ ghost_killed = _do_stopped_ghost_sweep(api_key, host_id)
1910
2175
  reaped = _do_reap(api_key, host_id) # 38523a98: kill ghosts/dup-PIDs/crashed-orphans
1911
2176
  _gc_headless_pids() # cb90b058: drop dead PIDs (stale entry can't mask a live agent)
1912
2177
  _up = int(time.monotonic() - _spawn_mono)
1913
- if relaunched or recycled or ver_recycled or stopped or enforced or reaped or force_killed:
1914
- _log(f"sweep done (uptime={_up}s) — {relaunched} respawned, {recycled} recycled, {ver_recycled} version-recycled, {stopped} stopped, {force_killed} force-killed, {enforced} recycle-enforced, {reaped} reaped")
2178
+ if relaunched or recycled or ver_recycled or stopped or enforced or reaped or force_killed or ghost_killed:
2179
+ _log(f"sweep done (uptime={_up}s) — {relaunched} respawned, {recycled} recycled, {ver_recycled} version-recycled, {stopped} stopped, {force_killed} force-killed, {ghost_killed} ghost-killed, {enforced} recycle-enforced, {reaped} reaped")
1915
2180
  elif time.monotonic() - _last_alive_log >= 60:
1916
2181
  _log(f"alive — uptime={_up}s")
1917
2182
  _last_alive_log = time.monotonic()
@@ -224,11 +224,14 @@ def _spawn_terminal_macos(cmd: str) -> tuple[bool, str]:
224
224
  _run = cmd
225
225
  lines.append(_run)
226
226
  lines.append('MC_RC=$?')
227
- # Close THIS Terminal window ONLY on a clean exit (recycle/stop => 0). On a CRASH
228
- # (non-zero) leave it OPEN so the scrollback is available for debugging (commander
229
- # condition: debugging > clean window). The own-$MC_TTY filter closes ONLY this
230
- # window never another agent's. saving no => no "close?" prompt. macOS-only path.
231
- lines.append('if [ "$MC_RC" = "0" ] && [ -n "$MC_TTY" ]; then')
227
+ # Close THIS Terminal window ONLY on a clean exit: rc=0 (recycle/stop self-exit)
228
+ # OR rc=143 (SIGTERM hostd's stop/ghost kill sweep ended the agent because the
229
+ # human pressed Stop, task 91201315; that's the job FINISHING, not a crash). Any
230
+ # OTHER rc is a crash -> leave the window OPEN so the scrollback is available for
231
+ # debugging (commander condition: debugging > clean window). The own-$MC_TTY
232
+ # filter closes ONLY this window — never another agent's. saving no => no
233
+ # "close?" prompt. macOS-only path.
234
+ lines.append('if { [ "$MC_RC" = "0" ] || [ "$MC_RC" = "143" ]; } && [ -n "$MC_TTY" ]; then')
232
235
  lines.append(
233
236
  " /usr/bin/osascript"
234
237
  " -e 'tell application \"Terminal\"'"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshcode
3
- Version: 2.11.125
3
+ Version: 2.11.127
4
4
  Summary: Real-time communication between AI agents — Supabase-backed CLI
5
5
  Author-email: MeshCode <hello@meshcode.io>
6
6
  License: MIT
@@ -90,5 +90,6 @@ tests/test_setup_path.py
90
90
  tests/test_sleep_signals.py
91
91
  tests/test_status_enum_coverage.py
92
92
  tests/test_stay_on_loop_hook.py
93
+ tests/test_stop_ghost_terminal.py
93
94
  tests/test_swarm_events.py
94
95
  tests/test_wait_open_tasks_contradiction.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meshcode"
7
- version = "2.11.125"
7
+ version = "2.11.127"
8
8
  description = "Real-time communication between AI agents — Supabase-backed CLI"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -0,0 +1,211 @@
1
+ """
2
+ Stop-Ghost-Terminal Regression Tests (task 91201315)
3
+ =====================================================
4
+ Bug (Ian live repro 2026-06-10, back-2 pid 26078): dashboard Stop -> agent
5
+ exits its loop cleanly, clears instance_id and stops heartbeating BEFORE the
6
+ hostd sweep -> mc_agents_to_stop / mc_agents_to_force_kill (both gate on
7
+ last_heartbeat < 90s) never return it -> hostd logs '0 stopped' forever and
8
+ the Terminal window stays open with a live claude.
9
+
10
+ Fix under test:
11
+ 1. hostd._do_stopped_ghost_sweep — roster-based sweep (mc_host_config_get)
12
+ for desired_state='stopped' + stale heartbeat, wired into the poll loop.
13
+ 2. hostd._discover_launcher_child_pids — finds the agent process under its
14
+ launcher bash (on POSIX `meshcode run` execvp's claude, so the old
15
+ `pgrep -f "meshcode run <target>"` sees nothing for visible agents).
16
+ 3. hostd._term_ghost_pid — SIGTERM the agent pid WITHOUT killpg so the
17
+ launcher bash survives to see rc=143.
18
+ 4. protocol_handler wrapper — rc=143 (SIGTERM) closes the window like a
19
+ clean exit; any other non-zero rc still leaves it open for debugging.
20
+
21
+ Safety invariants (commander conditions):
22
+ - NEVER kill a session the human opened by hand (`claude --resume` in a
23
+ plain tab): discovery only matches `meshcode run <target>` cmdlines or
24
+ direct children of the target's own launcher .command.
25
+ - Token-safe target matching: 'back' must never match 'back-2'.
26
+
27
+ No live DB or processes needed: pure-function tests with subprocess mocked,
28
+ plus source-pattern checks (pattern of tests/test_security_regressions.py).
29
+
30
+ Usage:
31
+ pytest tests/test_stop_ghost_terminal.py -v
32
+ """
33
+
34
+ import inspect
35
+ import re
36
+ import sys
37
+ import types
38
+ from pathlib import Path
39
+
40
+ sys.path.insert(0, str(Path(__file__).parent.parent))
41
+
42
+ from meshcode import hostd, protocol_handler # noqa: E402
43
+
44
+
45
+ def _code_only(func) -> str:
46
+ """Source of `func` with the docstring and #-comments stripped, so the
47
+ assertions below test the CODE, not prose that mentions the same names."""
48
+ src = inspect.getsource(func)
49
+ src = re.sub(r'""".*?"""', "", src, count=1, flags=re.DOTALL)
50
+ return "\n".join(ln.split("#", 1)[0] for ln in src.splitlines())
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # 1. Launcher-child discovery (the execvp'd-claude hole)
55
+ # ---------------------------------------------------------------------------
56
+
57
+ # Shaped like real `ps -axo pid=,ppid=,args=` output from the live repro box:
58
+ # back-2 ghost tree + a SIBLING agent 'back' launcher (prefix trap) + a claude
59
+ # the human opened by hand in a plain zsh tab (must NEVER be discovered).
60
+ _FAKE_PS = """\
61
+ 26061 26060 login -pf user
62
+ 26075 26061 /bin/bash /Users/u/.meshcode/launchers/mesh-dev_back-2.command
63
+ 26078 26075 claude --dangerously-skip-permissions -- boot
64
+ 26140 26078 /usr/bin/python3 -m meshcode.meshcode_mcp serve
65
+ 30001 26061 /bin/bash /Users/u/.meshcode/launchers/mesh-dev_back.command
66
+ 30002 30001 claude --dangerously-skip-permissions -- boot
67
+ 40001 40000 -zsh
68
+ 40002 40001 claude --resume abc123
69
+ 50001 50000 vim notes.txt
70
+ """
71
+
72
+
73
+ def _patch_ps(monkeypatch, stdout=_FAKE_PS):
74
+ def fake_run(cmd, **kw):
75
+ assert cmd[0] == "ps", f"unexpected subprocess in discovery: {cmd}"
76
+ return types.SimpleNamespace(stdout=stdout, returncode=0)
77
+ monkeypatch.setattr(hostd.subprocess, "run", fake_run)
78
+
79
+
80
+ class TestLauncherChildDiscovery:
81
+ def test_finds_claude_child_of_target_launcher(self, monkeypatch):
82
+ _patch_ps(monkeypatch)
83
+ assert hostd._discover_launcher_child_pids("mesh-dev/back-2") == [26078]
84
+
85
+ def test_token_safe_back_does_not_match_back2(self, monkeypatch):
86
+ _patch_ps(monkeypatch)
87
+ # 'mesh-dev/back' must resolve to ITS OWN launcher child (30002),
88
+ # never to back-2's tree — the 24e3dd44 prefix-kill class of bug.
89
+ assert hostd._discover_launcher_child_pids("mesh-dev/back") == [30002]
90
+
91
+ def test_never_matches_hand_opened_claude_resume(self, monkeypatch):
92
+ _patch_ps(monkeypatch)
93
+ for target in ("mesh-dev/back-2", "mesh-dev/back"):
94
+ kids = hostd._discover_launcher_child_pids(target)
95
+ assert 40002 not in kids, "human `claude --resume` session would be killed!"
96
+
97
+ def test_grandchild_mcp_serve_not_returned(self, monkeypatch):
98
+ # SIGTERM goes to the claude child only; the mcp-serve grandchild exits
99
+ # with its stdio. Returning it would double-kill into the wrong layer.
100
+ _patch_ps(monkeypatch)
101
+ assert 26140 not in hostd._discover_launcher_child_pids("mesh-dev/back-2")
102
+
103
+ def test_empty_on_ps_failure(self, monkeypatch):
104
+ def boom(cmd, **kw):
105
+ raise OSError("no ps")
106
+ monkeypatch.setattr(hostd.subprocess, "run", boom)
107
+ assert hostd._discover_launcher_child_pids("mesh-dev/back-2") == []
108
+
109
+ def test_discover_agent_pids_includes_launcher_children(self, monkeypatch):
110
+ """_discover_agent_pids (used by _do_stops/_do_force_kills/_do_reap too)
111
+ must now see the execvp'd claude via the launcher path."""
112
+ def fake_run(cmd, **kw):
113
+ if cmd[0] == "pgrep": # old path: nothing post-execvp
114
+ return types.SimpleNamespace(stdout="", returncode=1)
115
+ return types.SimpleNamespace(stdout=_FAKE_PS, returncode=0)
116
+ monkeypatch.setattr(hostd.subprocess, "run", fake_run)
117
+ assert 26078 in hostd._discover_agent_pids("mesh-dev/back-2")
118
+
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # 2. _term_ghost_pid kill semantics
122
+ # ---------------------------------------------------------------------------
123
+
124
+ class TestTermGhostPid:
125
+ def test_no_killpg_ever(self):
126
+ """killpg would take the launcher bash down with the agent — the bash must
127
+ survive to see rc=143 and close its own window."""
128
+ assert "killpg" not in _code_only(hostd._term_ghost_pid)
129
+
130
+ def test_skips_unreadable_or_mismatched_cmdline(self, monkeypatch):
131
+ monkeypatch.setattr(hostd, "_pid_cmdline", lambda p: "")
132
+ sent = []
133
+ monkeypatch.setattr(hostd.os, "kill", lambda *a: sent.append(a))
134
+ monkeypatch.setattr(hostd.sys, "platform", "darwin")
135
+ assert hostd._term_ghost_pid("mesh-dev/back-2", 12345) is False
136
+ assert sent == [], "killed a pid whose cmdline could not be verified"
137
+
138
+ def test_sigterm_then_done_when_process_exits(self, monkeypatch):
139
+ monkeypatch.setattr(hostd.sys, "platform", "darwin")
140
+ monkeypatch.setattr(hostd, "_pid_cmdline",
141
+ lambda p: "claude --dangerously-skip-permissions -- boot")
142
+ monkeypatch.setattr(hostd.time, "sleep", lambda s: None)
143
+ monkeypatch.setattr(hostd, "_kill_heartbeat_fork", lambda t: None)
144
+ calls = []
145
+
146
+ def fake_kill(pid, sig):
147
+ calls.append((pid, sig))
148
+ if sig == 0: # liveness probe: already gone
149
+ raise ProcessLookupError()
150
+ monkeypatch.setattr(hostd.os, "kill", fake_kill)
151
+ assert hostd._term_ghost_pid("mesh-dev/back-2", 26078) is True
152
+ assert (26078, hostd._signal.SIGTERM) in calls
153
+ assert (26078, hostd._signal.SIGKILL) not in calls
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # 3. Ghost sweep gating + loop wiring (source checks)
158
+ # ---------------------------------------------------------------------------
159
+
160
+ class TestGhostSweep:
161
+ SRC = None
162
+
163
+ @classmethod
164
+ def setup_class(cls):
165
+ cls.SRC = _code_only(hostd._do_stopped_ghost_sweep)
166
+
167
+ def test_only_desired_state_stopped(self):
168
+ assert '"stopped"' in self.SRC and "desired_state" in self.SRC
169
+
170
+ def test_fresh_heartbeat_left_to_rpc_sweeps(self):
171
+ # < 90s heartbeat = the cooperative must_exit / RPC kill paths own it.
172
+ assert re.search(r"heartbeat_age_s", self.SRC) and "90" in self.SRC
173
+
174
+ def test_uses_roster_not_heartbeat_gated_rpc(self):
175
+ assert "mc_host_config_get" in self.SRC
176
+ assert "mc_agents_to_stop" not in self.SRC
177
+
178
+ def test_wired_into_poll_loop(self):
179
+ full = Path(hostd.__file__).read_text(encoding="utf-8", errors="replace")
180
+ m = re.search(r"_do_stopped_ghost_sweep\(api_key,\s*host_id\)", full)
181
+ assert m, "_do_stopped_ghost_sweep is defined but never called in the poll loop"
182
+
183
+ def test_sweep_runs_dead_window_reap(self):
184
+ assert "_close_dead_terminal_windows" in self.SRC
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # 4. Wrapper rc=143 clean-close
189
+ # ---------------------------------------------------------------------------
190
+
191
+ class TestWrapperRc143:
192
+ SRC = None
193
+
194
+ @classmethod
195
+ def setup_class(cls):
196
+ cls.SRC = Path(protocol_handler.__file__).read_text(encoding="utf-8",
197
+ errors="replace")
198
+
199
+ def test_close_condition_accepts_0_and_143(self):
200
+ m = re.search(r"lines\.append\('if (.+?)'\)", self.SRC)
201
+ assert m, "wrapper close-condition line not found"
202
+ cond = m.group(1)
203
+ assert '"$MC_RC" = "0"' in cond
204
+ assert '"$MC_RC" = "143"' in cond
205
+
206
+ def test_other_nonzero_rcs_stay_open(self):
207
+ m = re.search(r"lines\.append\('if (.+?)'\)", self.SRC)
208
+ cond = m.group(1)
209
+ # Only the two literal values may close the window — no wildcard/negation.
210
+ assert "!=" not in cond and "-ne" not in cond
211
+ assert re.findall(r'"\$MC_RC" = "(\d+)"', cond) == ["0", "143"]
File without changes
File without changes
File without changes
File without changes