meshcode 2.11.170__tar.gz → 2.11.172__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.170 → meshcode-2.11.172}/PKG-INFO +1 -1
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/__init__.py +1 -1
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/claude_update.py +51 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/hostd.py +41 -8
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/server.py +73 -10
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/SOURCES.txt +1 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/pyproject.toml +1 -1
- {meshcode-2.11.170 → meshcode-2.11.172}/README.md +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/__main__.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/_launch_smoke.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/_session_handoff_template.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/_update_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/cli.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/comms_v4.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/compat.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/daemon.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/doctor.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/hooks/push_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/invites.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/launcher.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/backend.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/realtime.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/preferences.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/protocol_handler.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/run_agent.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/secrets.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/self_update.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/terminal_mirror_runner.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/up.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode/upload.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/setup.cfg +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_auto_update_hardening.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_autonomous_closegap_1.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_autonomous_closegap_2.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_autonomous_closegap_3.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_autonomous_prompt_inject.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_boot_bug_regression.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_color_truecolor.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_core.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_cross_agent_messaging.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_date_parse.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_doctor.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_ensure_boot_env_urgent_wake.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_epistemic_v1_python_sdk.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_epistemic_v1_stop_conditions.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_esc_deaf_state.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_exceptions.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_file_upload.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_fleet_reaper.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_hostd_launch_pinned_env.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_hostd_serve_discovery_split.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_hostd_zombie_sessions.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_init_device_code.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_install_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_launch_smoke.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_lease_sigterm_release.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_live_mesh_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_mark_read_batch.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_marketplace_ratings.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_migration_integrity.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_no_appleevents_on_sweep.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_preflight_hb_gate.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_pretrust_claude.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_prompt_dedup_budget.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_push_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_realtime_event_freshness.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_replica_base_workspace_fallback.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_replica_boot_protocol_unconditional.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_rls_cross_tenant.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_rm_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_rpc_grants.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_rpc_migrations.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_run_agent_dry_run.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_run_agent_no_server_import.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_security_regressions.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_self_update_user_site.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_sentinel.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_session_replay_gate.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_setup_path.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_sleep_signals.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_status_enum_coverage.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_stay_on_loop_hook.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_stop_ghost_terminal.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_task_progress.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_terminal_lifecycle.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_up_launch_cmd.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_update_guard.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_urgent_wake_tmux.py +0 -0
- {meshcode-2.11.170 → meshcode-2.11.172}/tests/test_wait_open_tasks_contradiction.py +0 -0
|
@@ -45,6 +45,34 @@ INSTALL_TIMEOUT_SEC = 90 # npm i -g can be slow on cold-cache machines
|
|
|
45
45
|
STATE_DIR = Path.home() / ".meshcode"
|
|
46
46
|
LOG_PATH = STATE_DIR / "claude_update.log"
|
|
47
47
|
|
|
48
|
+
# --- P0 launch-latency (task 9fb81ff5 / 1d4276c3): never run a doomed global install ---
|
|
49
|
+
SKIP_STATE = STATE_DIR / "claude_update_skip.json"
|
|
50
|
+
NEG_CACHE_SEC = 6 * 3600 # a failed target version is not retried for 6h
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _global_prefix_writable(npm: str) -> bool:
|
|
54
|
+
"""True if `npm i -g` can succeed (global node_modules writable).
|
|
55
|
+
|
|
56
|
+
On boxes where /usr/local/lib/node_modules is root-owned, every blocking
|
|
57
|
+
global install hits EACCES — retried on EVERY launch, it can never
|
|
58
|
+
succeed (446 doomed attempts observed in claude_update.log, 2026-07-03).
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
r = subprocess.run([npm, "prefix", "-g"], capture_output=True, text=True, timeout=5)
|
|
62
|
+
root = os.path.join((r.stdout or "").strip(), "lib", "node_modules")
|
|
63
|
+
probe = os.path.join(root, NPM_PKG.split("/")[0]) # @anthropic-ai scope dir
|
|
64
|
+
return os.access(probe if os.path.isdir(probe) else root, os.W_OK)
|
|
65
|
+
except Exception:
|
|
66
|
+
return True # fail-open: unknown -> let npm try once
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _mark_skip(version: str, reason: str) -> None:
|
|
70
|
+
"""Record a failed/doomed target version so it is not retried for 6h."""
|
|
71
|
+
try:
|
|
72
|
+
SKIP_STATE.write_text(json.dumps({"version": version, "ts": time.time(), "reason": reason}))
|
|
73
|
+
except Exception:
|
|
74
|
+
pass
|
|
75
|
+
|
|
48
76
|
|
|
49
77
|
# ============================================================
|
|
50
78
|
# Skip-condition helpers
|
|
@@ -214,6 +242,27 @@ def check_and_maybe_update_claude_blocking(
|
|
|
214
242
|
)
|
|
215
243
|
return None
|
|
216
244
|
|
|
245
|
+
# negative cache: same target version failed <6h ago -> skip silently
|
|
246
|
+
try:
|
|
247
|
+
st = json.loads(SKIP_STATE.read_text())
|
|
248
|
+
if st.get("version") == latest and time.time() - st.get("ts", 0) < NEG_CACHE_SEC:
|
|
249
|
+
return None
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
if not _global_prefix_writable(npm):
|
|
254
|
+
# global install can NEVER succeed (EACCES) -> route latest via npx instead
|
|
255
|
+
# (run_agent npx launch path picks this env up; user npm cache, no root needed)
|
|
256
|
+
os.environ["_MESHCODE_NPX_CLAUDE_VERSION"] = latest
|
|
257
|
+
_mark_skip(latest, "eacces")
|
|
258
|
+
if verbose:
|
|
259
|
+
print(
|
|
260
|
+
f"[meshcode] WARN: npm global prefix not writable; using npx claude@{latest}. "
|
|
261
|
+
f"Permanent fix: sudo chown -R $(whoami) /usr/local/lib/node_modules",
|
|
262
|
+
file=sys.stderr,
|
|
263
|
+
)
|
|
264
|
+
return None
|
|
265
|
+
|
|
217
266
|
if verbose:
|
|
218
267
|
print(
|
|
219
268
|
f"[meshcode] Upgrading claude {current} -> {latest} (blocking, can take ~30s)...",
|
|
@@ -234,6 +283,7 @@ def check_and_maybe_update_claude_blocking(
|
|
|
234
283
|
if verbose:
|
|
235
284
|
print(f"[meshcode] claude upgraded to {latest}", file=sys.stderr)
|
|
236
285
|
return latest
|
|
286
|
+
_mark_skip(latest, f"exit_{proc.returncode}")
|
|
237
287
|
if verbose:
|
|
238
288
|
print(
|
|
239
289
|
f"[meshcode] WARN: npm install exit {proc.returncode}; "
|
|
@@ -242,6 +292,7 @@ def check_and_maybe_update_claude_blocking(
|
|
|
242
292
|
)
|
|
243
293
|
return None
|
|
244
294
|
except subprocess.TimeoutExpired:
|
|
295
|
+
_mark_skip(latest, "timeout")
|
|
245
296
|
if verbose:
|
|
246
297
|
print(
|
|
247
298
|
f"[meshcode] WARN: npm install timed out after {timeout_sec}s; "
|
|
@@ -525,8 +525,27 @@ def _api_key() -> Optional[str]:
|
|
|
525
525
|
return None
|
|
526
526
|
|
|
527
527
|
|
|
528
|
+
# P0 launch-latency FIX B (task 9fb81ff5 / 1d4276c3): during a DB degradation
|
|
529
|
+
# (e.g. PostgREST pool exhaustion, 2026-07-03) each sweep runs 5-7 sequential
|
|
530
|
+
# RPCs; at the old hard timeout=15 a fully-degraded sweep took 75-105s and
|
|
531
|
+
# Launch clicks sat unhonored for minutes. 6s is >> p99 healthy RPC latency
|
|
532
|
+
# and cuts the worst-case sweep to ~42s. Override via MESHCODE_HOSTD_RPC_TIMEOUT.
|
|
533
|
+
try:
|
|
534
|
+
_RPC_TIMEOUT = int(os.environ.get("MESHCODE_HOSTD_RPC_TIMEOUT", "6"))
|
|
535
|
+
except ValueError:
|
|
536
|
+
_RPC_TIMEOUT = 6
|
|
537
|
+
|
|
538
|
+
# Consecutive _rpc transport failures (timeout / unreachable / None). Reset on
|
|
539
|
+
# success OR HTTPError (an HTTP error proves the cloud is reachable — that's a
|
|
540
|
+
# server-side refusal, not degradation). At >=3 the sweep loop skips its
|
|
541
|
+
# non-critical phases (stops / force-kills / ghost sweep / reap) so heartbeat +
|
|
542
|
+
# respawns — the paths that honor an explicit human Launch — stay first-class.
|
|
543
|
+
_rpc_fail_streak = 0
|
|
544
|
+
|
|
545
|
+
|
|
528
546
|
def _rpc(fn: str, payload: dict) -> Optional[dict]:
|
|
529
547
|
"""Call a PostgREST RPC. Returns parsed JSON or None on any failure."""
|
|
548
|
+
global _rpc_fail_streak
|
|
530
549
|
url, key = _supabase_cfg()
|
|
531
550
|
if not url or not key:
|
|
532
551
|
_log("WARN: SUPABASE_URL/KEY not set — cannot reach cloud")
|
|
@@ -542,12 +561,15 @@ def _rpc(fn: str, payload: dict) -> Optional[dict]:
|
|
|
542
561
|
},
|
|
543
562
|
method="POST",
|
|
544
563
|
)
|
|
545
|
-
with urllib.request.urlopen(req, timeout=
|
|
546
|
-
|
|
564
|
+
with urllib.request.urlopen(req, timeout=_RPC_TIMEOUT) as resp:
|
|
565
|
+
out = json.loads(resp.read().decode("utf-8"))
|
|
566
|
+
_rpc_fail_streak = 0
|
|
567
|
+
return out
|
|
547
568
|
except urllib.error.HTTPError as e:
|
|
548
569
|
# task 89d50a14 [E]: log the PostgREST error BODY, not just the status line.
|
|
549
570
|
# mc_log_respawn_event failed 400 for DAYS and the cause (42804 host_id
|
|
550
571
|
# uuid-vs-text) was undiagnosable from "HTTP Error 400: Bad Request" alone.
|
|
572
|
+
_rpc_fail_streak = 0 # cloud reachable — server refusal, not degradation
|
|
551
573
|
try:
|
|
552
574
|
body = e.read().decode("utf-8", "replace").strip()[:300]
|
|
553
575
|
except Exception:
|
|
@@ -555,6 +577,7 @@ def _rpc(fn: str, payload: dict) -> Optional[dict]:
|
|
|
555
577
|
_log(f"WARN: rpc {fn} failed: {e}" + (f" — {body}" if body else ""))
|
|
556
578
|
return None
|
|
557
579
|
except Exception as e:
|
|
580
|
+
_rpc_fail_streak += 1
|
|
558
581
|
_log(f"WARN: rpc {fn} failed: {e}")
|
|
559
582
|
return None
|
|
560
583
|
|
|
@@ -3346,12 +3369,22 @@ def cmd_hostd(args: list) -> int:
|
|
|
3346
3369
|
# always run on a stale-running host without manual intervention.
|
|
3347
3370
|
_maybe_self_restart_on_version_drift()
|
|
3348
3371
|
relaunched = _do_respawns(api_key, host_id)
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
#
|
|
3352
|
-
#
|
|
3353
|
-
|
|
3354
|
-
|
|
3372
|
+
# P0 FIX B (task 9fb81ff5 / 1d4276c3): degraded-sweep short-circuit.
|
|
3373
|
+
# Heartbeat + respawns above ALWAYS run — they honor an explicit
|
|
3374
|
+
# human Launch. When the cloud is degraded (>=3 consecutive _rpc
|
|
3375
|
+
# transport failures), skip the non-critical phases this sweep so
|
|
3376
|
+
# a fully-degraded pass stays ~2 RPCs instead of 5-7 × timeout.
|
|
3377
|
+
# Stops/kills/reaps resume on the next healthy sweep.
|
|
3378
|
+
if _rpc_fail_streak >= 3:
|
|
3379
|
+
_log(f"sweep degraded (rpc_fail_streak={_rpc_fail_streak}) — skipping non-critical phases (stops/force-kills/ghost-sweep/reap)")
|
|
3380
|
+
stopped = force_killed = ghost_killed = reaped = 0
|
|
3381
|
+
else:
|
|
3382
|
+
stopped = _do_stops(api_key, host_id)
|
|
3383
|
+
force_killed = _do_force_kills(api_key, host_id) # 38523a98 Gap1: visible explicit human stop
|
|
3384
|
+
# 91201315: stopped agents whose instance/heartbeat already cleared —
|
|
3385
|
+
# invisible to both RPC sweeps above (their <90s heartbeat gate).
|
|
3386
|
+
ghost_killed = _do_stopped_ghost_sweep(api_key, host_id)
|
|
3387
|
+
reaped = _do_reap(api_key, host_id) # 38523a98: kill ghosts/dup-PIDs/crashed-orphans
|
|
3355
3388
|
_gc_headless_pids() # cb90b058: drop dead PIDs (stale entry can't mask a live agent)
|
|
3356
3389
|
_gc_stop_markers() # a4001d59: stale stop-markers can't flip a later crash into a silent close
|
|
3357
3390
|
_up = int(time.monotonic() - _spawn_mono)
|
|
@@ -1138,6 +1138,10 @@ def with_working_status(func):
|
|
|
1138
1138
|
_check_hot_reload()
|
|
1139
1139
|
_capture_session() # stash session on first tool call for silent auto-wake
|
|
1140
1140
|
_touch_active_bg() # foreground last_active_at stamp (covers wait too, before skip)
|
|
1141
|
+
if not skip: # meshcode_wait stays ungated (read-only, must always run)
|
|
1142
|
+
_blocked = await _lease_gate_async(name)
|
|
1143
|
+
if _blocked is not None:
|
|
1144
|
+
return _blocked
|
|
1141
1145
|
if not skip:
|
|
1142
1146
|
global _CONSECUTIVE_IDLE_SECONDS
|
|
1143
1147
|
_CONSECUTIVE_IDLE_SECONDS = 0 # any non-wait tool resets idle timer
|
|
@@ -1186,6 +1190,10 @@ def with_working_status(func):
|
|
|
1186
1190
|
_check_hot_reload()
|
|
1187
1191
|
_capture_session() # stash session on first tool call for silent auto-wake
|
|
1188
1192
|
_touch_active_bg() # foreground last_active_at stamp (covers wait too, before skip)
|
|
1193
|
+
if not skip:
|
|
1194
|
+
_blocked = _lease_gate_sync(name)
|
|
1195
|
+
if _blocked is not None:
|
|
1196
|
+
return _blocked
|
|
1189
1197
|
if not skip:
|
|
1190
1198
|
global _CONSECUTIVE_IDLE_SECONDS
|
|
1191
1199
|
_CONSECUTIVE_IDLE_SECONDS = 0 # any non-wait tool resets idle timer
|
|
@@ -1329,6 +1337,40 @@ _INSTANCE_ID = f"mcp-{_uuid.uuid4().hex[:12]}"
|
|
|
1329
1337
|
# false-terminate a legitimately-detached process.
|
|
1330
1338
|
_BOOT_PPID = os.getppid()
|
|
1331
1339
|
|
|
1340
|
+
# ---- Single-instance lease GATE (fix f43cf72f, sec bba8e9d0) ---------------
|
|
1341
|
+
# The lease acquire is deferred off the MCP handshake path (see lifespan +
|
|
1342
|
+
# run_server changes below) so `initialize` answers in <1s instead of blocking
|
|
1343
|
+
# on contended lease RPCs (3x sb_rpc retries x 10s socket timeout). But tools
|
|
1344
|
+
# must NOT dispatch before the lease is held, or a second live instance /
|
|
1345
|
+
# a tombstoned agent could mutate shared state (split-brain / tombstone-bypass).
|
|
1346
|
+
# _LEASE_ACQUIRED is set() ONLY after a genuine acquire; every tool waits on it.
|
|
1347
|
+
_LEASE_ACQUIRED = _threading.Event() # module-level alias (L1024); no plain `import threading` at module scope
|
|
1348
|
+
_LEASE_GATE_TIMEOUT = 30.0 # max seconds a tool blocks waiting for the lease
|
|
1349
|
+
|
|
1350
|
+
def _lease_gate_sync(tool_name: str):
|
|
1351
|
+
"""Block a sync tool until the lease is held. Returns an error dict to
|
|
1352
|
+
short-circuit if the lease never arrives (process is os._exit-ing on
|
|
1353
|
+
conflict, so this tail is rare)."""
|
|
1354
|
+
if _LEASE_ACQUIRED.is_set():
|
|
1355
|
+
return None
|
|
1356
|
+
if _LEASE_ACQUIRED.wait(timeout=_LEASE_GATE_TIMEOUT):
|
|
1357
|
+
return None
|
|
1358
|
+
return {"error": "single-instance lease not yet acquired — retry shortly",
|
|
1359
|
+
"error_code": "lease_pending", "tool": tool_name}
|
|
1360
|
+
|
|
1361
|
+
async def _lease_gate_async(tool_name: str):
|
|
1362
|
+
"""Async twin of _lease_gate_sync — never blocks the event loop."""
|
|
1363
|
+
if _LEASE_ACQUIRED.is_set():
|
|
1364
|
+
return None
|
|
1365
|
+
_loop = asyncio.get_running_loop()
|
|
1366
|
+
_deadline = _loop.time() + _LEASE_GATE_TIMEOUT
|
|
1367
|
+
while not _LEASE_ACQUIRED.is_set():
|
|
1368
|
+
if _loop.time() > _deadline:
|
|
1369
|
+
return {"error": "single-instance lease not yet acquired — retry shortly",
|
|
1370
|
+
"error_code": "lease_pending", "tool": tool_name}
|
|
1371
|
+
await asyncio.sleep(0.05)
|
|
1372
|
+
return None
|
|
1373
|
+
|
|
1332
1374
|
|
|
1333
1375
|
def _stdin_peer_dead() -> bool:
|
|
1334
1376
|
"""Non-destructively check whether stdin's peer has closed.
|
|
@@ -1389,7 +1431,7 @@ def _acquire_lease() -> bool:
|
|
|
1389
1431
|
"p_api_key": api_key,
|
|
1390
1432
|
"p_project_id": _PROJECT_ID,
|
|
1391
1433
|
"p_agent_name": AGENT_NAME,
|
|
1392
|
-
})
|
|
1434
|
+
}, _max_retries=1)
|
|
1393
1435
|
except Exception as e:
|
|
1394
1436
|
# Non-fatal: RPC might not exist on older servers.
|
|
1395
1437
|
_mc_log(f"stale-lease pre-clean skipped: {e}", "warn")
|
|
@@ -1400,7 +1442,7 @@ def _acquire_lease() -> bool:
|
|
|
1400
1442
|
"p_project_id": _PROJECT_ID,
|
|
1401
1443
|
"p_agent_name": AGENT_NAME,
|
|
1402
1444
|
"p_instance_id": _INSTANCE_ID,
|
|
1403
|
-
})
|
|
1445
|
+
}, _max_retries=1)
|
|
1404
1446
|
if isinstance(r, dict) and r.get("ok"):
|
|
1405
1447
|
global _CONSECUTIVE_IDLE_SECONDS
|
|
1406
1448
|
_CONSECUTIVE_IDLE_SECONDS = 0 # P6: reset idle counter on lease success
|
|
@@ -1453,7 +1495,7 @@ def _acquire_lease() -> bool:
|
|
|
1453
1495
|
"p_project_id": _PROJECT_ID,
|
|
1454
1496
|
"p_agent_name": AGENT_NAME,
|
|
1455
1497
|
"p_instance_id": _INSTANCE_ID,
|
|
1456
|
-
})
|
|
1498
|
+
}, _max_retries=1)
|
|
1457
1499
|
if isinstance(r2, dict) and r2.get("ok"):
|
|
1458
1500
|
_mc_log("Lease acquired after force-release.")
|
|
1459
1501
|
return True
|
|
@@ -2730,6 +2772,31 @@ async def lifespan(_app):
|
|
|
2730
2772
|
|
|
2731
2773
|
asyncio.create_task(_bg_realtime_start())
|
|
2732
2774
|
|
|
2775
|
+
# Single-instance lease — acquired OFF the handshake path (fix f43cf72f).
|
|
2776
|
+
# Mirrors the c0e7de87 deferral of Realtime/heartbeat: run the blocking
|
|
2777
|
+
# _acquire_lease() in a worker thread so the lifespan yields immediately and
|
|
2778
|
+
# `initialize` is answered fast; gate all tool dispatch on _LEASE_ACQUIRED
|
|
2779
|
+
# until it lands. Hard-exit (os._exit) on tombstone/kick or a genuine
|
|
2780
|
+
# single-instance conflict — before any gated tool can unblock.
|
|
2781
|
+
async def _bg_acquire_lease():
|
|
2782
|
+
try:
|
|
2783
|
+
ok = await asyncio.to_thread(_acquire_lease)
|
|
2784
|
+
except SystemExit as _se: # "kicked" tombstone path
|
|
2785
|
+
os._exit(_se.code if isinstance(_se.code, int) else 0)
|
|
2786
|
+
return
|
|
2787
|
+
except Exception as _le: # never fatal — degrade to no-lease, stay gated
|
|
2788
|
+
log.warning(f"[meshcode] bg lease acquire error (non-fatal): {_le}")
|
|
2789
|
+
return
|
|
2790
|
+
if not ok: # another live instance holds it
|
|
2791
|
+
try:
|
|
2792
|
+
sys.stderr.write("[meshcode-mcp] lease held by another live instance — exiting\n")
|
|
2793
|
+
sys.stderr.flush()
|
|
2794
|
+
except Exception:
|
|
2795
|
+
pass
|
|
2796
|
+
os._exit(2)
|
|
2797
|
+
_LEASE_ACQUIRED.set() # release the dispatch gate — ONLY on real acquire
|
|
2798
|
+
asyncio.create_task(_bg_acquire_lease())
|
|
2799
|
+
|
|
2733
2800
|
def _initial_heartbeat_bg():
|
|
2734
2801
|
"""Send first heartbeat + flip status to 'idle' in a daemon thread.
|
|
2735
2802
|
|
|
@@ -8262,13 +8329,9 @@ def run_server():
|
|
|
8262
8329
|
# only the lifespan re-runs. `SystemExit` is let through so `sys.exit`
|
|
8263
8330
|
# from config-validation paths still works. stdin EOF returns from
|
|
8264
8331
|
# `mcp.run()` normally, which breaks the loop.
|
|
8265
|
-
# Lease acquire
|
|
8266
|
-
#
|
|
8267
|
-
#
|
|
8268
|
-
# _kill_stale_mcp_process() is already called in the lifespan; only the
|
|
8269
|
-
# lease acquire needs an explicit call here before mcp.run().
|
|
8270
|
-
if not _acquire_lease():
|
|
8271
|
-
sys.exit(2)
|
|
8332
|
+
# Lease acquire is DEFERRED into the lifespan (_bg_acquire_lease, fix
|
|
8333
|
+
# f43cf72f) so it never blocks `initialize`. Tool dispatch is gated on
|
|
8334
|
+
# _LEASE_ACQUIRED; conflict/kick hard-exits from that background task.
|
|
8272
8335
|
|
|
8273
8336
|
import time as _time_mod
|
|
8274
8337
|
_restart_count = 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|