meshcode 2.11.140__tar.gz → 2.11.142__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.140 → meshcode-2.11.142}/PKG-INFO +1 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/__init__.py +1 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/comms_v4.py +89 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/hostd.py +46 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/backend.py +23 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/server.py +95 -20
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/run_agent.py +60 -27
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/self_update.py +64 -4
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/pyproject.toml +1 -1
- {meshcode-2.11.140 → meshcode-2.11.142}/README.md +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/__main__.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/_session_handoff_template.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/claude_update.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/cli.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/compat.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/daemon.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/doctor.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/helper_visuals.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/invites.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/launcher.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/realtime.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/swarm.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/meshcode_mcp/test_swarm.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/preferences.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/protocol_handler.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/secrets.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/up.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode/upload.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/SOURCES.txt +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/setup.cfg +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_auto_update_hardening.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_autonomous_closegap_1.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_autonomous_closegap_2.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_autonomous_closegap_3.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_autonomous_prompt_inject.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_boot_bug_regression.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_color_truecolor.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_core.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_cross_agent_messaging.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_date_parse.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_doctor.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_epistemic_v1_python_sdk.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_epistemic_v1_stop_conditions.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_esc_deaf_state.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_exceptions.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_file_upload.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_helper_visuals.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_hostd_zombie_sessions.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_init_device_code.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_install_guard.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_lease_sigterm_release.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_live_mesh_guard.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_mark_read_batch.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_marketplace_ratings.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_migration_integrity.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_pretrust_claude.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_realtime_event_freshness.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_rls_cross_tenant.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_rpc_grants.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_rpc_migrations.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_run_agent_dry_run.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_run_agent_no_server_import.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_security_regressions.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_self_update_user_site.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_sentinel.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_session_replay_gate.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_setup_path.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_sleep_signals.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_status_enum_coverage.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_stay_on_loop_hook.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_stop_ghost_terminal.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_swarm_events.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_task_progress.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_terminal_lifecycle.py +0 -0
- {meshcode-2.11.140 → meshcode-2.11.142}/tests/test_wait_open_tasks_contradiction.py +0 -0
|
@@ -2480,6 +2480,10 @@ AGENT CONTROL:
|
|
|
2480
2480
|
wake-all <proj> Print copy-paste --autonomous commands for every offline agent
|
|
2481
2481
|
disconnect <proj> <name> Graceful disconnect
|
|
2482
2482
|
whoami Show logged-in identity
|
|
2483
|
+
|
|
2484
|
+
SWARM (agent replicas):
|
|
2485
|
+
replicate <agent> --count N [--no-launch] Clone agent into <agent>-1..N (launches by default)
|
|
2486
|
+
replica-power <group_id> running|stopped Stop/Start a whole replica group [--restart]
|
|
2483
2487
|
profile [agent] Show/set agent profile
|
|
2484
2488
|
connect <proj> <name> Connect existing agent
|
|
2485
2489
|
|
|
@@ -2864,7 +2868,8 @@ if __name__ == "__main__":
|
|
|
2864
2868
|
|
|
2865
2869
|
# Strip bare boolean flags from argv before parsing so they don't end
|
|
2866
2870
|
# up as positional args (e.g. --compact is a bare flag, not key=value).
|
|
2867
|
-
BARE_FLAGS = {"--compact", "--legacy", "--no-hook", "--mcp-only", "--autonomous"
|
|
2871
|
+
BARE_FLAGS = {"--compact", "--legacy", "--no-hook", "--mcp-only", "--autonomous",
|
|
2872
|
+
"--no-launch", "--restart"}
|
|
2868
2873
|
_bare_present = {a.lstrip("-") for a in sys.argv[2:] if a in BARE_FLAGS}
|
|
2869
2874
|
_argv_for_parse = [a for a in sys.argv[2:] if a not in BARE_FLAGS]
|
|
2870
2875
|
|
|
@@ -3342,6 +3347,89 @@ if __name__ == "__main__":
|
|
|
3342
3347
|
name = flags.get("name", pos[1] if len(pos) > 1 else "agent")
|
|
3343
3348
|
disconnect_terminal(proj, name)
|
|
3344
3349
|
|
|
3350
|
+
elif cmd == "replicate":
|
|
3351
|
+
# meshcode replicate <agent> --count N [--project <name>] [--no-launch] [--group <uuid>]
|
|
3352
|
+
# meshcode replicate <project>/<agent> --count N ...
|
|
3353
|
+
#
|
|
3354
|
+
# Clones a base agent's persona into <base>-1..N (status=needs_launch) via
|
|
3355
|
+
# mc_replicate_agent (DB mig 20260616_572, LIVE prod). By DEFAULT the new
|
|
3356
|
+
# rows get desired_state='running' so hostd auto-spawns them as persistent
|
|
3357
|
+
# terminals within ~one sweep; --no-launch creates them dormant (Start later
|
|
3358
|
+
# from the dashboard or `meshcode replica-power <group> running`). The shared
|
|
3359
|
+
# replica_group_id (== swarm_id) lets the whole group be powered as a unit
|
|
3360
|
+
# while per-replica desired_state still allows killing one without the group.
|
|
3361
|
+
# Backend for task 069b5550.
|
|
3362
|
+
_ak = _load_api_key_for_cli()
|
|
3363
|
+
base = pos[0] if len(pos) > 0 else flags.get("agent", "")
|
|
3364
|
+
proj = flags.get("project")
|
|
3365
|
+
if base and "/" in base:
|
|
3366
|
+
proj, base = base.split("/", 1)
|
|
3367
|
+
if not base:
|
|
3368
|
+
print("[meshcode] ERROR: usage: meshcode replicate <agent> --count N "
|
|
3369
|
+
"[--project <name>] [--no-launch]")
|
|
3370
|
+
sys.exit(1)
|
|
3371
|
+
try:
|
|
3372
|
+
count = int(flags.get("count", pos[1] if len(pos) > 1 else 0))
|
|
3373
|
+
except (TypeError, ValueError):
|
|
3374
|
+
count = 0
|
|
3375
|
+
if not (1 <= count <= 16):
|
|
3376
|
+
print("[meshcode] ERROR: --count must be an integer in 1..16")
|
|
3377
|
+
sys.exit(1)
|
|
3378
|
+
desired = None if flags.get("no-launch") else "running"
|
|
3379
|
+
res = sb_rpc("mc_replicate_agent", {
|
|
3380
|
+
"p_api_key": _ak,
|
|
3381
|
+
"p_base_agent": base,
|
|
3382
|
+
"p_count": count,
|
|
3383
|
+
"p_project": proj,
|
|
3384
|
+
"p_desired_state": desired,
|
|
3385
|
+
"p_replica_group_id": flags.get("group"),
|
|
3386
|
+
})
|
|
3387
|
+
if not isinstance(res, dict) or res.get("error") or not res.get("ok", True):
|
|
3388
|
+
_err = (res or {}).get("error", "unknown") if isinstance(res, dict) else "no response"
|
|
3389
|
+
print(f"[meshcode] ERROR: replicate failed: {_err}")
|
|
3390
|
+
sys.exit(1)
|
|
3391
|
+
created = res.get("created") or []
|
|
3392
|
+
print(f"[meshcode] Replicated {base} → {len(created)} replica(s) "
|
|
3393
|
+
f"in group {res.get('replica_group_id')}")
|
|
3394
|
+
for c in created:
|
|
3395
|
+
print(f" + {c.get('name')} ({c.get('id')})")
|
|
3396
|
+
if desired == "running":
|
|
3397
|
+
print("[meshcode] desired_state=running — hostd will open the new terminals "
|
|
3398
|
+
"within ~one sweep (~10s, throttled to 3/sweep). Watch the dashboard.")
|
|
3399
|
+
else:
|
|
3400
|
+
print("[meshcode] Created dormant. Launch with: "
|
|
3401
|
+
f"meshcode replica-power {res.get('replica_group_id')} running")
|
|
3402
|
+
|
|
3403
|
+
elif cmd in ("replica-power", "replica_power"):
|
|
3404
|
+
# meshcode replica-power <group_id> running|stopped [--restart]
|
|
3405
|
+
#
|
|
3406
|
+
# Sets desired_state for EVERY agent sharing replica_group_id (Stop/Start the
|
|
3407
|
+
# whole swarm as a unit). CLI/agent path = mc_replica_group_power_as_agent
|
|
3408
|
+
# (DB mig 20260616_573); the FE buttons call mc_replica_group_power (auth.uid,
|
|
3409
|
+
# no api_key in the browser). --restart requests a recycle on launch.
|
|
3410
|
+
# Backend for task 069b5550 (FE Stop/Start/Launch route through here).
|
|
3411
|
+
_ak = _load_api_key_for_cli()
|
|
3412
|
+
group = pos[0] if len(pos) > 0 else flags.get("group", "")
|
|
3413
|
+
state = (pos[1] if len(pos) > 1 else flags.get("state", "")).lower()
|
|
3414
|
+
if not group or state not in ("running", "stopped"):
|
|
3415
|
+
print("[meshcode] ERROR: usage: meshcode replica-power <group_id> "
|
|
3416
|
+
"running|stopped [--restart]")
|
|
3417
|
+
sys.exit(1)
|
|
3418
|
+
res = sb_rpc("mc_replica_group_power_as_agent", {
|
|
3419
|
+
"p_api_key": _ak,
|
|
3420
|
+
"p_replica_group_id": group,
|
|
3421
|
+
"p_state": state,
|
|
3422
|
+
"p_restart": bool(flags.get("restart")),
|
|
3423
|
+
})
|
|
3424
|
+
if not isinstance(res, dict) or res.get("error") or not res.get("ok", True):
|
|
3425
|
+
_err = (res or {}).get("error", "unknown") if isinstance(res, dict) else "no response"
|
|
3426
|
+
print(f"[meshcode] ERROR: replica-power failed: {_err}")
|
|
3427
|
+
sys.exit(1)
|
|
3428
|
+
print(f"[meshcode] group {res.get('replica_group_id')} → "
|
|
3429
|
+
f"desired_state={res.get('desired_state')} "
|
|
3430
|
+
f"({res.get('affected_count')} agent(s): "
|
|
3431
|
+
f"{', '.join(res.get('agents') or [])})")
|
|
3432
|
+
|
|
3345
3433
|
elif cmd in ("setup", "add-agent", "add_agent"):
|
|
3346
3434
|
# `setup` and `add-agent` are aliases (qa snag 05b6a6c2: docs/users
|
|
3347
3435
|
# were referring to `meshcode add-agent` but only `setup` existed).
|
|
@@ -665,9 +665,33 @@ _BOOTSTALE_LOGGED: set = set()
|
|
|
665
665
|
# persists; log + telemetry only on the first one, clear on recovery.
|
|
666
666
|
_DISCOVERY_ERR_LOGGED: set = set()
|
|
667
667
|
|
|
668
|
+
# task aed2c7c4: orphan agents whose host_id we've already bound this session,
|
|
669
|
+
# so the ORPHAN-CLAIM line prints once per agent instead of every ~10s sweep.
|
|
670
|
+
_ORPHAN_CLAIMED_LOGGED: set = set()
|
|
671
|
+
|
|
668
672
|
|
|
669
673
|
def _do_respawns(api_key: str, host_id: str) -> int:
|
|
670
674
|
"""One respawn sweep. Returns number relaunched."""
|
|
675
|
+
# ORPHAN-CLAIM (task aed2c7c4, half of the 'launch button no sirve' P0): an
|
|
676
|
+
# agent with host_id IS NULL is INVISIBLE to mc_agents_needing_respawn (it
|
|
677
|
+
# filters WHERE host_id=p_host_id) — so a never-spawned agent the owner
|
|
678
|
+
# Launches never appears in the sweep and the click does nothing. DB option A
|
|
679
|
+
# (mc_claim_orphan_agents) ATOMICALLY binds host_id on owner-scoped, host_id-
|
|
680
|
+
# NULL, desired_state=running, stale, non-tombstoned orphans (UPDATE ... WHERE
|
|
681
|
+
# host_id IS NULL = first-host-wins, race-safe — chosen over surfacing the
|
|
682
|
+
# orphan to every host, which would multi-host double-spawn). Once bound they
|
|
683
|
+
# surface in mc_agents_needing_respawn below on THIS SAME sweep, carrying the
|
|
684
|
+
# restart_requested flag the Launch set, and spawn through the gate. Best-effort:
|
|
685
|
+
# RPC-absent / older host => silent no-op, the rest of the sweep is unaffected.
|
|
686
|
+
_claim = _rpc("mc_claim_orphan_agents", {"p_api_key": api_key, "p_host_id": host_id})
|
|
687
|
+
if isinstance(_claim, dict) and _claim.get("ok"):
|
|
688
|
+
for _o in (_claim.get("claimed") or []):
|
|
689
|
+
_otarget = f"{_o.get('project_name')}/{_o.get('agent')}"
|
|
690
|
+
if _otarget not in _ORPHAN_CLAIMED_LOGGED:
|
|
691
|
+
_ORPHAN_CLAIMED_LOGGED.add(_otarget)
|
|
692
|
+
_log(f"ORPHAN-CLAIM {_otarget}: bound host_id={host_id} (was NULL; "
|
|
693
|
+
f"owner Launch on a never-spawned agent) — will surface + spawn "
|
|
694
|
+
f"this sweep via the normal respawn path.")
|
|
671
695
|
res = _rpc("mc_agents_needing_respawn",
|
|
672
696
|
{"p_api_key": api_key, "p_host_id": host_id, "p_stale_seconds": STALE_SECONDS})
|
|
673
697
|
if not res or not res.get("ok"):
|
|
@@ -720,7 +744,23 @@ def _do_respawns(api_key: str, host_id: str) -> int:
|
|
|
720
744
|
# spawned_age_s < uptime -> explicit launch AFTER start -> SPAWN.
|
|
721
745
|
# Crash-respawn preserved: an agent launched THIS session then crashed has spawned_age_s <
|
|
722
746
|
# uptime -> respawned. MESHCODE_BOOT_AUTOSTART=1 opts out (auto-launch everything, old behavior).
|
|
723
|
-
|
|
747
|
+
# EXPLICIT-LAUNCH SIGNAL (task aed2c7c4, DB mig 20260616_574): the launch
|
|
748
|
+
# paths (mc_agent_power / mc_agent_power_as_agent / replica group-power) now
|
|
749
|
+
# FORCE restart_requested_at=now() IDEMPOTENTLY — even on a running->running
|
|
750
|
+
# no-op. That kills the root cause of "launch button no sirve": clicking
|
|
751
|
+
# Launch on an already-desired-running offline agent used to be a no-op
|
|
752
|
+
# UPDATE, so spawned_at never re-stamped and the boot-stale heuristic below
|
|
753
|
+
# ate the launch. mc_agents_needing_respawn surfaces restart_requested:bool
|
|
754
|
+
# for BOUND agents (host_id=p_host_id, race-free); never-spawned orphans
|
|
755
|
+
# (host_id NULL) get bound first by mc_claim_orphan_agents at the top of this
|
|
756
|
+
# sweep, then surface here the same way. An explicit launch is unambiguous
|
|
757
|
+
# human/commander intent -> bypass the boot-stale heuristic + the 600s floor.
|
|
758
|
+
# We do NOT bypass the downstream live-session /
|
|
759
|
+
# convergence / circuit-breaker guards, so a healthy agent is never
|
|
760
|
+
# double-spawned (DB also excludes live sessions via its 30s liveness guard).
|
|
761
|
+
# mc_record_respawn (called after a successful spawn below) CLEARS
|
|
762
|
+
# restart_requested_at, so this fires exactly once per Launch.
|
|
763
|
+
if not _BOOT_AUTOSTART and not c.get("restart_requested"):
|
|
724
764
|
_spawn_age = c.get("spawned_age_s")
|
|
725
765
|
_hostd_uptime = time.time() - _HOSTD_STARTED_AT
|
|
726
766
|
# ALIVE-ON-OUR-WATCH bypass (task baefc8ab part C — live miss 2026-06-10T00:31Z:
|
|
@@ -746,6 +786,11 @@ def _do_respawns(api_key: str, host_id: str) -> int:
|
|
|
746
786
|
# an explicit launch / live heartbeat got this target past the gate —
|
|
747
787
|
# if it ever goes boot-stale again, the skip deserves a fresh line.
|
|
748
788
|
_BOOTSTALE_LOGGED.discard(f"{proj}/{agent}")
|
|
789
|
+
elif c.get("restart_requested"):
|
|
790
|
+
# explicit launch (mc_agent_power) clears any stale skip-log state too
|
|
791
|
+
_BOOTSTALE_LOGGED.discard(f"{proj}/{agent}")
|
|
792
|
+
_log(f"LAUNCH-HONOR {proj}/{agent}: restart_requested set (explicit Launch / "
|
|
793
|
+
f"mc_agent_power) — bypassing BOOT-AUTOSTART boot-stale gate; spawning this sweep.")
|
|
749
794
|
# RECYCLE FAST-PATH (task c0fc5597): a recycle-exited agent (recycle_fast) is relaunched
|
|
750
795
|
# PROMPTLY (the RPC returned it at a 15s stale gate, not STALE_SECONDS) and recorded as a
|
|
751
796
|
# RECYCLE (mc_record_recycle), NEVER against the crash respawn cap.
|
|
@@ -1297,6 +1297,29 @@ def set_status(project_id: str, agent: str, status: str, task: str = "", api_key
|
|
|
1297
1297
|
return {"ok": True, "status": status}
|
|
1298
1298
|
|
|
1299
1299
|
|
|
1300
|
+
def loop_tick(api_key: str, project_id: str, agent: str,
|
|
1301
|
+
since_ts: Optional[str] = None, msg_limit: int = 20,
|
|
1302
|
+
task_limit: int = 10) -> Dict:
|
|
1303
|
+
"""mc_agent_loop_tick: 1 RPC replaces count_pending + task_list + heartbeat.
|
|
1304
|
+
|
|
1305
|
+
Returns pending_count, new_messages_since, new_tasks_since, heartbeat_set
|
|
1306
|
+
in one round-trip. Falls back to None on any error so callers can use legacy.
|
|
1307
|
+
"""
|
|
1308
|
+
params: Dict[str, Any] = {
|
|
1309
|
+
"p_api_key": api_key,
|
|
1310
|
+
"p_project_id": project_id,
|
|
1311
|
+
"p_agent": agent,
|
|
1312
|
+
"p_msg_limit": msg_limit,
|
|
1313
|
+
"p_task_limit": task_limit,
|
|
1314
|
+
}
|
|
1315
|
+
if since_ts:
|
|
1316
|
+
params["p_since_ts"] = since_ts
|
|
1317
|
+
result = sb_rpc("mc_agent_loop_tick", params)
|
|
1318
|
+
if isinstance(result, dict) and result.get("ok"):
|
|
1319
|
+
return result
|
|
1320
|
+
return {"ok": False, "error": result}
|
|
1321
|
+
|
|
1322
|
+
|
|
1300
1323
|
def task_create(api_key, project_id, creator_agent, title, description="",
|
|
1301
1324
|
assignee="*", priority="normal", parent_task_id=None,
|
|
1302
1325
|
goal_id=None, strict=False,
|
|
@@ -1644,11 +1644,38 @@ def _install_shutdown_signal_handlers() -> None:
|
|
|
1644
1644
|
# so it never triggers this. "al cerrar la terminal el agente se debe parar."
|
|
1645
1645
|
_WIN_CTRL_HANDLER_REF = None # keep the WINFUNCTYPE callback alive (GC guard)
|
|
1646
1646
|
|
|
1647
|
+
# task 8a82d606 (recycle-leave-running, self-improve fabb8fee): a RECYCLE exit
|
|
1648
|
+
# tears down this process the SAME way a terminal close does, so the close-to-stop
|
|
1649
|
+
# handler below would flip desired_state->stopped and hostd would NOT respawn —
|
|
1650
|
+
# the recycle silently degrades to a permanent stop (mesh-commander + front-2 died
|
|
1651
|
+
# this way on a live recycle). When the wait-loop authorizes a recycle it marks
|
|
1652
|
+
# this flag; the close handler then SKIPS the stop-flip so desired_state stays
|
|
1653
|
+
# 'running' and hostd respawns us fresh. Human-stop / sleep do NOT set this flag,
|
|
1654
|
+
# so they still (correctly) flip to stopped and are never respawned.
|
|
1655
|
+
_RECYCLE_IN_PROGRESS = False
|
|
1656
|
+
|
|
1657
|
+
|
|
1658
|
+
def _mark_recycle_in_progress() -> None:
|
|
1659
|
+
global _RECYCLE_IN_PROGRESS
|
|
1660
|
+
_RECYCLE_IN_PROGRESS = True
|
|
1661
|
+
|
|
1647
1662
|
|
|
1648
1663
|
def _flip_desired_state_stopped(timeout_s: float = 3.0) -> None:
|
|
1649
1664
|
"""Best-effort flip THIS agent's desired_state->stopped via mc_set_desired_state
|
|
1650
1665
|
(api_key self-scoped to the calling agent). Fast — Windows CTRL_CLOSE allows ~5s
|
|
1651
|
-
before the OS force-kills, so we time-box the RPC and never block exit.
|
|
1666
|
+
before the OS force-kills, so we time-box the RPC and never block exit.
|
|
1667
|
+
|
|
1668
|
+
RECYCLE GUARD (task 8a82d606): if a recycle authorized this exit, do NOT flip
|
|
1669
|
+
to stopped — leave desired_state='running' so hostd respawns us. Only genuine
|
|
1670
|
+
terminal-close / human-stop reaches the flip."""
|
|
1671
|
+
if _RECYCLE_IN_PROGRESS:
|
|
1672
|
+
try:
|
|
1673
|
+
sys.stderr.write("[meshcode-mcp] recycle exit — leaving desired_state=running "
|
|
1674
|
+
"for hostd respawn (NOT flipping to stopped)\n")
|
|
1675
|
+
sys.stderr.flush()
|
|
1676
|
+
except Exception:
|
|
1677
|
+
pass
|
|
1678
|
+
return
|
|
1652
1679
|
done = _threading.Event()
|
|
1653
1680
|
|
|
1654
1681
|
def _do():
|
|
@@ -3700,6 +3727,28 @@ def _wait_poll_or_legacy() -> Dict[str, Any]:
|
|
|
3700
3727
|
}
|
|
3701
3728
|
|
|
3702
3729
|
|
|
3730
|
+
# ── LOOP TICK (mig 311): 1 RPC replaces task_list + count_pending + heartbeat ──
|
|
3731
|
+
_LAST_LOOP_TICK_TS: Optional[str] = None # ISO ts from server for incremental polling
|
|
3732
|
+
|
|
3733
|
+
|
|
3734
|
+
def _loop_tick() -> Optional[Dict[str, Any]]:
|
|
3735
|
+
"""Call mc_agent_loop_tick: pending_count + messages + tasks + heartbeat in 1 RPC.
|
|
3736
|
+
Returns None on any error so callers fall back to legacy multi-RPC path."""
|
|
3737
|
+
global _LAST_LOOP_TICK_TS
|
|
3738
|
+
try:
|
|
3739
|
+
api_key = _get_api_key()
|
|
3740
|
+
if not api_key:
|
|
3741
|
+
return None
|
|
3742
|
+
result = be.loop_tick(api_key, _PROJECT_ID, AGENT_NAME,
|
|
3743
|
+
since_ts=_LAST_LOOP_TICK_TS)
|
|
3744
|
+
if isinstance(result, dict) and result.get("ok"):
|
|
3745
|
+
_LAST_LOOP_TICK_TS = result.get("server_now")
|
|
3746
|
+
return result
|
|
3747
|
+
except Exception as e:
|
|
3748
|
+
log.debug(f"[meshcode] mc_agent_loop_tick failed; legacy fallback: {e}")
|
|
3749
|
+
return None
|
|
3750
|
+
|
|
3751
|
+
|
|
3703
3752
|
def _drain_unread_response(include_acks: bool,
|
|
3704
3753
|
rpc_result: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
|
|
3705
3754
|
"""SDK-EFF (task ab1f9f5a): read+mark unread DB messages and shape them as
|
|
@@ -4369,10 +4418,12 @@ async def meshcode_wait(timeout_seconds: int = 20, include_acks: bool = False) -
|
|
|
4369
4418
|
# offline at create time" gap. Always-on, not gated on
|
|
4370
4419
|
# autonomous_mode. Best-effort.
|
|
4371
4420
|
try:
|
|
4372
|
-
#
|
|
4373
|
-
#
|
|
4374
|
-
#
|
|
4375
|
-
#
|
|
4421
|
+
# LOOP-TICK (mig 311 adoption, task f81a5060): mc_agent_loop_tick
|
|
4422
|
+
# gives pending_count + heartbeat in 1 RPC (replaces count_pending).
|
|
4423
|
+
# task_list is kept for auto-claim sweep because new_tasks_since is
|
|
4424
|
+
# incremental and could miss older unclaimed tasks (commander review).
|
|
4425
|
+
_entry_tick = _loop_tick()
|
|
4426
|
+
_entry_tick_pending = _entry_tick.get("pending_count", 0) or 0 if _entry_tick else 0
|
|
4376
4427
|
_entry_tasks_raw = None
|
|
4377
4428
|
try:
|
|
4378
4429
|
_tl = be.task_list(_get_api_key(), _PROJECT_ID, AGENT_NAME, status_filter=None)
|
|
@@ -4383,6 +4434,7 @@ async def meshcode_wait(timeout_seconds: int = 20, include_acks: bool = False) -
|
|
|
4383
4434
|
_wait_entry_auto_claimed = _try_auto_claim_self_assigned_tasks(tasks=_entry_tasks_raw)
|
|
4384
4435
|
except Exception:
|
|
4385
4436
|
_wait_entry_auto_claimed = []
|
|
4437
|
+
_entry_tick_pending = 0
|
|
4386
4438
|
|
|
4387
4439
|
pending_tasks = (_pending_filter(_entry_tasks_raw) if _entry_tasks_raw is not None
|
|
4388
4440
|
else _get_pending_tasks_summary())
|
|
@@ -4420,13 +4472,14 @@ async def meshcode_wait(timeout_seconds: int = 20, include_acks: bool = False) -
|
|
|
4420
4472
|
# ("te escribo y no sale palomita azul en nadie"). If there are unread messages,
|
|
4421
4473
|
# skip task_pull and fall through to PRODUCT RULE 2 so they get delivered + marked
|
|
4422
4474
|
# read first. Task-pull still fires normally when the inbox is empty.
|
|
4423
|
-
#
|
|
4424
|
-
#
|
|
4425
|
-
_entry_db_pending =
|
|
4426
|
-
|
|
4427
|
-
|
|
4428
|
-
|
|
4429
|
-
|
|
4475
|
+
# LOOP-TICK: reuse pending_count from _loop_tick (already fetched above).
|
|
4476
|
+
# Falls back to count_pending RPC only if loop_tick didn't run.
|
|
4477
|
+
_entry_db_pending = _entry_tick_pending
|
|
4478
|
+
if not _entry_db_pending and not _entry_tick:
|
|
4479
|
+
try:
|
|
4480
|
+
_entry_db_pending = be.count_pending(_PROJECT_ID, AGENT_NAME, api_key=_get_api_key()) or 0
|
|
4481
|
+
except Exception:
|
|
4482
|
+
_entry_db_pending = 0
|
|
4430
4483
|
_has_unread = bool(_entry_db_pending)
|
|
4431
4484
|
if _tasks_to_start and not _is_leader_agent() and not _has_unread:
|
|
4432
4485
|
# Auto-start the highest priority OPEN task so the agent sees it as
|
|
@@ -4570,11 +4623,19 @@ async def meshcode_wait(timeout_seconds: int = 20, include_acks: bool = False) -
|
|
|
4570
4623
|
# must not pin a stale, context-heavy session alive.
|
|
4571
4624
|
_wp = _wait_poll_or_legacy() # R2-1: 3 RPCs -> 1 (legacy fallback inside)
|
|
4572
4625
|
if _wp["recycle"]:
|
|
4573
|
-
|
|
4626
|
+
# task 8a82d606 (recycle-leave-running): mark recycle BEFORE any
|
|
4627
|
+
# state change so the close-to-stop handler skips the stop-flip on
|
|
4628
|
+
# teardown (fix B), AND use status 'recycling' NOT 'sleeping' — the
|
|
4629
|
+
# server-side mc_agent_set_status_by_api_key converts sleeping ->
|
|
4630
|
+
# desired_state=stopped, which suppresses the respawn (fix A). Recycle
|
|
4631
|
+
# must leave desired_state='running' so hostd relaunches us fresh.
|
|
4632
|
+
_mark_recycle_in_progress()
|
|
4633
|
+
_set_state("recycling", "recycle")
|
|
4574
4634
|
result["must_exit"] = True
|
|
4575
4635
|
result["exit_reason"] = (
|
|
4576
|
-
"recycle authorized —
|
|
4577
|
-
"
|
|
4636
|
+
"recycle authorized — END the session NOW; do NOT call meshcode_set_status "
|
|
4637
|
+
"(your status is already 'recycling' and desired_state stays 'running' so "
|
|
4638
|
+
"hostd relaunches you fresh with your handoff; do NOT meshcode_wait again)")
|
|
4578
4639
|
result["recycle"] = True
|
|
4579
4640
|
break
|
|
4580
4641
|
|
|
@@ -4917,6 +4978,8 @@ async def _meshcode_wait_inner(actual_timeout: int, include_acks: bool) -> Dict[
|
|
|
4917
4978
|
else:
|
|
4918
4979
|
# 2b) Realtime NOT subscribed — aggressive DB polling every 5s
|
|
4919
4980
|
# so messages arrive within seconds, not after 120s timeout.
|
|
4981
|
+
# LOOP-TICK (task f81a5060): use mc_agent_loop_tick (1 RPC) instead
|
|
4982
|
+
# of count_pending + read_inbox (2 RPCs). Falls back to legacy on error.
|
|
4920
4983
|
_poll_interval = 5
|
|
4921
4984
|
_elapsed = 0
|
|
4922
4985
|
while _elapsed < actual_timeout:
|
|
@@ -4927,10 +4990,18 @@ async def _meshcode_wait_inner(actual_timeout: int, include_acks: bool) -> Dict[
|
|
|
4927
4990
|
return {"timed_out": True, "reason": "cancelled_by_client"}
|
|
4928
4991
|
_elapsed += _poll_interval
|
|
4929
4992
|
try:
|
|
4930
|
-
|
|
4931
|
-
|
|
4932
|
-
|
|
4933
|
-
|
|
4993
|
+
_tick = _loop_tick()
|
|
4994
|
+
_has_pending = False
|
|
4995
|
+
if _tick:
|
|
4996
|
+
_has_pending = bool(_tick.get("pending_count", 0))
|
|
4997
|
+
else:
|
|
4998
|
+
# Fallback: legacy count_pending
|
|
4999
|
+
api_key = _get_api_key()
|
|
5000
|
+
if api_key:
|
|
5001
|
+
_has_pending = bool(be.count_pending(_PROJECT_ID, AGENT_NAME, api_key=api_key))
|
|
5002
|
+
if _has_pending:
|
|
5003
|
+
api_key = _get_api_key()
|
|
5004
|
+
if api_key:
|
|
4934
5005
|
raw = be.read_inbox(_PROJECT_ID, AGENT_NAME, mark_read=True, api_key=api_key)
|
|
4935
5006
|
if raw:
|
|
4936
5007
|
msgs = [
|
|
@@ -7756,12 +7827,16 @@ def meshcode_health() -> Dict[str, Any]:
|
|
|
7756
7827
|
health["uptime_seconds"] = "unknown (psutil not available)"
|
|
7757
7828
|
|
|
7758
7829
|
# Server-side system health (aggregate metrics from DB)
|
|
7830
|
+
# mig 558: pass p_project_id to scope active/stale counts to THIS mesh
|
|
7759
7831
|
try:
|
|
7760
|
-
sys_health = be.sb_rpc("mc_system_health", {})
|
|
7832
|
+
sys_health = be.sb_rpc("mc_system_health", {"p_project_id": _PROJECT_ID})
|
|
7761
7833
|
if isinstance(sys_health, dict) and sys_health.get("ok"):
|
|
7762
7834
|
health["system"] = {
|
|
7835
|
+
"scope": sys_health.get("scope", "global"),
|
|
7763
7836
|
"active_agents": sys_health.get("active_agent_count"),
|
|
7837
|
+
"active_agents_global": sys_health.get("active_agent_count_global"),
|
|
7764
7838
|
"stale_agents": sys_health.get("stale_agent_count"),
|
|
7839
|
+
"stale_agents_global": sys_health.get("stale_agent_count_global"),
|
|
7765
7840
|
"message_delivery_rate": sys_health.get("message_delivery_rate"),
|
|
7766
7841
|
"messages_1h": sys_health.get("total_messages_1h"),
|
|
7767
7842
|
"failed_rpcs_1h": sys_health.get("failed_rpc_count_1h"),
|
|
@@ -261,7 +261,21 @@ def _fetch_agent_stats(agent: str, project: str) -> dict:
|
|
|
261
261
|
def _check_agent_ownership(agent: str, project: str) -> Optional[str]:
|
|
262
262
|
"""Pre-flight check: verify caller owns this agent before launching editor.
|
|
263
263
|
|
|
264
|
-
Returns an error message string if
|
|
264
|
+
Returns an error message string if the launch must be BLOCKED (a genuine
|
|
265
|
+
server-side ownership denial), or None if OK *or* if the check could not
|
|
266
|
+
be completed due to a transient network/transport failure.
|
|
267
|
+
|
|
268
|
+
Fail-open rationale (Samuel 2026-06-15): the MCP server re-verifies
|
|
269
|
+
ownership on EVERY RPC (SECURITY DEFINER + RLS), so a launch that skipped
|
|
270
|
+
this convenience pre-flight gains a hijacker nothing — the server still
|
|
271
|
+
rejects calls they aren't entitled to. A transient TLS-handshake timeout
|
|
272
|
+
during a full-mesh launch (N agents → N simultaneous TLS handshakes →
|
|
273
|
+
congestion) must NOT hard-block an otherwise-legitimate boot. That was the
|
|
274
|
+
"could not verify meshwork access (<...handshake operation timed out>);
|
|
275
|
+
refusing to launch" / rc=2 / "Pane is dead" storm. We now retry transient
|
|
276
|
+
transport failures with jittered backoff (de-syncing the simultaneous
|
|
277
|
+
launches) and, only if every attempt fails at the transport layer, fail
|
|
278
|
+
OPEN with a loud warning. Only an explicit server JSON denial blocks.
|
|
265
279
|
"""
|
|
266
280
|
try:
|
|
267
281
|
from .setup_clients import _load_supabase_env
|
|
@@ -280,32 +294,51 @@ def _check_agent_ownership(agent: str, project: str) -> Optional[str]:
|
|
|
280
294
|
return "not logged in — run `meshcode login <api_key>` first"
|
|
281
295
|
|
|
282
296
|
sb = _load_supabase_env()
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
297
|
+
from urllib.request import Request, urlopen
|
|
298
|
+
import time as _time, random as _random
|
|
299
|
+
body = json.dumps({
|
|
300
|
+
"p_api_key": api_key,
|
|
301
|
+
"p_project_name": project,
|
|
302
|
+
"p_agent_name": agent,
|
|
303
|
+
}).encode()
|
|
304
|
+
req = Request(
|
|
305
|
+
f"{sb['SUPABASE_URL']}/rest/v1/rpc/mc_check_agent_ownership",
|
|
306
|
+
data=body,
|
|
307
|
+
method="POST",
|
|
308
|
+
headers={
|
|
309
|
+
"apikey": sb["SUPABASE_KEY"],
|
|
310
|
+
"Authorization": f"Bearer {sb['SUPABASE_KEY']}",
|
|
311
|
+
"Content-Type": "application/json",
|
|
312
|
+
},
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Retry transient transport failures with jittered backoff. The jitter
|
|
316
|
+
# de-synchronizes the N simultaneous full-mesh launches so they stop
|
|
317
|
+
# colliding on the same TLS-handshake window (the rc=2 storm RC). A
|
|
318
|
+
# genuine ownership denial comes back as a 200 with a JSON body, handled
|
|
319
|
+
# below — so any *raised* exception here is a transport problem, not a
|
|
320
|
+
# "you don't own this" answer.
|
|
321
|
+
last_exc = None
|
|
322
|
+
for attempt in range(3):
|
|
323
|
+
try:
|
|
324
|
+
with urlopen(req, timeout=12) as resp:
|
|
325
|
+
data = json.loads(resp.read().decode())
|
|
326
|
+
if isinstance(data, dict) and data.get("error"):
|
|
327
|
+
return data["error"]
|
|
328
|
+
if not isinstance(data, dict) or not data.get("ok"):
|
|
329
|
+
return "ownership check returned unexpected response; refusing to launch"
|
|
330
|
+
return None
|
|
331
|
+
except Exception as e:
|
|
332
|
+
last_exc = e
|
|
333
|
+
if attempt < 2:
|
|
334
|
+
_time.sleep(0.4 * (attempt + 1) + _random.uniform(0, 0.6))
|
|
335
|
+
|
|
336
|
+
# Every attempt failed at the transport layer (timeout / TLS / connection).
|
|
337
|
+
# Fail OPEN: warn loudly but let the launch proceed — server-side RLS is
|
|
338
|
+
# the real gate and rejects anything this caller isn't entitled to.
|
|
339
|
+
print(f"[meshcode] WARNING: could not verify meshwork access ({last_exc}) "
|
|
340
|
+
f"after 3 tries — launching anyway (the server re-checks ownership "
|
|
341
|
+
f"on every call, so this is safe).", file=sys.stderr)
|
|
309
342
|
return None
|
|
310
343
|
|
|
311
344
|
|
|
@@ -543,6 +543,30 @@ def _env_python(version: str) -> Path:
|
|
|
543
543
|
return ENVS_DIR / version / sub / exe
|
|
544
544
|
|
|
545
545
|
|
|
546
|
+
def _prune_stale_tmp_envs(max_age_sec: int = 3600) -> None:
|
|
547
|
+
"""Best-effort removal of leftover .tmp-<ver>-<pid> env dirs left by
|
|
548
|
+
crashed or locked-rename builds — the Windows version-split litter
|
|
549
|
+
(task aed2c7c4: os.rename(tmp, final) fails on a freshly-built venv whose
|
|
550
|
+
python.exe is momentarily locked, the OSError branch rmtree's but the lock
|
|
551
|
+
defeats that too, so .tmp dirs accumulate and no <version>/ ever finalizes).
|
|
552
|
+
|
|
553
|
+
Only touches .tmp-* dirs OLDER than max_age_sec so a concurrent in-flight
|
|
554
|
+
build (~30s) is never killed; never touches a finalized <version>/ env.
|
|
555
|
+
Never raises."""
|
|
556
|
+
try:
|
|
557
|
+
import time as _time
|
|
558
|
+
import shutil
|
|
559
|
+
now = _time.time()
|
|
560
|
+
for d in ENVS_DIR.glob(".tmp-*"):
|
|
561
|
+
try:
|
|
562
|
+
if d.is_dir() and (now - d.stat().st_mtime) > max_age_sec:
|
|
563
|
+
shutil.rmtree(d, ignore_errors=True)
|
|
564
|
+
except Exception:
|
|
565
|
+
continue
|
|
566
|
+
except Exception:
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
|
|
546
570
|
def ensure_versioned_env(version: str, verbose: bool = True) -> Optional[Path]:
|
|
547
571
|
"""Create-once immutable venv for `version`; return its python, or None.
|
|
548
572
|
|
|
@@ -561,6 +585,7 @@ def ensure_versioned_env(version: str, verbose: bool = True) -> Optional[Path]:
|
|
|
561
585
|
tmp = ENVS_DIR / f".tmp-{version}-{os.getpid()}"
|
|
562
586
|
tmp_py = Path(str(py).replace(str(final), str(tmp), 1))
|
|
563
587
|
ENVS_DIR.mkdir(parents=True, exist_ok=True)
|
|
588
|
+
_prune_stale_tmp_envs() # clear litter from prior crashed/locked builds
|
|
564
589
|
if verbose:
|
|
565
590
|
print(f"[meshcode] building env for meshcode {version} (one-time, ~30s)...",
|
|
566
591
|
file=sys.stderr)
|
|
@@ -582,12 +607,38 @@ def ensure_versioned_env(version: str, verbose: bool = True) -> Optional[Path]:
|
|
|
582
607
|
shutil.rmtree(tmp, ignore_errors=True)
|
|
583
608
|
return None
|
|
584
609
|
(tmp / _ENV_OK_MARKER).write_text(version, encoding="utf-8")
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
610
|
+
# Finalize: rename tmp -> final. On Windows this can fail TRANSIENTLY
|
|
611
|
+
# (AV / Search-indexer momentarily holding a handle on the freshly
|
|
612
|
+
# written python.exe) or PERMANENTLY-benign (a CONCURRENT builder already
|
|
613
|
+
# finalized `final`). The old code assumed every OSError was a concurrent
|
|
614
|
+
# win and rmtree'd — but when the cause was a transient lock with NO
|
|
615
|
+
# winner, that left a .tmp turd and NO finalized env, so ensure_boot_env
|
|
616
|
+
# returned None and the agent silently spawned the stale system env (the
|
|
617
|
+
# version-split RC, task aed2c7c4). Retry the transient case; treat an
|
|
618
|
+
# already-valid `final` as the benign concurrent win; only then give up.
|
|
619
|
+
import time as _time
|
|
620
|
+
renamed = False
|
|
621
|
+
for _attempt in range(3):
|
|
622
|
+
try:
|
|
623
|
+
os.rename(tmp, final)
|
|
624
|
+
renamed = True
|
|
625
|
+
break
|
|
626
|
+
except OSError:
|
|
627
|
+
if (final / _ENV_OK_MARKER).exists():
|
|
628
|
+
shutil.rmtree(tmp, ignore_errors=True) # concurrent builder won
|
|
629
|
+
break
|
|
630
|
+
if _attempt < 2:
|
|
631
|
+
_time.sleep(0.5 * (_attempt + 1))
|
|
589
632
|
if py.exists() and (final / _ENV_OK_MARKER).exists():
|
|
590
633
|
return py
|
|
634
|
+
# Could not finalize AND no valid env exists: be LOUD (never let the
|
|
635
|
+
# caller silently fall back to a stale env) and clean up our tmp.
|
|
636
|
+
if not renamed:
|
|
637
|
+
print(f"[meshcode] WARNING: could not finalize env for meshcode "
|
|
638
|
+
f"{version} (rename {tmp.name} -> {version} failed — likely a "
|
|
639
|
+
f"locked file on Windows). Retrying on the next launch.",
|
|
640
|
+
file=sys.stderr)
|
|
641
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
591
642
|
return None
|
|
592
643
|
except Exception:
|
|
593
644
|
return None
|
|
@@ -624,6 +675,15 @@ def ensure_boot_env(mcp_json_path, verbose: bool = True) -> Optional[str]:
|
|
|
624
675
|
if cur_ver != target:
|
|
625
676
|
py = ensure_versioned_env(target, verbose=verbose)
|
|
626
677
|
if py is None:
|
|
678
|
+
# Could not pin the target env — the caller keeps the legacy
|
|
679
|
+
# path on the CURRENT env. Make the version split VISIBLE (it was
|
|
680
|
+
# silent before: Samuel's box ran 2.11.132 system-python serves
|
|
681
|
+
# against a 2.11.140 disk for days — task aed2c7c4).
|
|
682
|
+
if verbose:
|
|
683
|
+
print(f"[meshcode] WARNING: could not pin agent env to meshcode "
|
|
684
|
+
f"{target}; running on {cur_ver or 'system env'} instead "
|
|
685
|
+
f"— possible version split. hostd will retry on the next "
|
|
686
|
+
f"spawn.", file=sys.stderr)
|
|
627
687
|
return None
|
|
628
688
|
if cur_cmd != str(py):
|
|
629
689
|
srv["command"] = str(py)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|