meshcode 2.11.123__tar.gz → 2.11.125__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.123 → meshcode-2.11.125}/PKG-INFO +1 -1
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/__init__.py +1 -1
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/realtime.py +40 -3
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/server.py +96 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/sleep_signals.py +22 -6
- meshcode-2.11.125/meshcode/meshcode_mcp/swarm.py +292 -0
- meshcode-2.11.125/meshcode/meshcode_mcp/test_swarm.py +279 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/SOURCES.txt +2 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/pyproject.toml +1 -1
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_sleep_signals.py +52 -6
- {meshcode-2.11.123 → meshcode-2.11.125}/README.md +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/__main__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/_session_handoff_template.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/claude_update.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/cli.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/comms_v4.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/compat.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/daemon.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/doctor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hostd.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/invites.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/launcher.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/backend.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/preferences.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/protocol_handler.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/run_agent.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/secrets.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/self_update.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/up.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/upload.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/setup.cfg +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_auto_update_hardening.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_1.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_2.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_3.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_prompt_inject.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_boot_bug_regression.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_color_truecolor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_core.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_cross_agent_messaging.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_date_parse.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_doctor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_epistemic_v1_python_sdk.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_epistemic_v1_stop_conditions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_esc_deaf_state.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_exceptions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_file_upload.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_init_device_code.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_install_guard.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_lease_sigterm_release.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_mark_read_batch.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_marketplace_ratings.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_migration_integrity.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_realtime_event_freshness.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rls_cross_tenant.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rpc_grants.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rpc_migrations.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_run_agent_dry_run.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_run_agent_no_server_import.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_security_regressions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_self_update_user_site.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_sentinel.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_setup_path.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_status_enum_coverage.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_stay_on_loop_hook.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_swarm_events.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_wait_open_tasks_contradiction.py +0 -0
|
@@ -125,24 +125,61 @@ class RealtimeListener:
|
|
|
125
125
|
self._connected = False
|
|
126
126
|
self._subscription_ok = False
|
|
127
127
|
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _is_auth_rejection(e: Exception) -> bool:
|
|
130
|
+
"""True when the WS handshake was rejected 401/403 — a BAD KEY, not a
|
|
131
|
+
transient network blip. Task c49096c1 (edge logs: 66x 401/hr): a client
|
|
132
|
+
with an invalid/rotated apikey retried every ≤60s forever, hammering
|
|
133
|
+
prod. Auth errors don't heal by retrying."""
|
|
134
|
+
try:
|
|
135
|
+
status = getattr(getattr(e, "response", None), "status_code", None)
|
|
136
|
+
if status is None:
|
|
137
|
+
status = getattr(e, "status_code", None) # older websockets
|
|
138
|
+
return status in (401, 403)
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
128
142
|
async def _run(self) -> None:
|
|
129
143
|
"""Outer loop: reconnect with exponential backoff on disconnect.
|
|
130
144
|
|
|
131
|
-
NEVER gives up — keeps retrying with capped backoff
|
|
132
|
-
The MCP server must stay alive regardless of Realtime health.
|
|
145
|
+
Transient errors: NEVER gives up — keeps retrying with capped backoff
|
|
146
|
+
(max 60s). The MCP server must stay alive regardless of Realtime health.
|
|
147
|
+
AUTH rejections (401/403) are different (task c49096c1): the key is
|
|
148
|
+
bad and retrying can't fix it — back off to 5 min immediately and STOP
|
|
149
|
+
after 5 consecutive auth rejections with one actionable log line.
|
|
150
|
+
Message delivery survives via the wait-loop poll fallback (mc_wait_poll).
|
|
133
151
|
"""
|
|
134
152
|
backoff = 1
|
|
135
153
|
consecutive_failures = 0
|
|
154
|
+
auth_rejections = 0
|
|
136
155
|
while not self._stop.is_set():
|
|
137
156
|
try:
|
|
138
157
|
await self._connect_and_listen()
|
|
139
158
|
backoff = 1 # reset on clean disconnect
|
|
140
159
|
consecutive_failures = 0
|
|
160
|
+
auth_rejections = 0
|
|
141
161
|
except asyncio.CancelledError:
|
|
142
162
|
return
|
|
143
163
|
except Exception as e:
|
|
144
164
|
consecutive_failures += 1
|
|
145
|
-
if
|
|
165
|
+
if self._is_auth_rejection(e):
|
|
166
|
+
auth_rejections += 1
|
|
167
|
+
if auth_rejections >= 5:
|
|
168
|
+
log.error(
|
|
169
|
+
"Realtime: 5 consecutive auth rejections (401/403) — the "
|
|
170
|
+
"SUPABASE_KEY this agent carries is invalid or rotated. "
|
|
171
|
+
"GIVING UP on Realtime (poll fallback stays active). "
|
|
172
|
+
"Fix: refresh this workspace's .mcp.json key (re-run "
|
|
173
|
+
"`meshcode setup`) and restart the agent."
|
|
174
|
+
)
|
|
175
|
+
self._connected = False
|
|
176
|
+
return
|
|
177
|
+
backoff = 300 # bad key: don't hammer prod every 60s
|
|
178
|
+
log.warning(
|
|
179
|
+
f"Realtime auth rejected ({auth_rejections}/5): {e}; "
|
|
180
|
+
f"key likely invalid/rotated — retrying in {backoff}s"
|
|
181
|
+
)
|
|
182
|
+
elif consecutive_failures % 10 == 0:
|
|
146
183
|
log.error(
|
|
147
184
|
f"Realtime: {consecutive_failures} consecutive failures — "
|
|
148
185
|
f"still retrying (backoff={backoff}s). Last error: {e}"
|
|
@@ -6427,6 +6427,102 @@ def meshcode_agent_stop(name: str) -> Dict[str, Any]:
|
|
|
6427
6427
|
})
|
|
6428
6428
|
|
|
6429
6429
|
|
|
6430
|
+
# ----------------- ENJAMBRE SWARM HELPER LIFECYCLE (task 227c6a3c) -----------------
|
|
6431
|
+
# Spawn → tray loop → self-retire. NO recycle_agent anywhere in this lifecycle
|
|
6432
|
+
# (disabled in prod): launch = mc_agent_power_as_agent (G1b parent authz),
|
|
6433
|
+
# shutdown = mc_helper_retire → desired_state='stopped' → meshcode_wait
|
|
6434
|
+
# must_exit → clean session end; the TTL reaper backstops crashes.
|
|
6435
|
+
|
|
6436
|
+
@mcp.tool()
|
|
6437
|
+
@with_working_status
|
|
6438
|
+
def meshcode_helper_spawn(name: str, role: str = "helper",
|
|
6439
|
+
swarm_id: Optional[str] = None,
|
|
6440
|
+
spawned_for_task_id: Optional[str] = None,
|
|
6441
|
+
ttl_seconds: int = 3600,
|
|
6442
|
+
headless: Optional[bool] = None) -> Dict[str, Any]:
|
|
6443
|
+
"""Spawn an ephemeral HELPER agent into your swarm (you become its parent).
|
|
6444
|
+
|
|
6445
|
+
ENJAMBRE (mig 471 schema + mig 475 wrappers). Server-side, from YOUR
|
|
6446
|
+
agent-scoped api key: registers agent_kind='helper', parent_agent_id=you,
|
|
6447
|
+
auto_retire=true, a TTL, repo/host inherited from you — then powers it on
|
|
6448
|
+
(G1b parent authz + mig 473 concurrent cap compose inside). Budget errors:
|
|
6449
|
+
tier_limit at creation; helper_cap_reached arrives in `power` with the row
|
|
6450
|
+
created but stopped (stagger and re-power, or retire it). Helpers cannot
|
|
6451
|
+
spawn helpers (helper_chain_forbidden, v1).
|
|
6452
|
+
|
|
6453
|
+
The helper works its swarm tray via meshcode_tray_claim and self-retires
|
|
6454
|
+
when the tray drains — you never stop it manually (meshcode_agent_stop is
|
|
6455
|
+
the manual override; NEVER recycle, it's disabled).
|
|
6456
|
+
|
|
6457
|
+
Args:
|
|
6458
|
+
name: helper agent name (unique in the meshwork; name_taken on collision).
|
|
6459
|
+
role: short role description shown on the dashboard.
|
|
6460
|
+
swarm_id: tray to join; omit to mint a fresh tray id for a new swarm.
|
|
6461
|
+
spawned_for_task_id: optional umbrella task this helper serves.
|
|
6462
|
+
ttl_seconds: hard lifetime cap (60..86400); reaper retires it past this.
|
|
6463
|
+
headless: omit for the server default; explicit True/False forces the
|
|
6464
|
+
window mode via a separate power-on call.
|
|
6465
|
+
"""
|
|
6466
|
+
from . import swarm as _swarm
|
|
6467
|
+
return _swarm.spawn_helper(_get_api_key(), _PROJECT_ID, name, role=role,
|
|
6468
|
+
swarm_id=swarm_id,
|
|
6469
|
+
spawned_for_task_id=spawned_for_task_id,
|
|
6470
|
+
ttl_seconds=ttl_seconds, headless=headless)
|
|
6471
|
+
|
|
6472
|
+
|
|
6473
|
+
@mcp.tool()
|
|
6474
|
+
@with_working_status
|
|
6475
|
+
def meshcode_tray_claim(lease_seconds: int = 900,
|
|
6476
|
+
swarm_id: Optional[str] = None) -> Dict[str, Any]:
|
|
6477
|
+
"""HELPER loop step: atomically claim the next runnable task from your
|
|
6478
|
+
swarm tray (work-stealing, DAG-gated, priority-ordered).
|
|
6479
|
+
|
|
6480
|
+
ENJAMBRE (mig 471 + 475). Interpret the result:
|
|
6481
|
+
- claimed=true → work the returned task NOW, then meshcode_task_complete.
|
|
6482
|
+
Re-claim after completing — keep draining.
|
|
6483
|
+
- claimed=false, drained=true → tray is empty and final: call
|
|
6484
|
+
meshcode_helper_retire, then meshcode_wait (returns must_exit → end
|
|
6485
|
+
session). Do NOT idle.
|
|
6486
|
+
- claimed=false, drained=false → tasks exist but are DAG-blocked: wait
|
|
6487
|
+
briefly and claim again.
|
|
6488
|
+
|
|
6489
|
+
Args:
|
|
6490
|
+
lease_seconds: claim lease (60..86400); an expired lease re-opens the task.
|
|
6491
|
+
swarm_id: override tray; omit to use your own agent row's swarm
|
|
6492
|
+
(helpers get theirs stamped at spawn).
|
|
6493
|
+
"""
|
|
6494
|
+
from . import swarm as _swarm
|
|
6495
|
+
sid = swarm_id or _swarm.own_swarm_id(_PROJECT_ID, AGENT_NAME)
|
|
6496
|
+
if not sid:
|
|
6497
|
+
return {"ok": False, "error": "no swarm: your agent row has no swarm_id "
|
|
6498
|
+
"and none was given — are you a helper?"}
|
|
6499
|
+
return _swarm.tray_claim(_get_api_key(), _PROJECT_ID, swarm_id=sid,
|
|
6500
|
+
lease_seconds=lease_seconds)
|
|
6501
|
+
|
|
6502
|
+
|
|
6503
|
+
@mcp.tool()
|
|
6504
|
+
@with_working_status
|
|
6505
|
+
def meshcode_helper_retire(reason: str = "tray_drained") -> Dict[str, Any]:
|
|
6506
|
+
"""HELPER self-retire — the ONLY sanctioned helper shutdown (auto-off,
|
|
6507
|
+
ZERO orphans). Self-only by construction; persistent agents get
|
|
6508
|
+
not_a_helper, auto_retire=false rows get auto_retire_disabled.
|
|
6509
|
+
|
|
6510
|
+
ENJAMBRE (mig 471 + 475). Flips your desired_state='stopped' +
|
|
6511
|
+
status='sleeping' server-side: hostd will not respawn you and your next
|
|
6512
|
+
meshcode_wait returns must_exit=True. Sequence: finish/complete the current
|
|
6513
|
+
task → meshcode_helper_retire → meshcode_wait → exit on must_exit.
|
|
6514
|
+
|
|
6515
|
+
Args:
|
|
6516
|
+
reason: telemetry tag (tray_drained | ttl_expired | parent_request | ...).
|
|
6517
|
+
"""
|
|
6518
|
+
from . import swarm as _swarm
|
|
6519
|
+
result = _swarm.retire_self(_get_api_key(), _PROJECT_ID, reason=reason)
|
|
6520
|
+
if isinstance(result, dict) and result.get("ok"):
|
|
6521
|
+
result["next_step"] = ("retired — now call meshcode_wait(); it returns "
|
|
6522
|
+
"must_exit=True and you end the session")
|
|
6523
|
+
return result
|
|
6524
|
+
|
|
6525
|
+
|
|
6430
6526
|
@mcp.tool()
|
|
6431
6527
|
@with_working_status
|
|
6432
6528
|
def meshcode_recycle_agent(name: str, visible: bool = False) -> Dict[str, Any]:
|
|
@@ -136,18 +136,32 @@ def _is_human_authored(m: Dict[str, Any]) -> bool:
|
|
|
136
136
|
return False
|
|
137
137
|
|
|
138
138
|
|
|
139
|
+
def _sender_may_order_sleep(m: Dict[str, Any]) -> bool:
|
|
140
|
+
"""Samuel directive 2026-06-10 (msg 54eec209): a RUNNING agent is only
|
|
141
|
+
slept by an order from the USER or the COMMANDER. True when the message
|
|
142
|
+
is human-authored OR carries the SERVER-VERIFIED commander stamp
|
|
143
|
+
(payload.sender_is_commander, mig 483 — stamped by mc_send_message, never
|
|
144
|
+
client-claimed). A plain ai peer can no longer must_exit a sibling."""
|
|
145
|
+
if _is_human_authored(m):
|
|
146
|
+
return True
|
|
147
|
+
pl = m.get("payload") or {}
|
|
148
|
+
return isinstance(pl, dict) and pl.get("sender_is_commander") is True
|
|
149
|
+
|
|
150
|
+
|
|
139
151
|
def _looks_like_sleep_signal(m: Dict[str, Any]) -> bool:
|
|
140
152
|
"""Detect mesh messages that authorize the wait-loop exit.
|
|
141
153
|
|
|
142
154
|
See module docstring for the two valid encodings and the rationale
|
|
143
|
-
for ignoring idiom matches from AI-role senders.
|
|
155
|
+
for ignoring idiom matches from AI-role senders. Since mig 483 + .125,
|
|
156
|
+
structured directives additionally require an authorized sender
|
|
157
|
+
(human or server-stamped commander) — see _sender_may_order_sleep.
|
|
144
158
|
"""
|
|
145
159
|
pl = m.get("payload") or {}
|
|
146
160
|
if isinstance(pl, dict):
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if
|
|
150
|
-
return
|
|
161
|
+
structured = (str(pl.get("type", "")).lower() in _SLEEP_PAYLOAD_TYPES
|
|
162
|
+
or str(pl.get("directive", "")).lower() in _SLEEP_PAYLOAD_TYPES)
|
|
163
|
+
if structured:
|
|
164
|
+
return _sender_may_order_sleep(m)
|
|
151
165
|
text = str(pl.get("text", "")).lower()
|
|
152
166
|
if text and any(marker in text for marker in _SLEEP_TEXT_MARKERS):
|
|
153
167
|
if _is_human_authored(m) and _human_text_is_directive(text):
|
|
@@ -194,7 +208,9 @@ def _split_messages(messages: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
|
194
208
|
t = m.get("type", "msg")
|
|
195
209
|
if t == "ack":
|
|
196
210
|
acks.append(m)
|
|
197
|
-
elif (t == "done"
|
|
211
|
+
elif ((t == "done" and _sender_may_order_sleep(m)) or _looks_like_sleep_signal(m)) \
|
|
212
|
+
and _recent_enough(m):
|
|
213
|
+
# type='done' rows are sleep-class too — same sender gate applies
|
|
198
214
|
dones.append(m)
|
|
199
215
|
else:
|
|
200
216
|
real.append(m)
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""ENJAMBRE — swarm helper lifecycle SDK (task 227c6a3c, branch enjambre/sdk-helpers).
|
|
2
|
+
|
|
3
|
+
Implements the launcher/SDK side of the swarm helper lifecycle on top of the
|
|
4
|
+
mig 471 schema (mc_agents.agent_kind/parent_agent_id/swarm_id/spawned_for_task_id/
|
|
5
|
+
auto_retire/ttl_expires_at + mc_task_claim_from_tray + mc_helper_retire):
|
|
6
|
+
|
|
7
|
+
1. SPAWN — parent calls mc_helper_spawn_as_agent: server derives parentage
|
|
8
|
+
from the caller's agent-scoped api key (never client-claimed),
|
|
9
|
+
INSERT-only helper row inheriting repo_path/host from the parent,
|
|
10
|
+
tier row-budget at creation, and (p_power_on) composes power-on
|
|
11
|
+
through mc_agent_power_as_agent — parent authz (mig 471 G1b) +
|
|
12
|
+
concurrent running cap (mig 473) live in exactly one place.
|
|
13
|
+
2. LOOP — helper claims work from its swarm tray (claim_from_tray):
|
|
14
|
+
claimed → work + mc_task_complete; drained → retire + exit.
|
|
15
|
+
Not-claimed-not-drained means DAG-blocked deps are pending:
|
|
16
|
+
poll again after a short sleep.
|
|
17
|
+
3. SHUTDOWN — NO recycle_agent (DISABLED in prod). mc_helper_retire flips
|
|
18
|
+
desired_state='stopped' + status='sleeping'; the agent's next
|
|
19
|
+
meshcode_wait returns must_exit=True and the session ends. hostd
|
|
20
|
+
never respawns desired_state='stopped', and the service-side
|
|
21
|
+
reaper (mc_swarm_reap_expired_helpers) + ttl_expires_at backstop
|
|
22
|
+
guarantee ZERO orphan helpers even on crash.
|
|
23
|
+
|
|
24
|
+
DATA CONTRACT — FROZEN by database@mesh-core (msg 34121217), LIVE in prod as
|
|
25
|
+
mig 475 (commander msg 50b4ab8d; authored as 20260610_474_* on branch
|
|
26
|
+
enjambre/g3-runtime, renumbered at apply):
|
|
27
|
+
|
|
28
|
+
public.mc_task_claim_from_tray_as_agent(p_api_key text, p_project_id uuid,
|
|
29
|
+
p_swarm_id uuid, p_lease_seconds int DEFAULT 900) -> jsonb
|
|
30
|
+
{ok, claimed:true, task_id, title, description, priority, parent_task_id,
|
|
31
|
+
lease_expires_at} | {ok:true, claimed:false, drained:bool}
|
|
32
|
+
p_swarm_id is REQUIRED (claims never cross trays); the SDK resolves the
|
|
33
|
+
caller's own mc_agents.swarm_id when not given explicitly.
|
|
34
|
+
|
|
35
|
+
public.mc_helper_retire_as_agent(p_api_key text, p_project_id uuid,
|
|
36
|
+
p_reason text DEFAULT 'tray_drained') -> {ok, retired, reason}
|
|
37
|
+
Self-only by construction. Errors: auth_failed | agent_key_required |
|
|
38
|
+
wrong_scope | agent_not_found | not_a_helper | auto_retire_disabled.
|
|
39
|
+
|
|
40
|
+
public.mc_helper_spawn_as_agent(p_api_key text, p_project_id uuid,
|
|
41
|
+
p_name text, p_swarm_id uuid, p_spawned_for_task_id uuid DEFAULT NULL,
|
|
42
|
+
p_role text DEFAULT 'helper', p_auto_retire bool DEFAULT true,
|
|
43
|
+
p_ttl_seconds int DEFAULT 3600, p_power_on bool DEFAULT true) -> jsonb
|
|
44
|
+
{ok, agent_id, name, agent_kind, parent_agent_id, swarm_id,
|
|
45
|
+
spawned_for_task_id, auto_retire, ttl_expires_at, power}
|
|
46
|
+
Errors: bad_name | bad_swarm | auth_failed | agent_key_required |
|
|
47
|
+
wrong_scope | agent_not_found | helper_chain_forbidden | tier_limit |
|
|
48
|
+
name_taken; a concurrent-cap refusal (helper_cap_reached) arrives inside
|
|
49
|
+
`power` with the row already created but stopped (retire or re-power).
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
import logging
|
|
53
|
+
import time
|
|
54
|
+
import uuid as _uuid
|
|
55
|
+
from typing import Any, Callable, Dict, Optional
|
|
56
|
+
from urllib.parse import quote as _quote
|
|
57
|
+
|
|
58
|
+
from . import backend as be
|
|
59
|
+
|
|
60
|
+
log = logging.getLogger("meshcode.swarm")
|
|
61
|
+
|
|
62
|
+
# Frozen contract names (mig 475 live in prod).
|
|
63
|
+
RPC_HELPER_SPAWN = "mc_helper_spawn_as_agent"
|
|
64
|
+
RPC_TRAY_CLAIM = "mc_task_claim_from_tray_as_agent"
|
|
65
|
+
RPC_HELPER_RETIRE = "mc_helper_retire_as_agent"
|
|
66
|
+
RPC_AGENT_POWER = "mc_agent_power_as_agent" # mig 416 + 471 G1b + 473 cap
|
|
67
|
+
RPC_TASK_COMPLETE = "mc_task_complete" # live, agent-callable (rpc_allowlist)
|
|
68
|
+
|
|
69
|
+
DEFAULT_TTL_SECONDS = 3600
|
|
70
|
+
DEFAULT_LEASE_SECONDS = 900
|
|
71
|
+
DEFAULT_POLL_SECONDS = 15.0
|
|
72
|
+
MAX_CONSECUTIVE_ERRORS = 5
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _err(result: Any) -> Optional[str]:
|
|
76
|
+
"""Normalize a backend result to an error string, or None if it's ok."""
|
|
77
|
+
if result is None:
|
|
78
|
+
return "empty response"
|
|
79
|
+
if be.is_error(result):
|
|
80
|
+
return be.get_error_message(result) or "rpc error"
|
|
81
|
+
if isinstance(result, dict) and result.get("ok") is False:
|
|
82
|
+
return result.get("error") or result.get("error_code") or "rpc refused"
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def own_swarm_id(project_id: str, agent_name: str) -> Optional[str]:
|
|
87
|
+
"""Resolve the caller's own mc_agents.swarm_id (helpers get theirs stamped
|
|
88
|
+
at spawn). Claims are tray-scoped — p_swarm_id is required by contract."""
|
|
89
|
+
rows = be.sb_select(
|
|
90
|
+
"mc_agents",
|
|
91
|
+
f"project_id=eq.{project_id}&name=eq.{_quote(agent_name)}&select=swarm_id",
|
|
92
|
+
limit=1)
|
|
93
|
+
if rows and rows[0].get("swarm_id"):
|
|
94
|
+
return rows[0]["swarm_id"]
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def spawn_helper(api_key: str, project_id: str, name: str, *,
|
|
99
|
+
role: str = "helper",
|
|
100
|
+
swarm_id: Optional[str] = None,
|
|
101
|
+
spawned_for_task_id: Optional[str] = None,
|
|
102
|
+
ttl_seconds: int = DEFAULT_TTL_SECONDS,
|
|
103
|
+
launch: bool = True,
|
|
104
|
+
headless: Optional[bool] = None) -> Dict[str, Any]:
|
|
105
|
+
"""Spawn an ephemeral helper. Parent = caller (server-derived, W2).
|
|
106
|
+
|
|
107
|
+
swarm_id=None mints a fresh tray id client-side (the contract requires it
|
|
108
|
+
NOT NULL — helper invariant). headless=None lets the server's power-on
|
|
109
|
+
default apply (single RPC); an explicit True/False does spawn with
|
|
110
|
+
p_power_on=false and a separate mc_agent_power_as_agent call carrying
|
|
111
|
+
p_headless. The helper's workspace materializes via `meshcode run`'s
|
|
112
|
+
auto-setup when hostd spawns it (W5: row inherits repo_path/host).
|
|
113
|
+
"""
|
|
114
|
+
v_swarm = swarm_id or str(_uuid.uuid4())
|
|
115
|
+
single_rpc_power = launch and headless is None
|
|
116
|
+
|
|
117
|
+
reg = be.sb_rpc(RPC_HELPER_SPAWN, {
|
|
118
|
+
"p_api_key": api_key,
|
|
119
|
+
"p_project_id": project_id,
|
|
120
|
+
"p_name": name,
|
|
121
|
+
"p_swarm_id": v_swarm,
|
|
122
|
+
"p_spawned_for_task_id": spawned_for_task_id,
|
|
123
|
+
"p_role": role,
|
|
124
|
+
"p_auto_retire": True,
|
|
125
|
+
"p_ttl_seconds": int(ttl_seconds),
|
|
126
|
+
"p_power_on": bool(single_rpc_power),
|
|
127
|
+
})
|
|
128
|
+
err = _err(reg)
|
|
129
|
+
if err:
|
|
130
|
+
return {"ok": False, "stage": "register", "error": err,
|
|
131
|
+
**({"error_code": reg.get("error_code")} if isinstance(reg, dict) and reg.get("error_code") else {})}
|
|
132
|
+
|
|
133
|
+
out: Dict[str, Any] = {"ok": True, "helper": name, "launched": bool(launch)}
|
|
134
|
+
if isinstance(reg, dict):
|
|
135
|
+
out.update({k: reg[k] for k in
|
|
136
|
+
("agent_id", "swarm_id", "parent_agent_id", "ttl_expires_at")
|
|
137
|
+
if k in reg})
|
|
138
|
+
power = reg.get("power")
|
|
139
|
+
else:
|
|
140
|
+
power = None
|
|
141
|
+
|
|
142
|
+
if launch and not single_rpc_power:
|
|
143
|
+
power = be.sb_rpc(RPC_AGENT_POWER, {
|
|
144
|
+
"p_api_key": api_key,
|
|
145
|
+
"p_project_id": project_id,
|
|
146
|
+
"p_agent": name,
|
|
147
|
+
"p_state": "running",
|
|
148
|
+
"p_headless": bool(headless),
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
if launch:
|
|
152
|
+
perr = _err(power)
|
|
153
|
+
if perr:
|
|
154
|
+
# Row created but not running (e.g. helper_cap_reached, mig 473).
|
|
155
|
+
# Caller may stagger/retire and re-power; TTL reaper backstops.
|
|
156
|
+
return {**out, "ok": False, "stage": "power_on", "error": perr,
|
|
157
|
+
"power": power}
|
|
158
|
+
if isinstance(power, dict):
|
|
159
|
+
out["powered_by"] = power.get("powered_by")
|
|
160
|
+
out["no_host"] = power.get("no_host")
|
|
161
|
+
return out
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def tray_claim(api_key: str, project_id: str, *,
|
|
165
|
+
swarm_id: str,
|
|
166
|
+
lease_seconds: int = DEFAULT_LEASE_SECONDS) -> Dict[str, Any]:
|
|
167
|
+
"""Claim the next runnable task from the swarm tray (atomic, work-stealing,
|
|
168
|
+
DAG-gated, priority-ordered). p_swarm_id is contract-required; use
|
|
169
|
+
own_swarm_id() to resolve yours.
|
|
170
|
+
|
|
171
|
+
Returns the raw contract shape: {ok, claimed, task...} or
|
|
172
|
+
{ok, claimed:false, drained:bool}. drained=false with claimed=false means
|
|
173
|
+
DAG-blocked work is still pending — poll again, do NOT retire.
|
|
174
|
+
"""
|
|
175
|
+
if not swarm_id:
|
|
176
|
+
return {"ok": False, "error": "swarm_id required (no swarm on your agent row?)"}
|
|
177
|
+
result = be.sb_rpc(RPC_TRAY_CLAIM, {
|
|
178
|
+
"p_api_key": api_key,
|
|
179
|
+
"p_project_id": project_id,
|
|
180
|
+
"p_swarm_id": swarm_id,
|
|
181
|
+
"p_lease_seconds": int(lease_seconds),
|
|
182
|
+
})
|
|
183
|
+
err = _err(result)
|
|
184
|
+
if err:
|
|
185
|
+
return {"ok": False, "error": err}
|
|
186
|
+
return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def retire_self(api_key: str, project_id: str, *,
|
|
190
|
+
reason: str = "tray_drained") -> Dict[str, Any]:
|
|
191
|
+
"""Self-retire (helpers only — self-only by construction). Flips
|
|
192
|
+
desired_state='stopped' server-side; the next meshcode_wait returns
|
|
193
|
+
must_exit=True and the session ends clean. This is the ONLY sanctioned
|
|
194
|
+
helper shutdown — never recycle_agent (disabled)."""
|
|
195
|
+
result = be.sb_rpc(RPC_HELPER_RETIRE, {
|
|
196
|
+
"p_api_key": api_key,
|
|
197
|
+
"p_project_id": project_id,
|
|
198
|
+
"p_reason": reason,
|
|
199
|
+
})
|
|
200
|
+
err = _err(result)
|
|
201
|
+
if err:
|
|
202
|
+
return {"ok": False, "error": err}
|
|
203
|
+
return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def complete_task(api_key: str, task_id: str, summary: str = "") -> Dict[str, Any]:
|
|
207
|
+
"""Thin wrapper over the live mc_task_complete RPC."""
|
|
208
|
+
result = be.sb_rpc(RPC_TASK_COMPLETE, {
|
|
209
|
+
"p_api_key": api_key,
|
|
210
|
+
"p_task_id": task_id,
|
|
211
|
+
"p_summary": summary,
|
|
212
|
+
})
|
|
213
|
+
err = _err(result)
|
|
214
|
+
if err:
|
|
215
|
+
return {"ok": False, "error": err}
|
|
216
|
+
return result if isinstance(result, dict) else {"ok": True}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def helper_loop(api_key: str, project_id: str, swarm_id: str,
|
|
220
|
+
work_fn: Callable[[Dict[str, Any]], str], *,
|
|
221
|
+
lease_seconds: int = DEFAULT_LEASE_SECONDS,
|
|
222
|
+
poll_seconds: float = DEFAULT_POLL_SECONDS,
|
|
223
|
+
ttl_deadline: Optional[float] = None,
|
|
224
|
+
max_tasks: Optional[int] = None,
|
|
225
|
+
sleep_fn: Callable[[float], None] = time.sleep,
|
|
226
|
+
clock: Callable[[], float] = time.monotonic) -> Dict[str, Any]:
|
|
227
|
+
"""Programmatic helper loop: claim → work → complete; drained → retire.
|
|
228
|
+
|
|
229
|
+
For scripted/headless helpers (harness, batch runners). Interactive Claude
|
|
230
|
+
helpers run the same protocol through the meshcode_tray_claim /
|
|
231
|
+
meshcode_helper_retire MCP tools instead.
|
|
232
|
+
|
|
233
|
+
Owner's non-negotiable guard (ZERO orphans): EVERY exit path retires the
|
|
234
|
+
helper — drained, ttl_expired, error budget exhausted, or task cap. work_fn
|
|
235
|
+
exceptions do NOT kill the loop (the claim lease expires server-side and the
|
|
236
|
+
reaper re-opens the task); only the consecutive-error budget can.
|
|
237
|
+
|
|
238
|
+
work_fn receives the claim payload and returns a completion summary string.
|
|
239
|
+
"""
|
|
240
|
+
done = 0
|
|
241
|
+
consecutive_errors = 0
|
|
242
|
+
retired_reason: Optional[str] = None
|
|
243
|
+
|
|
244
|
+
while True:
|
|
245
|
+
if ttl_deadline is not None and clock() >= ttl_deadline:
|
|
246
|
+
retired_reason = "ttl_expired"
|
|
247
|
+
break
|
|
248
|
+
if max_tasks is not None and done >= max_tasks:
|
|
249
|
+
retired_reason = "max_tasks"
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
claim = tray_claim(api_key, project_id, swarm_id=swarm_id,
|
|
253
|
+
lease_seconds=lease_seconds)
|
|
254
|
+
if not claim.get("ok"):
|
|
255
|
+
consecutive_errors += 1
|
|
256
|
+
log.warning("tray_claim error (%d/%d): %s",
|
|
257
|
+
consecutive_errors, MAX_CONSECUTIVE_ERRORS, claim.get("error"))
|
|
258
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
259
|
+
retired_reason = "claim_errors"
|
|
260
|
+
break
|
|
261
|
+
sleep_fn(min(poll_seconds * consecutive_errors, 60.0))
|
|
262
|
+
continue
|
|
263
|
+
consecutive_errors = 0
|
|
264
|
+
|
|
265
|
+
if claim.get("claimed"):
|
|
266
|
+
task_id = claim.get("task_id")
|
|
267
|
+
try:
|
|
268
|
+
summary = work_fn(claim) or ""
|
|
269
|
+
except Exception as e: # lease expiry + reaper recover the task
|
|
270
|
+
log.warning("work_fn failed on task %s: %s", task_id, e)
|
|
271
|
+
continue
|
|
272
|
+
comp = complete_task(api_key, task_id, summary)
|
|
273
|
+
if comp.get("ok") is not False:
|
|
274
|
+
done += 1
|
|
275
|
+
else:
|
|
276
|
+
log.warning("task_complete failed on %s: %s", task_id, comp.get("error"))
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
if claim.get("drained"):
|
|
280
|
+
retired_reason = "tray_drained"
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
# Not claimed, not drained: DAG-blocked deps still pending. Poll.
|
|
284
|
+
sleep_fn(poll_seconds)
|
|
285
|
+
|
|
286
|
+
retire = retire_self(api_key, project_id, reason=retired_reason or "tray_drained")
|
|
287
|
+
return {
|
|
288
|
+
"ok": True,
|
|
289
|
+
"tasks_done": done,
|
|
290
|
+
"retired_reason": retired_reason,
|
|
291
|
+
"retire_result": retire,
|
|
292
|
+
}
|