meshcode 2.11.123__tar.gz → 2.11.124__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {meshcode-2.11.123 → meshcode-2.11.124}/PKG-INFO +1 -1
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/__init__.py +1 -1
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/realtime.py +40 -3
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/server.py +96 -0
- meshcode-2.11.124/meshcode/meshcode_mcp/swarm.py +292 -0
- meshcode-2.11.124/meshcode/meshcode_mcp/test_swarm.py +279 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/PKG-INFO +1 -1
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/SOURCES.txt +2 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/pyproject.toml +1 -1
- {meshcode-2.11.123 → meshcode-2.11.124}/README.md +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/__main__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/_session_handoff_template.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/_stop_hook_template.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/ascii_art.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/atomic_push.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/claude_update.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/cli.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/comms_v4.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/compat.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/daemon.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/date_parse.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/doctor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/error_hints.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/exceptions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/hooks/__init__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/hooks/repo_path_lock.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/hostd.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/invites.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/launcher.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/launcher_install.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/__init__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/__main__.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/backend.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/sleep_signals.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_backend.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_realtime.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/preferences.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/protocol_handler.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/protocol_v2.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/quickstart.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/rpc_allowlist.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/run_agent.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/scripts/check_secrets.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/scripts/race_rate_harness.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/secrets.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/self_update.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/setup_clients.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/supervisor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/up.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode/upload.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/dependency_links.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/entry_points.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/requires.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/meshcode.egg-info/top_level.txt +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/setup.cfg +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_auto_update_hardening.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_autonomous_closegap_1.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_autonomous_closegap_2.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_autonomous_closegap_3.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_autonomous_prompt_inject.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_boot_bug_regression.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_color_truecolor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_core.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_cross_agent_messaging.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_date_parse.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_doctor.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_epistemic_v1_python_sdk.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_epistemic_v1_stop_conditions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_esc_deaf_state.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_exceptions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_file_upload.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_init_device_code.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_install_guard.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_lease_sigterm_release.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_mark_read_batch.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_marketplace_ratings.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_migration_integrity.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_realtime_event_freshness.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_rls_cross_tenant.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_rpc_grants.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_rpc_migrations.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_run_agent_dry_run.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_run_agent_no_server_import.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_security_regressions.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_self_update_user_site.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_sentinel.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_setup_path.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_sleep_signals.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_status_enum_coverage.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_stay_on_loop_hook.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_swarm_events.py +0 -0
- {meshcode-2.11.123 → meshcode-2.11.124}/tests/test_wait_open_tasks_contradiction.py +0 -0
|
@@ -125,24 +125,61 @@ class RealtimeListener:
|
|
|
125
125
|
self._connected = False
|
|
126
126
|
self._subscription_ok = False
|
|
127
127
|
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _is_auth_rejection(e: Exception) -> bool:
|
|
130
|
+
"""True when the WS handshake was rejected 401/403 — a BAD KEY, not a
|
|
131
|
+
transient network blip. Task c49096c1 (edge logs: 66x 401/hr): a client
|
|
132
|
+
with an invalid/rotated apikey retried every ≤60s forever, hammering
|
|
133
|
+
prod. Auth errors don't heal by retrying."""
|
|
134
|
+
try:
|
|
135
|
+
status = getattr(getattr(e, "response", None), "status_code", None)
|
|
136
|
+
if status is None:
|
|
137
|
+
status = getattr(e, "status_code", None) # older websockets
|
|
138
|
+
return status in (401, 403)
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
128
142
|
async def _run(self) -> None:
|
|
129
143
|
"""Outer loop: reconnect with exponential backoff on disconnect.
|
|
130
144
|
|
|
131
|
-
NEVER gives up — keeps retrying with capped backoff
|
|
132
|
-
The MCP server must stay alive regardless of Realtime health.
|
|
145
|
+
Transient errors: NEVER gives up — keeps retrying with capped backoff
|
|
146
|
+
(max 60s). The MCP server must stay alive regardless of Realtime health.
|
|
147
|
+
AUTH rejections (401/403) are different (task c49096c1): the key is
|
|
148
|
+
bad and retrying can't fix it — back off to 5 min immediately and STOP
|
|
149
|
+
after 5 consecutive auth rejections with one actionable log line.
|
|
150
|
+
Message delivery survives via the wait-loop poll fallback (mc_wait_poll).
|
|
133
151
|
"""
|
|
134
152
|
backoff = 1
|
|
135
153
|
consecutive_failures = 0
|
|
154
|
+
auth_rejections = 0
|
|
136
155
|
while not self._stop.is_set():
|
|
137
156
|
try:
|
|
138
157
|
await self._connect_and_listen()
|
|
139
158
|
backoff = 1 # reset on clean disconnect
|
|
140
159
|
consecutive_failures = 0
|
|
160
|
+
auth_rejections = 0
|
|
141
161
|
except asyncio.CancelledError:
|
|
142
162
|
return
|
|
143
163
|
except Exception as e:
|
|
144
164
|
consecutive_failures += 1
|
|
145
|
-
if
|
|
165
|
+
if self._is_auth_rejection(e):
|
|
166
|
+
auth_rejections += 1
|
|
167
|
+
if auth_rejections >= 5:
|
|
168
|
+
log.error(
|
|
169
|
+
"Realtime: 5 consecutive auth rejections (401/403) — the "
|
|
170
|
+
"SUPABASE_KEY this agent carries is invalid or rotated. "
|
|
171
|
+
"GIVING UP on Realtime (poll fallback stays active). "
|
|
172
|
+
"Fix: refresh this workspace's .mcp.json key (re-run "
|
|
173
|
+
"`meshcode setup`) and restart the agent."
|
|
174
|
+
)
|
|
175
|
+
self._connected = False
|
|
176
|
+
return
|
|
177
|
+
backoff = 300 # bad key: don't hammer prod every 60s
|
|
178
|
+
log.warning(
|
|
179
|
+
f"Realtime auth rejected ({auth_rejections}/5): {e}; "
|
|
180
|
+
f"key likely invalid/rotated — retrying in {backoff}s"
|
|
181
|
+
)
|
|
182
|
+
elif consecutive_failures % 10 == 0:
|
|
146
183
|
log.error(
|
|
147
184
|
f"Realtime: {consecutive_failures} consecutive failures — "
|
|
148
185
|
f"still retrying (backoff={backoff}s). Last error: {e}"
|
|
@@ -6427,6 +6427,102 @@ def meshcode_agent_stop(name: str) -> Dict[str, Any]:
|
|
|
6427
6427
|
})
|
|
6428
6428
|
|
|
6429
6429
|
|
|
6430
|
+
# ----------------- ENJAMBRE SWARM HELPER LIFECYCLE (task 227c6a3c) -----------------
|
|
6431
|
+
# Spawn → tray loop → self-retire. NO recycle_agent anywhere in this lifecycle
|
|
6432
|
+
# (disabled in prod): launch = mc_agent_power_as_agent (G1b parent authz),
|
|
6433
|
+
# shutdown = mc_helper_retire → desired_state='stopped' → meshcode_wait
|
|
6434
|
+
# must_exit → clean session end; the TTL reaper backstops crashes.
|
|
6435
|
+
|
|
6436
|
+
@mcp.tool()
|
|
6437
|
+
@with_working_status
|
|
6438
|
+
def meshcode_helper_spawn(name: str, role: str = "helper",
|
|
6439
|
+
swarm_id: Optional[str] = None,
|
|
6440
|
+
spawned_for_task_id: Optional[str] = None,
|
|
6441
|
+
ttl_seconds: int = 3600,
|
|
6442
|
+
headless: Optional[bool] = None) -> Dict[str, Any]:
|
|
6443
|
+
"""Spawn an ephemeral HELPER agent into your swarm (you become its parent).
|
|
6444
|
+
|
|
6445
|
+
ENJAMBRE (mig 471 schema + mig 475 wrappers). Server-side, from YOUR
|
|
6446
|
+
agent-scoped api key: registers agent_kind='helper', parent_agent_id=you,
|
|
6447
|
+
auto_retire=true, a TTL, repo/host inherited from you — then powers it on
|
|
6448
|
+
(G1b parent authz + mig 473 concurrent cap compose inside). Budget errors:
|
|
6449
|
+
tier_limit at creation; helper_cap_reached arrives in `power` with the row
|
|
6450
|
+
created but stopped (stagger and re-power, or retire it). Helpers cannot
|
|
6451
|
+
spawn helpers (helper_chain_forbidden, v1).
|
|
6452
|
+
|
|
6453
|
+
The helper works its swarm tray via meshcode_tray_claim and self-retires
|
|
6454
|
+
when the tray drains — you never stop it manually (meshcode_agent_stop is
|
|
6455
|
+
the manual override; NEVER recycle, it's disabled).
|
|
6456
|
+
|
|
6457
|
+
Args:
|
|
6458
|
+
name: helper agent name (unique in the meshwork; name_taken on collision).
|
|
6459
|
+
role: short role description shown on the dashboard.
|
|
6460
|
+
swarm_id: tray to join; omit to mint a fresh tray id for a new swarm.
|
|
6461
|
+
spawned_for_task_id: optional umbrella task this helper serves.
|
|
6462
|
+
ttl_seconds: hard lifetime cap (60..86400); reaper retires it past this.
|
|
6463
|
+
headless: omit for the server default; explicit True/False forces the
|
|
6464
|
+
window mode via a separate power-on call.
|
|
6465
|
+
"""
|
|
6466
|
+
from . import swarm as _swarm
|
|
6467
|
+
return _swarm.spawn_helper(_get_api_key(), _PROJECT_ID, name, role=role,
|
|
6468
|
+
swarm_id=swarm_id,
|
|
6469
|
+
spawned_for_task_id=spawned_for_task_id,
|
|
6470
|
+
ttl_seconds=ttl_seconds, headless=headless)
|
|
6471
|
+
|
|
6472
|
+
|
|
6473
|
+
@mcp.tool()
|
|
6474
|
+
@with_working_status
|
|
6475
|
+
def meshcode_tray_claim(lease_seconds: int = 900,
|
|
6476
|
+
swarm_id: Optional[str] = None) -> Dict[str, Any]:
|
|
6477
|
+
"""HELPER loop step: atomically claim the next runnable task from your
|
|
6478
|
+
swarm tray (work-stealing, DAG-gated, priority-ordered).
|
|
6479
|
+
|
|
6480
|
+
ENJAMBRE (mig 471 + 475). Interpret the result:
|
|
6481
|
+
- claimed=true → work the returned task NOW, then meshcode_task_complete.
|
|
6482
|
+
Re-claim after completing — keep draining.
|
|
6483
|
+
- claimed=false, drained=true → tray is empty and final: call
|
|
6484
|
+
meshcode_helper_retire, then meshcode_wait (returns must_exit → end
|
|
6485
|
+
session). Do NOT idle.
|
|
6486
|
+
- claimed=false, drained=false → tasks exist but are DAG-blocked: wait
|
|
6487
|
+
briefly and claim again.
|
|
6488
|
+
|
|
6489
|
+
Args:
|
|
6490
|
+
lease_seconds: claim lease (60..86400); an expired lease re-opens the task.
|
|
6491
|
+
swarm_id: override tray; omit to use your own agent row's swarm
|
|
6492
|
+
(helpers get theirs stamped at spawn).
|
|
6493
|
+
"""
|
|
6494
|
+
from . import swarm as _swarm
|
|
6495
|
+
sid = swarm_id or _swarm.own_swarm_id(_PROJECT_ID, AGENT_NAME)
|
|
6496
|
+
if not sid:
|
|
6497
|
+
return {"ok": False, "error": "no swarm: your agent row has no swarm_id "
|
|
6498
|
+
"and none was given — are you a helper?"}
|
|
6499
|
+
return _swarm.tray_claim(_get_api_key(), _PROJECT_ID, swarm_id=sid,
|
|
6500
|
+
lease_seconds=lease_seconds)
|
|
6501
|
+
|
|
6502
|
+
|
|
6503
|
+
@mcp.tool()
|
|
6504
|
+
@with_working_status
|
|
6505
|
+
def meshcode_helper_retire(reason: str = "tray_drained") -> Dict[str, Any]:
|
|
6506
|
+
"""HELPER self-retire — the ONLY sanctioned helper shutdown (auto-off,
|
|
6507
|
+
ZERO orphans). Self-only by construction; persistent agents get
|
|
6508
|
+
not_a_helper, auto_retire=false rows get auto_retire_disabled.
|
|
6509
|
+
|
|
6510
|
+
ENJAMBRE (mig 471 + 475). Flips your desired_state='stopped' +
|
|
6511
|
+
status='sleeping' server-side: hostd will not respawn you and your next
|
|
6512
|
+
meshcode_wait returns must_exit=True. Sequence: finish/complete the current
|
|
6513
|
+
task → meshcode_helper_retire → meshcode_wait → exit on must_exit.
|
|
6514
|
+
|
|
6515
|
+
Args:
|
|
6516
|
+
reason: telemetry tag (tray_drained | ttl_expired | parent_request | ...).
|
|
6517
|
+
"""
|
|
6518
|
+
from . import swarm as _swarm
|
|
6519
|
+
result = _swarm.retire_self(_get_api_key(), _PROJECT_ID, reason=reason)
|
|
6520
|
+
if isinstance(result, dict) and result.get("ok"):
|
|
6521
|
+
result["next_step"] = ("retired — now call meshcode_wait(); it returns "
|
|
6522
|
+
"must_exit=True and you end the session")
|
|
6523
|
+
return result
|
|
6524
|
+
|
|
6525
|
+
|
|
6430
6526
|
@mcp.tool()
|
|
6431
6527
|
@with_working_status
|
|
6432
6528
|
def meshcode_recycle_agent(name: str, visible: bool = False) -> Dict[str, Any]:
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""ENJAMBRE — swarm helper lifecycle SDK (task 227c6a3c, branch enjambre/sdk-helpers).
|
|
2
|
+
|
|
3
|
+
Implements the launcher/SDK side of the swarm helper lifecycle on top of the
|
|
4
|
+
mig 471 schema (mc_agents.agent_kind/parent_agent_id/swarm_id/spawned_for_task_id/
|
|
5
|
+
auto_retire/ttl_expires_at + mc_task_claim_from_tray + mc_helper_retire):
|
|
6
|
+
|
|
7
|
+
1. SPAWN — parent calls mc_helper_spawn_as_agent: server derives parentage
|
|
8
|
+
from the caller's agent-scoped api key (never client-claimed),
|
|
9
|
+
INSERT-only helper row inheriting repo_path/host from the parent,
|
|
10
|
+
tier row-budget at creation, and (p_power_on) composes power-on
|
|
11
|
+
through mc_agent_power_as_agent — parent authz (mig 471 G1b) +
|
|
12
|
+
concurrent running cap (mig 473) live in exactly one place.
|
|
13
|
+
2. LOOP — helper claims work from its swarm tray (claim_from_tray):
|
|
14
|
+
claimed → work + mc_task_complete; drained → retire + exit.
|
|
15
|
+
Not-claimed-not-drained means DAG-blocked deps are pending:
|
|
16
|
+
poll again after a short sleep.
|
|
17
|
+
3. SHUTDOWN — NO recycle_agent (DISABLED in prod). mc_helper_retire flips
|
|
18
|
+
desired_state='stopped' + status='sleeping'; the agent's next
|
|
19
|
+
meshcode_wait returns must_exit=True and the session ends. hostd
|
|
20
|
+
never respawns desired_state='stopped', and the service-side
|
|
21
|
+
reaper (mc_swarm_reap_expired_helpers) + ttl_expires_at backstop
|
|
22
|
+
guarantee ZERO orphan helpers even on crash.
|
|
23
|
+
|
|
24
|
+
DATA CONTRACT — FROZEN by database@mesh-core (msg 34121217), LIVE in prod as
|
|
25
|
+
mig 475 (commander msg 50b4ab8d; authored as 20260610_474_* on branch
|
|
26
|
+
enjambre/g3-runtime, renumbered at apply):
|
|
27
|
+
|
|
28
|
+
public.mc_task_claim_from_tray_as_agent(p_api_key text, p_project_id uuid,
|
|
29
|
+
p_swarm_id uuid, p_lease_seconds int DEFAULT 900) -> jsonb
|
|
30
|
+
{ok, claimed:true, task_id, title, description, priority, parent_task_id,
|
|
31
|
+
lease_expires_at} | {ok:true, claimed:false, drained:bool}
|
|
32
|
+
p_swarm_id is REQUIRED (claims never cross trays); the SDK resolves the
|
|
33
|
+
caller's own mc_agents.swarm_id when not given explicitly.
|
|
34
|
+
|
|
35
|
+
public.mc_helper_retire_as_agent(p_api_key text, p_project_id uuid,
|
|
36
|
+
p_reason text DEFAULT 'tray_drained') -> {ok, retired, reason}
|
|
37
|
+
Self-only by construction. Errors: auth_failed | agent_key_required |
|
|
38
|
+
wrong_scope | agent_not_found | not_a_helper | auto_retire_disabled.
|
|
39
|
+
|
|
40
|
+
public.mc_helper_spawn_as_agent(p_api_key text, p_project_id uuid,
|
|
41
|
+
p_name text, p_swarm_id uuid, p_spawned_for_task_id uuid DEFAULT NULL,
|
|
42
|
+
p_role text DEFAULT 'helper', p_auto_retire bool DEFAULT true,
|
|
43
|
+
p_ttl_seconds int DEFAULT 3600, p_power_on bool DEFAULT true) -> jsonb
|
|
44
|
+
{ok, agent_id, name, agent_kind, parent_agent_id, swarm_id,
|
|
45
|
+
spawned_for_task_id, auto_retire, ttl_expires_at, power}
|
|
46
|
+
Errors: bad_name | bad_swarm | auth_failed | agent_key_required |
|
|
47
|
+
wrong_scope | agent_not_found | helper_chain_forbidden | tier_limit |
|
|
48
|
+
name_taken; a concurrent-cap refusal (helper_cap_reached) arrives inside
|
|
49
|
+
`power` with the row already created but stopped (retire or re-power).
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
import logging
|
|
53
|
+
import time
|
|
54
|
+
import uuid as _uuid
|
|
55
|
+
from typing import Any, Callable, Dict, Optional
|
|
56
|
+
from urllib.parse import quote as _quote
|
|
57
|
+
|
|
58
|
+
from . import backend as be
|
|
59
|
+
|
|
60
|
+
log = logging.getLogger("meshcode.swarm")
|
|
61
|
+
|
|
62
|
+
# Frozen contract names (mig 475 live in prod).
|
|
63
|
+
RPC_HELPER_SPAWN = "mc_helper_spawn_as_agent"
|
|
64
|
+
RPC_TRAY_CLAIM = "mc_task_claim_from_tray_as_agent"
|
|
65
|
+
RPC_HELPER_RETIRE = "mc_helper_retire_as_agent"
|
|
66
|
+
RPC_AGENT_POWER = "mc_agent_power_as_agent" # mig 416 + 471 G1b + 473 cap
|
|
67
|
+
RPC_TASK_COMPLETE = "mc_task_complete" # live, agent-callable (rpc_allowlist)
|
|
68
|
+
|
|
69
|
+
DEFAULT_TTL_SECONDS = 3600
|
|
70
|
+
DEFAULT_LEASE_SECONDS = 900
|
|
71
|
+
DEFAULT_POLL_SECONDS = 15.0
|
|
72
|
+
MAX_CONSECUTIVE_ERRORS = 5
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _err(result: Any) -> Optional[str]:
|
|
76
|
+
"""Normalize a backend result to an error string, or None if it's ok."""
|
|
77
|
+
if result is None:
|
|
78
|
+
return "empty response"
|
|
79
|
+
if be.is_error(result):
|
|
80
|
+
return be.get_error_message(result) or "rpc error"
|
|
81
|
+
if isinstance(result, dict) and result.get("ok") is False:
|
|
82
|
+
return result.get("error") or result.get("error_code") or "rpc refused"
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def own_swarm_id(project_id: str, agent_name: str) -> Optional[str]:
|
|
87
|
+
"""Resolve the caller's own mc_agents.swarm_id (helpers get theirs stamped
|
|
88
|
+
at spawn). Claims are tray-scoped — p_swarm_id is required by contract."""
|
|
89
|
+
rows = be.sb_select(
|
|
90
|
+
"mc_agents",
|
|
91
|
+
f"project_id=eq.{project_id}&name=eq.{_quote(agent_name)}&select=swarm_id",
|
|
92
|
+
limit=1)
|
|
93
|
+
if rows and rows[0].get("swarm_id"):
|
|
94
|
+
return rows[0]["swarm_id"]
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def spawn_helper(api_key: str, project_id: str, name: str, *,
|
|
99
|
+
role: str = "helper",
|
|
100
|
+
swarm_id: Optional[str] = None,
|
|
101
|
+
spawned_for_task_id: Optional[str] = None,
|
|
102
|
+
ttl_seconds: int = DEFAULT_TTL_SECONDS,
|
|
103
|
+
launch: bool = True,
|
|
104
|
+
headless: Optional[bool] = None) -> Dict[str, Any]:
|
|
105
|
+
"""Spawn an ephemeral helper. Parent = caller (server-derived, W2).
|
|
106
|
+
|
|
107
|
+
swarm_id=None mints a fresh tray id client-side (the contract requires it
|
|
108
|
+
NOT NULL — helper invariant). headless=None lets the server's power-on
|
|
109
|
+
default apply (single RPC); an explicit True/False does spawn with
|
|
110
|
+
p_power_on=false and a separate mc_agent_power_as_agent call carrying
|
|
111
|
+
p_headless. The helper's workspace materializes via `meshcode run`'s
|
|
112
|
+
auto-setup when hostd spawns it (W5: row inherits repo_path/host).
|
|
113
|
+
"""
|
|
114
|
+
v_swarm = swarm_id or str(_uuid.uuid4())
|
|
115
|
+
single_rpc_power = launch and headless is None
|
|
116
|
+
|
|
117
|
+
reg = be.sb_rpc(RPC_HELPER_SPAWN, {
|
|
118
|
+
"p_api_key": api_key,
|
|
119
|
+
"p_project_id": project_id,
|
|
120
|
+
"p_name": name,
|
|
121
|
+
"p_swarm_id": v_swarm,
|
|
122
|
+
"p_spawned_for_task_id": spawned_for_task_id,
|
|
123
|
+
"p_role": role,
|
|
124
|
+
"p_auto_retire": True,
|
|
125
|
+
"p_ttl_seconds": int(ttl_seconds),
|
|
126
|
+
"p_power_on": bool(single_rpc_power),
|
|
127
|
+
})
|
|
128
|
+
err = _err(reg)
|
|
129
|
+
if err:
|
|
130
|
+
return {"ok": False, "stage": "register", "error": err,
|
|
131
|
+
**({"error_code": reg.get("error_code")} if isinstance(reg, dict) and reg.get("error_code") else {})}
|
|
132
|
+
|
|
133
|
+
out: Dict[str, Any] = {"ok": True, "helper": name, "launched": bool(launch)}
|
|
134
|
+
if isinstance(reg, dict):
|
|
135
|
+
out.update({k: reg[k] for k in
|
|
136
|
+
("agent_id", "swarm_id", "parent_agent_id", "ttl_expires_at")
|
|
137
|
+
if k in reg})
|
|
138
|
+
power = reg.get("power")
|
|
139
|
+
else:
|
|
140
|
+
power = None
|
|
141
|
+
|
|
142
|
+
if launch and not single_rpc_power:
|
|
143
|
+
power = be.sb_rpc(RPC_AGENT_POWER, {
|
|
144
|
+
"p_api_key": api_key,
|
|
145
|
+
"p_project_id": project_id,
|
|
146
|
+
"p_agent": name,
|
|
147
|
+
"p_state": "running",
|
|
148
|
+
"p_headless": bool(headless),
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
if launch:
|
|
152
|
+
perr = _err(power)
|
|
153
|
+
if perr:
|
|
154
|
+
# Row created but not running (e.g. helper_cap_reached, mig 473).
|
|
155
|
+
# Caller may stagger/retire and re-power; TTL reaper backstops.
|
|
156
|
+
return {**out, "ok": False, "stage": "power_on", "error": perr,
|
|
157
|
+
"power": power}
|
|
158
|
+
if isinstance(power, dict):
|
|
159
|
+
out["powered_by"] = power.get("powered_by")
|
|
160
|
+
out["no_host"] = power.get("no_host")
|
|
161
|
+
return out
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def tray_claim(api_key: str, project_id: str, *,
|
|
165
|
+
swarm_id: str,
|
|
166
|
+
lease_seconds: int = DEFAULT_LEASE_SECONDS) -> Dict[str, Any]:
|
|
167
|
+
"""Claim the next runnable task from the swarm tray (atomic, work-stealing,
|
|
168
|
+
DAG-gated, priority-ordered). p_swarm_id is contract-required; use
|
|
169
|
+
own_swarm_id() to resolve yours.
|
|
170
|
+
|
|
171
|
+
Returns the raw contract shape: {ok, claimed, task...} or
|
|
172
|
+
{ok, claimed:false, drained:bool}. drained=false with claimed=false means
|
|
173
|
+
DAG-blocked work is still pending — poll again, do NOT retire.
|
|
174
|
+
"""
|
|
175
|
+
if not swarm_id:
|
|
176
|
+
return {"ok": False, "error": "swarm_id required (no swarm on your agent row?)"}
|
|
177
|
+
result = be.sb_rpc(RPC_TRAY_CLAIM, {
|
|
178
|
+
"p_api_key": api_key,
|
|
179
|
+
"p_project_id": project_id,
|
|
180
|
+
"p_swarm_id": swarm_id,
|
|
181
|
+
"p_lease_seconds": int(lease_seconds),
|
|
182
|
+
})
|
|
183
|
+
err = _err(result)
|
|
184
|
+
if err:
|
|
185
|
+
return {"ok": False, "error": err}
|
|
186
|
+
return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def retire_self(api_key: str, project_id: str, *,
|
|
190
|
+
reason: str = "tray_drained") -> Dict[str, Any]:
|
|
191
|
+
"""Self-retire (helpers only — self-only by construction). Flips
|
|
192
|
+
desired_state='stopped' server-side; the next meshcode_wait returns
|
|
193
|
+
must_exit=True and the session ends clean. This is the ONLY sanctioned
|
|
194
|
+
helper shutdown — never recycle_agent (disabled)."""
|
|
195
|
+
result = be.sb_rpc(RPC_HELPER_RETIRE, {
|
|
196
|
+
"p_api_key": api_key,
|
|
197
|
+
"p_project_id": project_id,
|
|
198
|
+
"p_reason": reason,
|
|
199
|
+
})
|
|
200
|
+
err = _err(result)
|
|
201
|
+
if err:
|
|
202
|
+
return {"ok": False, "error": err}
|
|
203
|
+
return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def complete_task(api_key: str, task_id: str, summary: str = "") -> Dict[str, Any]:
|
|
207
|
+
"""Thin wrapper over the live mc_task_complete RPC."""
|
|
208
|
+
result = be.sb_rpc(RPC_TASK_COMPLETE, {
|
|
209
|
+
"p_api_key": api_key,
|
|
210
|
+
"p_task_id": task_id,
|
|
211
|
+
"p_summary": summary,
|
|
212
|
+
})
|
|
213
|
+
err = _err(result)
|
|
214
|
+
if err:
|
|
215
|
+
return {"ok": False, "error": err}
|
|
216
|
+
return result if isinstance(result, dict) else {"ok": True}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def helper_loop(api_key: str, project_id: str, swarm_id: str,
|
|
220
|
+
work_fn: Callable[[Dict[str, Any]], str], *,
|
|
221
|
+
lease_seconds: int = DEFAULT_LEASE_SECONDS,
|
|
222
|
+
poll_seconds: float = DEFAULT_POLL_SECONDS,
|
|
223
|
+
ttl_deadline: Optional[float] = None,
|
|
224
|
+
max_tasks: Optional[int] = None,
|
|
225
|
+
sleep_fn: Callable[[float], None] = time.sleep,
|
|
226
|
+
clock: Callable[[], float] = time.monotonic) -> Dict[str, Any]:
|
|
227
|
+
"""Programmatic helper loop: claim → work → complete; drained → retire.
|
|
228
|
+
|
|
229
|
+
For scripted/headless helpers (harness, batch runners). Interactive Claude
|
|
230
|
+
helpers run the same protocol through the meshcode_tray_claim /
|
|
231
|
+
meshcode_helper_retire MCP tools instead.
|
|
232
|
+
|
|
233
|
+
Owner's non-negotiable guard (ZERO orphans): EVERY exit path retires the
|
|
234
|
+
helper — drained, ttl_expired, error budget exhausted, or task cap. work_fn
|
|
235
|
+
exceptions do NOT kill the loop (the claim lease expires server-side and the
|
|
236
|
+
reaper re-opens the task); only the consecutive-error budget can.
|
|
237
|
+
|
|
238
|
+
work_fn receives the claim payload and returns a completion summary string.
|
|
239
|
+
"""
|
|
240
|
+
done = 0
|
|
241
|
+
consecutive_errors = 0
|
|
242
|
+
retired_reason: Optional[str] = None
|
|
243
|
+
|
|
244
|
+
while True:
|
|
245
|
+
if ttl_deadline is not None and clock() >= ttl_deadline:
|
|
246
|
+
retired_reason = "ttl_expired"
|
|
247
|
+
break
|
|
248
|
+
if max_tasks is not None and done >= max_tasks:
|
|
249
|
+
retired_reason = "max_tasks"
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
claim = tray_claim(api_key, project_id, swarm_id=swarm_id,
|
|
253
|
+
lease_seconds=lease_seconds)
|
|
254
|
+
if not claim.get("ok"):
|
|
255
|
+
consecutive_errors += 1
|
|
256
|
+
log.warning("tray_claim error (%d/%d): %s",
|
|
257
|
+
consecutive_errors, MAX_CONSECUTIVE_ERRORS, claim.get("error"))
|
|
258
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
259
|
+
retired_reason = "claim_errors"
|
|
260
|
+
break
|
|
261
|
+
sleep_fn(min(poll_seconds * consecutive_errors, 60.0))
|
|
262
|
+
continue
|
|
263
|
+
consecutive_errors = 0
|
|
264
|
+
|
|
265
|
+
if claim.get("claimed"):
|
|
266
|
+
task_id = claim.get("task_id")
|
|
267
|
+
try:
|
|
268
|
+
summary = work_fn(claim) or ""
|
|
269
|
+
except Exception as e: # lease expiry + reaper recover the task
|
|
270
|
+
log.warning("work_fn failed on task %s: %s", task_id, e)
|
|
271
|
+
continue
|
|
272
|
+
comp = complete_task(api_key, task_id, summary)
|
|
273
|
+
if comp.get("ok") is not False:
|
|
274
|
+
done += 1
|
|
275
|
+
else:
|
|
276
|
+
log.warning("task_complete failed on %s: %s", task_id, comp.get("error"))
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
if claim.get("drained"):
|
|
280
|
+
retired_reason = "tray_drained"
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
# Not claimed, not drained: DAG-blocked deps still pending. Poll.
|
|
284
|
+
sleep_fn(poll_seconds)
|
|
285
|
+
|
|
286
|
+
retire = retire_self(api_key, project_id, reason=retired_reason or "tray_drained")
|
|
287
|
+
return {
|
|
288
|
+
"ok": True,
|
|
289
|
+
"tasks_done": done,
|
|
290
|
+
"retired_reason": retired_reason,
|
|
291
|
+
"retire_result": retire,
|
|
292
|
+
}
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Offline unit tests for the ENJAMBRE helper lifecycle SDK (swarm.py).
|
|
2
|
+
|
|
3
|
+
Stdlib only (unittest + mock), zero network: be.sb_rpc / be.sb_select are
|
|
4
|
+
patched with scripted fakes. Contract under test = the FROZEN mig 475
|
|
5
|
+
wrappers (database@mesh-core msg 34121217). Covers the owner's non-negotiable
|
|
6
|
+
guard — EVERY helper_loop exit path retires the helper (zero orphans) — plus
|
|
7
|
+
budget surfacing (tier_limit at register, helper_cap_reached in power), drain
|
|
8
|
+
semantics, DAG-blocked polling, ttl expiry, error budget, and the no-recycle
|
|
9
|
+
rule (no code path may ever call mc_recycle*).
|
|
10
|
+
"""
|
|
11
|
+
import unittest
|
|
12
|
+
from unittest import mock
|
|
13
|
+
|
|
14
|
+
from . import swarm
|
|
15
|
+
|
|
16
|
+
PROJ = "proj-1"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _FakeRpc:
|
|
20
|
+
"""Scripted sb_rpc fake: records calls, answers per-function queues."""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.calls = [] # (fn_name, params)
|
|
24
|
+
self.queues = {} # fn_name -> list of responses (popped FIFO)
|
|
25
|
+
self.default = {"ok": True}
|
|
26
|
+
|
|
27
|
+
def script(self, fn_name, *responses):
|
|
28
|
+
self.queues.setdefault(fn_name, []).extend(responses)
|
|
29
|
+
|
|
30
|
+
def __call__(self, fn_name, params, **kw):
|
|
31
|
+
self.calls.append((fn_name, params))
|
|
32
|
+
q = self.queues.get(fn_name)
|
|
33
|
+
if q:
|
|
34
|
+
return q.pop(0)
|
|
35
|
+
return dict(self.default)
|
|
36
|
+
|
|
37
|
+
def called_fns(self):
|
|
38
|
+
return [fn for fn, _ in self.calls]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SwarmTestCase(unittest.TestCase):
|
|
42
|
+
def setUp(self):
|
|
43
|
+
self.rpc = _FakeRpc()
|
|
44
|
+
patcher = mock.patch.object(swarm.be, "sb_rpc", self.rpc)
|
|
45
|
+
patcher.start()
|
|
46
|
+
self.addCleanup(patcher.stop)
|
|
47
|
+
|
|
48
|
+
def assert_no_recycle(self):
|
|
49
|
+
"""recycle_agent is DISABLED in prod — the helper lifecycle must never
|
|
50
|
+
touch any mc_recycle* RPC on any path."""
|
|
51
|
+
for fn in self.rpc.called_fns():
|
|
52
|
+
self.assertNotIn("recycle", fn, f"forbidden recycle RPC called: {fn}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SpawnTests(SwarmTestCase):
|
|
56
|
+
def test_spawn_single_rpc_composes_power_on(self):
|
|
57
|
+
"""Default (headless unset): ONE RPC — server composes power-on."""
|
|
58
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN,
|
|
59
|
+
{"ok": True, "agent_id": "h-1", "name": "helper-1",
|
|
60
|
+
"swarm_id": "s-1", "parent_agent_id": "p-1",
|
|
61
|
+
"ttl_expires_at": "2026-06-10T01:00:00Z",
|
|
62
|
+
"power": {"ok": True, "powered_by": "parent:qa",
|
|
63
|
+
"no_host": False}})
|
|
64
|
+
|
|
65
|
+
out = swarm.spawn_helper("KEY", PROJ, "helper-1",
|
|
66
|
+
swarm_id="s-1", spawned_for_task_id="t-9",
|
|
67
|
+
ttl_seconds=120)
|
|
68
|
+
|
|
69
|
+
self.assertTrue(out["ok"])
|
|
70
|
+
self.assertEqual(out["agent_id"], "h-1")
|
|
71
|
+
self.assertEqual(out["swarm_id"], "s-1")
|
|
72
|
+
self.assertEqual(out["powered_by"], "parent:qa")
|
|
73
|
+
self.assertEqual(self.rpc.called_fns(), [swarm.RPC_HELPER_SPAWN])
|
|
74
|
+
|
|
75
|
+
_, params = self.rpc.calls[0]
|
|
76
|
+
self.assertEqual(params["p_project_id"], PROJ)
|
|
77
|
+
self.assertEqual(params["p_swarm_id"], "s-1")
|
|
78
|
+
self.assertEqual(params["p_spawned_for_task_id"], "t-9")
|
|
79
|
+
self.assertEqual(params["p_ttl_seconds"], 120)
|
|
80
|
+
self.assertTrue(params["p_auto_retire"])
|
|
81
|
+
self.assertTrue(params["p_power_on"])
|
|
82
|
+
self.assert_no_recycle()
|
|
83
|
+
|
|
84
|
+
def test_spawn_mints_swarm_id_when_omitted(self):
|
|
85
|
+
"""Contract: p_swarm_id NOT NULL (bad_swarm otherwise) — SDK mints."""
|
|
86
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN, {"ok": True, "agent_id": "h-1"})
|
|
87
|
+
swarm.spawn_helper("KEY", PROJ, "helper-1")
|
|
88
|
+
_, params = self.rpc.calls[0]
|
|
89
|
+
self.assertTrue(params["p_swarm_id"]) # a freshly minted uuid
|
|
90
|
+
|
|
91
|
+
def test_spawn_explicit_headless_uses_two_rpcs(self):
|
|
92
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN, {"ok": True, "agent_id": "h-1"})
|
|
93
|
+
self.rpc.script(swarm.RPC_AGENT_POWER,
|
|
94
|
+
{"ok": True, "powered_by": "parent:qa"})
|
|
95
|
+
out = swarm.spawn_helper("KEY", PROJ, "helper-1", headless=True)
|
|
96
|
+
self.assertTrue(out["ok"])
|
|
97
|
+
_, spawn_params = self.rpc.calls[0]
|
|
98
|
+
self.assertFalse(spawn_params["p_power_on"])
|
|
99
|
+
fn, power_params = self.rpc.calls[1]
|
|
100
|
+
self.assertEqual(fn, swarm.RPC_AGENT_POWER)
|
|
101
|
+
self.assertEqual(power_params["p_state"], "running")
|
|
102
|
+
self.assertTrue(power_params["p_headless"])
|
|
103
|
+
|
|
104
|
+
def test_spawn_tier_limit_surfaces_register_stage(self):
|
|
105
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN,
|
|
106
|
+
{"ok": False, "error_code": "tier_limit",
|
|
107
|
+
"error": "Agent limit reached for free plan (3/3)"})
|
|
108
|
+
out = swarm.spawn_helper("KEY", PROJ, "helper-1")
|
|
109
|
+
self.assertFalse(out["ok"])
|
|
110
|
+
self.assertEqual(out["stage"], "register")
|
|
111
|
+
self.assertEqual(out["error_code"], "tier_limit")
|
|
112
|
+
self.assertEqual(self.rpc.called_fns(), [swarm.RPC_HELPER_SPAWN])
|
|
113
|
+
|
|
114
|
+
def test_spawn_helper_cap_in_power_reports_power_on_stage(self):
|
|
115
|
+
"""mig 473 concurrent cap: row created, power refused — caller can
|
|
116
|
+
stagger/re-power; the TTL reaper backstops abandonment."""
|
|
117
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN,
|
|
118
|
+
{"ok": True, "agent_id": "h-1", "swarm_id": "s-1",
|
|
119
|
+
"power": {"ok": False, "error_code": "helper_cap_reached",
|
|
120
|
+
"error": "too many running helpers"}})
|
|
121
|
+
out = swarm.spawn_helper("KEY", PROJ, "helper-1", swarm_id="s-1")
|
|
122
|
+
self.assertFalse(out["ok"])
|
|
123
|
+
self.assertEqual(out["stage"], "power_on")
|
|
124
|
+
self.assertEqual(out["agent_id"], "h-1")
|
|
125
|
+
self.assertIn("helpers", out["error"])
|
|
126
|
+
|
|
127
|
+
def test_spawn_launch_false_registers_only(self):
|
|
128
|
+
self.rpc.script(swarm.RPC_HELPER_SPAWN, {"ok": True, "agent_id": "h-1"})
|
|
129
|
+
out = swarm.spawn_helper("KEY", PROJ, "helper-1", launch=False)
|
|
130
|
+
self.assertTrue(out["ok"])
|
|
131
|
+
self.assertFalse(out["launched"])
|
|
132
|
+
_, params = self.rpc.calls[0]
|
|
133
|
+
self.assertFalse(params["p_power_on"])
|
|
134
|
+
self.assertEqual(self.rpc.called_fns(), [swarm.RPC_HELPER_SPAWN])
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class TrayClaimTests(SwarmTestCase):
|
|
138
|
+
def test_claim_passes_full_contract_params(self):
|
|
139
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
140
|
+
{"ok": True, "claimed": True, "task_id": "t-1"})
|
|
141
|
+
out = swarm.tray_claim("KEY", PROJ, swarm_id="s-1", lease_seconds=300)
|
|
142
|
+
self.assertTrue(out["claimed"])
|
|
143
|
+
_, params = self.rpc.calls[0]
|
|
144
|
+
self.assertEqual(params["p_project_id"], PROJ)
|
|
145
|
+
self.assertEqual(params["p_swarm_id"], "s-1")
|
|
146
|
+
self.assertEqual(params["p_lease_seconds"], 300)
|
|
147
|
+
|
|
148
|
+
def test_claim_requires_swarm_id(self):
|
|
149
|
+
out = swarm.tray_claim("KEY", PROJ, swarm_id=None)
|
|
150
|
+
self.assertFalse(out["ok"])
|
|
151
|
+
self.assertIn("swarm_id required", out["error"])
|
|
152
|
+
self.assertEqual(self.rpc.calls, []) # never hit the wire
|
|
153
|
+
|
|
154
|
+
def test_claim_error_normalized(self):
|
|
155
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
156
|
+
swarm.be._make_error("boom", code="500", source="rpc"))
|
|
157
|
+
out = swarm.tray_claim("KEY", PROJ, swarm_id="s-1")
|
|
158
|
+
self.assertFalse(out["ok"])
|
|
159
|
+
self.assertEqual(out["error"], "boom")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class OwnSwarmTests(unittest.TestCase):
|
|
163
|
+
def test_own_swarm_id_reads_own_row(self):
|
|
164
|
+
with mock.patch.object(swarm.be, "sb_select",
|
|
165
|
+
return_value=[{"swarm_id": "s-9"}]) as sel:
|
|
166
|
+
self.assertEqual(swarm.own_swarm_id(PROJ, "helper-1"), "s-9")
|
|
167
|
+
filters = sel.call_args[0][1]
|
|
168
|
+
self.assertIn(f"project_id=eq.{PROJ}", filters)
|
|
169
|
+
self.assertIn("name=eq.helper-1", filters)
|
|
170
|
+
|
|
171
|
+
def test_own_swarm_id_none_when_unset(self):
|
|
172
|
+
with mock.patch.object(swarm.be, "sb_select",
|
|
173
|
+
return_value=[{"swarm_id": None}]):
|
|
174
|
+
self.assertIsNone(swarm.own_swarm_id(PROJ, "qa"))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class RetireTests(SwarmTestCase):
|
|
178
|
+
def test_retire_ok(self):
|
|
179
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE,
|
|
180
|
+
{"ok": True, "retired": "helper-1", "reason": "tray_drained"})
|
|
181
|
+
out = swarm.retire_self("KEY", PROJ)
|
|
182
|
+
self.assertTrue(out["ok"])
|
|
183
|
+
_, params = self.rpc.calls[0]
|
|
184
|
+
self.assertEqual(params["p_project_id"], PROJ)
|
|
185
|
+
self.assertEqual(params["p_reason"], "tray_drained")
|
|
186
|
+
|
|
187
|
+
def test_retire_not_a_helper_refused(self):
|
|
188
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE,
|
|
189
|
+
{"ok": False, "error": "not_a_helper"})
|
|
190
|
+
out = swarm.retire_self("KEY", PROJ)
|
|
191
|
+
self.assertFalse(out["ok"])
|
|
192
|
+
self.assertEqual(out["error"], "not_a_helper")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class HelperLoopTests(SwarmTestCase):
|
|
196
|
+
"""The lifecycle core: claim → work → complete; drained → retire + exit."""
|
|
197
|
+
|
|
198
|
+
def setUp(self):
|
|
199
|
+
super().setUp()
|
|
200
|
+
self.sleeps = []
|
|
201
|
+
self.sleep = self.sleeps.append # no real sleeping in tests
|
|
202
|
+
|
|
203
|
+
def _loop(self, work_fn, **kw):
|
|
204
|
+
return swarm.helper_loop("KEY", PROJ, "s-1", work_fn,
|
|
205
|
+
sleep_fn=self.sleep, **kw)
|
|
206
|
+
|
|
207
|
+
def test_drain_after_two_tasks_retires(self):
|
|
208
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
209
|
+
{"ok": True, "claimed": True, "task_id": "t-1", "title": "a"},
|
|
210
|
+
{"ok": True, "claimed": True, "task_id": "t-2", "title": "b"},
|
|
211
|
+
{"ok": True, "claimed": False, "drained": True})
|
|
212
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True, "retired": "h"})
|
|
213
|
+
|
|
214
|
+
worked = []
|
|
215
|
+
out = self._loop(lambda t: worked.append(t["task_id"]) or "done")
|
|
216
|
+
|
|
217
|
+
self.assertEqual(worked, ["t-1", "t-2"])
|
|
218
|
+
self.assertEqual(out["tasks_done"], 2)
|
|
219
|
+
self.assertEqual(out["retired_reason"], "tray_drained")
|
|
220
|
+
# every claimed task was completed BEFORE retiring
|
|
221
|
+
fns = self.rpc.called_fns()
|
|
222
|
+
self.assertEqual(fns.count(swarm.RPC_TASK_COMPLETE), 2)
|
|
223
|
+
self.assertEqual(fns[-1], swarm.RPC_HELPER_RETIRE)
|
|
224
|
+
self.assert_no_recycle()
|
|
225
|
+
|
|
226
|
+
def test_dag_blocked_polls_then_drains(self):
|
|
227
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
228
|
+
{"ok": True, "claimed": False, "drained": False}, # blocked deps
|
|
229
|
+
{"ok": True, "claimed": False, "drained": True})
|
|
230
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True})
|
|
231
|
+
out = self._loop(lambda t: "")
|
|
232
|
+
self.assertEqual(out["tasks_done"], 0)
|
|
233
|
+
self.assertEqual(out["retired_reason"], "tray_drained")
|
|
234
|
+
self.assertEqual(len(self.sleeps), 1) # polled once while blocked
|
|
235
|
+
|
|
236
|
+
def test_work_fn_exception_does_not_orphan(self):
|
|
237
|
+
"""work_fn blowing up must neither complete the task nor kill the loop:
|
|
238
|
+
the lease expires server-side; the loop keeps draining and retires."""
|
|
239
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
240
|
+
{"ok": True, "claimed": True, "task_id": "t-bad"},
|
|
241
|
+
{"ok": True, "claimed": False, "drained": True})
|
|
242
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True})
|
|
243
|
+
|
|
244
|
+
def explode(_t):
|
|
245
|
+
raise RuntimeError("kaput")
|
|
246
|
+
|
|
247
|
+
out = self._loop(explode)
|
|
248
|
+
self.assertEqual(out["tasks_done"], 0)
|
|
249
|
+
self.assertEqual(out["retired_reason"], "tray_drained")
|
|
250
|
+
self.assertNotIn(swarm.RPC_TASK_COMPLETE, self.rpc.called_fns())
|
|
251
|
+
self.assertIn(swarm.RPC_HELPER_RETIRE, self.rpc.called_fns())
|
|
252
|
+
|
|
253
|
+
def test_claim_error_budget_exhaustion_still_retires(self):
|
|
254
|
+
for _ in range(swarm.MAX_CONSECUTIVE_ERRORS):
|
|
255
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
256
|
+
swarm.be._make_error("net down", code="0", source="network"))
|
|
257
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True})
|
|
258
|
+
out = self._loop(lambda t: "")
|
|
259
|
+
self.assertEqual(out["retired_reason"], "claim_errors")
|
|
260
|
+
self.assertEqual(self.rpc.called_fns()[-1], swarm.RPC_HELPER_RETIRE)
|
|
261
|
+
|
|
262
|
+
def test_ttl_deadline_retires_before_claiming(self):
|
|
263
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True})
|
|
264
|
+
out = self._loop(lambda t: "", ttl_deadline=100.0, clock=lambda: 200.0)
|
|
265
|
+
self.assertEqual(out["retired_reason"], "ttl_expired")
|
|
266
|
+
self.assertEqual(self.rpc.called_fns(), [swarm.RPC_HELPER_RETIRE])
|
|
267
|
+
|
|
268
|
+
def test_max_tasks_cap_retires(self):
|
|
269
|
+
self.rpc.script(swarm.RPC_TRAY_CLAIM,
|
|
270
|
+
{"ok": True, "claimed": True, "task_id": "t-1"})
|
|
271
|
+
self.rpc.script(swarm.RPC_HELPER_RETIRE, {"ok": True})
|
|
272
|
+
out = self._loop(lambda t: "ok", max_tasks=1)
|
|
273
|
+
self.assertEqual(out["tasks_done"], 1)
|
|
274
|
+
self.assertEqual(out["retired_reason"], "max_tasks")
|
|
275
|
+
self.assert_no_recycle()
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
if __name__ == "__main__":
|
|
279
|
+
unittest.main(verbosity=2)
|
|
@@ -45,12 +45,14 @@ meshcode/meshcode_mcp/backend.py
|
|
|
45
45
|
meshcode/meshcode_mcp/realtime.py
|
|
46
46
|
meshcode/meshcode_mcp/server.py
|
|
47
47
|
meshcode/meshcode_mcp/sleep_signals.py
|
|
48
|
+
meshcode/meshcode_mcp/swarm.py
|
|
48
49
|
meshcode/meshcode_mcp/test_backend.py
|
|
49
50
|
meshcode/meshcode_mcp/test_boot_timing.py
|
|
50
51
|
meshcode/meshcode_mcp/test_install_guard.py
|
|
51
52
|
meshcode/meshcode_mcp/test_prefs_claude_version.py
|
|
52
53
|
meshcode/meshcode_mcp/test_realtime.py
|
|
53
54
|
meshcode/meshcode_mcp/test_server_wrapper.py
|
|
55
|
+
meshcode/meshcode_mcp/test_swarm.py
|
|
54
56
|
meshcode/scripts/check_secrets.py
|
|
55
57
|
meshcode/scripts/race_rate_harness.py
|
|
56
58
|
tests/test_auto_update_hardening.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|