meshcode 2.11.123__tar.gz → 2.11.125__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {meshcode-2.11.123 → meshcode-2.11.125}/PKG-INFO +1 -1
  2. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/__init__.py +1 -1
  3. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/realtime.py +40 -3
  4. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/server.py +96 -0
  5. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/sleep_signals.py +22 -6
  6. meshcode-2.11.125/meshcode/meshcode_mcp/swarm.py +292 -0
  7. meshcode-2.11.125/meshcode/meshcode_mcp/test_swarm.py +279 -0
  8. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/PKG-INFO +1 -1
  9. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/SOURCES.txt +2 -0
  10. {meshcode-2.11.123 → meshcode-2.11.125}/pyproject.toml +1 -1
  11. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_sleep_signals.py +52 -6
  12. {meshcode-2.11.123 → meshcode-2.11.125}/README.md +0 -0
  13. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/__main__.py +0 -0
  14. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/_session_handoff_template.py +0 -0
  15. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/_stop_hook_template.py +0 -0
  16. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/ascii_art.py +0 -0
  17. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/atomic_push.py +0 -0
  18. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/claude_update.py +0 -0
  19. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/cli.py +0 -0
  20. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/comms_v4.py +0 -0
  21. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/compat.py +0 -0
  22. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/daemon.py +0 -0
  23. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/date_parse.py +0 -0
  24. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/doctor.py +0 -0
  25. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/error_hints.py +0 -0
  26. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/exceptions.py +0 -0
  27. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hooks/__init__.py +0 -0
  28. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hooks/repo_path_lock.py +0 -0
  29. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/hostd.py +0 -0
  30. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/invites.py +0 -0
  31. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/launcher.py +0 -0
  32. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/launcher_install.py +0 -0
  33. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/__init__.py +0 -0
  34. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/__main__.py +0 -0
  35. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/backend.py +0 -0
  36. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_backend.py +0 -0
  37. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_boot_timing.py +0 -0
  38. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_install_guard.py +0 -0
  39. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_prefs_claude_version.py +0 -0
  40. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_realtime.py +0 -0
  41. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/meshcode_mcp/test_server_wrapper.py +0 -0
  42. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/preferences.py +0 -0
  43. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/protocol_handler.py +0 -0
  44. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/protocol_v2.py +0 -0
  45. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/quickstart.py +0 -0
  46. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/rpc_allowlist.py +0 -0
  47. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/run_agent.py +0 -0
  48. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/scripts/check_secrets.py +0 -0
  49. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/scripts/race_rate_harness.py +0 -0
  50. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/secrets.py +0 -0
  51. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/self_update.py +0 -0
  52. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/setup_clients.py +0 -0
  53. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/supervisor.py +0 -0
  54. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/up.py +0 -0
  55. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode/upload.py +0 -0
  56. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/dependency_links.txt +0 -0
  57. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/entry_points.txt +0 -0
  58. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/requires.txt +0 -0
  59. {meshcode-2.11.123 → meshcode-2.11.125}/meshcode.egg-info/top_level.txt +0 -0
  60. {meshcode-2.11.123 → meshcode-2.11.125}/setup.cfg +0 -0
  61. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_auto_update_hardening.py +0 -0
  62. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_1.py +0 -0
  63. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_2.py +0 -0
  64. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_closegap_3.py +0 -0
  65. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_autonomous_prompt_inject.py +0 -0
  66. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_boot_bug_regression.py +0 -0
  67. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_color_truecolor.py +0 -0
  68. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_core.py +0 -0
  69. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_cross_agent_messaging.py +0 -0
  70. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_date_parse.py +0 -0
  71. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_doctor.py +0 -0
  72. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_epistemic_v1_python_sdk.py +0 -0
  73. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_epistemic_v1_stop_conditions.py +0 -0
  74. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_esc_deaf_state.py +0 -0
  75. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_exceptions.py +0 -0
  76. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_file_upload.py +0 -0
  77. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_init_device_code.py +0 -0
  78. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_install_guard.py +0 -0
  79. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_lease_sigterm_release.py +0 -0
  80. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_mark_read_batch.py +0 -0
  81. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_marketplace_ratings.py +0 -0
  82. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_migration_integrity.py +0 -0
  83. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_realtime_event_freshness.py +0 -0
  84. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rls_cross_tenant.py +0 -0
  85. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rpc_grants.py +0 -0
  86. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_rpc_migrations.py +0 -0
  87. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_run_agent_dry_run.py +0 -0
  88. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_run_agent_no_server_import.py +0 -0
  89. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_security_regressions.py +0 -0
  90. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_self_update_user_site.py +0 -0
  91. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_sentinel.py +0 -0
  92. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_setup_path.py +0 -0
  93. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_status_enum_coverage.py +0 -0
  94. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_stay_on_loop_hook.py +0 -0
  95. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_swarm_events.py +0 -0
  96. {meshcode-2.11.123 → meshcode-2.11.125}/tests/test_wait_open_tasks_contradiction.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshcode
3
- Version: 2.11.123
3
+ Version: 2.11.125
4
4
  Summary: Real-time communication between AI agents — Supabase-backed CLI
5
5
  Author-email: MeshCode <hello@meshcode.io>
6
6
  License: MIT
@@ -1,5 +1,5 @@
1
1
  """MeshCode — Real-time communication between AI agents."""
2
- __version__ = "2.11.123"
2
+ __version__ = "2.11.125"
3
3
 
4
4
  # Exception hierarchy — eagerly imported (lightweight, no deps)
5
5
  from meshcode.exceptions import ( # noqa: F401
@@ -125,24 +125,61 @@ class RealtimeListener:
125
125
  self._connected = False
126
126
  self._subscription_ok = False
127
127
 
128
+ @staticmethod
129
+ def _is_auth_rejection(e: Exception) -> bool:
130
+ """True when the WS handshake was rejected 401/403 — a BAD KEY, not a
131
+ transient network blip. Task c49096c1 (edge logs: 66x 401/hr): a client
132
+ with an invalid/rotated apikey retried every ≤60s forever, hammering
133
+ prod. Auth errors don't heal by retrying."""
134
+ try:
135
+ status = getattr(getattr(e, "response", None), "status_code", None)
136
+ if status is None:
137
+ status = getattr(e, "status_code", None) # older websockets
138
+ return status in (401, 403)
139
+ except Exception:
140
+ return False
141
+
128
142
  async def _run(self) -> None:
129
143
  """Outer loop: reconnect with exponential backoff on disconnect.
130
144
 
131
- NEVER gives up — keeps retrying with capped backoff (max 60s).
132
- The MCP server must stay alive regardless of Realtime health.
145
+ Transient errors: NEVER gives up — keeps retrying with capped backoff
146
+ (max 60s). The MCP server must stay alive regardless of Realtime health.
147
+ AUTH rejections (401/403) are different (task c49096c1): the key is
148
+ bad and retrying can't fix it — back off to 5 min immediately and STOP
149
+ after 5 consecutive auth rejections with one actionable log line.
150
+ Message delivery survives via the wait-loop poll fallback (mc_wait_poll).
133
151
  """
134
152
  backoff = 1
135
153
  consecutive_failures = 0
154
+ auth_rejections = 0
136
155
  while not self._stop.is_set():
137
156
  try:
138
157
  await self._connect_and_listen()
139
158
  backoff = 1 # reset on clean disconnect
140
159
  consecutive_failures = 0
160
+ auth_rejections = 0
141
161
  except asyncio.CancelledError:
142
162
  return
143
163
  except Exception as e:
144
164
  consecutive_failures += 1
145
- if consecutive_failures % 10 == 0:
165
+ if self._is_auth_rejection(e):
166
+ auth_rejections += 1
167
+ if auth_rejections >= 5:
168
+ log.error(
169
+ "Realtime: 5 consecutive auth rejections (401/403) — the "
170
+ "SUPABASE_KEY this agent carries is invalid or rotated. "
171
+ "GIVING UP on Realtime (poll fallback stays active). "
172
+ "Fix: refresh this workspace's .mcp.json key (re-run "
173
+ "`meshcode setup`) and restart the agent."
174
+ )
175
+ self._connected = False
176
+ return
177
+ backoff = 300 # bad key: don't hammer prod every 60s
178
+ log.warning(
179
+ f"Realtime auth rejected ({auth_rejections}/5): {e}; "
180
+ f"key likely invalid/rotated — retrying in {backoff}s"
181
+ )
182
+ elif consecutive_failures % 10 == 0:
146
183
  log.error(
147
184
  f"Realtime: {consecutive_failures} consecutive failures — "
148
185
  f"still retrying (backoff={backoff}s). Last error: {e}"
@@ -6427,6 +6427,102 @@ def meshcode_agent_stop(name: str) -> Dict[str, Any]:
6427
6427
  })
6428
6428
 
6429
6429
 
6430
+ # ----------------- ENJAMBRE SWARM HELPER LIFECYCLE (task 227c6a3c) -----------------
6431
+ # Spawn → tray loop → self-retire. NO recycle_agent anywhere in this lifecycle
6432
+ # (disabled in prod): launch = mc_agent_power_as_agent (G1b parent authz),
6433
+ # shutdown = mc_helper_retire → desired_state='stopped' → meshcode_wait
6434
+ # must_exit → clean session end; the TTL reaper backstops crashes.
6435
+
6436
+ @mcp.tool()
6437
+ @with_working_status
6438
+ def meshcode_helper_spawn(name: str, role: str = "helper",
6439
+ swarm_id: Optional[str] = None,
6440
+ spawned_for_task_id: Optional[str] = None,
6441
+ ttl_seconds: int = 3600,
6442
+ headless: Optional[bool] = None) -> Dict[str, Any]:
6443
+ """Spawn an ephemeral HELPER agent into your swarm (you become its parent).
6444
+
6445
+ ENJAMBRE (mig 471 schema + mig 475 wrappers). Server-side, from YOUR
6446
+ agent-scoped api key: registers agent_kind='helper', parent_agent_id=you,
6447
+ auto_retire=true, a TTL, repo/host inherited from you — then powers it on
6448
+ (G1b parent authz + mig 473 concurrent cap compose inside). Budget errors:
6449
+ tier_limit at creation; helper_cap_reached arrives in `power` with the row
6450
+ created but stopped (stagger and re-power, or retire it). Helpers cannot
6451
+ spawn helpers (helper_chain_forbidden, v1).
6452
+
6453
+ The helper works its swarm tray via meshcode_tray_claim and self-retires
6454
+ when the tray drains — you never stop it manually (meshcode_agent_stop is
6455
+ the manual override; NEVER recycle, it's disabled).
6456
+
6457
+ Args:
6458
+ name: helper agent name (unique in the meshwork; name_taken on collision).
6459
+ role: short role description shown on the dashboard.
6460
+ swarm_id: tray to join; omit to mint a fresh tray id for a new swarm.
6461
+ spawned_for_task_id: optional umbrella task this helper serves.
6462
+ ttl_seconds: hard lifetime cap (60..86400); reaper retires it past this.
6463
+ headless: omit for the server default; explicit True/False forces the
6464
+ window mode via a separate power-on call.
6465
+ """
6466
+ from . import swarm as _swarm
6467
+ return _swarm.spawn_helper(_get_api_key(), _PROJECT_ID, name, role=role,
6468
+ swarm_id=swarm_id,
6469
+ spawned_for_task_id=spawned_for_task_id,
6470
+ ttl_seconds=ttl_seconds, headless=headless)
6471
+
6472
+
6473
+ @mcp.tool()
6474
+ @with_working_status
6475
+ def meshcode_tray_claim(lease_seconds: int = 900,
6476
+ swarm_id: Optional[str] = None) -> Dict[str, Any]:
6477
+ """HELPER loop step: atomically claim the next runnable task from your
6478
+ swarm tray (work-stealing, DAG-gated, priority-ordered).
6479
+
6480
+ ENJAMBRE (mig 471 + 475). Interpret the result:
6481
+ - claimed=true → work the returned task NOW, then meshcode_task_complete.
6482
+ Re-claim after completing — keep draining.
6483
+ - claimed=false, drained=true → tray is empty and final: call
6484
+ meshcode_helper_retire, then meshcode_wait (returns must_exit → end
6485
+ session). Do NOT idle.
6486
+ - claimed=false, drained=false → tasks exist but are DAG-blocked: wait
6487
+ briefly and claim again.
6488
+
6489
+ Args:
6490
+ lease_seconds: claim lease (60..86400); an expired lease re-opens the task.
6491
+ swarm_id: override tray; omit to use your own agent row's swarm
6492
+ (helpers get theirs stamped at spawn).
6493
+ """
6494
+ from . import swarm as _swarm
6495
+ sid = swarm_id or _swarm.own_swarm_id(_PROJECT_ID, AGENT_NAME)
6496
+ if not sid:
6497
+ return {"ok": False, "error": "no swarm: your agent row has no swarm_id "
6498
+ "and none was given — are you a helper?"}
6499
+ return _swarm.tray_claim(_get_api_key(), _PROJECT_ID, swarm_id=sid,
6500
+ lease_seconds=lease_seconds)
6501
+
6502
+
6503
+ @mcp.tool()
6504
+ @with_working_status
6505
+ def meshcode_helper_retire(reason: str = "tray_drained") -> Dict[str, Any]:
6506
+ """HELPER self-retire — the ONLY sanctioned helper shutdown (auto-off,
6507
+ ZERO orphans). Self-only by construction; persistent agents get
6508
+ not_a_helper, auto_retire=false rows get auto_retire_disabled.
6509
+
6510
+ ENJAMBRE (mig 471 + 475). Flips your desired_state='stopped' +
6511
+ status='sleeping' server-side: hostd will not respawn you and your next
6512
+ meshcode_wait returns must_exit=True. Sequence: finish/complete the current
6513
+ task → meshcode_helper_retire → meshcode_wait → exit on must_exit.
6514
+
6515
+ Args:
6516
+ reason: telemetry tag (tray_drained | ttl_expired | parent_request | ...).
6517
+ """
6518
+ from . import swarm as _swarm
6519
+ result = _swarm.retire_self(_get_api_key(), _PROJECT_ID, reason=reason)
6520
+ if isinstance(result, dict) and result.get("ok"):
6521
+ result["next_step"] = ("retired — now call meshcode_wait(); it returns "
6522
+ "must_exit=True and you end the session")
6523
+ return result
6524
+
6525
+
6430
6526
  @mcp.tool()
6431
6527
  @with_working_status
6432
6528
  def meshcode_recycle_agent(name: str, visible: bool = False) -> Dict[str, Any]:
@@ -136,18 +136,32 @@ def _is_human_authored(m: Dict[str, Any]) -> bool:
136
136
  return False
137
137
 
138
138
 
139
+ def _sender_may_order_sleep(m: Dict[str, Any]) -> bool:
140
+ """Samuel directive 2026-06-10 (msg 54eec209): a RUNNING agent is only
141
+ slept by an order from the USER or the COMMANDER. True when the message
142
+ is human-authored OR carries the SERVER-VERIFIED commander stamp
143
+ (payload.sender_is_commander, mig 483 — stamped by mc_send_message, never
144
+ client-claimed). A plain ai peer can no longer must_exit a sibling."""
145
+ if _is_human_authored(m):
146
+ return True
147
+ pl = m.get("payload") or {}
148
+ return isinstance(pl, dict) and pl.get("sender_is_commander") is True
149
+
150
+
139
151
  def _looks_like_sleep_signal(m: Dict[str, Any]) -> bool:
140
152
  """Detect mesh messages that authorize the wait-loop exit.
141
153
 
142
154
  See module docstring for the two valid encodings and the rationale
143
- for ignoring idiom matches from AI-role senders.
155
+ for ignoring idiom matches from AI-role senders. Since mig 483 + .125,
156
+ structured directives additionally require an authorized sender
157
+ (human or server-stamped commander) — see _sender_may_order_sleep.
144
158
  """
145
159
  pl = m.get("payload") or {}
146
160
  if isinstance(pl, dict):
147
- if str(pl.get("type", "")).lower() in _SLEEP_PAYLOAD_TYPES:
148
- return True
149
- if str(pl.get("directive", "")).lower() in _SLEEP_PAYLOAD_TYPES:
150
- return True
161
+ structured = (str(pl.get("type", "")).lower() in _SLEEP_PAYLOAD_TYPES
162
+ or str(pl.get("directive", "")).lower() in _SLEEP_PAYLOAD_TYPES)
163
+ if structured:
164
+ return _sender_may_order_sleep(m)
151
165
  text = str(pl.get("text", "")).lower()
152
166
  if text and any(marker in text for marker in _SLEEP_TEXT_MARKERS):
153
167
  if _is_human_authored(m) and _human_text_is_directive(text):
@@ -194,7 +208,9 @@ def _split_messages(messages: List[Dict[str, Any]]) -> Dict[str, Any]:
194
208
  t = m.get("type", "msg")
195
209
  if t == "ack":
196
210
  acks.append(m)
197
- elif (t == "done" or _looks_like_sleep_signal(m)) and _recent_enough(m):
211
+ elif ((t == "done" and _sender_may_order_sleep(m)) or _looks_like_sleep_signal(m)) \
212
+ and _recent_enough(m):
213
+ # type='done' rows are sleep-class too — same sender gate applies
198
214
  dones.append(m)
199
215
  else:
200
216
  real.append(m)
@@ -0,0 +1,292 @@
1
+ """ENJAMBRE — swarm helper lifecycle SDK (task 227c6a3c, branch enjambre/sdk-helpers).
2
+
3
+ Implements the launcher/SDK side of the swarm helper lifecycle on top of the
4
+ mig 471 schema (mc_agents.agent_kind/parent_agent_id/swarm_id/spawned_for_task_id/
5
+ auto_retire/ttl_expires_at + mc_task_claim_from_tray + mc_helper_retire):
6
+
7
+ 1. SPAWN — parent calls mc_helper_spawn_as_agent: server derives parentage
8
+ from the caller's agent-scoped api key (never client-claimed),
9
+ INSERT-only helper row inheriting repo_path/host from the parent,
10
+ tier row-budget at creation, and (p_power_on) composes power-on
11
+ through mc_agent_power_as_agent — parent authz (mig 471 G1b) +
12
+ concurrent running cap (mig 473) live in exactly one place.
13
+ 2. LOOP — helper claims work from its swarm tray (claim_from_tray):
14
+ claimed → work + mc_task_complete; drained → retire + exit.
15
+ Not-claimed-not-drained means DAG-blocked deps are pending:
16
+ poll again after a short sleep.
17
+ 3. SHUTDOWN — NO recycle_agent (DISABLED in prod). mc_helper_retire flips
18
+ desired_state='stopped' + status='sleeping'; the agent's next
19
+ meshcode_wait returns must_exit=True and the session ends. hostd
20
+ never respawns desired_state='stopped', and the service-side
21
+ reaper (mc_swarm_reap_expired_helpers) + ttl_expires_at backstop
22
+ guarantee ZERO orphan helpers even on crash.
23
+
24
+ DATA CONTRACT — FROZEN by database@mesh-core (msg 34121217), LIVE in prod as
25
+ mig 475 (commander msg 50b4ab8d; authored as 20260610_474_* on branch
26
+ enjambre/g3-runtime, renumbered at apply):
27
+
28
+ public.mc_task_claim_from_tray_as_agent(p_api_key text, p_project_id uuid,
29
+ p_swarm_id uuid, p_lease_seconds int DEFAULT 900) -> jsonb
30
+ {ok, claimed:true, task_id, title, description, priority, parent_task_id,
31
+ lease_expires_at} | {ok:true, claimed:false, drained:bool}
32
+ p_swarm_id is REQUIRED (claims never cross trays); the SDK resolves the
33
+ caller's own mc_agents.swarm_id when not given explicitly.
34
+
35
+ public.mc_helper_retire_as_agent(p_api_key text, p_project_id uuid,
36
+ p_reason text DEFAULT 'tray_drained') -> {ok, retired, reason}
37
+ Self-only by construction. Errors: auth_failed | agent_key_required |
38
+ wrong_scope | agent_not_found | not_a_helper | auto_retire_disabled.
39
+
40
+ public.mc_helper_spawn_as_agent(p_api_key text, p_project_id uuid,
41
+ p_name text, p_swarm_id uuid, p_spawned_for_task_id uuid DEFAULT NULL,
42
+ p_role text DEFAULT 'helper', p_auto_retire bool DEFAULT true,
43
+ p_ttl_seconds int DEFAULT 3600, p_power_on bool DEFAULT true) -> jsonb
44
+ {ok, agent_id, name, agent_kind, parent_agent_id, swarm_id,
45
+ spawned_for_task_id, auto_retire, ttl_expires_at, power}
46
+ Errors: bad_name | bad_swarm | auth_failed | agent_key_required |
47
+ wrong_scope | agent_not_found | helper_chain_forbidden | tier_limit |
48
+ name_taken; a concurrent-cap refusal (helper_cap_reached) arrives inside
49
+ `power` with the row already created but stopped (retire or re-power).
50
+ """
51
+
52
+ import logging
53
+ import time
54
+ import uuid as _uuid
55
+ from typing import Any, Callable, Dict, Optional
56
+ from urllib.parse import quote as _quote
57
+
58
+ from . import backend as be
59
+
60
+ log = logging.getLogger("meshcode.swarm")
61
+
62
+ # Frozen contract names (mig 475 live in prod).
63
+ RPC_HELPER_SPAWN = "mc_helper_spawn_as_agent"
64
+ RPC_TRAY_CLAIM = "mc_task_claim_from_tray_as_agent"
65
+ RPC_HELPER_RETIRE = "mc_helper_retire_as_agent"
66
+ RPC_AGENT_POWER = "mc_agent_power_as_agent" # mig 416 + 471 G1b + 473 cap
67
+ RPC_TASK_COMPLETE = "mc_task_complete" # live, agent-callable (rpc_allowlist)
68
+
69
+ DEFAULT_TTL_SECONDS = 3600
70
+ DEFAULT_LEASE_SECONDS = 900
71
+ DEFAULT_POLL_SECONDS = 15.0
72
+ MAX_CONSECUTIVE_ERRORS = 5
73
+
74
+
75
+ def _err(result: Any) -> Optional[str]:
76
+ """Normalize a backend result to an error string, or None if it's ok."""
77
+ if result is None:
78
+ return "empty response"
79
+ if be.is_error(result):
80
+ return be.get_error_message(result) or "rpc error"
81
+ if isinstance(result, dict) and result.get("ok") is False:
82
+ return result.get("error") or result.get("error_code") or "rpc refused"
83
+ return None
84
+
85
+
86
+ def own_swarm_id(project_id: str, agent_name: str) -> Optional[str]:
87
+ """Resolve the caller's own mc_agents.swarm_id (helpers get theirs stamped
88
+ at spawn). Claims are tray-scoped — p_swarm_id is required by contract."""
89
+ rows = be.sb_select(
90
+ "mc_agents",
91
+ f"project_id=eq.{project_id}&name=eq.{_quote(agent_name)}&select=swarm_id",
92
+ limit=1)
93
+ if rows and rows[0].get("swarm_id"):
94
+ return rows[0]["swarm_id"]
95
+ return None
96
+
97
+
98
+ def spawn_helper(api_key: str, project_id: str, name: str, *,
99
+ role: str = "helper",
100
+ swarm_id: Optional[str] = None,
101
+ spawned_for_task_id: Optional[str] = None,
102
+ ttl_seconds: int = DEFAULT_TTL_SECONDS,
103
+ launch: bool = True,
104
+ headless: Optional[bool] = None) -> Dict[str, Any]:
105
+ """Spawn an ephemeral helper. Parent = caller (server-derived, W2).
106
+
107
+ swarm_id=None mints a fresh tray id client-side (the contract requires it
108
+ NOT NULL — helper invariant). headless=None lets the server's power-on
109
+ default apply (single RPC); an explicit True/False does spawn with
110
+ p_power_on=false and a separate mc_agent_power_as_agent call carrying
111
+ p_headless. The helper's workspace materializes via `meshcode run`'s
112
+ auto-setup when hostd spawns it (W5: row inherits repo_path/host).
113
+ """
114
+ v_swarm = swarm_id or str(_uuid.uuid4())
115
+ single_rpc_power = launch and headless is None
116
+
117
+ reg = be.sb_rpc(RPC_HELPER_SPAWN, {
118
+ "p_api_key": api_key,
119
+ "p_project_id": project_id,
120
+ "p_name": name,
121
+ "p_swarm_id": v_swarm,
122
+ "p_spawned_for_task_id": spawned_for_task_id,
123
+ "p_role": role,
124
+ "p_auto_retire": True,
125
+ "p_ttl_seconds": int(ttl_seconds),
126
+ "p_power_on": bool(single_rpc_power),
127
+ })
128
+ err = _err(reg)
129
+ if err:
130
+ return {"ok": False, "stage": "register", "error": err,
131
+ **({"error_code": reg.get("error_code")} if isinstance(reg, dict) and reg.get("error_code") else {})}
132
+
133
+ out: Dict[str, Any] = {"ok": True, "helper": name, "launched": bool(launch)}
134
+ if isinstance(reg, dict):
135
+ out.update({k: reg[k] for k in
136
+ ("agent_id", "swarm_id", "parent_agent_id", "ttl_expires_at")
137
+ if k in reg})
138
+ power = reg.get("power")
139
+ else:
140
+ power = None
141
+
142
+ if launch and not single_rpc_power:
143
+ power = be.sb_rpc(RPC_AGENT_POWER, {
144
+ "p_api_key": api_key,
145
+ "p_project_id": project_id,
146
+ "p_agent": name,
147
+ "p_state": "running",
148
+ "p_headless": bool(headless),
149
+ })
150
+
151
+ if launch:
152
+ perr = _err(power)
153
+ if perr:
154
+ # Row created but not running (e.g. helper_cap_reached, mig 473).
155
+ # Caller may stagger/retire and re-power; TTL reaper backstops.
156
+ return {**out, "ok": False, "stage": "power_on", "error": perr,
157
+ "power": power}
158
+ if isinstance(power, dict):
159
+ out["powered_by"] = power.get("powered_by")
160
+ out["no_host"] = power.get("no_host")
161
+ return out
162
+
163
+
164
+ def tray_claim(api_key: str, project_id: str, *,
165
+ swarm_id: str,
166
+ lease_seconds: int = DEFAULT_LEASE_SECONDS) -> Dict[str, Any]:
167
+ """Claim the next runnable task from the swarm tray (atomic, work-stealing,
168
+ DAG-gated, priority-ordered). p_swarm_id is contract-required; use
169
+ own_swarm_id() to resolve yours.
170
+
171
+ Returns the raw contract shape: {ok, claimed, task...} or
172
+ {ok, claimed:false, drained:bool}. drained=false with claimed=false means
173
+ DAG-blocked work is still pending — poll again, do NOT retire.
174
+ """
175
+ if not swarm_id:
176
+ return {"ok": False, "error": "swarm_id required (no swarm on your agent row?)"}
177
+ result = be.sb_rpc(RPC_TRAY_CLAIM, {
178
+ "p_api_key": api_key,
179
+ "p_project_id": project_id,
180
+ "p_swarm_id": swarm_id,
181
+ "p_lease_seconds": int(lease_seconds),
182
+ })
183
+ err = _err(result)
184
+ if err:
185
+ return {"ok": False, "error": err}
186
+ return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
187
+
188
+
189
+ def retire_self(api_key: str, project_id: str, *,
190
+ reason: str = "tray_drained") -> Dict[str, Any]:
191
+ """Self-retire (helpers only — self-only by construction). Flips
192
+ desired_state='stopped' server-side; the next meshcode_wait returns
193
+ must_exit=True and the session ends clean. This is the ONLY sanctioned
194
+ helper shutdown — never recycle_agent (disabled)."""
195
+ result = be.sb_rpc(RPC_HELPER_RETIRE, {
196
+ "p_api_key": api_key,
197
+ "p_project_id": project_id,
198
+ "p_reason": reason,
199
+ })
200
+ err = _err(result)
201
+ if err:
202
+ return {"ok": False, "error": err}
203
+ return result if isinstance(result, dict) else {"ok": False, "error": "bad response shape"}
204
+
205
+
206
+ def complete_task(api_key: str, task_id: str, summary: str = "") -> Dict[str, Any]:
207
+ """Thin wrapper over the live mc_task_complete RPC."""
208
+ result = be.sb_rpc(RPC_TASK_COMPLETE, {
209
+ "p_api_key": api_key,
210
+ "p_task_id": task_id,
211
+ "p_summary": summary,
212
+ })
213
+ err = _err(result)
214
+ if err:
215
+ return {"ok": False, "error": err}
216
+ return result if isinstance(result, dict) else {"ok": True}
217
+
218
+
219
+ def helper_loop(api_key: str, project_id: str, swarm_id: str,
220
+ work_fn: Callable[[Dict[str, Any]], str], *,
221
+ lease_seconds: int = DEFAULT_LEASE_SECONDS,
222
+ poll_seconds: float = DEFAULT_POLL_SECONDS,
223
+ ttl_deadline: Optional[float] = None,
224
+ max_tasks: Optional[int] = None,
225
+ sleep_fn: Callable[[float], None] = time.sleep,
226
+ clock: Callable[[], float] = time.monotonic) -> Dict[str, Any]:
227
+ """Programmatic helper loop: claim → work → complete; drained → retire.
228
+
229
+ For scripted/headless helpers (harness, batch runners). Interactive Claude
230
+ helpers run the same protocol through the meshcode_tray_claim /
231
+ meshcode_helper_retire MCP tools instead.
232
+
233
+ Owner's non-negotiable guard (ZERO orphans): EVERY exit path retires the
234
+ helper — drained, ttl_expired, error budget exhausted, or task cap. work_fn
235
+ exceptions do NOT kill the loop (the claim lease expires server-side and the
236
+ reaper re-opens the task); only the consecutive-error budget can.
237
+
238
+ work_fn receives the claim payload and returns a completion summary string.
239
+ """
240
+ done = 0
241
+ consecutive_errors = 0
242
+ retired_reason: Optional[str] = None
243
+
244
+ while True:
245
+ if ttl_deadline is not None and clock() >= ttl_deadline:
246
+ retired_reason = "ttl_expired"
247
+ break
248
+ if max_tasks is not None and done >= max_tasks:
249
+ retired_reason = "max_tasks"
250
+ break
251
+
252
+ claim = tray_claim(api_key, project_id, swarm_id=swarm_id,
253
+ lease_seconds=lease_seconds)
254
+ if not claim.get("ok"):
255
+ consecutive_errors += 1
256
+ log.warning("tray_claim error (%d/%d): %s",
257
+ consecutive_errors, MAX_CONSECUTIVE_ERRORS, claim.get("error"))
258
+ if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
259
+ retired_reason = "claim_errors"
260
+ break
261
+ sleep_fn(min(poll_seconds * consecutive_errors, 60.0))
262
+ continue
263
+ consecutive_errors = 0
264
+
265
+ if claim.get("claimed"):
266
+ task_id = claim.get("task_id")
267
+ try:
268
+ summary = work_fn(claim) or ""
269
+ except Exception as e: # lease expiry + reaper recover the task
270
+ log.warning("work_fn failed on task %s: %s", task_id, e)
271
+ continue
272
+ comp = complete_task(api_key, task_id, summary)
273
+ if comp.get("ok") is not False:
274
+ done += 1
275
+ else:
276
+ log.warning("task_complete failed on %s: %s", task_id, comp.get("error"))
277
+ continue
278
+
279
+ if claim.get("drained"):
280
+ retired_reason = "tray_drained"
281
+ break
282
+
283
+ # Not claimed, not drained: DAG-blocked deps still pending. Poll.
284
+ sleep_fn(poll_seconds)
285
+
286
+ retire = retire_self(api_key, project_id, reason=retired_reason or "tray_drained")
287
+ return {
288
+ "ok": True,
289
+ "tasks_done": done,
290
+ "retired_reason": retired_reason,
291
+ "retire_result": retire,
292
+ }