loki-mode 6.83.0 → 7.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +62 -11
- package/VERSION +1 -1
- package/agents/managed_registry.py +246 -0
- package/agents/types.json +330 -0
- package/autonomy/completion-council.sh +226 -0
- package/autonomy/loki +346 -15
- package/autonomy/run.sh +357 -1
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +235 -0
- package/docs/INSTALLATION.md +1 -1
- package/mcp/__init__.py +1 -1
- package/mcp/managed_tools.py +234 -0
- package/mcp/server.py +22 -0
- package/memory/managed_memory/__init__.py +9 -0
- package/memory/managed_memory/retrieve.py +237 -1
- package/package.json +4 -2
- package/providers/managed.py +789 -0
- package/skills/00-index.md +1 -0
- package/skills/memory.md +185 -0
|
@@ -0,0 +1,789 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Loki Managed Agents - Multiagent session orchestration (Phase 2 foundation).
|
|
3
|
+
|
|
4
|
+
This module is the SINGLE entry point for running Claude Managed Agents
|
|
5
|
+
multiagent sessions (callable_agents). It is used by Phase 3 (code-review
|
|
6
|
+
council) and Phase 4 (completion council) to replace the existing
|
|
7
|
+
CLI-invocation fan-out with a single managed session where each council
|
|
8
|
+
agent is a callable_agent.
|
|
9
|
+
|
|
10
|
+
Public API (consumed by autonomy/run.sh Phases 3 and 4 in Wave 2):
|
|
11
|
+
|
|
12
|
+
is_enabled() -> bool
|
|
13
|
+
True iff the parent flag LOKI_MANAGED_AGENTS is "true" AND the
|
|
14
|
+
umbrella flag LOKI_EXPERIMENTAL_MANAGED_AGENTS is "true" AND the
|
|
15
|
+
anthropic SDK is importable. Does NOT import anthropic eagerly at
|
|
16
|
+
module load; probes importability via importlib.util.find_spec.
|
|
17
|
+
|
|
18
|
+
run_council(agent_pool, context, timeout_s=300) -> CouncilResult
|
|
19
|
+
Orchestrates a multiagent session with the provided pool names.
|
|
20
|
+
Returns a CouncilResult with per-agent verdicts and any
|
|
21
|
+
tool-confirmation payloads. On any failure, logs a fallback event
|
|
22
|
+
and raises ManagedUnavailable.
|
|
23
|
+
|
|
24
|
+
run_completion_council(voters, context, timeout_s=180) -> VotingResult
|
|
25
|
+
Same pattern as run_council but shapes the response for the
|
|
26
|
+
completion-council use case (voting for STOP / CONTINUE).
|
|
27
|
+
|
|
28
|
+
resolve_agent_ids(pool_names) -> list[str]
|
|
29
|
+
Returns Managed Agent IDs from .loki/managed/agent_ids.json cache.
|
|
30
|
+
Lazily materializes missing IDs via agents.managed_registry on
|
|
31
|
+
first use. Never eager on startup.
|
|
32
|
+
|
|
33
|
+
Design constraints:
|
|
34
|
+
|
|
35
|
+
1. This file is one of TWO places allowed to import the anthropic SDK
|
|
36
|
+
(the other being memory/managed_memory/client.py). A CI grep
|
|
37
|
+
invariant enforces the allowlist.
|
|
38
|
+
2. Every SDK call is wrapped in a hard timeout. Per-call default is
|
|
39
|
+
10s; the multiagent session budget is the caller-supplied
|
|
40
|
+
timeout_s (default 300s council, 180s completion council).
|
|
41
|
+
3. SDK shape errors (AttributeError / TypeError) on the beta API are
|
|
42
|
+
caught and translated into ManagedUnavailable. Outer callers in
|
|
43
|
+
autonomy/run.sh translate ManagedUnavailable into "fall back to
|
|
44
|
+
CLI fan-out" without aborting the iteration.
|
|
45
|
+
4. Every failure mode emits a structured event to
|
|
46
|
+
.loki/managed/events.ndjson via memory.managed_memory.events.
|
|
47
|
+
|
|
48
|
+
Research-preview note:
|
|
49
|
+
The `callable_agents` multiagent-session surface is a research
|
|
50
|
+
preview. The base BETA_HEADER ("managed-agents-2026-04-01") covers
|
|
51
|
+
the Phase 3/4 surface we depend on. If a future SDK version requires
|
|
52
|
+
a distinct "research preview" beta tag, extend
|
|
53
|
+
memory/managed_memory/_beta.py to expose a second constant; do not
|
|
54
|
+
inline a second header here.
|
|
55
|
+
|
|
56
|
+
Honest caveat:
|
|
57
|
+
The multiagent code path has NOT been exercised against the live
|
|
58
|
+
Anthropic API in this repo's CI. Automated coverage relies on
|
|
59
|
+
FakeMultiagentSession in tests/managed/. A staging smoke test against
|
|
60
|
+
the real API is required before Phase 3/4 leaves preview.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
from __future__ import annotations
|
|
64
|
+
|
|
65
|
+
import importlib
|
|
66
|
+
import importlib.util
|
|
67
|
+
import json
|
|
68
|
+
import logging
|
|
69
|
+
import os
|
|
70
|
+
import threading
|
|
71
|
+
import time
|
|
72
|
+
from dataclasses import asdict, dataclass, field
|
|
73
|
+
from pathlib import Path
|
|
74
|
+
from typing import Any, Dict, List, Optional
|
|
75
|
+
|
|
76
|
+
# Cross-module event emitter. memory.managed_memory has no SDK import at
|
|
77
|
+
# module load time, so this import is safe even when flags are off.
|
|
78
|
+
from memory.managed_memory.events import emit_managed_event
|
|
79
|
+
|
|
80
|
+
# Centralized beta header. Reused so the two SDK-importing files stay in sync.
|
|
81
|
+
from memory.managed_memory._beta import BETA_HEADER
|
|
82
|
+
|
|
83
|
+
_LOG = logging.getLogger("loki.providers.managed")
|
|
84
|
+
|
|
85
|
+
# Per-SDK-call hard timeout. The overall multiagent session budget is
|
|
86
|
+
# caller-supplied via timeout_s.
|
|
87
|
+
_DEFAULT_CALL_TIMEOUT = 10.0
|
|
88
|
+
|
|
89
|
+
# Cache file for materialized Managed Agent IDs.
|
|
90
|
+
_CACHE_FILE_REL = ".loki/managed/agent_ids.json"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# Exceptions and result types
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ManagedUnavailable(Exception):
|
|
99
|
+
"""
|
|
100
|
+
Raised when the managed-agents multiagent path cannot run.
|
|
101
|
+
|
|
102
|
+
Outer callers translate this into "fall back to CLI fan-out"
|
|
103
|
+
without aborting the iteration. This exception is NEVER used to
|
|
104
|
+
surface bugs -- those propagate normally.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class AgentVerdict:
|
|
110
|
+
"""A single callable_agent's response inside a council run."""
|
|
111
|
+
|
|
112
|
+
agent_id: str
|
|
113
|
+
pool_name: str
|
|
114
|
+
verdict: str # e.g. "APPROVE" / "REQUEST_CHANGES" / "STOP" / "CONTINUE"
|
|
115
|
+
rationale: str = ""
|
|
116
|
+
severity: Optional[str] = None # "critical" / "high" / "medium" / "low"
|
|
117
|
+
raw: Dict[str, Any] = field(default_factory=dict)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class ToolConfirmation:
|
|
122
|
+
"""A tool-use confirmation emitted by the session (observability only)."""
|
|
123
|
+
|
|
124
|
+
agent_id: str
|
|
125
|
+
tool_name: str
|
|
126
|
+
payload: Dict[str, Any] = field(default_factory=dict)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass
|
|
130
|
+
class CouncilResult:
|
|
131
|
+
"""Aggregated response from run_council."""
|
|
132
|
+
|
|
133
|
+
verdicts: List[AgentVerdict] = field(default_factory=list)
|
|
134
|
+
tool_confirmations: List[ToolConfirmation] = field(default_factory=list)
|
|
135
|
+
session_id: Optional[str] = None
|
|
136
|
+
elapsed_ms: int = 0
|
|
137
|
+
partial: bool = False # True if budget fired before all agents responded
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@dataclass
|
|
141
|
+
class VotingResult:
|
|
142
|
+
"""Aggregated response from run_completion_council."""
|
|
143
|
+
|
|
144
|
+
votes: List[AgentVerdict] = field(default_factory=list)
|
|
145
|
+
majority: Optional[str] = None # "STOP" / "CONTINUE" / None on tie
|
|
146
|
+
session_id: Optional[str] = None
|
|
147
|
+
elapsed_ms: int = 0
|
|
148
|
+
partial: bool = False
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
# Flag handling and SDK availability probe
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _flag_true(name: str) -> bool:
|
|
157
|
+
return os.environ.get(name, "").strip().lower() == "true"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _flags_on() -> bool:
|
|
161
|
+
"""Parent + umbrella flags both on."""
|
|
162
|
+
return _flag_true("LOKI_MANAGED_AGENTS") and _flag_true(
|
|
163
|
+
"LOKI_EXPERIMENTAL_MANAGED_AGENTS"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _sdk_available() -> bool:
|
|
168
|
+
"""True if the anthropic SDK can be imported (without importing it)."""
|
|
169
|
+
try:
|
|
170
|
+
return importlib.util.find_spec("anthropic") is not None
|
|
171
|
+
except (ValueError, ImportError): # pragma: no cover - defensive
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def is_enabled() -> bool:
|
|
176
|
+
"""
|
|
177
|
+
Return True only if both flags are on AND the anthropic SDK is importable.
|
|
178
|
+
|
|
179
|
+
Does NOT trigger the import of anthropic itself -- callers must be
|
|
180
|
+
able to cheaply check this from hot paths (run.sh iteration loop).
|
|
181
|
+
"""
|
|
182
|
+
if not _flags_on():
|
|
183
|
+
return False
|
|
184
|
+
return _sdk_available()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ---------------------------------------------------------------------------
|
|
188
|
+
# SDK client construction (deferred; only called inside flag-gated paths)
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
_client_lock = threading.Lock()
|
|
193
|
+
_cached_client: Optional[Any] = None # anthropic.Anthropic instance
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _build_client() -> Any:
|
|
197
|
+
"""
|
|
198
|
+
Construct the anthropic client lazily. Raises ManagedUnavailable on any
|
|
199
|
+
SDK-import / credential / beta-shape issue.
|
|
200
|
+
|
|
201
|
+
This is the only place in the module that imports anthropic.
|
|
202
|
+
"""
|
|
203
|
+
# Importing anthropic here (not at module top) keeps the top-level
|
|
204
|
+
# import of this module SDK-free.
|
|
205
|
+
try:
|
|
206
|
+
import anthropic # noqa: WPS433 - deliberate lazy import
|
|
207
|
+
except ImportError as e:
|
|
208
|
+
emit_managed_event(
|
|
209
|
+
"managed_agents_fallback",
|
|
210
|
+
{"op": "client_import", "reason": "anthropic_not_installed", "detail": str(e)},
|
|
211
|
+
)
|
|
212
|
+
raise ManagedUnavailable(f"anthropic SDK not installed: {e}")
|
|
213
|
+
|
|
214
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
|
|
215
|
+
if not api_key:
|
|
216
|
+
emit_managed_event(
|
|
217
|
+
"managed_agents_fallback",
|
|
218
|
+
{"op": "client_import", "reason": "missing_api_key"},
|
|
219
|
+
)
|
|
220
|
+
raise ManagedUnavailable("ANTHROPIC_API_KEY is not set")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
client = anthropic.Anthropic(
|
|
224
|
+
api_key=api_key,
|
|
225
|
+
timeout=_DEFAULT_CALL_TIMEOUT,
|
|
226
|
+
default_headers={"anthropic-beta": BETA_HEADER},
|
|
227
|
+
)
|
|
228
|
+
except Exception as e: # pragma: no cover - SDK init edge
|
|
229
|
+
emit_managed_event(
|
|
230
|
+
"managed_agents_fallback",
|
|
231
|
+
{"op": "client_init", "reason": "client_ctor_failed", "detail": str(e)},
|
|
232
|
+
)
|
|
233
|
+
raise ManagedUnavailable(f"anthropic client construction failed: {e}")
|
|
234
|
+
|
|
235
|
+
return client
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _get_client() -> Any:
|
|
239
|
+
"""Return a module-level cached anthropic client."""
|
|
240
|
+
global _cached_client
|
|
241
|
+
with _client_lock:
|
|
242
|
+
if _cached_client is None:
|
|
243
|
+
_cached_client = _build_client()
|
|
244
|
+
return _cached_client
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _reset_client() -> None:
|
|
248
|
+
"""Test hook: drop the cached client so tests can inject a fake."""
|
|
249
|
+
global _cached_client
|
|
250
|
+
with _client_lock:
|
|
251
|
+
_cached_client = None
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ---------------------------------------------------------------------------
|
|
255
|
+
# Fake / dependency-injection hook for tests
|
|
256
|
+
# ---------------------------------------------------------------------------
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# Tests in tests/managed/ inject a FakeMultiagentSession factory here so the
|
|
260
|
+
# real anthropic.beta.sessions.* path is bypassed. In production, this stays
|
|
261
|
+
# None and _session_factory() falls through to the real SDK.
|
|
262
|
+
_session_factory_override: Optional[Any] = None
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _set_session_factory_for_tests(factory: Optional[Any]) -> None:
|
|
266
|
+
"""
|
|
267
|
+
Test hook. Pass a callable (client, *, agent_ids, context, timeout_s)
|
|
268
|
+
that returns an object exposing .run() -> dict. Pass None to restore.
|
|
269
|
+
"""
|
|
270
|
+
global _session_factory_override
|
|
271
|
+
_session_factory_override = factory
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _session_factory(
|
|
275
|
+
client: Any,
|
|
276
|
+
*,
|
|
277
|
+
agent_ids: List[str],
|
|
278
|
+
context: Dict[str, Any],
|
|
279
|
+
timeout_s: int,
|
|
280
|
+
) -> Any:
|
|
281
|
+
"""
|
|
282
|
+
Construct a multiagent session object. Wraps the SDK beta surface in a
|
|
283
|
+
tiny adapter so we can unit-test without the real network path.
|
|
284
|
+
|
|
285
|
+
Raises ManagedUnavailable if the SDK shape is not what we expect.
|
|
286
|
+
"""
|
|
287
|
+
if _session_factory_override is not None:
|
|
288
|
+
return _session_factory_override(
|
|
289
|
+
client, agent_ids=agent_ids, context=context, timeout_s=timeout_s
|
|
290
|
+
)
|
|
291
|
+
return _RealMultiagentSession(
|
|
292
|
+
client=client, agent_ids=agent_ids, context=context, timeout_s=timeout_s
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class _RealMultiagentSession:
|
|
297
|
+
"""
|
|
298
|
+
Thin adapter over `client.beta.sessions.create(...)` (preview).
|
|
299
|
+
|
|
300
|
+
We deliberately keep this adapter tiny and tolerant of SDK shape
|
|
301
|
+
churn; any AttributeError/TypeError is converted to ManagedUnavailable.
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def __init__(
|
|
305
|
+
self,
|
|
306
|
+
client: Any,
|
|
307
|
+
*,
|
|
308
|
+
agent_ids: List[str],
|
|
309
|
+
context: Dict[str, Any],
|
|
310
|
+
timeout_s: int,
|
|
311
|
+
) -> None:
|
|
312
|
+
self._client = client
|
|
313
|
+
self._agent_ids = agent_ids
|
|
314
|
+
self._context = context
|
|
315
|
+
self._timeout_s = timeout_s
|
|
316
|
+
|
|
317
|
+
def run(self) -> Dict[str, Any]:
|
|
318
|
+
"""Execute the session. Returns a dict of session payload."""
|
|
319
|
+
beta = getattr(self._client, "beta", None)
|
|
320
|
+
if beta is None:
|
|
321
|
+
raise ManagedUnavailable("anthropic SDK missing `beta` namespace")
|
|
322
|
+
|
|
323
|
+
# SDK API under preview: exact attribute may be `sessions` or
|
|
324
|
+
# `agent_sessions`. Try both, fail-fast if neither.
|
|
325
|
+
sessions = getattr(beta, "sessions", None) or getattr(
|
|
326
|
+
beta, "agent_sessions", None
|
|
327
|
+
)
|
|
328
|
+
if sessions is None or not hasattr(sessions, "create"):
|
|
329
|
+
raise ManagedUnavailable(
|
|
330
|
+
"anthropic.beta.sessions.create not available in SDK"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
session = sessions.create(
|
|
335
|
+
callable_agents=[{"agent_id": aid} for aid in self._agent_ids],
|
|
336
|
+
context=self._context,
|
|
337
|
+
timeout=self._timeout_s,
|
|
338
|
+
)
|
|
339
|
+
except (AttributeError, TypeError) as e:
|
|
340
|
+
raise ManagedUnavailable(f"SDK session shape mismatch: {e}")
|
|
341
|
+
|
|
342
|
+
# Session objects can be streamed or returned whole depending on
|
|
343
|
+
# SDK version. We treat .messages / .output / .result as the
|
|
344
|
+
# first-wins path and stop there.
|
|
345
|
+
for attr in ("messages", "output", "result"):
|
|
346
|
+
got = getattr(session, attr, None)
|
|
347
|
+
if got is not None:
|
|
348
|
+
return {"raw": got, "session_id": getattr(session, "id", None)}
|
|
349
|
+
return {"raw": None, "session_id": getattr(session, "id", None)}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
# Agent-ID resolution (cache-backed)
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _cache_path() -> Path:
|
|
358
|
+
base = os.environ.get("LOKI_TARGET_DIR") or os.getcwd()
|
|
359
|
+
return Path(base) / _CACHE_FILE_REL
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _load_agent_ids_cache() -> Dict[str, str]:
|
|
363
|
+
p = _cache_path()
|
|
364
|
+
if not p.exists():
|
|
365
|
+
return {}
|
|
366
|
+
try:
|
|
367
|
+
with open(p, "r", encoding="utf-8") as f:
|
|
368
|
+
data = json.load(f)
|
|
369
|
+
except (OSError, json.JSONDecodeError):
|
|
370
|
+
return {}
|
|
371
|
+
if not isinstance(data, dict):
|
|
372
|
+
return {}
|
|
373
|
+
# Only keep pool_name -> agent_id string entries.
|
|
374
|
+
return {k: v for k, v in data.items() if isinstance(k, str) and isinstance(v, str)}
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def resolve_agent_ids(pool_names: List[str]) -> List[str]:
|
|
378
|
+
"""
|
|
379
|
+
Return Managed Agent IDs for the requested pool names.
|
|
380
|
+
|
|
381
|
+
Reads .loki/managed/agent_ids.json; for any missing pool name, defers
|
|
382
|
+
to agents.managed_registry.materialize_agent(name) to create and
|
|
383
|
+
cache the ID. Raises ManagedUnavailable when materialization fails
|
|
384
|
+
for any requested pool.
|
|
385
|
+
|
|
386
|
+
NOTE: Lazy by design. This function does nothing on Loki startup; it
|
|
387
|
+
is only called inside run_council / run_completion_council (i.e.
|
|
388
|
+
inside flag-gated paths).
|
|
389
|
+
"""
|
|
390
|
+
if not pool_names:
|
|
391
|
+
return []
|
|
392
|
+
|
|
393
|
+
cache = _load_agent_ids_cache()
|
|
394
|
+
resolved: List[str] = []
|
|
395
|
+
missing: List[str] = [n for n in pool_names if n not in cache]
|
|
396
|
+
|
|
397
|
+
if missing:
|
|
398
|
+
# Defer import so agents.managed_registry stays cold unless we
|
|
399
|
+
# actually need to materialize.
|
|
400
|
+
try:
|
|
401
|
+
from agents import managed_registry
|
|
402
|
+
except ImportError as e:
|
|
403
|
+
emit_managed_event(
|
|
404
|
+
"managed_agents_fallback",
|
|
405
|
+
{
|
|
406
|
+
"op": "resolve_agent_ids",
|
|
407
|
+
"reason": "registry_import_failed",
|
|
408
|
+
"detail": str(e),
|
|
409
|
+
},
|
|
410
|
+
)
|
|
411
|
+
raise ManagedUnavailable(f"agents.managed_registry unavailable: {e}")
|
|
412
|
+
|
|
413
|
+
for name in missing:
|
|
414
|
+
try:
|
|
415
|
+
agent_id = managed_registry.materialize_agent(name)
|
|
416
|
+
except Exception as e:
|
|
417
|
+
emit_managed_event(
|
|
418
|
+
"managed_agents_fallback",
|
|
419
|
+
{
|
|
420
|
+
"op": "resolve_agent_ids",
|
|
421
|
+
"reason": "materialize_failed",
|
|
422
|
+
"pool_name": name,
|
|
423
|
+
"detail": str(e),
|
|
424
|
+
},
|
|
425
|
+
)
|
|
426
|
+
raise ManagedUnavailable(
|
|
427
|
+
f"materialize_agent({name!r}) failed: {e}"
|
|
428
|
+
)
|
|
429
|
+
cache[name] = agent_id
|
|
430
|
+
|
|
431
|
+
for name in pool_names:
|
|
432
|
+
resolved.append(cache[name])
|
|
433
|
+
return resolved
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ---------------------------------------------------------------------------
|
|
437
|
+
# Council orchestration (used by Phase 3 + Phase 4)
|
|
438
|
+
# ---------------------------------------------------------------------------
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _run_session_with_budget(
|
|
442
|
+
agent_ids: List[str],
|
|
443
|
+
context: Dict[str, Any],
|
|
444
|
+
timeout_s: int,
|
|
445
|
+
op_name: str,
|
|
446
|
+
) -> Dict[str, Any]:
|
|
447
|
+
"""
|
|
448
|
+
Execute a multiagent session under an overall budget.
|
|
449
|
+
|
|
450
|
+
Uses a worker thread + Event to enforce the overall budget without
|
|
451
|
+
blocking forever on a pathological SDK call. Per-call timeouts are
|
|
452
|
+
already set at client construction; this is the outer envelope.
|
|
453
|
+
|
|
454
|
+
Returns a dict: {"result": <payload>, "partial": bool, "session_id": str|None}
|
|
455
|
+
Raises ManagedUnavailable on SDK shape errors or fatal session ctor errors.
|
|
456
|
+
"""
|
|
457
|
+
try:
|
|
458
|
+
client = _get_client()
|
|
459
|
+
except ManagedUnavailable:
|
|
460
|
+
raise
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
session = _session_factory(
|
|
464
|
+
client, agent_ids=agent_ids, context=context, timeout_s=timeout_s
|
|
465
|
+
)
|
|
466
|
+
except ManagedUnavailable:
|
|
467
|
+
raise
|
|
468
|
+
except (AttributeError, TypeError) as e:
|
|
469
|
+
emit_managed_event(
|
|
470
|
+
"managed_agents_fallback",
|
|
471
|
+
{"op": op_name, "reason": "session_shape_error", "detail": str(e)},
|
|
472
|
+
)
|
|
473
|
+
raise ManagedUnavailable(f"session factory shape error: {e}")
|
|
474
|
+
|
|
475
|
+
# Emit a session-created event eagerly so operators can observe.
|
|
476
|
+
emit_managed_event(
|
|
477
|
+
"managed_session_created",
|
|
478
|
+
{"op": op_name, "agent_count": len(agent_ids), "timeout_s": timeout_s},
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
state: Dict[str, Any] = {"payload": None, "error": None}
|
|
482
|
+
done = threading.Event()
|
|
483
|
+
|
|
484
|
+
def _worker() -> None:
|
|
485
|
+
try:
|
|
486
|
+
state["payload"] = session.run()
|
|
487
|
+
except ManagedUnavailable as e:
|
|
488
|
+
state["error"] = e
|
|
489
|
+
except Exception as e: # pragma: no cover - defensive
|
|
490
|
+
state["error"] = e
|
|
491
|
+
finally:
|
|
492
|
+
done.set()
|
|
493
|
+
# Thread-level marker for long-stalled sessions; fires only if
|
|
494
|
+
# the worker actually reached idle state.
|
|
495
|
+
emit_managed_event(
|
|
496
|
+
"managed_session_thread_idle",
|
|
497
|
+
{"op": op_name},
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
thread = threading.Thread(
|
|
501
|
+
target=_worker, name=f"managed-session-{op_name}", daemon=True
|
|
502
|
+
)
|
|
503
|
+
thread.start()
|
|
504
|
+
emit_managed_event(
|
|
505
|
+
"managed_session_thread_created",
|
|
506
|
+
{"op": op_name, "agent_count": len(agent_ids)},
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
finished = done.wait(timeout=float(timeout_s))
|
|
510
|
+
if not finished:
|
|
511
|
+
# Budget fired before the worker returned. We leave the thread
|
|
512
|
+
# daemon'd; the SDK's own per-call timeout will tear it down.
|
|
513
|
+
emit_managed_event(
|
|
514
|
+
"managed_agents_fallback",
|
|
515
|
+
{
|
|
516
|
+
"op": op_name,
|
|
517
|
+
"reason": "overall_budget_timeout",
|
|
518
|
+
"timeout_s": timeout_s,
|
|
519
|
+
},
|
|
520
|
+
)
|
|
521
|
+
raise ManagedUnavailable(
|
|
522
|
+
f"{op_name}: overall budget {timeout_s}s exceeded"
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
err = state["error"]
|
|
526
|
+
if err is not None:
|
|
527
|
+
if isinstance(err, ManagedUnavailable):
|
|
528
|
+
emit_managed_event(
|
|
529
|
+
"managed_agents_fallback",
|
|
530
|
+
{
|
|
531
|
+
"op": op_name,
|
|
532
|
+
"reason": "session_unavailable",
|
|
533
|
+
"detail": str(err),
|
|
534
|
+
},
|
|
535
|
+
)
|
|
536
|
+
raise err
|
|
537
|
+
# Translate any remaining runtime error into ManagedUnavailable
|
|
538
|
+
# after logging a fallback event.
|
|
539
|
+
emit_managed_event(
|
|
540
|
+
"managed_agents_fallback",
|
|
541
|
+
{"op": op_name, "reason": "session_runtime_error", "detail": str(err)},
|
|
542
|
+
)
|
|
543
|
+
raise ManagedUnavailable(f"{op_name}: {err}")
|
|
544
|
+
|
|
545
|
+
payload = state["payload"] or {}
|
|
546
|
+
return {
|
|
547
|
+
"result": payload.get("raw"),
|
|
548
|
+
"partial": False,
|
|
549
|
+
"session_id": payload.get("session_id"),
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def _parse_agent_messages(raw: Any) -> List[Dict[str, Any]]:
|
|
554
|
+
"""
|
|
555
|
+
Best-effort extraction of a list of {agent_id, text, tool_confirmations}
|
|
556
|
+
from an SDK response. Tolerates missing fields.
|
|
557
|
+
"""
|
|
558
|
+
if raw is None:
|
|
559
|
+
return []
|
|
560
|
+
if isinstance(raw, list):
|
|
561
|
+
items = raw
|
|
562
|
+
elif isinstance(raw, dict) and "messages" in raw:
|
|
563
|
+
items = raw["messages"]
|
|
564
|
+
else:
|
|
565
|
+
items = [raw]
|
|
566
|
+
|
|
567
|
+
out: List[Dict[str, Any]] = []
|
|
568
|
+
for msg in items:
|
|
569
|
+
if isinstance(msg, dict):
|
|
570
|
+
out.append(
|
|
571
|
+
{
|
|
572
|
+
"agent_id": msg.get("agent_id") or msg.get("source_agent") or "",
|
|
573
|
+
"text": msg.get("text") or msg.get("content") or "",
|
|
574
|
+
"tool_confirmations": msg.get("tool_confirmations") or [],
|
|
575
|
+
"raw": msg,
|
|
576
|
+
}
|
|
577
|
+
)
|
|
578
|
+
else:
|
|
579
|
+
# Pydantic-ish object
|
|
580
|
+
get = lambda name, default=None: getattr(msg, name, default) # noqa: E731
|
|
581
|
+
out.append(
|
|
582
|
+
{
|
|
583
|
+
"agent_id": get("agent_id") or get("source_agent") or "",
|
|
584
|
+
"text": get("text") or get("content") or "",
|
|
585
|
+
"tool_confirmations": get("tool_confirmations") or [],
|
|
586
|
+
"raw": {"repr": str(msg)},
|
|
587
|
+
}
|
|
588
|
+
)
|
|
589
|
+
return out
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _verdict_from_text(text: str, default: str) -> str:
|
|
593
|
+
"""Heuristic extraction of a coarse verdict token from free-form text."""
|
|
594
|
+
t = (text or "").upper()
|
|
595
|
+
for token in ("APPROVE", "REQUEST_CHANGES", "REJECT", "STOP", "CONTINUE"):
|
|
596
|
+
if token in t:
|
|
597
|
+
return token
|
|
598
|
+
return default
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def run_council(
|
|
602
|
+
agent_pool: List[str],
|
|
603
|
+
context: Dict[str, Any],
|
|
604
|
+
timeout_s: int = 300,
|
|
605
|
+
) -> CouncilResult:
|
|
606
|
+
"""
|
|
607
|
+
Run a council multiagent session with the given agent pool names.
|
|
608
|
+
|
|
609
|
+
Returns a CouncilResult with one AgentVerdict per pool member. On any
|
|
610
|
+
failure mode (SDK shape error, budget timeout, client missing), a
|
|
611
|
+
fallback event is emitted and ManagedUnavailable is raised.
|
|
612
|
+
"""
|
|
613
|
+
if not is_enabled():
|
|
614
|
+
emit_managed_event(
|
|
615
|
+
"managed_agents_fallback",
|
|
616
|
+
{"op": "run_council", "reason": "flags_off_or_sdk_missing"},
|
|
617
|
+
)
|
|
618
|
+
raise ManagedUnavailable("managed agents path is not enabled")
|
|
619
|
+
|
|
620
|
+
if not agent_pool:
|
|
621
|
+
raise ManagedUnavailable("run_council requires a non-empty agent_pool")
|
|
622
|
+
|
|
623
|
+
start = time.monotonic()
|
|
624
|
+
try:
|
|
625
|
+
agent_ids = resolve_agent_ids(agent_pool)
|
|
626
|
+
except ManagedUnavailable:
|
|
627
|
+
raise
|
|
628
|
+
|
|
629
|
+
try:
|
|
630
|
+
session_payload = _run_session_with_budget(
|
|
631
|
+
agent_ids=agent_ids,
|
|
632
|
+
context=context,
|
|
633
|
+
timeout_s=timeout_s,
|
|
634
|
+
op_name="run_council",
|
|
635
|
+
)
|
|
636
|
+
except ManagedUnavailable:
|
|
637
|
+
raise
|
|
638
|
+
|
|
639
|
+
messages = _parse_agent_messages(session_payload["result"])
|
|
640
|
+
# Build one verdict per pool member. Fall back to "ABSTAIN" when the
|
|
641
|
+
# session produced no message for that agent_id.
|
|
642
|
+
id_to_pool = {aid: name for aid, name in zip(agent_ids, agent_pool)}
|
|
643
|
+
verdicts: List[AgentVerdict] = []
|
|
644
|
+
tool_confirmations: List[ToolConfirmation] = []
|
|
645
|
+
seen_ids: set = set()
|
|
646
|
+
|
|
647
|
+
for m in messages:
|
|
648
|
+
aid = m["agent_id"] or ""
|
|
649
|
+
if aid and aid in id_to_pool:
|
|
650
|
+
seen_ids.add(aid)
|
|
651
|
+
verdicts.append(
|
|
652
|
+
AgentVerdict(
|
|
653
|
+
agent_id=aid,
|
|
654
|
+
pool_name=id_to_pool[aid],
|
|
655
|
+
verdict=_verdict_from_text(m["text"], default="ABSTAIN"),
|
|
656
|
+
rationale=m["text"],
|
|
657
|
+
raw=m.get("raw", {}),
|
|
658
|
+
)
|
|
659
|
+
)
|
|
660
|
+
for tc in m.get("tool_confirmations", []) or []:
|
|
661
|
+
if isinstance(tc, dict):
|
|
662
|
+
tool_confirmations.append(
|
|
663
|
+
ToolConfirmation(
|
|
664
|
+
agent_id=aid,
|
|
665
|
+
tool_name=tc.get("tool_name", ""),
|
|
666
|
+
payload=tc,
|
|
667
|
+
)
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
for aid in agent_ids:
|
|
671
|
+
if aid not in seen_ids:
|
|
672
|
+
verdicts.append(
|
|
673
|
+
AgentVerdict(
|
|
674
|
+
agent_id=aid,
|
|
675
|
+
pool_name=id_to_pool[aid],
|
|
676
|
+
verdict="ABSTAIN",
|
|
677
|
+
rationale="no message from agent in session",
|
|
678
|
+
)
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
elapsed_ms = int((time.monotonic() - start) * 1000)
|
|
682
|
+
return CouncilResult(
|
|
683
|
+
verdicts=verdicts,
|
|
684
|
+
tool_confirmations=tool_confirmations,
|
|
685
|
+
session_id=session_payload.get("session_id"),
|
|
686
|
+
elapsed_ms=elapsed_ms,
|
|
687
|
+
partial=session_payload.get("partial", False),
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def run_completion_council(
|
|
692
|
+
voters: List[str],
|
|
693
|
+
context: Dict[str, Any],
|
|
694
|
+
timeout_s: int = 180,
|
|
695
|
+
) -> VotingResult:
|
|
696
|
+
"""
|
|
697
|
+
Run the completion-council multiagent session.
|
|
698
|
+
|
|
699
|
+
Shapes the response as a VotingResult: each voter produces a STOP /
|
|
700
|
+
CONTINUE verdict; majority is computed across non-abstain votes.
|
|
701
|
+
"""
|
|
702
|
+
if not is_enabled():
|
|
703
|
+
emit_managed_event(
|
|
704
|
+
"managed_agents_fallback",
|
|
705
|
+
{"op": "run_completion_council", "reason": "flags_off_or_sdk_missing"},
|
|
706
|
+
)
|
|
707
|
+
raise ManagedUnavailable("managed agents path is not enabled")
|
|
708
|
+
|
|
709
|
+
if not voters:
|
|
710
|
+
raise ManagedUnavailable("run_completion_council requires non-empty voters")
|
|
711
|
+
|
|
712
|
+
start = time.monotonic()
|
|
713
|
+
try:
|
|
714
|
+
agent_ids = resolve_agent_ids(voters)
|
|
715
|
+
except ManagedUnavailable:
|
|
716
|
+
raise
|
|
717
|
+
|
|
718
|
+
try:
|
|
719
|
+
session_payload = _run_session_with_budget(
|
|
720
|
+
agent_ids=agent_ids,
|
|
721
|
+
context=context,
|
|
722
|
+
timeout_s=timeout_s,
|
|
723
|
+
op_name="run_completion_council",
|
|
724
|
+
)
|
|
725
|
+
except ManagedUnavailable:
|
|
726
|
+
raise
|
|
727
|
+
|
|
728
|
+
messages = _parse_agent_messages(session_payload["result"])
|
|
729
|
+
id_to_pool = {aid: name for aid, name in zip(agent_ids, voters)}
|
|
730
|
+
votes: List[AgentVerdict] = []
|
|
731
|
+
seen_ids: set = set()
|
|
732
|
+
|
|
733
|
+
for m in messages:
|
|
734
|
+
aid = m["agent_id"] or ""
|
|
735
|
+
if aid and aid in id_to_pool:
|
|
736
|
+
seen_ids.add(aid)
|
|
737
|
+
votes.append(
|
|
738
|
+
AgentVerdict(
|
|
739
|
+
agent_id=aid,
|
|
740
|
+
pool_name=id_to_pool[aid],
|
|
741
|
+
verdict=_verdict_from_text(m["text"], default="CONTINUE"),
|
|
742
|
+
rationale=m["text"],
|
|
743
|
+
raw=m.get("raw", {}),
|
|
744
|
+
)
|
|
745
|
+
)
|
|
746
|
+
for aid in agent_ids:
|
|
747
|
+
if aid not in seen_ids:
|
|
748
|
+
votes.append(
|
|
749
|
+
AgentVerdict(
|
|
750
|
+
agent_id=aid,
|
|
751
|
+
pool_name=id_to_pool[aid],
|
|
752
|
+
verdict="ABSTAIN",
|
|
753
|
+
rationale="no message from voter in session",
|
|
754
|
+
)
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
# Majority across non-abstain votes. Tie => None.
|
|
758
|
+
counts: Dict[str, int] = {}
|
|
759
|
+
for v in votes:
|
|
760
|
+
if v.verdict in ("STOP", "CONTINUE"):
|
|
761
|
+
counts[v.verdict] = counts.get(v.verdict, 0) + 1
|
|
762
|
+
majority: Optional[str] = None
|
|
763
|
+
if counts:
|
|
764
|
+
top_val = max(counts.values())
|
|
765
|
+
top_keys = [k for k, v in counts.items() if v == top_val]
|
|
766
|
+
if len(top_keys) == 1:
|
|
767
|
+
majority = top_keys[0]
|
|
768
|
+
|
|
769
|
+
elapsed_ms = int((time.monotonic() - start) * 1000)
|
|
770
|
+
return VotingResult(
|
|
771
|
+
votes=votes,
|
|
772
|
+
majority=majority,
|
|
773
|
+
session_id=session_payload.get("session_id"),
|
|
774
|
+
elapsed_ms=elapsed_ms,
|
|
775
|
+
partial=session_payload.get("partial", False),
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
__all__ = [
|
|
780
|
+
"AgentVerdict",
|
|
781
|
+
"CouncilResult",
|
|
782
|
+
"ManagedUnavailable",
|
|
783
|
+
"ToolConfirmation",
|
|
784
|
+
"VotingResult",
|
|
785
|
+
"is_enabled",
|
|
786
|
+
"resolve_agent_ids",
|
|
787
|
+
"run_completion_council",
|
|
788
|
+
"run_council",
|
|
789
|
+
]
|