maestro-loop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maestro_loop/__init__.py +8 -0
- maestro_loop/controller.py +584 -0
- maestro_loop/py.typed +0 -0
- maestro_loop/retry.py +36 -0
- maestro_loop/scheduler.py +98 -0
- maestro_loop/states.py +48 -0
- maestro_loop-0.1.0.dist-info/METADATA +10 -0
- maestro_loop-0.1.0.dist-info/RECORD +9 -0
- maestro_loop-0.1.0.dist-info/WHEEL +4 -0
maestro_loop/__init__.py
ADDED
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
"""AutoLoop controller: LLM-driven orchestration for autonomous execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
from pydantic_ai import Agent
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from maestro.resilience import async_retry, is_retryable_error # type: ignore[import-untyped]
|
|
13
|
+
from maestro.streaming import AgentStreamLogger # type: ignore[import-untyped]
|
|
14
|
+
from maestro_agents.animation import ThinkingAnimation
|
|
15
|
+
from maestro_agents.base import MaestroAgent
|
|
16
|
+
from maestro_agents.contract_validator import validate_contracts
|
|
17
|
+
from maestro_ssot.hub import SSOTHub
|
|
18
|
+
from maestro_ssot.models import ReqStatus
|
|
19
|
+
|
|
20
|
+
from .retry import RetryBudget
|
|
21
|
+
from .scheduler import LLMScheduler
|
|
22
|
+
from .states import ControllerAction, LoopState
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from maestro_harness.harness import Harness
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
console = Console()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LoopResult(BaseModel):
|
|
32
|
+
"""Result of a complete AutoLoop run."""
|
|
33
|
+
|
|
34
|
+
state: LoopState
|
|
35
|
+
requirement: str
|
|
36
|
+
iterations: int
|
|
37
|
+
message: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LLMController:
|
|
41
|
+
"""LLM-based controller that decides the next action in the loop.
|
|
42
|
+
|
|
43
|
+
In test mode (model="test"), falls back to deterministic rule-based
|
|
44
|
+
decisions so that CI tests pass without requiring a real LLM.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, model: str | object, agents: list[MaestroAgent]) -> None:
|
|
48
|
+
self._is_test = str(model) == "test"
|
|
49
|
+
if not self._is_test:
|
|
50
|
+
self._agent = Agent(
|
|
51
|
+
model, # type: ignore[arg-type]
|
|
52
|
+
output_type=ControllerAction,
|
|
53
|
+
system_prompt=self._system_prompt(),
|
|
54
|
+
)
|
|
55
|
+
self._agents = agents
|
|
56
|
+
|
|
57
|
+
async def decide(
|
|
58
|
+
self, prompt: str, timeout: float = 60.0
|
|
59
|
+
) -> ControllerAction:
|
|
60
|
+
"""Ask the LLM (or test fallback) to decide the next action.
|
|
61
|
+
|
|
62
|
+
Retries transient failures (rate limits, timeouts) with exponential
|
|
63
|
+
backoff. Non-retryable errors fail fast.
|
|
64
|
+
"""
|
|
65
|
+
if self._is_test:
|
|
66
|
+
return self._test_decide(prompt)
|
|
67
|
+
|
|
68
|
+
async def _call() -> ControllerAction:
|
|
69
|
+
import asyncio
|
|
70
|
+
coro = self._agent.run(prompt)
|
|
71
|
+
result = await asyncio.wait_for(coro, timeout=timeout)
|
|
72
|
+
return result.output
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
action: ControllerAction = await async_retry(
|
|
76
|
+
_call,
|
|
77
|
+
max_retries=2,
|
|
78
|
+
base_delay=2.0,
|
|
79
|
+
retryable=(Exception,),
|
|
80
|
+
)
|
|
81
|
+
return action
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
if is_retryable_error(exc):
|
|
84
|
+
logger.warning("Controller LLM call failed after retries: %s", exc)
|
|
85
|
+
else:
|
|
86
|
+
logger.warning("Controller LLM call failed (non-retryable): %s", exc)
|
|
87
|
+
# Fail-safe: assume nothing to do
|
|
88
|
+
return ControllerAction(
|
|
89
|
+
action="DONE",
|
|
90
|
+
reasoning=f"Controller failed: {exc}",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def _test_decide(self, prompt: str) -> ControllerAction:
|
|
94
|
+
"""Deterministic decision logic for TestModel environments.
|
|
95
|
+
|
|
96
|
+
Parses the controller prompt heuristically to infer state and
|
|
97
|
+
returns the same action a real LLM would choose for the test tasks.
|
|
98
|
+
"""
|
|
99
|
+
has_pending = "Pending tasks" in prompt
|
|
100
|
+
has_in_progress = "In-progress tasks" in prompt
|
|
101
|
+
has_done = "Completed tasks" in prompt
|
|
102
|
+
is_first = "Iteration: 1 " in prompt
|
|
103
|
+
all_done = (
|
|
104
|
+
"Pending: 0" in prompt
|
|
105
|
+
and "In Progress: 0" in prompt
|
|
106
|
+
and "Blocked: 0" in prompt
|
|
107
|
+
and has_done
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if is_first:
|
|
111
|
+
return ControllerAction(action="DECOMPOSE", reasoning="Test: first iteration")
|
|
112
|
+
if has_in_progress:
|
|
113
|
+
return ControllerAction(action="EXECUTE", reasoning="Test: execute in-progress")
|
|
114
|
+
if has_pending:
|
|
115
|
+
return ControllerAction(action="ASSIGN", reasoning="Test: assign pending")
|
|
116
|
+
if all_done:
|
|
117
|
+
return ControllerAction(action="DONE", reasoning="Test: all tasks done")
|
|
118
|
+
if has_done:
|
|
119
|
+
return ControllerAction(action="VERIFY", reasoning="Test: verify completed")
|
|
120
|
+
return ControllerAction(action="DONE", reasoning="Test: nothing to do")
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _system_prompt() -> str:
|
|
124
|
+
return (
|
|
125
|
+
"You are the Controller of a multi-agent software engineering system. "
|
|
126
|
+
"You observe the current state of tasks and decide the single next action. "
|
|
127
|
+
"Your decision must be one of: DECOMPOSE, ASSIGN, EXECUTE, VERIFY, "
|
|
128
|
+
"COMMIT, or DONE. Return your decision as structured JSON."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class AutoLoop:
|
|
133
|
+
"""LLM-driven execution loop. The Controller decides each step dynamically.
|
|
134
|
+
|
|
135
|
+
Unlike the previous deterministic state machine (PLAN→ASSIGN→EXEC→VALID→...),
|
|
136
|
+
the Controller observes the full SSOT state and chooses the next action.
|
|
137
|
+
This allows the loop to skip unnecessary steps (e.g., ANSWER a greeting
|
|
138
|
+
directly without decomposition) or repeat steps when needed.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
def __init__(
|
|
142
|
+
self,
|
|
143
|
+
hub: SSOTHub,
|
|
144
|
+
harness: Harness,
|
|
145
|
+
agents: list[MaestroAgent],
|
|
146
|
+
max_retries: int = 3,
|
|
147
|
+
max_iterations: int = 20,
|
|
148
|
+
model_map: dict[str, Any] | None = None,
|
|
149
|
+
scheduler_model: str | object = "test",
|
|
150
|
+
timeout_seconds: float = 600.0,
|
|
151
|
+
) -> None:
|
|
152
|
+
self.hub = hub
|
|
153
|
+
self.harness = harness
|
|
154
|
+
self.agents = agents
|
|
155
|
+
self.scheduler = LLMScheduler(scheduler_model, agents)
|
|
156
|
+
self.controller = LLMController(scheduler_model, agents)
|
|
157
|
+
self._test_mode = str(scheduler_model) == "test"
|
|
158
|
+
self.retry_budget = RetryBudget(max_retries)
|
|
159
|
+
self.max_iterations = max_iterations
|
|
160
|
+
self.timeout_seconds = timeout_seconds
|
|
161
|
+
self.model_map = model_map or {}
|
|
162
|
+
self.iterations = 0
|
|
163
|
+
self._history: list[ControllerAction] = []
|
|
164
|
+
self._planning_agent = self._get_agent_by_role("planner")
|
|
165
|
+
self._review_agent = self._get_agent_by_role("review")
|
|
166
|
+
|
|
167
|
+
# ------------------------------------------------------------------
|
|
168
|
+
# Public API
|
|
169
|
+
# ------------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
async def run(self, requirement: str) -> LoopResult:
|
|
172
|
+
"""Execute the autonomous loop until DONE or FAIL."""
|
|
173
|
+
import asyncio
|
|
174
|
+
|
|
175
|
+
self.hub.initialize(requirement)
|
|
176
|
+
self.iterations = 0
|
|
177
|
+
self.retry_budget.reset()
|
|
178
|
+
self._history.clear()
|
|
179
|
+
start_time = asyncio.get_event_loop().time()
|
|
180
|
+
|
|
181
|
+
while True:
|
|
182
|
+
elapsed = asyncio.get_event_loop().time() - start_time
|
|
183
|
+
if elapsed > self.timeout_seconds:
|
|
184
|
+
return LoopResult(
|
|
185
|
+
state=LoopState.FAIL,
|
|
186
|
+
requirement=requirement,
|
|
187
|
+
iterations=self.iterations,
|
|
188
|
+
message=f"Global timeout ({self.timeout_seconds}s) exceeded.",
|
|
189
|
+
)
|
|
190
|
+
self.iterations += 1
|
|
191
|
+
if self.iterations > self.max_iterations:
|
|
192
|
+
return LoopResult(
|
|
193
|
+
state=LoopState.FAIL,
|
|
194
|
+
requirement=requirement,
|
|
195
|
+
iterations=self.iterations,
|
|
196
|
+
message=f"Max iterations ({self.max_iterations}) exceeded.",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Shortcut: if all non-root requirements are DONE, finish immediately
|
|
200
|
+
all_reqs = self.hub.list_all_requirements()
|
|
201
|
+
non_root = [r for r in all_reqs if r.parent_id is not None]
|
|
202
|
+
if non_root and all(r.status == ReqStatus.DONE for r in non_root):
|
|
203
|
+
return LoopResult(
|
|
204
|
+
state=LoopState.DONE,
|
|
205
|
+
requirement=requirement,
|
|
206
|
+
iterations=self.iterations,
|
|
207
|
+
message="All requirements completed.",
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
prompt = self._build_controller_prompt(requirement)
|
|
211
|
+
ctrl_anim = ThinkingAnimation(
|
|
212
|
+
"[bold blue]controller[/] deciding",
|
|
213
|
+
console=console,
|
|
214
|
+
)
|
|
215
|
+
ctrl_anim.start()
|
|
216
|
+
action = await self.controller.decide(prompt)
|
|
217
|
+
ctrl_anim.stop()
|
|
218
|
+
console.print(f"[dim]→ {action.action}: {action.reasoning}[/]")
|
|
219
|
+
self._history.append(action)
|
|
220
|
+
|
|
221
|
+
logger.info(
|
|
222
|
+
"Iteration %d: action=%s reason=%s",
|
|
223
|
+
self.iterations,
|
|
224
|
+
action.action,
|
|
225
|
+
action.reasoning,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
match action.action:
|
|
229
|
+
case "DECOMPOSE":
|
|
230
|
+
had_pending = await self._do_plan()
|
|
231
|
+
if not had_pending:
|
|
232
|
+
logger.info("Planner produced no sub-tasks (likely trivial)")
|
|
233
|
+
|
|
234
|
+
case "ASSIGN":
|
|
235
|
+
await self._do_assign()
|
|
236
|
+
|
|
237
|
+
case "EXECUTE":
|
|
238
|
+
await self._do_exec()
|
|
239
|
+
|
|
240
|
+
case "VERIFY":
|
|
241
|
+
ok, reasons = await self._do_valid()
|
|
242
|
+
if not ok:
|
|
243
|
+
logger.info("Validation failed: %s", reasons)
|
|
244
|
+
self._rollback()
|
|
245
|
+
if self.retry_budget.exhausted:
|
|
246
|
+
return LoopResult(
|
|
247
|
+
state=LoopState.FAIL,
|
|
248
|
+
requirement=requirement,
|
|
249
|
+
iterations=self.iterations,
|
|
250
|
+
message=(
|
|
251
|
+
f"Validation failed after {self.retry_budget.max_retries} "
|
|
252
|
+
f"retries: {reasons}"
|
|
253
|
+
),
|
|
254
|
+
)
|
|
255
|
+
self.retry_budget.consume()
|
|
256
|
+
|
|
257
|
+
case "COMMIT":
|
|
258
|
+
self._do_commit()
|
|
259
|
+
|
|
260
|
+
case "DONE":
|
|
261
|
+
all_reqs = self.hub.list_all_requirements()
|
|
262
|
+
non_root = [r for r in all_reqs if r.parent_id is not None]
|
|
263
|
+
if not non_root or all(r.status == ReqStatus.DONE for r in non_root):
|
|
264
|
+
return LoopResult(
|
|
265
|
+
state=LoopState.DONE,
|
|
266
|
+
requirement=requirement,
|
|
267
|
+
iterations=self.iterations,
|
|
268
|
+
message="All requirements completed.",
|
|
269
|
+
)
|
|
270
|
+
logger.warning("Controller chose DONE but tasks remain pending")
|
|
271
|
+
|
|
272
|
+
case _:
|
|
273
|
+
return LoopResult(
|
|
274
|
+
state=LoopState.FAIL,
|
|
275
|
+
requirement=requirement,
|
|
276
|
+
iterations=self.iterations,
|
|
277
|
+
message=f"Unknown controller action: {action.action}",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# ------------------------------------------------------------------
|
|
281
|
+
# Action handlers (do the work, do NOT set state — Controller decides next step)
|
|
282
|
+
# ------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
async def _do_plan(self) -> bool:
|
|
285
|
+
"""Invoke PlanningAgent to decompose the current requirement tree.
|
|
286
|
+
|
|
287
|
+
Returns True if at least one PENDING requirement was created.
|
|
288
|
+
"""
|
|
289
|
+
if self._planning_agent is not None:
|
|
290
|
+
all_reqs = self.hub.list_all_requirements()
|
|
291
|
+
root = next((r for r in all_reqs if r.parent_id is None), None)
|
|
292
|
+
root_id = root.id if root else "unknown"
|
|
293
|
+
existing = [r for r in all_reqs if r.parent_id is not None]
|
|
294
|
+
agent_list = ", ".join(f"{a.role}({a.agent_id})" for a in self.agents)
|
|
295
|
+
|
|
296
|
+
prompt = (
|
|
297
|
+
f"Decompose the requirement into actionable sub-tasks.\n\n"
|
|
298
|
+
f"Root requirement ID (use this as parent_id for ALL sub-tasks): {root_id}\n"
|
|
299
|
+
f"Available agents in the team: {agent_list}\n"
|
|
300
|
+
)
|
|
301
|
+
if existing:
|
|
302
|
+
prompt += (
|
|
303
|
+
f"Existing sub-requirements ({len(existing)}): "
|
|
304
|
+
f"do NOT create duplicates.\n"
|
|
305
|
+
)
|
|
306
|
+
prompt += (
|
|
307
|
+
"Use the SSOT tools to add requirements and register contracts. "
|
|
308
|
+
"Remember: every sub-task MUST have a parent_id."
|
|
309
|
+
)
|
|
310
|
+
try:
|
|
311
|
+
sl = AgentStreamLogger()
|
|
312
|
+
result = await sl.run(
|
|
313
|
+
self._planning_agent,
|
|
314
|
+
prompt,
|
|
315
|
+
model=self._model_for("planner"),
|
|
316
|
+
)
|
|
317
|
+
if not result.success:
|
|
318
|
+
logger.warning("PlanningAgent error: %s", result.error)
|
|
319
|
+
except Exception as exc:
|
|
320
|
+
logger.warning("PlanningAgent exception: %s", exc)
|
|
321
|
+
|
|
322
|
+
all_reqs = self.hub.list_all_requirements()
|
|
323
|
+
return any(r.status == ReqStatus.PENDING for r in all_reqs)
|
|
324
|
+
|
|
325
|
+
async def _do_assign(self) -> bool:
|
|
326
|
+
"""Claim pending requirements for the best-matching agents.
|
|
327
|
+
|
|
328
|
+
Returns True if at least one task was assigned.
|
|
329
|
+
"""
|
|
330
|
+
pending = self.hub.list_pending()
|
|
331
|
+
assigned_any = False
|
|
332
|
+
|
|
333
|
+
for task in pending:
|
|
334
|
+
# Skip root requirement (parent_id is None) — only sub-tasks get assigned
|
|
335
|
+
if task.parent_id is None:
|
|
336
|
+
continue
|
|
337
|
+
if task.dependencies:
|
|
338
|
+
deps_done = all(
|
|
339
|
+
(d := self.hub.get_requirement(dep_id)) is not None
|
|
340
|
+
and d.status == ReqStatus.DONE
|
|
341
|
+
for dep_id in task.dependencies
|
|
342
|
+
)
|
|
343
|
+
if not deps_done:
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
agent = await self.scheduler.select_best_agent(task, self.agents)
|
|
347
|
+
if agent is None:
|
|
348
|
+
continue
|
|
349
|
+
# Planner should only decompose, not execute sub-tasks
|
|
350
|
+
if agent.role == "planner":
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
try:
|
|
354
|
+
self.hub.claim_requirement(task.id, agent.agent_id)
|
|
355
|
+
assigned_any = True
|
|
356
|
+
except ValueError as exc:
|
|
357
|
+
logger.debug("Could not claim %s: %s", task.id, exc)
|
|
358
|
+
|
|
359
|
+
return assigned_any
|
|
360
|
+
|
|
361
|
+
async def _do_exec(self) -> None:
|
|
362
|
+
"""Run each claimed agent to implement its requirement."""
|
|
363
|
+
in_progress = self.hub.list_in_progress()
|
|
364
|
+
|
|
365
|
+
for task in in_progress:
|
|
366
|
+
agent = self._get_agent_by_id(task.assignee)
|
|
367
|
+
if agent is None:
|
|
368
|
+
logger.warning(
|
|
369
|
+
"Agent %s not found, unclaiming %s for reassignment",
|
|
370
|
+
task.assignee,
|
|
371
|
+
task.id,
|
|
372
|
+
)
|
|
373
|
+
self.hub.update_requirement_status(
|
|
374
|
+
task.id, ReqStatus.PENDING, task.assignee or "system"
|
|
375
|
+
)
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
prompt = (
|
|
379
|
+
f"Requirement ID: {task.id}\n"
|
|
380
|
+
f"Implement this requirement: {task.description}\n\n"
|
|
381
|
+
"Use the SSOT tools to read contracts, write code via the harness, "
|
|
382
|
+
"and update the requirement status to DONE when complete. "
|
|
383
|
+
f"Call ssot_update_requirement_status with node_id='{task.id}' and status='DONE'."
|
|
384
|
+
)
|
|
385
|
+
try:
|
|
386
|
+
sl = AgentStreamLogger()
|
|
387
|
+
result = await sl.run(
|
|
388
|
+
agent,
|
|
389
|
+
prompt,
|
|
390
|
+
model=self._model_for(agent.role),
|
|
391
|
+
)
|
|
392
|
+
console.print(f"[dim]Agent {agent.agent_id} output: {result.output[:200]}[/]")
|
|
393
|
+
if not result.success:
|
|
394
|
+
logger.warning("Agent %s error: %s", agent.agent_id, result.error)
|
|
395
|
+
except Exception as exc:
|
|
396
|
+
logger.warning("Agent %s exception: %s", agent.agent_id, exc)
|
|
397
|
+
|
|
398
|
+
async def _do_valid(self) -> tuple[bool, list[str]]:
|
|
399
|
+
"""Validate contracts, run tests, and invoke ReviewAgent.
|
|
400
|
+
|
|
401
|
+
Returns (all_passed, list_of_failure_reasons).
|
|
402
|
+
"""
|
|
403
|
+
reasons: list[str] = []
|
|
404
|
+
|
|
405
|
+
contract_result = validate_contracts(self.hub)
|
|
406
|
+
if not contract_result.valid:
|
|
407
|
+
for v in contract_result.violations:
|
|
408
|
+
reasons.append(f"Contract violation: {v.message}")
|
|
409
|
+
|
|
410
|
+
if self._review_agent is not None:
|
|
411
|
+
try:
|
|
412
|
+
sl = AgentStreamLogger()
|
|
413
|
+
review_result = await sl.run(
|
|
414
|
+
self._review_agent,
|
|
415
|
+
"Review the latest code changes and contracts for quality issues.",
|
|
416
|
+
model=self._model_for("review"),
|
|
417
|
+
)
|
|
418
|
+
if not review_result.success:
|
|
419
|
+
reasons.append(f"Review failed: {review_result.error}")
|
|
420
|
+
except Exception as exc:
|
|
421
|
+
reasons.append(f"Review exception: {exc}")
|
|
422
|
+
|
|
423
|
+
if not self._run_tests():
|
|
424
|
+
reasons.append("Tests failed")
|
|
425
|
+
|
|
426
|
+
return len(reasons) == 0, reasons
|
|
427
|
+
|
|
428
|
+
def _do_commit(self) -> None:
|
|
429
|
+
"""Tag a clean SSOT snapshot and clear the harness rollback point."""
|
|
430
|
+
self.hub.create_snapshot(f"Iteration {self.iterations} commit")
|
|
431
|
+
if hasattr(self.harness, "clear_snapshot"):
|
|
432
|
+
self.harness.clear_snapshot()
|
|
433
|
+
self.retry_budget.reset()
|
|
434
|
+
|
|
435
|
+
# ------------------------------------------------------------------
|
|
436
|
+
# Controller prompt builder
|
|
437
|
+
# ------------------------------------------------------------------
|
|
438
|
+
|
|
439
|
+
def _build_controller_prompt(self, requirement: str) -> str:
|
|
440
|
+
"""Build a rich state snapshot for the LLM Controller."""
|
|
441
|
+
all_reqs = self.hub.list_all_requirements()
|
|
442
|
+
|
|
443
|
+
pending = [r for r in all_reqs if r.status == ReqStatus.PENDING]
|
|
444
|
+
in_progress = [r for r in all_reqs if r.status == ReqStatus.IN_PROGRESS]
|
|
445
|
+
done = [r for r in all_reqs if r.status == ReqStatus.DONE]
|
|
446
|
+
blocked = [r for r in all_reqs if r.status == ReqStatus.BLOCKED]
|
|
447
|
+
|
|
448
|
+
lines = [
|
|
449
|
+
f"Original requirement: {requirement}",
|
|
450
|
+
"",
|
|
451
|
+
f"Iteration: {self.iterations} / {self.max_iterations}",
|
|
452
|
+
f"Retry budget remaining: {self.retry_budget.remaining}",
|
|
453
|
+
"",
|
|
454
|
+
"## Current Task State",
|
|
455
|
+
f" Pending: {len(pending)}",
|
|
456
|
+
f" In Progress: {len(in_progress)}",
|
|
457
|
+
f" Done: {len(done)}",
|
|
458
|
+
f" Blocked: {len(blocked)}",
|
|
459
|
+
"",
|
|
460
|
+
]
|
|
461
|
+
|
|
462
|
+
if pending:
|
|
463
|
+
lines.append("### Pending tasks")
|
|
464
|
+
for r in pending:
|
|
465
|
+
deps = (
|
|
466
|
+
f" (depends on: {', '.join(r.dependencies)})"
|
|
467
|
+
if r.dependencies
|
|
468
|
+
else ""
|
|
469
|
+
)
|
|
470
|
+
lines.append(f" - [{r.id[:8]}] {r.description}{deps}")
|
|
471
|
+
lines.append("")
|
|
472
|
+
|
|
473
|
+
if in_progress:
|
|
474
|
+
lines.append("### In-progress tasks")
|
|
475
|
+
for r in in_progress:
|
|
476
|
+
lines.append(
|
|
477
|
+
f" - [{r.id[:8]}] {r.description} (assigned to {r.assignee})"
|
|
478
|
+
)
|
|
479
|
+
lines.append("")
|
|
480
|
+
|
|
481
|
+
if done:
|
|
482
|
+
lines.append("### Completed tasks")
|
|
483
|
+
for r in done:
|
|
484
|
+
lines.append(f" - [{r.id[:8]}] {r.description}")
|
|
485
|
+
lines.append("")
|
|
486
|
+
|
|
487
|
+
if self._history:
|
|
488
|
+
lines.append("### Recent actions")
|
|
489
|
+
for h in self._history[-5:]:
|
|
490
|
+
lines.append(f" - {h.action}: {h.reasoning}")
|
|
491
|
+
lines.append("")
|
|
492
|
+
|
|
493
|
+
lines.append("### Available agents")
|
|
494
|
+
for a in self.agents:
|
|
495
|
+
lines.append(f" - {a.role} ({a.agent_id})")
|
|
496
|
+
lines.append("")
|
|
497
|
+
|
|
498
|
+
lines.extend(
|
|
499
|
+
[
|
|
500
|
+
"## Instruction",
|
|
501
|
+
"You are the Controller. Based on the current state, choose the next action.",
|
|
502
|
+
"",
|
|
503
|
+
"Available actions:",
|
|
504
|
+
" DECOMPOSE — Use the planner to break requirements into sub-tasks.",
|
|
505
|
+
" ASSIGN — Assign pending tasks to suitable agents.",
|
|
506
|
+
" EXECUTE — Run claimed agents on their in-progress tasks.",
|
|
507
|
+
" VERIFY — Validate contracts, tests, and code quality.",
|
|
508
|
+
" COMMIT — Save a snapshot of the current successful state.",
|
|
509
|
+
" DONE — All work is complete. Finish the loop.",
|
|
510
|
+
"",
|
|
511
|
+
"Guidelines:",
|
|
512
|
+
"- If there are pending tasks that need decomposition, choose DECOMPOSE."
|
|
513
|
+
"- If there are pending tasks ready to work on, choose ASSIGN.",
|
|
514
|
+
"- If there are in-progress tasks, choose EXECUTE.",
|
|
515
|
+
"- After execution, choose VERIFY to validate.",
|
|
516
|
+
"- If validation passes, choose COMMIT then DONE.",
|
|
517
|
+
"- If retry budget is 0 and problems remain, choose DONE anyway.",
|
|
518
|
+
]
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
return "\n".join(lines)
|
|
522
|
+
|
|
523
|
+
# ------------------------------------------------------------------
|
|
524
|
+
# Helpers
|
|
525
|
+
# ------------------------------------------------------------------
|
|
526
|
+
|
|
527
|
+
def _model_for(self, role: str) -> Any | None:
|
|
528
|
+
"""Return the model override for a given role, if any."""
|
|
529
|
+
return self.model_map.get(role)
|
|
530
|
+
|
|
531
|
+
def _get_agent_by_role(self, role: str) -> MaestroAgent | None:
|
|
532
|
+
for agent in self.agents:
|
|
533
|
+
if agent.role == role:
|
|
534
|
+
return agent
|
|
535
|
+
return None
|
|
536
|
+
|
|
537
|
+
def _get_agent_by_id(self, agent_id: str | None) -> MaestroAgent | None:
|
|
538
|
+
if agent_id is None:
|
|
539
|
+
return None
|
|
540
|
+
for agent in self.agents:
|
|
541
|
+
if agent.agent_id == agent_id:
|
|
542
|
+
return agent
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
def _rollback(self) -> None:
|
|
546
|
+
"""Rollback code changes via harness and reset SSOT task statuses."""
|
|
547
|
+
if hasattr(self.harness, "rollback"):
|
|
548
|
+
self.harness.rollback()
|
|
549
|
+
|
|
550
|
+
from contextlib import suppress
|
|
551
|
+
|
|
552
|
+
for task in self.hub.list_in_progress():
|
|
553
|
+
with suppress(ValueError):
|
|
554
|
+
self.hub.update_requirement_status(
|
|
555
|
+
task.id, ReqStatus.PENDING, task.assignee or "system"
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def _run_tests(self) -> bool:
|
|
559
|
+
"""Run test commands through the harness if a test agent is configured.
|
|
560
|
+
|
|
561
|
+
Returns True if tests pass or no test agent exists.
|
|
562
|
+
"""
|
|
563
|
+
test_agent = self._get_agent_by_role("test")
|
|
564
|
+
if test_agent is None:
|
|
565
|
+
return True
|
|
566
|
+
|
|
567
|
+
policy = self.harness.get_agent_policy(test_agent.agent_id)
|
|
568
|
+
if policy is None:
|
|
569
|
+
return True
|
|
570
|
+
|
|
571
|
+
allowed = policy.commands.allow
|
|
572
|
+
if any("pytest" in cmd for cmd in allowed):
|
|
573
|
+
from maestro_harness.validator import Action
|
|
574
|
+
|
|
575
|
+
action = Action(
|
|
576
|
+
agent_id=test_agent.agent_id,
|
|
577
|
+
action_type="COMMAND",
|
|
578
|
+
target=".",
|
|
579
|
+
command="pytest",
|
|
580
|
+
)
|
|
581
|
+
result = self.harness.execute_action(test_agent.agent_id, action)
|
|
582
|
+
return result.exit_code == 0
|
|
583
|
+
|
|
584
|
+
return True
|
maestro_loop/py.typed
ADDED
|
File without changes
|
maestro_loop/retry.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Retry budget manager for the Auto-Loop controller."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RetryBudget:
|
|
7
|
+
"""Tracks global retry attempts across planning epochs.
|
|
8
|
+
|
|
9
|
+
The loop is allowed ``max_retries`` full-cycle retries before
|
|
10
|
+
declaring failure and escalating to a human supervisor.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, max_retries: int = 3) -> None:
|
|
14
|
+
self.max_retries = max_retries
|
|
15
|
+
self._attempts = 0
|
|
16
|
+
|
|
17
|
+
def consume(self) -> bool:
|
|
18
|
+
"""Consume one retry attempt. Returns True if budget remains."""
|
|
19
|
+
if self._attempts >= self.max_retries:
|
|
20
|
+
return False
|
|
21
|
+
self._attempts += 1
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def exhausted(self) -> bool:
|
|
26
|
+
"""True when no retries remain."""
|
|
27
|
+
return self._attempts >= self.max_retries
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def remaining(self) -> int:
|
|
31
|
+
"""Number of retries still available."""
|
|
32
|
+
return max(0, self.max_retries - self._attempts)
|
|
33
|
+
|
|
34
|
+
def reset(self) -> None:
|
|
35
|
+
"""Reset budget (used when a clean COMMIT succeeds)."""
|
|
36
|
+
self._attempts = 0
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""LLM-powered agent-task scheduler for the Auto-Loop controller."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from pydantic_ai import Agent
|
|
7
|
+
|
|
8
|
+
from maestro_agents.base import MaestroAgent
|
|
9
|
+
from maestro_ssot.models import RequirementNode
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ScheduleDecision(BaseModel):
|
|
13
|
+
"""Structured output from the scheduling LLM."""
|
|
14
|
+
|
|
15
|
+
selected_role: str = Field(description="Role of the best agent for this task")
|
|
16
|
+
reason: str = Field(description="Why this agent is the best fit")
|
|
17
|
+
confidence: float = Field(ge=0.0, le=1.0, description="Confidence score (0-1)")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LLMScheduler:
|
|
21
|
+
"""LLM-based scheduler that selects the best agent for a task.
|
|
22
|
+
|
|
23
|
+
Uses a lightweight PydanticAI agent with structured output to evaluate
|
|
24
|
+
task descriptions semantically and pick the most suitable role.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, model: str | object, agents: list[MaestroAgent]) -> None:
|
|
28
|
+
self._agent = Agent(
|
|
29
|
+
model, # type: ignore[arg-type]
|
|
30
|
+
output_type=ScheduleDecision,
|
|
31
|
+
system_prompt=self._system_prompt(),
|
|
32
|
+
)
|
|
33
|
+
self._agents = agents
|
|
34
|
+
|
|
35
|
+
async def select_best_agent(
|
|
36
|
+
self, task: RequirementNode, agents: list[MaestroAgent]
|
|
37
|
+
) -> MaestroAgent | None:
|
|
38
|
+
"""Return the agent whose role best matches the task description."""
|
|
39
|
+
if not agents:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
prompt = self._build_prompt(task, agents)
|
|
43
|
+
result = await self._agent.run(prompt)
|
|
44
|
+
decision: ScheduleDecision = result.output
|
|
45
|
+
|
|
46
|
+
for agent in agents:
|
|
47
|
+
if agent.role == decision.selected_role:
|
|
48
|
+
return agent
|
|
49
|
+
|
|
50
|
+
# Fallback: if LLM returns an unknown role, pick first non-planner/review
|
|
51
|
+
for agent in agents:
|
|
52
|
+
if agent.role not in ("planner", "review"):
|
|
53
|
+
return agent
|
|
54
|
+
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _system_prompt() -> str:
|
|
59
|
+
return (
|
|
60
|
+
"You are a task scheduler for a multi-agent software engineering team. "
|
|
61
|
+
"Your job is to read a task description and pick exactly one agent role "
|
|
62
|
+
"that is best suited to execute it. Return your decision as structured JSON."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _build_prompt(task: RequirementNode, agents: list[MaestroAgent]) -> str:
|
|
67
|
+
lines = [
|
|
68
|
+
"## Task",
|
|
69
|
+
f"Description: {task.description}",
|
|
70
|
+
]
|
|
71
|
+
if task.acceptance_criteria:
|
|
72
|
+
lines.append("Acceptance criteria:")
|
|
73
|
+
for ac in task.acceptance_criteria:
|
|
74
|
+
lines.append(f" - {ac}")
|
|
75
|
+
|
|
76
|
+
lines.extend(["", "## Available Agents"])
|
|
77
|
+
for agent in agents:
|
|
78
|
+
lines.append(f"- role: {agent.role}")
|
|
79
|
+
# Try to show a short excerpt of the system prompt if available
|
|
80
|
+
prompt_excerpt = ""
|
|
81
|
+
if hasattr(agent, "system_prompt"):
|
|
82
|
+
sp = agent.system_prompt()
|
|
83
|
+
if sp:
|
|
84
|
+
prompt_excerpt = sp[:120].replace("\n", " ")
|
|
85
|
+
if prompt_excerpt:
|
|
86
|
+
lines.append(f" prompt: {prompt_excerpt}...")
|
|
87
|
+
|
|
88
|
+
lines.extend(
|
|
89
|
+
[
|
|
90
|
+
"",
|
|
91
|
+
"## Instruction",
|
|
92
|
+
"Pick the single best agent role for this task.",
|
|
93
|
+
"Consider the full semantic meaning, not just keyword matching.",
|
|
94
|
+
"If a task involves both API logic and UI (e.g. 'login page with JWT'), "
|
|
95
|
+
"favor the agent that owns the core business logic.",
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
return "\n".join(lines)
|
maestro_loop/states.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""LoopState enumeration and Controller action model for the Auto-Loop."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LoopState(StrEnum):
|
|
12
|
+
"""Deterministic states of the autonomous execution loop."""
|
|
13
|
+
|
|
14
|
+
PLAN = "PLAN"
|
|
15
|
+
ASSIGN = "ASSIGN"
|
|
16
|
+
EXEC = "EXEC"
|
|
17
|
+
VALID = "VALID"
|
|
18
|
+
COMMIT = "COMMIT"
|
|
19
|
+
CHECK = "CHECK"
|
|
20
|
+
DONE = "DONE"
|
|
21
|
+
FAIL = "FAIL"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
TERMINAL_STATES = {LoopState.DONE, LoopState.FAIL}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ControllerAction(BaseModel):
|
|
28
|
+
"""Structured output from the LLM Controller.
|
|
29
|
+
|
|
30
|
+
The Controller observes the current SSOT state and decides the next
|
|
31
|
+
action, rather than following a hard-coded state machine transition.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
action: Literal["DECOMPOSE", "ASSIGN", "EXECUTE", "VERIFY", "COMMIT", "DONE"] = Field(
|
|
35
|
+
description="The next action to take in the execution loop"
|
|
36
|
+
)
|
|
37
|
+
target_requirement_id: str | None = Field(
|
|
38
|
+
default=None,
|
|
39
|
+
description="ID of the requirement to target (for ASSIGN / EXECUTE)",
|
|
40
|
+
)
|
|
41
|
+
agent_role: str | None = Field(
|
|
42
|
+
default=None,
|
|
43
|
+
description="Role of the agent to use (for ASSIGN / EXECUTE)",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
reasoning: str = Field(
|
|
47
|
+
description="Why this action was chosen"
|
|
48
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: maestro-loop
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Auto-Loop Controller: deterministic state machine for autonomous execution
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: maestro-agents
|
|
7
|
+
Requires-Dist: maestro-harness
|
|
8
|
+
Requires-Dist: maestro-ssot
|
|
9
|
+
Requires-Dist: pydantic-ai>=0.5
|
|
10
|
+
Requires-Dist: pydantic>=2.10
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
maestro_loop/__init__.py,sha256=yIqDkpGjng1NQTR5mDzf9mjg11f2zFaGnJTfqhVyaUA,228
|
|
2
|
+
maestro_loop/controller.py,sha256=C6hnaSrsphfq6K41T-FlOe1PDbxFmjQtN53cR53nXbg,22884
|
|
3
|
+
maestro_loop/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
maestro_loop/retry.py,sha256=Ix9SdHMMBApR0_rapl-71t05Puizij6LGuiLIeBm5ag,1070
|
|
5
|
+
maestro_loop/scheduler.py,sha256=4EizHaUKXL5xFGXaoTgHj8JdgR7RIArkPTnELzfzL5s,3536
|
|
6
|
+
maestro_loop/states.py,sha256=967kdRnZbOydabx__TKH2vtxXbtF24X8HAoCRtYM6p8,1285
|
|
7
|
+
maestro_loop-0.1.0.dist-info/METADATA,sha256=loMnlj_Vp6YkyRC2Bm9Z2k0zF2boJN5K0LDB3elK8qY,315
|
|
8
|
+
maestro_loop-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
maestro_loop-0.1.0.dist-info/RECORD,,
|