rdc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gwd/__init__.py +69 -0
- gwd/__main__.py +70 -0
- gwd/classify.py +63 -0
- gwd/client.py +46 -0
- gwd/executor.py +165 -0
- gwd/orchestrator.py +224 -0
- gwd/planner.py +191 -0
- gwd/prompts.py +100 -0
- gwd/tools.py +363 -0
- gwd/types.py +98 -0
- gwd/verifier.py +158 -0
- rdc-0.1.0.dist-info/METADATA +298 -0
- rdc-0.1.0.dist-info/RECORD +96 -0
- rdc-0.1.0.dist-info/WHEEL +4 -0
- rdc-0.1.0.dist-info/entry_points.txt +4 -0
- rdc-0.1.0.dist-info/licenses/LICENSE +21 -0
- remote_dev_ctrl/__init__.py +3 -0
- remote_dev_ctrl/cli.py +1629 -0
- remote_dev_ctrl/llm.py +464 -0
- remote_dev_ctrl/mcp/__init__.py +8 -0
- remote_dev_ctrl/mcp/server.py +361 -0
- remote_dev_ctrl/scaffold.py +1058 -0
- remote_dev_ctrl/server/__init__.py +1 -0
- remote_dev_ctrl/server/actions.py +178 -0
- remote_dev_ctrl/server/agents/__init__.py +9 -0
- remote_dev_ctrl/server/agents/gwd_provider.py +117 -0
- remote_dev_ctrl/server/agents/manager.py +385 -0
- remote_dev_ctrl/server/agents/provider.py +89 -0
- remote_dev_ctrl/server/agents/terminal_provider.py +170 -0
- remote_dev_ctrl/server/agents/tools.py +615 -0
- remote_dev_ctrl/server/agents/web_provider.py +342 -0
- remote_dev_ctrl/server/app.py +8130 -0
- remote_dev_ctrl/server/audit.py +372 -0
- remote_dev_ctrl/server/auth.py +359 -0
- remote_dev_ctrl/server/browser.py +1518 -0
- remote_dev_ctrl/server/browser_use.py +189 -0
- remote_dev_ctrl/server/caddy.py +384 -0
- remote_dev_ctrl/server/channels/__init__.py +1 -0
- remote_dev_ctrl/server/channels/phone.py +796 -0
- remote_dev_ctrl/server/channels/telegram.py +877 -0
- remote_dev_ctrl/server/chrome.py +246 -0
- remote_dev_ctrl/server/collections_page.py +174 -0
- remote_dev_ctrl/server/config.py +363 -0
- remote_dev_ctrl/server/context_synthesizer.py +138 -0
- remote_dev_ctrl/server/conversation.py +268 -0
- remote_dev_ctrl/server/dashboard.py +1679 -0
- remote_dev_ctrl/server/db/__init__.py +87 -0
- remote_dev_ctrl/server/db/connection.py +107 -0
- remote_dev_ctrl/server/db/migrate.py +592 -0
- remote_dev_ctrl/server/db/migrations/logs/20260210000001_create_schema.sql +62 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260210000001_create_schema.sql +81 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260210000002_add_project_description.sql +7 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260210000003_runtime_state_tables.sql +53 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260212000004_add_collections.sql +21 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260213000005_add_agent_sessions.sql +18 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260222000006_browser_nullable_process_id.sql +48 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260222000007_add_recordings.sql +21 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260305000008_add_recipes.sql +17 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260312000009_add_project_tags.sql +5 -0
- remote_dev_ctrl/server/db/migrations/rdc/20260316000010_add_actions.sql +6 -0
- remote_dev_ctrl/server/db/migrations/tasks/20260210000001_create_schema.sql +48 -0
- remote_dev_ctrl/server/db/models.py +365 -0
- remote_dev_ctrl/server/db/repositories.py +1616 -0
- remote_dev_ctrl/server/debug_page.py +689 -0
- remote_dev_ctrl/server/event_store.py +346 -0
- remote_dev_ctrl/server/events/__init__.py +5 -0
- remote_dev_ctrl/server/events/bus.py +144 -0
- remote_dev_ctrl/server/intent.py +2665 -0
- remote_dev_ctrl/server/middleware.py +354 -0
- remote_dev_ctrl/server/orchestrator.py +297 -0
- remote_dev_ctrl/server/pinchtab.py +393 -0
- remote_dev_ctrl/server/ports.py +188 -0
- remote_dev_ctrl/server/process_discovery.py +521 -0
- remote_dev_ctrl/server/processes.py +951 -0
- remote_dev_ctrl/server/pty_relay.py +505 -0
- remote_dev_ctrl/server/queue/__init__.py +9 -0
- remote_dev_ctrl/server/recipes.py +153 -0
- remote_dev_ctrl/server/recording.py +203 -0
- remote_dev_ctrl/server/scrubber.py +200 -0
- remote_dev_ctrl/server/state_machine.py +983 -0
- remote_dev_ctrl/server/static/rrweb.min.js +19 -0
- remote_dev_ctrl/server/static/shared.css +698 -0
- remote_dev_ctrl/server/static/vendor/tailwind.js +65 -0
- remote_dev_ctrl/server/static/vendor/xstate.js +2 -0
- remote_dev_ctrl/server/static/vendor/xterm-addon-fit.js +2 -0
- remote_dev_ctrl/server/static/vendor/xterm-addon-unicode11.js +8 -0
- remote_dev_ctrl/server/static/vendor/xterm-addon-web-links.js +2 -0
- remote_dev_ctrl/server/static/vendor/xterm.css +209 -0
- remote_dev_ctrl/server/static/vendor/xterm.js +2 -0
- remote_dev_ctrl/server/streaming.py +191 -0
- remote_dev_ctrl/server/terminal.py +1200 -0
- remote_dev_ctrl/server/tts.py +310 -0
- remote_dev_ctrl/server/vault.py +229 -0
- remote_dev_ctrl/server/vnc.py +527 -0
- remote_dev_ctrl/server/voice_agent.py +169 -0
- remote_dev_ctrl/server/worker.py +880 -0
gwd/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""gwd — get work done. Native task executor.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
execute_task() — run a task against a project
|
|
5
|
+
classify_task() — determine if a task is simple or complex
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .types import TaskComplexity, ExecutionStep, OnStepCallback
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def execute_task(
|
|
12
|
+
task: str,
|
|
13
|
+
project_path: str,
|
|
14
|
+
project_context: dict | None = None,
|
|
15
|
+
client=None,
|
|
16
|
+
model: str | None = None,
|
|
17
|
+
on_step: OnStepCallback | None = None,
|
|
18
|
+
max_iterations: int = 50,
|
|
19
|
+
force_complexity: TaskComplexity | None = None,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Execute a task against a project directory.
|
|
22
|
+
|
|
23
|
+
Routes to SingleAgentExecutor for simple tasks or
|
|
24
|
+
MultiAgentOrchestrator for complex tasks.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
task: What to do.
|
|
28
|
+
project_path: Filesystem path to the project.
|
|
29
|
+
project_context: Optional dict with purpose, stack, conventions, etc.
|
|
30
|
+
client: Optional OpenAI-compatible client. Auto-created if None.
|
|
31
|
+
model: Optional model override. Auto-detected if None.
|
|
32
|
+
on_step: Optional callback for streaming execution steps.
|
|
33
|
+
max_iterations: Max tool-use iterations (for simple tasks).
|
|
34
|
+
force_complexity: Override automatic classification.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Final result string.
|
|
38
|
+
"""
|
|
39
|
+
from .client import create_client, default_model
|
|
40
|
+
from .classify import classify_task as _classify
|
|
41
|
+
|
|
42
|
+
if client is None:
|
|
43
|
+
client = create_client()
|
|
44
|
+
if model is None:
|
|
45
|
+
model = default_model()
|
|
46
|
+
|
|
47
|
+
context = {"project_path": project_path}
|
|
48
|
+
if project_context:
|
|
49
|
+
context.update(project_context)
|
|
50
|
+
|
|
51
|
+
complexity = force_complexity or _classify(task)
|
|
52
|
+
|
|
53
|
+
if complexity == TaskComplexity.SIMPLE:
|
|
54
|
+
from .executor import SingleAgentExecutor
|
|
55
|
+
executor = SingleAgentExecutor(
|
|
56
|
+
client=client,
|
|
57
|
+
model=model,
|
|
58
|
+
context=context,
|
|
59
|
+
max_iterations=max_iterations,
|
|
60
|
+
)
|
|
61
|
+
return await executor.run(task, on_step=on_step)
|
|
62
|
+
else:
|
|
63
|
+
from .orchestrator import MultiAgentOrchestrator
|
|
64
|
+
orch = MultiAgentOrchestrator(
|
|
65
|
+
client=client,
|
|
66
|
+
model=model,
|
|
67
|
+
context=context,
|
|
68
|
+
)
|
|
69
|
+
return await orch.run(task, project_context=context, on_step=on_step)
|
gwd/__main__.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""CLI entry point: python -m gwd "task description" --project /path"""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .types import TaskComplexity, ExecutionStep
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
prog="gwd",
|
|
14
|
+
description="Get Work Done — native task executor",
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument("task", help="Task description")
|
|
17
|
+
parser.add_argument("--project", "-p", default=".", help="Project directory (default: .)")
|
|
18
|
+
parser.add_argument("--model", "-m", help="Model override (e.g. gpt-4o, anthropic/claude-sonnet-4-20250514)")
|
|
19
|
+
parser.add_argument("--max-iterations", type=int, default=50, help="Max tool-use iterations")
|
|
20
|
+
parser.add_argument("--force-simple", action="store_true", help="Force simple (single-agent) execution")
|
|
21
|
+
parser.add_argument("--force-complex", action="store_true", help="Force complex (multi-agent) execution")
|
|
22
|
+
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
23
|
+
|
|
24
|
+
args = parser.parse_args()
|
|
25
|
+
|
|
26
|
+
if args.verbose:
|
|
27
|
+
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s %(name)s: %(message)s")
|
|
28
|
+
else:
|
|
29
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
30
|
+
|
|
31
|
+
force = None
|
|
32
|
+
if args.force_simple:
|
|
33
|
+
force = TaskComplexity.SIMPLE
|
|
34
|
+
elif args.force_complex:
|
|
35
|
+
force = TaskComplexity.COMPLEX
|
|
36
|
+
|
|
37
|
+
async def on_step(step: ExecutionStep):
|
|
38
|
+
if step.type == "text":
|
|
39
|
+
print(step.content)
|
|
40
|
+
elif step.type == "tool_call":
|
|
41
|
+
tool_info = step.tool_name
|
|
42
|
+
if step.tool_args:
|
|
43
|
+
brief = {k: (v[:60] + "..." if isinstance(v, str) and len(v) > 60 else v)
|
|
44
|
+
for k, v in step.tool_args.items()}
|
|
45
|
+
tool_info += f" {brief}"
|
|
46
|
+
print(f" -> {tool_info}")
|
|
47
|
+
elif step.type == "tool_result" and step.is_error:
|
|
48
|
+
print(f" !! {step.result[:200]}")
|
|
49
|
+
elif step.type == "error":
|
|
50
|
+
print(f"ERROR: {step.content}", file=sys.stderr)
|
|
51
|
+
elif step.type == "status" and args.verbose:
|
|
52
|
+
print(f"[{step.content}]")
|
|
53
|
+
|
|
54
|
+
from . import execute_task
|
|
55
|
+
|
|
56
|
+
result = asyncio.run(execute_task(
|
|
57
|
+
task=args.task,
|
|
58
|
+
project_path=args.project,
|
|
59
|
+
model=args.model,
|
|
60
|
+
on_step=on_step,
|
|
61
|
+
max_iterations=args.max_iterations,
|
|
62
|
+
force_complexity=force,
|
|
63
|
+
))
|
|
64
|
+
|
|
65
|
+
if not result.startswith("ERROR") and result != "Task completed":
|
|
66
|
+
print(f"\n--- Result ---\n{result}")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
main()
|
gwd/classify.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Task complexity classifier.
|
|
2
|
+
|
|
3
|
+
Determines whether a task should be handled by a single agent (simple)
|
|
4
|
+
or the multi-agent orchestrator (complex).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
from .types import TaskComplexity
|
|
10
|
+
|
|
11
|
+
# Keywords that suggest complexity
|
|
12
|
+
COMPLEX_KEYWORDS = {
|
|
13
|
+
"refactor", "implement", "integrate", "migrate", "redesign",
|
|
14
|
+
"add endpoint", "add api", "add auth", "authentication",
|
|
15
|
+
"add feature", "build", "create system", "set up", "setup",
|
|
16
|
+
"convert", "rewrite", "architect",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
# Keywords that suggest simplicity
|
|
20
|
+
SIMPLE_KEYWORDS = {
|
|
21
|
+
"list", "show", "print", "rename", "typo", "fix typo",
|
|
22
|
+
"change", "update", "add import", "remove", "delete",
|
|
23
|
+
"move", "copy", "format", "lint", "log",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def classify_task(description: str) -> TaskComplexity:
|
|
28
|
+
"""Classify a task as simple or complex.
|
|
29
|
+
|
|
30
|
+
Heuristic based on word count and keyword matching.
|
|
31
|
+
Default bias: SIMPLE.
|
|
32
|
+
"""
|
|
33
|
+
desc_lower = description.lower().strip()
|
|
34
|
+
words = desc_lower.split()
|
|
35
|
+
word_count = len(words)
|
|
36
|
+
|
|
37
|
+
# Very short tasks are simple
|
|
38
|
+
if word_count <= 8:
|
|
39
|
+
# Unless they contain complex keywords
|
|
40
|
+
for kw in COMPLEX_KEYWORDS:
|
|
41
|
+
if kw in desc_lower:
|
|
42
|
+
return TaskComplexity.COMPLEX
|
|
43
|
+
return TaskComplexity.SIMPLE
|
|
44
|
+
|
|
45
|
+
# Long tasks are complex
|
|
46
|
+
if word_count > 20:
|
|
47
|
+
# Unless they only contain simple keywords
|
|
48
|
+
has_complex = any(kw in desc_lower for kw in COMPLEX_KEYWORDS)
|
|
49
|
+
if has_complex:
|
|
50
|
+
return TaskComplexity.COMPLEX
|
|
51
|
+
# Even long tasks that are just verbose simple requests stay simple
|
|
52
|
+
has_simple = any(kw in desc_lower for kw in SIMPLE_KEYWORDS)
|
|
53
|
+
if has_simple and not has_complex:
|
|
54
|
+
return TaskComplexity.SIMPLE
|
|
55
|
+
return TaskComplexity.COMPLEX
|
|
56
|
+
|
|
57
|
+
# Medium length — check keywords
|
|
58
|
+
for kw in COMPLEX_KEYWORDS:
|
|
59
|
+
if kw in desc_lower:
|
|
60
|
+
return TaskComplexity.COMPLEX
|
|
61
|
+
|
|
62
|
+
# Default bias: simple
|
|
63
|
+
return TaskComplexity.SIMPLE
|
gwd/client.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""LLM client factory for gwd.
|
|
2
|
+
|
|
3
|
+
Creates an OpenAI-compatible client from environment variables.
|
|
4
|
+
Priority: OPENROUTER_API_KEY > OPENAI_API_KEY > ANTHROPIC_API_KEY > localhost Ollama.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from openai import OpenAI
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_client(api_key: str | None = None, base_url: str | None = None) -> OpenAI:
|
|
13
|
+
"""Create an OpenAI-compatible client.
|
|
14
|
+
|
|
15
|
+
If api_key/base_url are provided, uses them directly.
|
|
16
|
+
Otherwise auto-detects from environment variables.
|
|
17
|
+
"""
|
|
18
|
+
if api_key:
|
|
19
|
+
return OpenAI(api_key=api_key, base_url=base_url)
|
|
20
|
+
|
|
21
|
+
# Auto-detect from env
|
|
22
|
+
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
23
|
+
if or_key:
|
|
24
|
+
return OpenAI(api_key=or_key, base_url="https://openrouter.ai/api/v1")
|
|
25
|
+
|
|
26
|
+
oai_key = os.getenv("OPENAI_API_KEY")
|
|
27
|
+
if oai_key:
|
|
28
|
+
return OpenAI(api_key=oai_key)
|
|
29
|
+
|
|
30
|
+
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
|
|
31
|
+
if anthropic_key:
|
|
32
|
+
return OpenAI(api_key=anthropic_key, base_url="https://openrouter.ai/api/v1")
|
|
33
|
+
|
|
34
|
+
# Fallback to local Ollama
|
|
35
|
+
return OpenAI(api_key="ollama", base_url="http://localhost:11434/v1")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def default_model() -> str:
|
|
39
|
+
"""Pick the default model based on which env var is set."""
|
|
40
|
+
if os.getenv("OPENROUTER_API_KEY"):
|
|
41
|
+
return "anthropic/claude-sonnet-4-20250514"
|
|
42
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
43
|
+
return "gpt-4o"
|
|
44
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
45
|
+
return "anthropic/claude-sonnet-4-20250514"
|
|
46
|
+
return "qwen3.5"
|
gwd/executor.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""SingleAgentExecutor — core tool-use loop for gwd.
|
|
2
|
+
|
|
3
|
+
Mirrors WebNativeProvider.run() but decoupled from RDC. Uses OpenAI-compatible
|
|
4
|
+
client to drive a tool-use agentic loop.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from openai import OpenAI
|
|
13
|
+
|
|
14
|
+
from .prompts import executor_prompt
|
|
15
|
+
from .tools import AGENT_TOOLS, execute_tool
|
|
16
|
+
from .types import ExecutionStep, OnStepCallback
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SingleAgentExecutor:
|
|
22
|
+
"""Core tool-use loop executor."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
client: OpenAI,
|
|
27
|
+
model: str,
|
|
28
|
+
context: dict[str, Any] | None = None,
|
|
29
|
+
max_iterations: int = 50,
|
|
30
|
+
):
|
|
31
|
+
self.client = client
|
|
32
|
+
self.model = model
|
|
33
|
+
self.context = context or {}
|
|
34
|
+
self.max_iterations = max_iterations
|
|
35
|
+
self._cancelled = False
|
|
36
|
+
self._step_index = 0
|
|
37
|
+
|
|
38
|
+
async def _emit(
|
|
39
|
+
self,
|
|
40
|
+
on_step: OnStepCallback | None,
|
|
41
|
+
step_type: str,
|
|
42
|
+
subtask_id: str = "",
|
|
43
|
+
**kwargs,
|
|
44
|
+
) -> ExecutionStep:
|
|
45
|
+
step = ExecutionStep(
|
|
46
|
+
type=step_type,
|
|
47
|
+
step_index=self._step_index,
|
|
48
|
+
subtask_id=subtask_id,
|
|
49
|
+
**kwargs,
|
|
50
|
+
)
|
|
51
|
+
self._step_index += 1
|
|
52
|
+
if on_step:
|
|
53
|
+
try:
|
|
54
|
+
await on_step(step)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.warning(f"on_step callback error: {e}")
|
|
57
|
+
return step
|
|
58
|
+
|
|
59
|
+
async def run(
|
|
60
|
+
self,
|
|
61
|
+
task: str,
|
|
62
|
+
on_step: OnStepCallback | None = None,
|
|
63
|
+
subtask_id: str = "",
|
|
64
|
+
) -> str:
|
|
65
|
+
"""Run the tool-use loop until completion.
|
|
66
|
+
|
|
67
|
+
Each call starts with a completely fresh conversation (system + user task).
|
|
68
|
+
This follows the Ralph Wiggum Loop pattern — no carried-over context
|
|
69
|
+
between attempts, preventing drift and hallucination accumulation.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
task: The task description for the agent.
|
|
73
|
+
on_step: Optional callback for streaming execution steps.
|
|
74
|
+
subtask_id: Optional subtask ID for step attribution.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Final text output from the agent.
|
|
78
|
+
"""
|
|
79
|
+
self._cancelled = False
|
|
80
|
+
self._step_index = 0
|
|
81
|
+
|
|
82
|
+
project_path = self.context.get("project_path", ".")
|
|
83
|
+
system = executor_prompt(self.context)
|
|
84
|
+
|
|
85
|
+
await self._emit(on_step, "status", subtask_id, content=f"Starting with model {self.model}")
|
|
86
|
+
|
|
87
|
+
messages: list[dict[str, Any]] = [
|
|
88
|
+
{"role": "system", "content": system},
|
|
89
|
+
{"role": "user", "content": task},
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
final_text = ""
|
|
93
|
+
|
|
94
|
+
for iteration in range(self.max_iterations):
|
|
95
|
+
if self._cancelled:
|
|
96
|
+
await self._emit(on_step, "status", subtask_id, content="Cancelled")
|
|
97
|
+
return "Task cancelled"
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
response = await asyncio.to_thread(
|
|
101
|
+
lambda: self.client.chat.completions.create(
|
|
102
|
+
model=self.model,
|
|
103
|
+
messages=messages,
|
|
104
|
+
tools=AGENT_TOOLS,
|
|
105
|
+
max_tokens=4096,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
await self._emit(on_step, "error", subtask_id, content=str(e), is_error=True)
|
|
110
|
+
return f"LLM error: {e}"
|
|
111
|
+
|
|
112
|
+
choice = response.choices[0]
|
|
113
|
+
msg = choice.message
|
|
114
|
+
|
|
115
|
+
if msg.content:
|
|
116
|
+
final_text = msg.content
|
|
117
|
+
await self._emit(on_step, "text", subtask_id, content=msg.content)
|
|
118
|
+
|
|
119
|
+
if not msg.tool_calls:
|
|
120
|
+
await self._emit(on_step, "status", subtask_id, content="Completed")
|
|
121
|
+
return final_text or "Task completed"
|
|
122
|
+
|
|
123
|
+
messages.append(msg.model_dump())
|
|
124
|
+
|
|
125
|
+
for tc in msg.tool_calls:
|
|
126
|
+
tool_name = tc.function.name
|
|
127
|
+
try:
|
|
128
|
+
tool_args = json.loads(tc.function.arguments) if tc.function.arguments else {}
|
|
129
|
+
except (json.JSONDecodeError, TypeError):
|
|
130
|
+
tool_args = {}
|
|
131
|
+
|
|
132
|
+
await self._emit(
|
|
133
|
+
on_step, "tool_call", subtask_id,
|
|
134
|
+
tool_name=tool_name,
|
|
135
|
+
tool_args=tool_args,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result_str, is_err = await execute_tool(tool_name, tool_args, project_path)
|
|
139
|
+
|
|
140
|
+
await self._emit(
|
|
141
|
+
on_step, "tool_result", subtask_id,
|
|
142
|
+
tool_name=tool_name,
|
|
143
|
+
result=result_str,
|
|
144
|
+
is_error=is_err,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
messages.append({
|
|
148
|
+
"role": "tool",
|
|
149
|
+
"tool_call_id": tc.id,
|
|
150
|
+
"content": result_str,
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
if choice.finish_reason == "stop":
|
|
154
|
+
await self._emit(on_step, "status", subtask_id, content="Completed")
|
|
155
|
+
return final_text or "Task completed"
|
|
156
|
+
|
|
157
|
+
await self._emit(
|
|
158
|
+
on_step, "error", subtask_id,
|
|
159
|
+
content=f"Reached maximum iterations ({self.max_iterations})",
|
|
160
|
+
is_error=True,
|
|
161
|
+
)
|
|
162
|
+
return f"Reached maximum iterations ({self.max_iterations})"
|
|
163
|
+
|
|
164
|
+
def cancel(self):
|
|
165
|
+
self._cancelled = True
|
gwd/orchestrator.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""MultiAgentOrchestrator — plan, execute, verify, review loop.
|
|
2
|
+
|
|
3
|
+
Uses the Ralph Wiggum Loop pattern (Geoffrey Huntley): each retry starts
|
|
4
|
+
with a completely fresh context. The agent discovers current state from
|
|
5
|
+
disk (git diff, file reads) rather than carrying forward a polluted
|
|
6
|
+
conversation history. This prevents drift and hallucination accumulation.
|
|
7
|
+
|
|
8
|
+
Flow:
|
|
9
|
+
Plan -> [wave1: subtasks in parallel] -> [wave2: ...] -> Summary
|
|
10
|
+
|
|
11
|
+
Per subtask (max 3 attempts):
|
|
12
|
+
Fresh executor -> Verify -> pass? done : wipe context, loop
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from openai import OpenAI
|
|
20
|
+
|
|
21
|
+
from .executor import SingleAgentExecutor
|
|
22
|
+
from .planner import PlannerAgent
|
|
23
|
+
from .types import (
|
|
24
|
+
ExecutionStep,
|
|
25
|
+
OnStepCallback,
|
|
26
|
+
Plan,
|
|
27
|
+
Subtask,
|
|
28
|
+
SubtaskStatus,
|
|
29
|
+
VerifyResult,
|
|
30
|
+
)
|
|
31
|
+
from .verifier import Verifier
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
MAX_SUBTASK_ATTEMPTS = 3
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class MultiAgentOrchestrator:
|
|
39
|
+
"""Orchestrates complex tasks: plan -> wave execution -> verify -> summary."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
client: OpenAI,
|
|
44
|
+
model: str,
|
|
45
|
+
context: dict[str, Any] | None = None,
|
|
46
|
+
):
|
|
47
|
+
self.client = client
|
|
48
|
+
self.model = model
|
|
49
|
+
self.context = context or {}
|
|
50
|
+
self._cancelled = False
|
|
51
|
+
self._step_index = 0
|
|
52
|
+
|
|
53
|
+
async def _emit(self, on_step: OnStepCallback | None, step_type: str, **kwargs):
|
|
54
|
+
step = ExecutionStep(type=step_type, step_index=self._step_index, **kwargs)
|
|
55
|
+
self._step_index += 1
|
|
56
|
+
if on_step:
|
|
57
|
+
try:
|
|
58
|
+
await on_step(step)
|
|
59
|
+
except Exception:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
async def run(
|
|
63
|
+
self,
|
|
64
|
+
task: str,
|
|
65
|
+
project_context: dict[str, Any] | None = None,
|
|
66
|
+
on_step: OnStepCallback | None = None,
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Execute a complex task via plan -> execute -> verify loop."""
|
|
69
|
+
self._cancelled = False
|
|
70
|
+
self._step_index = 0
|
|
71
|
+
|
|
72
|
+
context = dict(self.context)
|
|
73
|
+
if project_context:
|
|
74
|
+
context.update(project_context)
|
|
75
|
+
|
|
76
|
+
# Phase 1: Create plan
|
|
77
|
+
await self._emit(on_step, "status", content="Creating execution plan...")
|
|
78
|
+
|
|
79
|
+
planner = PlannerAgent(
|
|
80
|
+
client=self.client,
|
|
81
|
+
model=self.model,
|
|
82
|
+
context=context,
|
|
83
|
+
)
|
|
84
|
+
plan = await planner.create_plan(task, on_step=on_step)
|
|
85
|
+
|
|
86
|
+
await self._emit(
|
|
87
|
+
on_step, "text",
|
|
88
|
+
content=f"Plan: {plan.analysis}\nSubtasks: {len(plan.subtasks)}",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if not plan.subtasks:
|
|
92
|
+
return "No subtasks to execute"
|
|
93
|
+
|
|
94
|
+
# Phase 2: Execute waves
|
|
95
|
+
waves = plan.waves()
|
|
96
|
+
results: dict[str, str] = {}
|
|
97
|
+
|
|
98
|
+
for wave_idx, wave in enumerate(waves):
|
|
99
|
+
if self._cancelled:
|
|
100
|
+
return "Orchestration cancelled"
|
|
101
|
+
|
|
102
|
+
await self._emit(
|
|
103
|
+
on_step, "status",
|
|
104
|
+
content=f"Wave {wave_idx + 1}/{len(waves)}: {len(wave)} subtask(s)",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Run subtasks in this wave concurrently
|
|
108
|
+
coros = [
|
|
109
|
+
self._execute_subtask(subtask, context, on_step)
|
|
110
|
+
for subtask in wave
|
|
111
|
+
]
|
|
112
|
+
wave_results = await asyncio.gather(*coros, return_exceptions=True)
|
|
113
|
+
|
|
114
|
+
for subtask, result in zip(wave, wave_results):
|
|
115
|
+
if isinstance(result, Exception):
|
|
116
|
+
subtask.status = SubtaskStatus.FAILED
|
|
117
|
+
subtask.result = str(result)
|
|
118
|
+
results[subtask.id] = f"FAILED: {result}"
|
|
119
|
+
else:
|
|
120
|
+
results[subtask.id] = result
|
|
121
|
+
|
|
122
|
+
# Phase 3: Summary
|
|
123
|
+
summary_parts = [f"Task: {task}", f"Plan: {plan.analysis}", ""]
|
|
124
|
+
for st in plan.subtasks:
|
|
125
|
+
status_icon = "+" if st.status == SubtaskStatus.PASSED else "-"
|
|
126
|
+
summary_parts.append(f" [{status_icon}] {st.id}. {st.description}")
|
|
127
|
+
if st.result:
|
|
128
|
+
result_preview = st.result[:200] + "..." if len(st.result) > 200 else st.result
|
|
129
|
+
summary_parts.append(f" {result_preview}")
|
|
130
|
+
|
|
131
|
+
summary = "\n".join(summary_parts)
|
|
132
|
+
await self._emit(on_step, "text", content=summary)
|
|
133
|
+
return summary
|
|
134
|
+
|
|
135
|
+
async def _execute_subtask(
|
|
136
|
+
self,
|
|
137
|
+
subtask: Subtask,
|
|
138
|
+
context: dict[str, Any],
|
|
139
|
+
on_step: OnStepCallback | None,
|
|
140
|
+
) -> str:
|
|
141
|
+
"""Execute a single subtask using the Ralph Wiggum Loop pattern.
|
|
142
|
+
|
|
143
|
+
Each attempt gets a completely fresh executor with no prior conversation
|
|
144
|
+
history. The agent discovers current project state from disk (git diff,
|
|
145
|
+
file reads) rather than inheriting a potentially polluted context.
|
|
146
|
+
|
|
147
|
+
On retry, only a short factual note about the attempt number is included
|
|
148
|
+
in the task prompt — no accumulated failure messages or suggestions that
|
|
149
|
+
could bias the agent toward the same broken approach.
|
|
150
|
+
"""
|
|
151
|
+
subtask.status = SubtaskStatus.RUNNING
|
|
152
|
+
await self._emit(
|
|
153
|
+
on_step, "status",
|
|
154
|
+
content=f"Subtask {subtask.id}: {subtask.description[:80]}",
|
|
155
|
+
subtask_id=subtask.id,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
verifier = Verifier(
|
|
159
|
+
client=self.client,
|
|
160
|
+
model=self.model,
|
|
161
|
+
context=context,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
last_verification: VerifyResult | None = None
|
|
165
|
+
|
|
166
|
+
for attempt in range(1, MAX_SUBTASK_ATTEMPTS + 1):
|
|
167
|
+
subtask.attempts = attempt
|
|
168
|
+
|
|
169
|
+
# Fresh executor each attempt — no carried-over conversation
|
|
170
|
+
executor = SingleAgentExecutor(
|
|
171
|
+
client=self.client,
|
|
172
|
+
model=self.model,
|
|
173
|
+
context=context,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Build task prompt: original description + minimal retry hint
|
|
177
|
+
if attempt == 1:
|
|
178
|
+
task_prompt = subtask.description
|
|
179
|
+
else:
|
|
180
|
+
# Only tell the agent it's a retry and to check current state.
|
|
181
|
+
# Don't feed back the previous error/suggestion — let it
|
|
182
|
+
# discover the actual state from disk with fresh eyes.
|
|
183
|
+
task_prompt = (
|
|
184
|
+
f"{subtask.description}\n\n"
|
|
185
|
+
f"Note: This is attempt {attempt}/{MAX_SUBTASK_ATTEMPTS}. "
|
|
186
|
+
f"A previous attempt may have made partial progress. "
|
|
187
|
+
f"Start by checking git diff and the current state of relevant files "
|
|
188
|
+
f"before making any changes."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
result = await executor.run(
|
|
192
|
+
task=task_prompt,
|
|
193
|
+
on_step=on_step,
|
|
194
|
+
subtask_id=subtask.id,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Verify
|
|
198
|
+
last_verification = await verifier.verify(subtask, on_step=on_step)
|
|
199
|
+
|
|
200
|
+
if last_verification.passed:
|
|
201
|
+
subtask.status = SubtaskStatus.PASSED
|
|
202
|
+
subtask.result = result
|
|
203
|
+
await self._emit(
|
|
204
|
+
on_step, "status",
|
|
205
|
+
content=f"Subtask {subtask.id} passed (attempt {attempt})",
|
|
206
|
+
subtask_id=subtask.id,
|
|
207
|
+
)
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
# Failed — log and loop with fresh context
|
|
211
|
+
await self._emit(
|
|
212
|
+
on_step, "status",
|
|
213
|
+
content=f"Subtask {subtask.id} failed verification (attempt {attempt}): {last_verification.output[:100]}",
|
|
214
|
+
subtask_id=subtask.id,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# All attempts exhausted
|
|
218
|
+
subtask.status = SubtaskStatus.FAILED
|
|
219
|
+
fail_msg = last_verification.output if last_verification else "unknown error"
|
|
220
|
+
subtask.result = f"Failed after {MAX_SUBTASK_ATTEMPTS} attempts: {fail_msg}"
|
|
221
|
+
return subtask.result
|
|
222
|
+
|
|
223
|
+
def cancel(self):
|
|
224
|
+
self._cancelled = True
|