autodevloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autodevloop/__init__.py +5 -0
- autodevloop/__main__.py +6 -0
- autodevloop/cli.py +233 -0
- autodevloop/config.py +165 -0
- autodevloop/engine.py +750 -0
- autodevloop/llm.py +259 -0
- autodevloop/prompts.py +342 -0
- autodevloop/py.typed +0 -0
- autodevloop/registry.py +37 -0
- autodevloop/reporting.py +127 -0
- autodevloop/testing.py +119 -0
- autodevloop/util.py +250 -0
- autodevloop/vcs.py +74 -0
- autodevloop/webapp.py +1184 -0
- autodevloop/yaml_compat.py +192 -0
- autodevloop-0.1.0.dist-info/METADATA +332 -0
- autodevloop-0.1.0.dist-info/RECORD +21 -0
- autodevloop-0.1.0.dist-info/WHEEL +5 -0
- autodevloop-0.1.0.dist-info/entry_points.txt +2 -0
- autodevloop-0.1.0.dist-info/licenses/LICENSE +21 -0
- autodevloop-0.1.0.dist-info/top_level.txt +1 -0
autodevloop/llm.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Provider-agnostic LLM CLI invocation with retry, backoff, and cost tracking.
|
|
2
|
+
|
|
3
|
+
Only the CLI command differs between providers (claude / codex / gemini); no
|
|
4
|
+
API keys are handled here. Users authenticate their CLI of choice beforehand.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import random
|
|
12
|
+
import re
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Callable
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class LLMResult:
|
|
24
|
+
text: str
|
|
25
|
+
cost_usd: float = 0.0
|
|
26
|
+
input_tokens: int = 0
|
|
27
|
+
output_tokens: int = 0
|
|
28
|
+
duration_s: float = 0.0
|
|
29
|
+
raw: str = ""
|
|
30
|
+
attempts: int = 1
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CallStats:
|
|
35
|
+
cost_usd: float = 0.0
|
|
36
|
+
input_tokens: int = 0
|
|
37
|
+
output_tokens: int = 0
|
|
38
|
+
calls: int = 0
|
|
39
|
+
by_label: dict[str, Any] = field(default_factory=dict)
|
|
40
|
+
|
|
41
|
+
def add(self, label: str, result: LLMResult) -> None:
|
|
42
|
+
self.cost_usd += result.cost_usd
|
|
43
|
+
self.input_tokens += result.input_tokens
|
|
44
|
+
self.output_tokens += result.output_tokens
|
|
45
|
+
self.calls += 1
|
|
46
|
+
bucket = self.by_label.setdefault(label, {"cost_usd": 0.0, "input_tokens": 0, "output_tokens": 0, "calls": 0})
|
|
47
|
+
bucket["cost_usd"] += result.cost_usd
|
|
48
|
+
bucket["input_tokens"] += result.input_tokens
|
|
49
|
+
bucket["output_tokens"] += result.output_tokens
|
|
50
|
+
bucket["calls"] += 1
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TransientError(RuntimeError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _split_command(command: str) -> list[str]:
|
|
58
|
+
if os.name == "nt":
|
|
59
|
+
parts = [next(g for g in m if g) for m in re.findall(r'"([^"]+)"|\'([^\']+)\'|(\S+)', command)]
|
|
60
|
+
else:
|
|
61
|
+
import shlex
|
|
62
|
+
|
|
63
|
+
parts = shlex.split(command)
|
|
64
|
+
return parts or ["claude"]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def resolve_command(command: str) -> list[str]:
|
|
68
|
+
parts = _split_command(command)
|
|
69
|
+
executable = parts[0]
|
|
70
|
+
expanded = Path(executable).expanduser()
|
|
71
|
+
if expanded.exists():
|
|
72
|
+
parts[0] = str(expanded.resolve())
|
|
73
|
+
return parts
|
|
74
|
+
resolved = shutil.which(executable)
|
|
75
|
+
if resolved:
|
|
76
|
+
parts[0] = resolved
|
|
77
|
+
return parts
|
|
78
|
+
if os.name == "nt" and executable.lower() in {"claude", "claude.exe", "claude.cmd", "claude.bat"}:
|
|
79
|
+
home = Path.home()
|
|
80
|
+
for candidate in [
|
|
81
|
+
home / ".local" / "bin" / "claude.exe",
|
|
82
|
+
home / ".local" / "bin" / "claude.cmd",
|
|
83
|
+
home / "AppData" / "Roaming" / "npm" / "claude.cmd",
|
|
84
|
+
home / "AppData" / "Roaming" / "npm" / "claude.exe",
|
|
85
|
+
]:
|
|
86
|
+
if candidate.exists():
|
|
87
|
+
parts[0] = str(candidate.resolve())
|
|
88
|
+
return parts
|
|
89
|
+
return parts
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _parse_claude_json(stdout: str) -> tuple[str, float, int, int]:
|
|
93
|
+
try:
|
|
94
|
+
data = json.loads(stdout)
|
|
95
|
+
except json.JSONDecodeError:
|
|
96
|
+
return stdout, 0.0, 0, 0
|
|
97
|
+
if not isinstance(data, dict):
|
|
98
|
+
return stdout, 0.0, 0, 0
|
|
99
|
+
text = data.get("result") or data.get("text") or ""
|
|
100
|
+
cost = float(data.get("total_cost_usd") or data.get("cost_usd") or 0.0)
|
|
101
|
+
usage = data.get("usage") or {}
|
|
102
|
+
in_tok = int(usage.get("input_tokens", 0) or 0) + int(usage.get("cache_read_input_tokens", 0) or 0) + int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
103
|
+
out_tok = int(usage.get("output_tokens", 0) or 0)
|
|
104
|
+
return text if isinstance(text, str) else stdout, cost, in_tok, out_tok
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
_TRANSIENT_PATTERNS = (
|
|
108
|
+
"overloaded", "rate limit", "rate_limit", "429", "503", "502", "500",
|
|
109
|
+
"timeout", "timed out", "connection reset", "temporarily", "try again",
|
|
110
|
+
"econnreset", "etimedout", "service unavailable",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _looks_transient(text: str) -> bool:
|
|
115
|
+
low = text.lower()
|
|
116
|
+
return any(p in low for p in _TRANSIENT_PATTERNS)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def call(
|
|
120
|
+
profile: dict[str, Any],
|
|
121
|
+
prompt: str,
|
|
122
|
+
cwd: Path,
|
|
123
|
+
*,
|
|
124
|
+
label: str = "LLM",
|
|
125
|
+
timeout: int = 1800,
|
|
126
|
+
retries: int = 3,
|
|
127
|
+
backoff_seconds: float = 5.0,
|
|
128
|
+
debug_file: Path | None = None,
|
|
129
|
+
on_status: Callable[[str], None] | None = None,
|
|
130
|
+
) -> LLMResult:
|
|
131
|
+
"""Invoke the configured provider CLI once, with retry on transient errors."""
|
|
132
|
+
last_error = ""
|
|
133
|
+
for attempt in range(1, max(1, retries) + 1):
|
|
134
|
+
try:
|
|
135
|
+
result = _invoke_once(
|
|
136
|
+
profile, prompt, cwd, label=label, timeout=timeout,
|
|
137
|
+
debug_file=debug_file, on_status=on_status,
|
|
138
|
+
)
|
|
139
|
+
result.attempts = attempt
|
|
140
|
+
return result
|
|
141
|
+
except TransientError as exc:
|
|
142
|
+
last_error = str(exc)
|
|
143
|
+
if attempt >= retries:
|
|
144
|
+
break
|
|
145
|
+
delay = backoff_seconds * (2 ** (attempt - 1)) + random.uniform(0, backoff_seconds)
|
|
146
|
+
if on_status:
|
|
147
|
+
on_status(f"transient failure (attempt {attempt}/{retries}); retrying in {int(delay)}s")
|
|
148
|
+
time.sleep(delay)
|
|
149
|
+
raise RuntimeError(f"{label}: provider call failed after {retries} attempts. Last error:\n{last_error}")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _invoke_once(
|
|
153
|
+
profile: dict[str, Any],
|
|
154
|
+
prompt: str,
|
|
155
|
+
cwd: Path,
|
|
156
|
+
*,
|
|
157
|
+
label: str,
|
|
158
|
+
timeout: int,
|
|
159
|
+
debug_file: Path | None,
|
|
160
|
+
on_status: Callable[[str], None] | None,
|
|
161
|
+
) -> LLMResult:
|
|
162
|
+
command = resolve_command(str(profile.get("command") or "claude"))
|
|
163
|
+
args = list(profile.get("args") or [])
|
|
164
|
+
full = [*command, *args]
|
|
165
|
+
model = profile.get("model")
|
|
166
|
+
if model:
|
|
167
|
+
full.extend([str(profile.get("model_flag") or "--model"), str(model)])
|
|
168
|
+
full.extend(list(profile.get("extra_args") or []))
|
|
169
|
+
if debug_file is not None and profile.get("name") == "claude":
|
|
170
|
+
full.extend(["--debug-file", str(debug_file)])
|
|
171
|
+
|
|
172
|
+
prompt_via = str(profile.get("prompt_via", "stdin"))
|
|
173
|
+
use_stdin = prompt_via == "stdin"
|
|
174
|
+
if not use_stdin:
|
|
175
|
+
full.append(prompt)
|
|
176
|
+
|
|
177
|
+
start = time.time()
|
|
178
|
+
try:
|
|
179
|
+
process = subprocess.Popen(
|
|
180
|
+
full,
|
|
181
|
+
cwd=str(cwd),
|
|
182
|
+
stdin=subprocess.PIPE if use_stdin else subprocess.DEVNULL,
|
|
183
|
+
stdout=subprocess.PIPE,
|
|
184
|
+
stderr=subprocess.PIPE,
|
|
185
|
+
text=True,
|
|
186
|
+
encoding="utf-8",
|
|
187
|
+
errors="replace",
|
|
188
|
+
)
|
|
189
|
+
except FileNotFoundError as exc:
|
|
190
|
+
raise RuntimeError(
|
|
191
|
+
f"Provider command not found: {profile.get('command')!r}. Install the CLI "
|
|
192
|
+
"and add it to PATH, or change the provider command in settings."
|
|
193
|
+
) from exc
|
|
194
|
+
|
|
195
|
+
stderr_lines: list[str] = []
|
|
196
|
+
last_activity = [time.time()]
|
|
197
|
+
done = threading.Event()
|
|
198
|
+
|
|
199
|
+
def drain_stderr() -> None:
|
|
200
|
+
assert process.stderr is not None
|
|
201
|
+
for line in process.stderr:
|
|
202
|
+
stripped = line.rstrip()
|
|
203
|
+
stderr_lines.append(stripped)
|
|
204
|
+
if stripped:
|
|
205
|
+
last_activity[0] = time.time()
|
|
206
|
+
if on_status and any(kw in stripped.lower() for kw in ("write", "edit", "read", "tool", "error", "warn", "create")):
|
|
207
|
+
on_status(stripped[:160])
|
|
208
|
+
|
|
209
|
+
def feed_stdin() -> None:
|
|
210
|
+
if not use_stdin or process.stdin is None:
|
|
211
|
+
return
|
|
212
|
+
try:
|
|
213
|
+
process.stdin.write(prompt)
|
|
214
|
+
process.stdin.close()
|
|
215
|
+
except (BrokenPipeError, OSError):
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
# No periodic heartbeat events: the dashboard shows a live per-agent timer
|
|
219
|
+
# client-side. We only surface meaningful tool-activity lines via on_status.
|
|
220
|
+
threads = [
|
|
221
|
+
threading.Thread(target=drain_stderr, daemon=True),
|
|
222
|
+
threading.Thread(target=feed_stdin, daemon=True),
|
|
223
|
+
]
|
|
224
|
+
for thread in threads:
|
|
225
|
+
thread.start()
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
stdout, _ = process.communicate(timeout=timeout)
|
|
229
|
+
except subprocess.TimeoutExpired:
|
|
230
|
+
process.kill()
|
|
231
|
+
done.set()
|
|
232
|
+
raise TransientError(f"{label}: provider timed out after {timeout}s")
|
|
233
|
+
finally:
|
|
234
|
+
done.set()
|
|
235
|
+
|
|
236
|
+
stderr_text = "\n".join(stderr_lines)
|
|
237
|
+
if process.returncode != 0:
|
|
238
|
+
tail = "\n".join(stderr_lines[-20:])
|
|
239
|
+
if _looks_transient(stderr_text):
|
|
240
|
+
raise TransientError(f"{label}: exit {process.returncode}. {tail}")
|
|
241
|
+
raise RuntimeError(f"{label}: provider exited with code {process.returncode}.\n{tail}")
|
|
242
|
+
|
|
243
|
+
duration = time.time() - start
|
|
244
|
+
if profile.get("output") == "claude-json":
|
|
245
|
+
text, cost, in_tok, out_tok = _parse_claude_json(stdout)
|
|
246
|
+
else:
|
|
247
|
+
text, cost, in_tok, out_tok = stdout, 0.0, 0, 0
|
|
248
|
+
|
|
249
|
+
if not text.strip() and _looks_transient(stderr_text):
|
|
250
|
+
raise TransientError(f"{label}: empty result with transient signal")
|
|
251
|
+
|
|
252
|
+
return LLMResult(
|
|
253
|
+
text=text,
|
|
254
|
+
cost_usd=cost,
|
|
255
|
+
input_tokens=in_tok,
|
|
256
|
+
output_tokens=out_tok,
|
|
257
|
+
duration_s=duration,
|
|
258
|
+
raw=stdout,
|
|
259
|
+
)
|
autodevloop/prompts.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""Editable prompt templates.
|
|
2
|
+
|
|
3
|
+
Templates use ``{{placeholder}}`` markers so literal JSON braces in the body
|
|
4
|
+
stay untouched. Defaults are written into ``.autodev/prompts/templates`` on
|
|
5
|
+
first run; users (or the web settings page) can edit those files freely. The
|
|
6
|
+
fixed pipeline stays standardised while the prompt wording remains open for
|
|
7
|
+
the model to exercise judgement.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from .util import read_text, write_text
|
|
17
|
+
|
|
18
|
+
_PLACEHOLDER = re.compile(r"{{\s*(\w+)\s*}}")
|
|
19
|
+
|
|
20
|
+
DEFAULT_TEMPLATES: dict[str, str] = {
|
|
21
|
+
"arch": """
|
|
22
|
+
You are AgentARCH, the founding architect for this project.
|
|
23
|
+
|
|
24
|
+
User goal:
|
|
25
|
+
{{goal}}
|
|
26
|
+
|
|
27
|
+
Extra architecture hints from the user (may be empty):
|
|
28
|
+
{{arch_hint}}
|
|
29
|
+
|
|
30
|
+
Choose a mainstream, well-supported technology stack, a clean directory
|
|
31
|
+
layout, a run strategy, a test strategy, and acceptance criteria. Favour
|
|
32
|
+
conventional, popular frameworks and project structures over exotic choices.
|
|
33
|
+
Keep the architecture general enough that future versions can evolve the
|
|
34
|
+
product without drifting from the user's goal.
|
|
35
|
+
|
|
36
|
+
Create or update docs/project_design.md in the working directory with the
|
|
37
|
+
chosen design. Then return a concise Markdown architecture report covering:
|
|
38
|
+
- Project type
|
|
39
|
+
- Tech stack (and why it is a mainstream choice)
|
|
40
|
+
- Directory layout
|
|
41
|
+
- Run instructions
|
|
42
|
+
- Test strategy and pass criteria
|
|
43
|
+
- How future versions should split work across agents
|
|
44
|
+
- Product boundaries: what to add only when it clearly serves the goal
|
|
45
|
+
""".strip(),
|
|
46
|
+
"plan": """
|
|
47
|
+
You are AgentPLAN for version v{{version}} (phase: {{phase}}).
|
|
48
|
+
|
|
49
|
+
User goal:
|
|
50
|
+
{{goal}}
|
|
51
|
+
|
|
52
|
+
Architecture contract (stay consistent with this):
|
|
53
|
+
{{architecture}}
|
|
54
|
+
|
|
55
|
+
{{phase_guidance}}
|
|
56
|
+
|
|
57
|
+
Accepted feature backlog (pick from here when in the expand phase):
|
|
58
|
+
{{backlog}}
|
|
59
|
+
|
|
60
|
+
Previous iteration context:
|
|
61
|
+
{{previous}}
|
|
62
|
+
|
|
63
|
+
Current project context:
|
|
64
|
+
{{context}}
|
|
65
|
+
|
|
66
|
+
Decide what THIS version should deliver. If tests fail or there are real
|
|
67
|
+
bugs, fixing them comes first. Otherwise advance the product meaningfully.
|
|
68
|
+
You may dynamically choose how many development agents to use and what each
|
|
69
|
+
one does — split work only when it can be merged safely (ideally each agent
|
|
70
|
+
owns distinct files/areas).
|
|
71
|
+
|
|
72
|
+
Return ONLY JSON:
|
|
73
|
+
{
|
|
74
|
+
"version_goal": "...",
|
|
75
|
+
"acceptance_criteria": ["..."],
|
|
76
|
+
"dev_agents": [
|
|
77
|
+
{"name": "AgentDEV_BACKEND", "role": "backend/frontend/docs/...", "task": "...", "owns": ["path/glob", "..."]}
|
|
78
|
+
],
|
|
79
|
+
"test_focus": ["..."],
|
|
80
|
+
"risks": ["..."]
|
|
81
|
+
}
|
|
82
|
+
""".strip(),
|
|
83
|
+
"dev": """
|
|
84
|
+
You are {{agent_name}} for version v{{version}}.
|
|
85
|
+
|
|
86
|
+
User goal:
|
|
87
|
+
{{goal}}
|
|
88
|
+
|
|
89
|
+
Architecture contract (do not violate the chosen stack/layout):
|
|
90
|
+
{{architecture}}
|
|
91
|
+
|
|
92
|
+
Version plan:
|
|
93
|
+
{{plan}}
|
|
94
|
+
|
|
95
|
+
Your specific task:
|
|
96
|
+
{{task}}
|
|
97
|
+
|
|
98
|
+
Files you own (prefer editing only these to avoid clobbering peers):
|
|
99
|
+
{{owns}}
|
|
100
|
+
|
|
101
|
+
Work only inside this workspace and produce runnable code. Preserve existing
|
|
102
|
+
working behaviour unless the plan says to change it. Keep the project aligned
|
|
103
|
+
with the user's goal; avoid unrelated features. Choose implementation details
|
|
104
|
+
appropriate to the project type. Update files directly, then end with:
|
|
105
|
+
|
|
106
|
+
SUMMARY:
|
|
107
|
+
Added: [...]
|
|
108
|
+
Changed: [...]
|
|
109
|
+
Fixed: [...]
|
|
110
|
+
Known issues: [...]
|
|
111
|
+
""".strip(),
|
|
112
|
+
"doc": """
|
|
113
|
+
You are AgentDOC for version v{{version}}.
|
|
114
|
+
|
|
115
|
+
User goal:
|
|
116
|
+
{{goal}}
|
|
117
|
+
|
|
118
|
+
Version plan:
|
|
119
|
+
{{plan}}
|
|
120
|
+
|
|
121
|
+
Maintain documentation only. Update README.md and docs/project_design.md so
|
|
122
|
+
run instructions stay accurate (if a local server is required, do not claim
|
|
123
|
+
double-clicking the HTML works). Do not edit source code except embedded docs.
|
|
124
|
+
""".strip(),
|
|
125
|
+
"test": """
|
|
126
|
+
You are AgentTEST for version v{{version}}.
|
|
127
|
+
|
|
128
|
+
User goal:
|
|
129
|
+
{{goal}}
|
|
130
|
+
|
|
131
|
+
Version plan:
|
|
132
|
+
{{plan}}
|
|
133
|
+
|
|
134
|
+
Detected built-in test candidates:
|
|
135
|
+
{{candidates}}
|
|
136
|
+
|
|
137
|
+
Current project context:
|
|
138
|
+
{{context}}
|
|
139
|
+
|
|
140
|
+
Decide the minimum credible test command(s) and what counts as pass. Prefer
|
|
141
|
+
existing project test/build commands; otherwise pick a built-in smoke marker
|
|
142
|
+
from the candidates.
|
|
143
|
+
|
|
144
|
+
Return ONLY JSON:
|
|
145
|
+
{
|
|
146
|
+
"commands": ["command or __builtin_marker__"],
|
|
147
|
+
"pass_criteria": ["..."],
|
|
148
|
+
"reason": "...",
|
|
149
|
+
"requires_manual_check": false
|
|
150
|
+
}
|
|
151
|
+
""".strip(),
|
|
152
|
+
"review": """
|
|
153
|
+
You are AgentREVIEW for version v{{version}} (phase: {{phase}}).
|
|
154
|
+
|
|
155
|
+
User goal:
|
|
156
|
+
{{goal}}
|
|
157
|
+
|
|
158
|
+
Version plan:
|
|
159
|
+
{{plan}}
|
|
160
|
+
|
|
161
|
+
Test result:
|
|
162
|
+
{{test_result}}
|
|
163
|
+
|
|
164
|
+
Development agent summaries:
|
|
165
|
+
{{dev_summaries}}
|
|
166
|
+
|
|
167
|
+
Current project context:
|
|
168
|
+
{{context}}
|
|
169
|
+
|
|
170
|
+
Score strictly but fairly. Focus on runtime breakage, missing core
|
|
171
|
+
requirements, test gaps, maintainability, architecture consistency, and
|
|
172
|
+
whether the work drifts from the user goal. Also judge how complete the
|
|
173
|
+
ORIGINAL user goal now is (goal_met = the core requested product is fully
|
|
174
|
+
usable and feature-complete, not merely bug-free). Write a short
|
|
175
|
+
human-readable summary of what this version delivers.
|
|
176
|
+
|
|
177
|
+
SCALE (important): "score" and "goal_progress" are integers from 0 to 100.
|
|
178
|
+
Do NOT use a 0-10 scale and do NOT use a 0-1 fraction. Score guide:
|
|
179
|
+
90-100 production-ready; 80-89 solid with minor issues; 60-79 works but has
|
|
180
|
+
notable gaps; 40-59 partly working; 0-39 broken or far from the goal.
|
|
181
|
+
goal_progress is the percent of the ORIGINAL user goal that is now done.
|
|
182
|
+
|
|
183
|
+
Return ONLY JSON:
|
|
184
|
+
{
|
|
185
|
+
"score": 0,
|
|
186
|
+
"blocking": false,
|
|
187
|
+
"goal_met": false,
|
|
188
|
+
"goal_progress": 0,
|
|
189
|
+
"issues": ["..."],
|
|
190
|
+
"good_points": ["..."],
|
|
191
|
+
"feature_summary": "one or two sentences on what this version does",
|
|
192
|
+
"whats_new": ["concise bullet of what changed vs the previous version"],
|
|
193
|
+
"suggestions_for_next_version": ["..."]
|
|
194
|
+
}
|
|
195
|
+
""".strip(),
|
|
196
|
+
"fix": """
|
|
197
|
+
You are AgentFIX for version v{{version}}, attempt {{attempt}}.
|
|
198
|
+
|
|
199
|
+
User goal:
|
|
200
|
+
{{goal}}
|
|
201
|
+
|
|
202
|
+
Original plan:
|
|
203
|
+
{{plan}}
|
|
204
|
+
|
|
205
|
+
Failing tests:
|
|
206
|
+
{{test_result}}
|
|
207
|
+
|
|
208
|
+
Review:
|
|
209
|
+
{{review}}
|
|
210
|
+
|
|
211
|
+
Fix only what is required to make this version usable and aligned with the
|
|
212
|
+
goal. Do not add unrelated new features in a fix pass. Update files directly.
|
|
213
|
+
""".strip(),
|
|
214
|
+
"scout": """
|
|
215
|
+
You are AgentSCOUT for version v{{version}}. The core user goal is already
|
|
216
|
+
met, so propose genuinely valuable NEW features that extend the product into
|
|
217
|
+
adjacent territory a real user of this product would appreciate.
|
|
218
|
+
|
|
219
|
+
User goal (already satisfied):
|
|
220
|
+
{{goal}}
|
|
221
|
+
|
|
222
|
+
Latest review:
|
|
223
|
+
{{review}}
|
|
224
|
+
|
|
225
|
+
Existing backlog (avoid duplicates):
|
|
226
|
+
{{backlog}}
|
|
227
|
+
|
|
228
|
+
Current project context:
|
|
229
|
+
{{context}}
|
|
230
|
+
|
|
231
|
+
Return ONLY JSON with candidate features (do not implement anything yet):
|
|
232
|
+
{
|
|
233
|
+
"candidates": [
|
|
234
|
+
{"title": "...", "description": "...", "rationale": "why a user benefits"}
|
|
235
|
+
]
|
|
236
|
+
}
|
|
237
|
+
""".strip(),
|
|
238
|
+
"evaluate": """
|
|
239
|
+
You are AgentEVALUATE, an independent product reviewer. Score each candidate
|
|
240
|
+
feature for whether it is worth building on top of this product.
|
|
241
|
+
|
|
242
|
+
User goal:
|
|
243
|
+
{{goal}}
|
|
244
|
+
|
|
245
|
+
Candidate features:
|
|
246
|
+
{{candidates}}
|
|
247
|
+
|
|
248
|
+
For each candidate give value (0-100), effort (low/medium/high), and a verdict.
|
|
249
|
+
A feature is "accepted" only when value >= {{threshold}} and it clearly serves
|
|
250
|
+
or sensibly extends the product. Reject vanity or unrelated features.
|
|
251
|
+
|
|
252
|
+
Return ONLY JSON:
|
|
253
|
+
{
|
|
254
|
+
"evaluations": [
|
|
255
|
+
{"title": "...", "value": 0, "effort": "low|medium|high", "accepted": false, "reason": "..."}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
""".strip(),
|
|
259
|
+
"goal_check": """
|
|
260
|
+
You are AgentGOALCHECK. Judge ONLY how complete the original user goal is.
|
|
261
|
+
|
|
262
|
+
User goal:
|
|
263
|
+
{{goal}}
|
|
264
|
+
|
|
265
|
+
Latest review:
|
|
266
|
+
{{review}}
|
|
267
|
+
|
|
268
|
+
Current project context:
|
|
269
|
+
{{context}}
|
|
270
|
+
|
|
271
|
+
"goal_progress" is an integer from 0 to 100 (percent of the original user goal
|
|
272
|
+
that is done). Do NOT use a 0-10 scale or a 0-1 fraction.
|
|
273
|
+
|
|
274
|
+
Return ONLY JSON:
|
|
275
|
+
{
|
|
276
|
+
"goal_met": false,
|
|
277
|
+
"goal_progress": 0,
|
|
278
|
+
"missing_for_goal": ["..."],
|
|
279
|
+
"reason": "..."
|
|
280
|
+
}
|
|
281
|
+
""".strip(),
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
TEMPLATE_NAMES = list(DEFAULT_TEMPLATES.keys())
|
|
285
|
+
|
|
286
|
+
# Tokens every template MUST keep so the engine can inject context and parse the
|
|
287
|
+
# reply. Users can rewrite the wording freely (any language), but removing these
|
|
288
|
+
# breaks the pipeline, so the web settings page refuses to save without them.
|
|
289
|
+
# - ``{{placeholder}}`` entries are context the engine substitutes in.
|
|
290
|
+
# - bare-word entries are JSON keys the engine reads back out of the reply.
|
|
291
|
+
REQUIRED_TOKENS: dict[str, list[str]] = {
|
|
292
|
+
"arch": ["{{goal}}", "{{arch_hint}}"],
|
|
293
|
+
"plan": ["{{version}}", "{{goal}}", "{{phase}}", "{{architecture}}",
|
|
294
|
+
"{{context}}", "version_goal", "dev_agents"],
|
|
295
|
+
"dev": ["{{agent_name}}", "{{version}}", "{{goal}}", "{{plan}}", "{{task}}"],
|
|
296
|
+
"doc": ["{{version}}", "{{goal}}", "{{plan}}"],
|
|
297
|
+
"test": ["{{version}}", "{{goal}}", "{{candidates}}", "{{context}}", "commands"],
|
|
298
|
+
"review": ["{{version}}", "{{goal}}", "{{plan}}", "{{test_result}}", "{{context}}",
|
|
299
|
+
"score", "goal_met", "goal_progress", "feature_summary", "whats_new"],
|
|
300
|
+
"fix": ["{{version}}", "{{goal}}", "{{plan}}", "{{test_result}}", "{{review}}"],
|
|
301
|
+
"scout": ["{{goal}}", "{{review}}", "{{context}}", "candidates"],
|
|
302
|
+
"evaluate": ["{{goal}}", "{{candidates}}", "{{threshold}}", "evaluations", "value", "accepted"],
|
|
303
|
+
"goal_check": ["{{goal}}", "{{review}}", "{{context}}", "goal_met", "goal_progress"],
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def validate_template(name: str, body: str) -> list[str]:
|
|
308
|
+
"""Return the list of required tokens missing from ``body`` (empty = valid)."""
|
|
309
|
+
required = REQUIRED_TOKENS.get(name, [])
|
|
310
|
+
text = body or ""
|
|
311
|
+
return [tok for tok in required if tok not in text]
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def templates_dir(app_dir: Path) -> Path:
|
|
315
|
+
return app_dir / "prompts" / "templates"
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def ensure_templates(app_dir: Path) -> None:
|
|
319
|
+
base = templates_dir(app_dir)
|
|
320
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
321
|
+
for name, body in DEFAULT_TEMPLATES.items():
|
|
322
|
+
path = base / f"{name}.md"
|
|
323
|
+
if not path.exists():
|
|
324
|
+
write_text(path, body + "\n")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def load_template(app_dir: Path, name: str) -> str:
|
|
328
|
+
path = templates_dir(app_dir) / f"{name}.md"
|
|
329
|
+
text = read_text(path)
|
|
330
|
+
return text if text.strip() else DEFAULT_TEMPLATES.get(name, "")
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def render(template: str, values: dict[str, Any]) -> str:
|
|
334
|
+
def repl(match: "re.Match[str]") -> str:
|
|
335
|
+
key = match.group(1)
|
|
336
|
+
return str(values.get(key, ""))
|
|
337
|
+
|
|
338
|
+
return _PLACEHOLDER.sub(repl, template).strip()
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def render_template(app_dir: Path, name: str, values: dict[str, Any]) -> str:
|
|
342
|
+
return render(load_template(app_dir, name), values)
|
autodevloop/py.typed
ADDED
|
File without changes
|
autodevloop/registry.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""A small registry of known project directories, stored in the user home.
|
|
2
|
+
|
|
3
|
+
Lets the web dashboard list every project that was started via the CLI or the
|
|
4
|
+
web UI, regardless of where it lives on disk.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .util import load_json, now_text, save_json
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def registry_path() -> Path:
|
|
16
|
+
return Path.home() / ".autodevloop" / "registry.json"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load() -> list[dict[str, Any]]:
|
|
20
|
+
data = load_json(registry_path(), {"projects": []})
|
|
21
|
+
projects = data.get("projects", []) if isinstance(data, dict) else []
|
|
22
|
+
return [p for p in projects if isinstance(p, dict)]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def register(root: Path, name: str = "") -> None:
|
|
26
|
+
root = root.resolve()
|
|
27
|
+
projects = load()
|
|
28
|
+
entry = {"dir": str(root), "name": name or root.name, "registered_at": now_text()}
|
|
29
|
+
projects = [p for p in projects if p.get("dir") != str(root)]
|
|
30
|
+
projects.insert(0, entry)
|
|
31
|
+
save_json(registry_path(), {"projects": projects[:100]}, stamp=False)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def remove(root: Path) -> None:
|
|
35
|
+
root = root.resolve()
|
|
36
|
+
projects = [p for p in load() if p.get("dir") != str(root)]
|
|
37
|
+
save_json(registry_path(), {"projects": projects}, stamp=False)
|