borisxdave 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boris.py +672 -0
- borisxdave-0.2.0.dist-info/METADATA +6 -0
- borisxdave-0.2.0.dist-info/RECORD +12 -0
- borisxdave-0.2.0.dist-info/WHEEL +5 -0
- borisxdave-0.2.0.dist-info/entry_points.txt +2 -0
- borisxdave-0.2.0.dist-info/top_level.txt +7 -0
- config.py +12 -0
- engine.py +684 -0
- git_manager.py +248 -0
- planner.py +161 -0
- prompts.py +687 -0
- state.py +103 -0
prompts.py
ADDED
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
"""Boris prompts - planning and prompt crafting (merged from planner + crafter)."""
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
# Force unbuffered stdout for real-time output on Windows
|
|
12
|
+
os.environ.setdefault("PYTHONUNBUFFERED", "1")
|
|
13
|
+
|
|
14
|
+
from state import Milestone, Plan, UIMilestone, UIPlan
|
|
15
|
+
|
|
16
|
+
IS_WINDOWS = sys.platform == "win32"
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Directories (relative to Boris install dir)
|
|
20
|
+
_BORIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
21
|
+
_PLANS_DIR = os.path.join(_BORIS_DIR, "plans")
|
|
22
|
+
_BORIS_PROMPT_PATH = os.path.join(_BORIS_DIR, "boris_prompt.md")
|
|
23
|
+
|
|
24
|
+
# Resolve claude command to full path so shell=False works on Windows (.cmd files)
|
|
25
|
+
CLAUDE_CMD = shutil.which("claude") or "claude"
|
|
26
|
+
|
|
27
|
+
# Cached Boris system prompt
|
|
28
|
+
_boris_prompt_cache = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _load_boris_prompt() -> str:
|
|
32
|
+
"""Load Boris's management prompt from boris_prompt.md. Cached after first load."""
|
|
33
|
+
global _boris_prompt_cache
|
|
34
|
+
if _boris_prompt_cache is not None:
|
|
35
|
+
return _boris_prompt_cache
|
|
36
|
+
try:
|
|
37
|
+
with open(_BORIS_PROMPT_PATH, "r", encoding="utf-8") as f:
|
|
38
|
+
_boris_prompt_cache = f.read().strip()
|
|
39
|
+
logger.debug("Loaded Boris prompt: %d chars", len(_boris_prompt_cache))
|
|
40
|
+
except FileNotFoundError:
|
|
41
|
+
logger.warning("boris_prompt.md not found at %s", _BORIS_PROMPT_PATH)
|
|
42
|
+
_boris_prompt_cache = ""
|
|
43
|
+
return _boris_prompt_cache
|
|
44
|
+
|
|
45
|
+
# Regex to strip ANSI escape codes
|
|
46
|
+
_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _clean_output(text: str) -> str:
|
|
50
|
+
"""Strip ANSI escape codes from text for safe embedding in prompts."""
|
|
51
|
+
return _ANSI_RE.sub("", text)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _list_project_files(project_dir: str, max_files: int = 500) -> str:
|
|
55
|
+
"""Walk project_dir and return formatted file listing.
|
|
56
|
+
|
|
57
|
+
Caps output at *max_files* entries to keep prompts within Claude CLI limits.
|
|
58
|
+
"""
|
|
59
|
+
skip_dirs = {".git", "__pycache__", "node_modules", ".boris", ".venv", "venv"}
|
|
60
|
+
file_list = []
|
|
61
|
+
truncated = False
|
|
62
|
+
|
|
63
|
+
for root, dirs, files in os.walk(project_dir):
|
|
64
|
+
# Filter out hidden and skip dirs
|
|
65
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs and not d.startswith(".")]
|
|
66
|
+
rel_root = os.path.relpath(root, project_dir)
|
|
67
|
+
for fname in sorted(files):
|
|
68
|
+
if len(file_list) >= max_files:
|
|
69
|
+
truncated = True
|
|
70
|
+
break
|
|
71
|
+
if rel_root == ".":
|
|
72
|
+
file_list.append(fname)
|
|
73
|
+
else:
|
|
74
|
+
file_list.append(os.path.join(rel_root, fname))
|
|
75
|
+
if truncated:
|
|
76
|
+
break
|
|
77
|
+
|
|
78
|
+
if not file_list:
|
|
79
|
+
return "(empty project)"
|
|
80
|
+
|
|
81
|
+
result = "\n".join(f" {f}" for f in file_list)
|
|
82
|
+
if truncated:
|
|
83
|
+
result += f"\n ... (truncated — showing {max_files} of many files)"
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# --- Planning (from planner.py) ---
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def create_plan(task: str, project_dir: str) -> Plan:
|
|
91
|
+
"""Break a task into milestones using Claude CLI."""
|
|
92
|
+
file_listing = _list_project_files(project_dir)
|
|
93
|
+
|
|
94
|
+
prompt = (
|
|
95
|
+
"You are a technical project planner. Break this task into ordered milestones.\n"
|
|
96
|
+
"Each milestone must be a concrete, buildable unit that DaveLoop can execute independently.\n"
|
|
97
|
+
"Order by dependency: foundation first, then features that depend on it.\n\n"
|
|
98
|
+
f"Task: {task}\n"
|
|
99
|
+
f"Project directory: {project_dir}\n"
|
|
100
|
+
f"Existing files:\n{file_listing}\n\n"
|
|
101
|
+
"Return ONLY a JSON array (no other text) of milestones:\n"
|
|
102
|
+
"[\n"
|
|
103
|
+
" {\n"
|
|
104
|
+
' "id": "M1",\n'
|
|
105
|
+
' "title": "Short title",\n'
|
|
106
|
+
' "description": "Detailed description of what to build",\n'
|
|
107
|
+
' "depends_on": [],\n'
|
|
108
|
+
' "acceptance_criteria": ["criterion 1", "criterion 2"],\n'
|
|
109
|
+
' "files_to_create": ["file1.py"],\n'
|
|
110
|
+
' "files_to_modify": []\n'
|
|
111
|
+
" }\n"
|
|
112
|
+
"]"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
env = os.environ.copy()
|
|
117
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
118
|
+
|
|
119
|
+
import sys as _sys
|
|
120
|
+
import threading
|
|
121
|
+
import time
|
|
122
|
+
|
|
123
|
+
print(" [Boris] Planning phase - Claude is thinking...", flush=True)
|
|
124
|
+
print(flush=True)
|
|
125
|
+
|
|
126
|
+
# Run Claude CLI in a thread so we can show a heartbeat while waiting
|
|
127
|
+
plan_result = {"stdout": "", "returncode": None, "error": None}
|
|
128
|
+
|
|
129
|
+
def _run_claude():
|
|
130
|
+
try:
|
|
131
|
+
# Use shell=False for reliable stdin piping on Windows.
|
|
132
|
+
# shell=True routes through cmd.exe which can mangle special chars.
|
|
133
|
+
cmd = [CLAUDE_CMD, "-p", "--no-session-persistence"]
|
|
134
|
+
result = subprocess.run(
|
|
135
|
+
cmd,
|
|
136
|
+
input=prompt,
|
|
137
|
+
capture_output=True,
|
|
138
|
+
timeout=600,
|
|
139
|
+
env=env,
|
|
140
|
+
encoding="utf-8",
|
|
141
|
+
errors="replace",
|
|
142
|
+
)
|
|
143
|
+
plan_result["stdout"] = result.stdout
|
|
144
|
+
plan_result["returncode"] = result.returncode
|
|
145
|
+
if result.returncode != 0:
|
|
146
|
+
# Capture both stderr and stdout for diagnostics -
|
|
147
|
+
# Claude CLI may write errors to either stream.
|
|
148
|
+
err = result.stderr.strip()
|
|
149
|
+
if not err:
|
|
150
|
+
err = result.stdout.strip()[:500]
|
|
151
|
+
plan_result["error"] = err
|
|
152
|
+
except subprocess.TimeoutExpired:
|
|
153
|
+
plan_result["error"] = "timeout"
|
|
154
|
+
except FileNotFoundError:
|
|
155
|
+
plan_result["error"] = "not_found"
|
|
156
|
+
|
|
157
|
+
thread = threading.Thread(target=_run_claude, daemon=True)
|
|
158
|
+
thread.start()
|
|
159
|
+
|
|
160
|
+
# Heartbeat: show Boris is alive every 10 seconds
|
|
161
|
+
elapsed = 0
|
|
162
|
+
spinner = ["|", "/", "-", "\\"]
|
|
163
|
+
while thread.is_alive():
|
|
164
|
+
idx = (elapsed // 2) % len(spinner)
|
|
165
|
+
_sys.stdout.write(f"\r [Boris] {spinner[idx]} Claude is planning... ({elapsed}s)")
|
|
166
|
+
_sys.stdout.flush()
|
|
167
|
+
time.sleep(2)
|
|
168
|
+
elapsed += 2
|
|
169
|
+
|
|
170
|
+
# Clear spinner line
|
|
171
|
+
_sys.stdout.write("\r" + " " * 60 + "\r")
|
|
172
|
+
_sys.stdout.flush()
|
|
173
|
+
|
|
174
|
+
# Handle errors
|
|
175
|
+
if plan_result["error"] == "timeout":
|
|
176
|
+
raise subprocess.TimeoutExpired(CLAUDE_CMD, 600)
|
|
177
|
+
if plan_result["error"] == "not_found":
|
|
178
|
+
raise FileNotFoundError(CLAUDE_CMD)
|
|
179
|
+
if plan_result["returncode"] != 0:
|
|
180
|
+
raise RuntimeError(f"Claude CLI failed (exit {plan_result['returncode']}): {plan_result['error']}")
|
|
181
|
+
|
|
182
|
+
response = plan_result["stdout"].strip()
|
|
183
|
+
logger.debug("Claude raw response: %s", response[:500])
|
|
184
|
+
|
|
185
|
+
# Parse milestones from response and show them live
|
|
186
|
+
print(" [Boris] Plan received! Parsing milestones...", flush=True)
|
|
187
|
+
|
|
188
|
+
# Show milestone IDs/titles as we find them in the JSON
|
|
189
|
+
for id_match in re.finditer(r'"id"\s*:\s*"(M\d+)"', response):
|
|
190
|
+
print(f" [Boris] Found milestone {id_match.group(1)}", flush=True)
|
|
191
|
+
print(flush=True)
|
|
192
|
+
|
|
193
|
+
# Extract JSON from response - handle markdown code blocks and surrounding text
|
|
194
|
+
json_match = re.search(r"```(?:json)?\s*\n(.*?)\n```", response, re.DOTALL)
|
|
195
|
+
if json_match:
|
|
196
|
+
response = json_match.group(1).strip()
|
|
197
|
+
else:
|
|
198
|
+
# Try to find raw JSON array
|
|
199
|
+
array_match = re.search(r"\[.*\]", response, re.DOTALL)
|
|
200
|
+
if array_match:
|
|
201
|
+
response = array_match.group(0).strip()
|
|
202
|
+
|
|
203
|
+
milestones_data = json.loads(response)
|
|
204
|
+
|
|
205
|
+
milestones = [
|
|
206
|
+
Milestone(
|
|
207
|
+
id=m["id"],
|
|
208
|
+
title=m["title"],
|
|
209
|
+
description=m["description"],
|
|
210
|
+
depends_on=m.get("depends_on", []),
|
|
211
|
+
acceptance_criteria=m.get("acceptance_criteria", []),
|
|
212
|
+
files_to_create=m.get("files_to_create", []),
|
|
213
|
+
files_to_modify=m.get("files_to_modify", []),
|
|
214
|
+
)
|
|
215
|
+
for m in milestones_data
|
|
216
|
+
]
|
|
217
|
+
|
|
218
|
+
plan = Plan(task=task, milestones=milestones)
|
|
219
|
+
|
|
220
|
+
# Save plan as markdown
|
|
221
|
+
plan_path = _save_plan_markdown(plan, task)
|
|
222
|
+
logger.info("Plan saved to %s", plan_path)
|
|
223
|
+
|
|
224
|
+
print(f" [Boris] Plan saved to {plan_path}", flush=True)
|
|
225
|
+
|
|
226
|
+
return plan
|
|
227
|
+
|
|
228
|
+
except json.JSONDecodeError as e:
|
|
229
|
+
raise RuntimeError(f"Failed to parse Claude's plan response as JSON: {e}") from e
|
|
230
|
+
except subprocess.TimeoutExpired:
|
|
231
|
+
raise RuntimeError("Claude CLI timed out while generating plan")
|
|
232
|
+
except FileNotFoundError:
|
|
233
|
+
raise RuntimeError(
|
|
234
|
+
f"'{CLAUDE_CMD}' command not found. Is Claude CLI installed?"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _save_plan_markdown(plan: Plan, task: str) -> str:
|
|
239
|
+
"""Save a human-readable markdown version of the plan. Returns filepath."""
|
|
240
|
+
os.makedirs(_PLANS_DIR, exist_ok=True)
|
|
241
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
242
|
+
filename = f"plan_{timestamp}.md"
|
|
243
|
+
filepath = os.path.join(_PLANS_DIR, filename)
|
|
244
|
+
|
|
245
|
+
lines = [
|
|
246
|
+
f"# Boris Plan",
|
|
247
|
+
f"",
|
|
248
|
+
f"**Task:** {task}",
|
|
249
|
+
f"**Created:** {plan.created_at}",
|
|
250
|
+
f"**Milestones:** {len(plan.milestones)}",
|
|
251
|
+
f"",
|
|
252
|
+
f"---",
|
|
253
|
+
f"",
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
for m in plan.milestones:
|
|
257
|
+
lines.append(f"## {m.id}: {m.title}")
|
|
258
|
+
lines.append(f"")
|
|
259
|
+
lines.append(f"{m.description}")
|
|
260
|
+
lines.append(f"")
|
|
261
|
+
if m.depends_on:
|
|
262
|
+
lines.append(f"**Depends on:** {', '.join(m.depends_on)}")
|
|
263
|
+
lines.append(f"")
|
|
264
|
+
lines.append(f"**Acceptance Criteria:**")
|
|
265
|
+
for c in m.acceptance_criteria:
|
|
266
|
+
lines.append(f"- {c}")
|
|
267
|
+
lines.append(f"")
|
|
268
|
+
if m.files_to_create:
|
|
269
|
+
lines.append(f"**Files to create:** {', '.join(m.files_to_create)}")
|
|
270
|
+
if m.files_to_modify:
|
|
271
|
+
lines.append(f"**Files to modify:** {', '.join(m.files_to_modify)}")
|
|
272
|
+
lines.append(f"")
|
|
273
|
+
lines.append(f"---")
|
|
274
|
+
lines.append(f"")
|
|
275
|
+
|
|
276
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
277
|
+
f.write("\n".join(lines))
|
|
278
|
+
|
|
279
|
+
return filepath
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
# --- Prompt Crafting (from crafter.py) ---
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def craft_prompt(milestone: Milestone, plan: Plan, project_dir: str) -> str:
|
|
286
|
+
"""Build a detailed prompt for DaveLoop to execute a milestone.
|
|
287
|
+
|
|
288
|
+
This is the most critical function in Boris. DaveLoop needs everything
|
|
289
|
+
in ONE prompt - context, spec, integration points, acceptance criteria.
|
|
290
|
+
"""
|
|
291
|
+
sections = []
|
|
292
|
+
|
|
293
|
+
# SCOPE LOCK - tell DaveLoop exactly what to build, loud and clear
|
|
294
|
+
sections.append(f"# YOUR TASK: {milestone.id} - {milestone.title}")
|
|
295
|
+
sections.append("")
|
|
296
|
+
sections.append(f"You are building ONE milestone only: **{milestone.id}: {milestone.title}**")
|
|
297
|
+
sections.append("Do NOT build the entire project. Do NOT build other milestones.")
|
|
298
|
+
sections.append(f"Build ONLY what is described below for {milestone.id}.")
|
|
299
|
+
sections.append("")
|
|
300
|
+
|
|
301
|
+
# Files scope - explicit
|
|
302
|
+
all_files = (milestone.files_to_create or []) + (milestone.files_to_modify or [])
|
|
303
|
+
if all_files:
|
|
304
|
+
sections.append(f"**Files you may touch:** {', '.join(all_files)}")
|
|
305
|
+
sections.append("**Do NOT create or modify any other files.**")
|
|
306
|
+
sections.append("")
|
|
307
|
+
|
|
308
|
+
# What to build
|
|
309
|
+
sections.append(f"## What to Build")
|
|
310
|
+
sections.append(milestone.description)
|
|
311
|
+
sections.append("")
|
|
312
|
+
|
|
313
|
+
# Files to Create
|
|
314
|
+
if milestone.files_to_create:
|
|
315
|
+
sections.append("## Files to Create")
|
|
316
|
+
for f in milestone.files_to_create:
|
|
317
|
+
sections.append(f"- {f}")
|
|
318
|
+
sections.append("")
|
|
319
|
+
|
|
320
|
+
# Files to Modify
|
|
321
|
+
if milestone.files_to_modify:
|
|
322
|
+
sections.append("## Files to Modify")
|
|
323
|
+
for f in milestone.files_to_modify:
|
|
324
|
+
sections.append(f"- {f}")
|
|
325
|
+
sections.append("")
|
|
326
|
+
|
|
327
|
+
# Acceptance Criteria - right after the spec, before context
|
|
328
|
+
sections.append("## Acceptance Criteria")
|
|
329
|
+
for criterion in milestone.acceptance_criteria:
|
|
330
|
+
sections.append(f"- {criterion}")
|
|
331
|
+
sections.append("")
|
|
332
|
+
|
|
333
|
+
# Verification
|
|
334
|
+
sections.append("## Verification")
|
|
335
|
+
sections.append("After implementation, verify by:")
|
|
336
|
+
for criterion in milestone.acceptance_criteria:
|
|
337
|
+
sections.append(f"- {criterion}")
|
|
338
|
+
sections.append("When complete, ensure the code runs without errors.")
|
|
339
|
+
sections.append("")
|
|
340
|
+
|
|
341
|
+
# Context section - AFTER the task spec so DaveLoop reads the task first
|
|
342
|
+
sections.append("---")
|
|
343
|
+
sections.append("## Background Context (for reference only - do NOT build all of this)")
|
|
344
|
+
sections.append(f"Overall project: {plan.task}")
|
|
345
|
+
sections.append(f"Project directory: {project_dir}")
|
|
346
|
+
sections.append("")
|
|
347
|
+
|
|
348
|
+
# What Already Exists
|
|
349
|
+
completed = [m for m in plan.milestones if m.status == "completed"]
|
|
350
|
+
if completed:
|
|
351
|
+
sections.append("### Completed Milestones")
|
|
352
|
+
for m in completed:
|
|
353
|
+
files = m.files_to_create + m.files_to_modify
|
|
354
|
+
file_str = ", ".join(files) if files else "(no specific files)"
|
|
355
|
+
sections.append(f"- {m.id}: {m.title} - Files: {file_str}")
|
|
356
|
+
sections.append("")
|
|
357
|
+
|
|
358
|
+
# Current file listing
|
|
359
|
+
sections.append("### Current Project Files")
|
|
360
|
+
file_tree = _list_project_files(project_dir)
|
|
361
|
+
sections.append(file_tree)
|
|
362
|
+
sections.append("")
|
|
363
|
+
|
|
364
|
+
# Integration Points
|
|
365
|
+
if milestone.depends_on:
|
|
366
|
+
sections.append("### Integration Points")
|
|
367
|
+
for dep_id in milestone.depends_on:
|
|
368
|
+
dep = next((m for m in plan.milestones if m.id == dep_id), None)
|
|
369
|
+
if dep:
|
|
370
|
+
sections.append(
|
|
371
|
+
f"- {dep.id}: {dep.title} - "
|
|
372
|
+
f"Files: {', '.join(dep.files_to_create) if dep.files_to_create else 'none'}. "
|
|
373
|
+
f"Integrate with this existing code."
|
|
374
|
+
)
|
|
375
|
+
sections.append("")
|
|
376
|
+
|
|
377
|
+
# Final reminder
|
|
378
|
+
sections.append("---")
|
|
379
|
+
sections.append(f"**REMINDER: Build ONLY {milestone.id}: {milestone.title}. Nothing else.**")
|
|
380
|
+
|
|
381
|
+
return "\n".join(sections)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def craft_correction(output: str, milestone: Milestone, plan: Plan, reason: str) -> str:
|
|
385
|
+
"""Build a correction prompt for DaveLoop after a failed or off-plan attempt."""
|
|
386
|
+
sections = []
|
|
387
|
+
|
|
388
|
+
sections.append(f"## Correction Required: {milestone.title}")
|
|
389
|
+
sections.append("")
|
|
390
|
+
sections.append(f"The previous attempt to build '{milestone.title}' had issues.")
|
|
391
|
+
sections.append(f"**Problem:** {reason}")
|
|
392
|
+
sections.append("")
|
|
393
|
+
|
|
394
|
+
# Output tail (last 200 lines), cleaned of ANSI codes
|
|
395
|
+
clean = _clean_output(output)
|
|
396
|
+
output_lines = clean.strip().splitlines()
|
|
397
|
+
tail = output_lines[-200:] if len(output_lines) > 200 else output_lines
|
|
398
|
+
sections.append("## Output from Previous Attempt (last 200 lines)")
|
|
399
|
+
sections.append("```")
|
|
400
|
+
sections.append("\n".join(tail))
|
|
401
|
+
sections.append("```")
|
|
402
|
+
sections.append("")
|
|
403
|
+
|
|
404
|
+
sections.append("Please fix the issues and complete the milestone.")
|
|
405
|
+
sections.append("")
|
|
406
|
+
|
|
407
|
+
# Include same context as craft_prompt
|
|
408
|
+
sections.append(f"## Project Context")
|
|
409
|
+
sections.append(f"You are working on: {plan.task}")
|
|
410
|
+
sections.append(f"Project directory: {plan.task}")
|
|
411
|
+
sections.append("")
|
|
412
|
+
|
|
413
|
+
# Current Milestone
|
|
414
|
+
sections.append(f"## Current Milestone: {milestone.title}")
|
|
415
|
+
sections.append(milestone.description)
|
|
416
|
+
sections.append("")
|
|
417
|
+
|
|
418
|
+
# Acceptance Criteria
|
|
419
|
+
sections.append("## Acceptance Criteria")
|
|
420
|
+
for criterion in milestone.acceptance_criteria:
|
|
421
|
+
sections.append(f"- {criterion}")
|
|
422
|
+
sections.append("")
|
|
423
|
+
|
|
424
|
+
# Boundaries
|
|
425
|
+
sections.append("## Boundaries")
|
|
426
|
+
sections.append("Do NOT modify files outside this milestone's scope.")
|
|
427
|
+
sections.append("Do NOT refactor or 'improve' existing working code.")
|
|
428
|
+
sections.append(f"Focus ONLY on: {milestone.title}")
|
|
429
|
+
sections.append("")
|
|
430
|
+
|
|
431
|
+
# Verification
|
|
432
|
+
sections.append("## Verification")
|
|
433
|
+
for criterion in milestone.acceptance_criteria:
|
|
434
|
+
sections.append(f"- {criterion}")
|
|
435
|
+
sections.append("When complete, ensure the code runs without errors.")
|
|
436
|
+
|
|
437
|
+
return "\n".join(sections)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# --- UI Testing & Polish (DaveLoop v1.4) ---
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def create_ui_plan(task: str, project_dir: str) -> UIPlan:
|
|
444
|
+
"""Create UI testing milestones using Claude CLI.
|
|
445
|
+
|
|
446
|
+
Claude already knows the project from the task + file listing.
|
|
447
|
+
It decides the project type and test tool itself.
|
|
448
|
+
"""
|
|
449
|
+
file_listing = _list_project_files(project_dir)
|
|
450
|
+
|
|
451
|
+
prompt = (
|
|
452
|
+
"You are a UI/QA testing expert. This project was just built and needs UI testing and polish.\n\n"
|
|
453
|
+
f"Original task: {task}\n"
|
|
454
|
+
f"Current files:\n{file_listing}\n\n"
|
|
455
|
+
"First, determine:\n"
|
|
456
|
+
"- project_type: \"web\", \"android\", \"ios\", or \"cross-platform\"\n"
|
|
457
|
+
"- test_tool: \"playwright\" (for web) or \"maestro\" (for Android/iOS)\n\n"
|
|
458
|
+
"Then create ordered UI testing milestones. Each milestone tests ONE user flow or UI aspect.\n"
|
|
459
|
+
"Focus on:\n"
|
|
460
|
+
"1. Core user flows (login, main feature, navigation)\n"
|
|
461
|
+
"2. Visual polish (alignment, spacing, responsive/adaptive)\n"
|
|
462
|
+
"3. Edge cases (empty states, error states, loading states)\n"
|
|
463
|
+
"4. Accessibility (contrast, labels, screen reader)\n\n"
|
|
464
|
+
"Return ONLY a JSON object (no other text):\n"
|
|
465
|
+
"{\n"
|
|
466
|
+
' "project_type": "web",\n'
|
|
467
|
+
' "test_tool": "playwright",\n'
|
|
468
|
+
' "milestones": [\n'
|
|
469
|
+
" {\n"
|
|
470
|
+
' "id": "UI1",\n'
|
|
471
|
+
' "title": "Test Login Flow",\n'
|
|
472
|
+
' "description": "Test the login flow...",\n'
|
|
473
|
+
' "test_commands": ["npx playwright test login.spec.ts"],\n'
|
|
474
|
+
' "acceptance_criteria": ["Login form displays correctly", "Can enter credentials and submit"]\n'
|
|
475
|
+
" }\n"
|
|
476
|
+
" ]\n"
|
|
477
|
+
"}"
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
env = os.environ.copy()
|
|
482
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
483
|
+
|
|
484
|
+
import threading
|
|
485
|
+
import time
|
|
486
|
+
|
|
487
|
+
print(" [Boris] UI Planning phase - Claude is thinking...", flush=True)
|
|
488
|
+
print(flush=True)
|
|
489
|
+
|
|
490
|
+
plan_result = {"stdout": "", "returncode": None, "error": None}
|
|
491
|
+
|
|
492
|
+
def _run_claude():
|
|
493
|
+
try:
|
|
494
|
+
cmd = [CLAUDE_CMD, "-p", "--no-session-persistence"]
|
|
495
|
+
result = subprocess.run(
|
|
496
|
+
cmd,
|
|
497
|
+
input=prompt,
|
|
498
|
+
capture_output=True,
|
|
499
|
+
timeout=600,
|
|
500
|
+
env=env,
|
|
501
|
+
encoding="utf-8",
|
|
502
|
+
errors="replace",
|
|
503
|
+
)
|
|
504
|
+
plan_result["stdout"] = result.stdout
|
|
505
|
+
plan_result["returncode"] = result.returncode
|
|
506
|
+
if result.returncode != 0:
|
|
507
|
+
err = result.stderr.strip()
|
|
508
|
+
if not err:
|
|
509
|
+
err = result.stdout.strip()[:500]
|
|
510
|
+
plan_result["error"] = err
|
|
511
|
+
except subprocess.TimeoutExpired:
|
|
512
|
+
plan_result["error"] = "timeout"
|
|
513
|
+
except FileNotFoundError:
|
|
514
|
+
plan_result["error"] = "not_found"
|
|
515
|
+
|
|
516
|
+
thread = threading.Thread(target=_run_claude, daemon=True)
|
|
517
|
+
thread.start()
|
|
518
|
+
|
|
519
|
+
elapsed = 0
|
|
520
|
+
spinner = ["|", "/", "-", "\\"]
|
|
521
|
+
while thread.is_alive():
|
|
522
|
+
idx = (elapsed // 2) % len(spinner)
|
|
523
|
+
sys.stdout.write(f"\r [Boris] {spinner[idx]} Claude is creating UI plan... ({elapsed}s)")
|
|
524
|
+
sys.stdout.flush()
|
|
525
|
+
time.sleep(2)
|
|
526
|
+
elapsed += 2
|
|
527
|
+
|
|
528
|
+
sys.stdout.write("\r" + " " * 60 + "\r")
|
|
529
|
+
sys.stdout.flush()
|
|
530
|
+
|
|
531
|
+
if plan_result["error"] == "timeout":
|
|
532
|
+
raise subprocess.TimeoutExpired(CLAUDE_CMD, 600)
|
|
533
|
+
if plan_result["error"] == "not_found":
|
|
534
|
+
raise FileNotFoundError(CLAUDE_CMD)
|
|
535
|
+
if plan_result["returncode"] != 0:
|
|
536
|
+
raise RuntimeError(f"Claude CLI failed (exit {plan_result['returncode']}): {plan_result['error']}")
|
|
537
|
+
|
|
538
|
+
response = plan_result["stdout"].strip()
|
|
539
|
+
logger.debug("Claude UI plan raw response: %s", response[:500])
|
|
540
|
+
|
|
541
|
+
print(" [Boris] UI Plan received! Parsing...", flush=True)
|
|
542
|
+
|
|
543
|
+
# Extract JSON from response
|
|
544
|
+
json_match = re.search(r"```(?:json)?\s*\n(.*?)\n```", response, re.DOTALL)
|
|
545
|
+
if json_match:
|
|
546
|
+
response = json_match.group(1).strip()
|
|
547
|
+
else:
|
|
548
|
+
# Try to find raw JSON object or array
|
|
549
|
+
obj_match = re.search(r"\{.*\}", response, re.DOTALL)
|
|
550
|
+
if obj_match:
|
|
551
|
+
response = obj_match.group(0).strip()
|
|
552
|
+
|
|
553
|
+
data = json.loads(response)
|
|
554
|
+
|
|
555
|
+
# Claude tells us what the project is and what tool to use
|
|
556
|
+
project_type = data.get("project_type", "web")
|
|
557
|
+
test_tool = data.get("test_tool", "playwright")
|
|
558
|
+
milestones_data = data.get("milestones", data if isinstance(data, list) else [])
|
|
559
|
+
|
|
560
|
+
print(f" [Boris] Project type: {project_type}, tool: {test_tool}", flush=True)
|
|
561
|
+
|
|
562
|
+
ui_milestones = [
|
|
563
|
+
UIMilestone(
|
|
564
|
+
id=m["id"],
|
|
565
|
+
title=m["title"],
|
|
566
|
+
description=m["description"],
|
|
567
|
+
test_tool=m.get("test_tool", test_tool),
|
|
568
|
+
test_commands=m.get("test_commands", []),
|
|
569
|
+
acceptance_criteria=m.get("acceptance_criteria", []),
|
|
570
|
+
)
|
|
571
|
+
for m in milestones_data
|
|
572
|
+
]
|
|
573
|
+
|
|
574
|
+
for um in ui_milestones:
|
|
575
|
+
print(f" [Boris] {um.id}: {um.title}", flush=True)
|
|
576
|
+
print(flush=True)
|
|
577
|
+
|
|
578
|
+
return UIPlan(
|
|
579
|
+
project_type=project_type,
|
|
580
|
+
test_tool=test_tool,
|
|
581
|
+
milestones=ui_milestones,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
except json.JSONDecodeError as e:
|
|
585
|
+
raise RuntimeError(f"Failed to parse Claude's UI plan response as JSON: {e}") from e
|
|
586
|
+
except subprocess.TimeoutExpired:
|
|
587
|
+
raise RuntimeError("Claude CLI timed out while generating UI plan")
|
|
588
|
+
except FileNotFoundError:
|
|
589
|
+
raise RuntimeError(f"'{CLAUDE_CMD}' command not found. Is Claude CLI installed?")
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def craft_ui_prompt(ui_milestone: UIMilestone, ui_plan: UIPlan, plan: Plan, project_dir: str) -> str:
|
|
593
|
+
"""Build a prompt for DaveLoop in UI Tester Mode.
|
|
594
|
+
|
|
595
|
+
Boris keeps it simple: scope the task, tell DaveLoop it's in UI test mode,
|
|
596
|
+
and let DaveLoop figure out the tooling. DaveLoop knows Playwright/Maestro.
|
|
597
|
+
"""
|
|
598
|
+
tool_name = "Playwright" if ui_plan.test_tool == "playwright" else "Maestro"
|
|
599
|
+
sections = []
|
|
600
|
+
|
|
601
|
+
# MODE DECLARATION - short and clear
|
|
602
|
+
sections.append(f"# MODE: UI TESTER (DaveLoop v1.4)")
|
|
603
|
+
sections.append(f"# Tool: {tool_name}")
|
|
604
|
+
sections.append("")
|
|
605
|
+
sections.append("You are in UI TESTER MODE. You do NOT build new features.")
|
|
606
|
+
sections.append("You TEST existing UI, FIND issues, and FIX visual/UX problems.")
|
|
607
|
+
sections.append("")
|
|
608
|
+
|
|
609
|
+
# Task scope
|
|
610
|
+
sections.append(f"## Your Task: {ui_milestone.id} - {ui_milestone.title}")
|
|
611
|
+
sections.append("")
|
|
612
|
+
sections.append(ui_milestone.description)
|
|
613
|
+
sections.append("")
|
|
614
|
+
|
|
615
|
+
# Test commands (if the plan specified any)
|
|
616
|
+
if ui_milestone.test_commands:
|
|
617
|
+
sections.append("## Test Commands")
|
|
618
|
+
for cmd in ui_milestone.test_commands:
|
|
619
|
+
sections.append(f"- `{cmd}`")
|
|
620
|
+
sections.append("")
|
|
621
|
+
|
|
622
|
+
# Acceptance criteria
|
|
623
|
+
sections.append("## Acceptance Criteria")
|
|
624
|
+
for criterion in ui_milestone.acceptance_criteria:
|
|
625
|
+
sections.append(f"- {criterion}")
|
|
626
|
+
sections.append("")
|
|
627
|
+
|
|
628
|
+
# Reporting markers so Boris can parse issues/fixes from output
|
|
629
|
+
sections.append("## Reporting")
|
|
630
|
+
sections.append("Report issues as: `ISSUE FOUND: <description>`")
|
|
631
|
+
sections.append("Report fixes as: `FIX APPLIED: <description>`")
|
|
632
|
+
sections.append("")
|
|
633
|
+
|
|
634
|
+
# Brief context
|
|
635
|
+
sections.append("---")
|
|
636
|
+
sections.append(f"Project: {plan.task}")
|
|
637
|
+
sections.append(f"Directory: {project_dir}")
|
|
638
|
+
sections.append(f"Type: {ui_plan.project_type}")
|
|
639
|
+
sections.append("")
|
|
640
|
+
|
|
641
|
+
# File listing so DaveLoop knows what exists
|
|
642
|
+
sections.append("### Current Files")
|
|
643
|
+
sections.append(_list_project_files(project_dir))
|
|
644
|
+
sections.append("")
|
|
645
|
+
|
|
646
|
+
sections.append(f"**Do NOT build new features. Test {ui_milestone.id} only.**")
|
|
647
|
+
|
|
648
|
+
return "\n".join(sections)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def craft_ui_correction(output: str, ui_milestone: UIMilestone, ui_plan: UIPlan, reason: str) -> str:
|
|
652
|
+
"""Build a correction prompt for DaveLoop in UI Tester Mode after an off-plan or failed attempt."""
|
|
653
|
+
tool_name = "Playwright" if ui_plan.test_tool == "playwright" else "Maestro"
|
|
654
|
+
sections = []
|
|
655
|
+
|
|
656
|
+
sections.append(f"# MODE: UI TESTER (DaveLoop v1.4) - CORRECTION")
|
|
657
|
+
sections.append(f"# Tool: {tool_name}")
|
|
658
|
+
sections.append("")
|
|
659
|
+
sections.append(f"## Correction Required: {ui_milestone.title}")
|
|
660
|
+
sections.append(f"**Problem:** {reason}")
|
|
661
|
+
sections.append("")
|
|
662
|
+
|
|
663
|
+
# Output tail
|
|
664
|
+
clean = _clean_output(output)
|
|
665
|
+
output_lines = clean.strip().splitlines()
|
|
666
|
+
tail = output_lines[-200:] if len(output_lines) > 200 else output_lines
|
|
667
|
+
sections.append("## Previous Output (last 200 lines)")
|
|
668
|
+
sections.append("```")
|
|
669
|
+
sections.append("\n".join(tail))
|
|
670
|
+
sections.append("```")
|
|
671
|
+
sections.append("")
|
|
672
|
+
|
|
673
|
+
sections.append(f"Fix the issues and complete {ui_milestone.id}: {ui_milestone.title}")
|
|
674
|
+
sections.append(ui_milestone.description)
|
|
675
|
+
sections.append("")
|
|
676
|
+
|
|
677
|
+
sections.append("## Acceptance Criteria")
|
|
678
|
+
for criterion in ui_milestone.acceptance_criteria:
|
|
679
|
+
sections.append(f"- {criterion}")
|
|
680
|
+
sections.append("")
|
|
681
|
+
|
|
682
|
+
sections.append("Report issues: `ISSUE FOUND: <description>`")
|
|
683
|
+
sections.append("Report fixes: `FIX APPLIED: <description>`")
|
|
684
|
+
sections.append("")
|
|
685
|
+
sections.append("**Do NOT build new features. UI testing only.**")
|
|
686
|
+
|
|
687
|
+
return "\n".join(sections)
|