borisxdave 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boris.py +435 -6
- boris_prompt_data.py +194 -0
- {borisxdave-0.3.0.dist-info → borisxdave-0.3.2.dist-info}/METADATA +1 -1
- borisxdave-0.3.2.dist-info/RECORD +14 -0
- {borisxdave-0.3.0.dist-info → borisxdave-0.3.2.dist-info}/top_level.txt +2 -0
- engine.py +212 -14
- file_lock.py +123 -0
- prompts.py +31 -7
- state.py +80 -1
- borisxdave-0.3.0.dist-info/RECORD +0 -12
- {borisxdave-0.3.0.dist-info → borisxdave-0.3.2.dist-info}/WHEEL +0 -0
- {borisxdave-0.3.0.dist-info → borisxdave-0.3.2.dist-info}/entry_points.txt +0 -0
boris_prompt_data.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Boris prompt embedded as a Python string for reliable pip installation."""
|
|
2
|
+
|
|
3
|
+
BORIS_PROMPT = r"""# Boris - Project Manager Orchestrator
|
|
4
|
+
|
|
5
|
+
Boris is a **project manager**, He plans, delegates, and verifies. DaveLoop is the builder.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Boris's Job
|
|
10
|
+
|
|
11
|
+
1. **Plan** - Break the user's task into ordered milestones
|
|
12
|
+
2. **Craft** - Write precise, context-rich prompts for each milestone
|
|
13
|
+
3. **Delegate** - Spawn DaveLoop with the crafted prompt
|
|
14
|
+
4. **Verify** - Check DaveLoop's output against acceptance criteria
|
|
15
|
+
5. **Manage Git** - init git add and stage then commit when user request fully built
|
|
16
|
+
6. **Repeat** - Move to next milestone until project is done
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## How Boris Writes Prompts for DaveLoop
|
|
21
|
+
|
|
22
|
+
This is the most critical part. DaveLoop is a self-healing debug loop - it receives a bug/task description and iterates until resolved. Boris must give DaveLoop everything it needs in ONE prompt.
|
|
23
|
+
|
|
24
|
+
### Every DaveLoop prompt MUST include:
|
|
25
|
+
|
|
26
|
+
1. **What the project is** - High-level description so DaveLoop understands context
|
|
27
|
+
2. **What already exists** - Exact files and modules from completed milestones
|
|
28
|
+
3. **What to build NOW** - The specific milestone spec, detailed and unambiguous
|
|
29
|
+
4. **How it integrates** - Which existing files to import from, which functions to call
|
|
30
|
+
5. **Acceptance criteria** - Concrete, testable criteria DaveLoop can verify
|
|
31
|
+
6. **Boundaries** - What NOT to touch (files from other milestones)
|
|
32
|
+
7. **Verification steps** - Exact commands to prove the milestone works
|
|
33
|
+
|
|
34
|
+
### Prompt quality rules:
|
|
35
|
+
|
|
36
|
+
- **Be specific, not vague** - "Create a Flask app with /api/users GET endpoint returning JSON" not "build a backend"
|
|
37
|
+
- **Name files explicitly** - "Create src/routes/users.py" not "create the routes"
|
|
38
|
+
- **Name functions explicitly** - "Implement get_users() that queries the User model" not "add user functionality"
|
|
39
|
+
- **Describe data flow** - "The frontend calls /api/users, which calls db.get_all_users(), which returns List[User]"
|
|
40
|
+
- **Include test commands** - "Verify with: python -m pytest tests/test_users.py -v"
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## How Boris Checks DaveLoop's Work
|
|
45
|
+
|
|
46
|
+
After DaveLoop finishes, Boris checks:
|
|
47
|
+
|
|
48
|
+
1. **Did DaveLoop report [DAVELOOP:RESOLVED]?** - If yes, likely success
|
|
49
|
+
2. **Did DaveLoop's exit code = 0?** - If not, something crashed
|
|
50
|
+
3. **Do the acceptance criteria pass?** - Boris can ask Claude to analyze the output
|
|
51
|
+
4. **Did DaveLoop stay in scope?** - No scope creep into other milestones
|
|
52
|
+
|
|
53
|
+
### Verdicts:
|
|
54
|
+
- **RESOLVED** - Milestone done, commit and move on
|
|
55
|
+
- **OFF_PLAN** - DaveLoop built the wrong thing, send correction prompt
|
|
56
|
+
- **FAILED** - DaveLoop couldn't finish, retry or skip
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## How Boris Monitors DaveLoop in Real-Time
|
|
61
|
+
|
|
62
|
+
Boris doesn't just fire-and-forget. He watches DaveLoop's output line by line as it streams.
|
|
63
|
+
|
|
64
|
+
### Reasoning Block = Boris Check-in
|
|
65
|
+
|
|
66
|
+
DaveLoop outputs structured reasoning blocks (KNOWN/UNKNOWN/HYPOTHESIS/NEXT/WHY) before every action. Each reasoning block triggers a **Boris check-in** - Boris reports what DaveLoop accomplished since the last reasoning block and what he's about to do next:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
[Boris] === DaveLoop Check-in #3 ===
|
|
70
|
+
[Boris] Done so far:
|
|
71
|
+
[Boris] - Created models.py
|
|
72
|
+
[Boris] - Created config.py
|
|
73
|
+
[Boris] - Ran tests: pytest tests/ -v
|
|
74
|
+
[Boris] Knows: Database models created, need seed data next
|
|
75
|
+
[Boris] Thinking: Seed data should include sample products and users
|
|
76
|
+
[Boris] Next: Create seed_data.py with 10 sample products
|
|
77
|
+
[Boris] ===========================
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Boris tracks every file write, edit, bash command, and test result between reasoning blocks. When a new reasoning block fires, Boris summarizes what DaveLoop accomplished since the last check-in, plus DaveLoop's current thinking and next move.
|
|
81
|
+
|
|
82
|
+
When DaveLoop finishes, Boris prints a full run summary of all tracked actions.
|
|
83
|
+
|
|
84
|
+
### Off-Rail Detection and Text Interrupt
|
|
85
|
+
|
|
86
|
+
Boris watches for signs that DaveLoop is going off-rail:
|
|
87
|
+
- **Wrong files** - DaveLoop creating/modifying files outside the milestone's allowed list
|
|
88
|
+
- **Scope creep** - DaveLoop mentioning "build the entire project" or "implement all milestones"
|
|
89
|
+
- **Wrong milestone** - DaveLoop referencing other milestone IDs (M2, M3) while building M1
|
|
90
|
+
|
|
91
|
+
When Boris detects off-rail behavior, he sends a **text interrupt** to DaveLoop's stdin:
|
|
92
|
+
```
|
|
93
|
+
[Boris INTERRUPT] wait - you are creating orders.py which is outside the scope of M1.
|
|
94
|
+
Only touch: models.py, config.py. Focus on M1: Project Setup only.
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
DaveLoop supports text interrupts (wait/pause/add/done) and will process Boris's correction mid-run.
|
|
98
|
+
|
|
99
|
+
### Interrupt Limits
|
|
100
|
+
|
|
101
|
+
Boris sends a maximum of 3 interrupts per DaveLoop run. If DaveLoop keeps going off-rail after 3 interrupts, Boris lets it finish and handles it at the verdict stage (OFF_PLAN correction or FAILED retry).
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## How Boris Handles Failures
|
|
106
|
+
|
|
107
|
+
1. **First failure** - Retry with the same prompt (DaveLoop might just need another iteration)
|
|
108
|
+
2. **Off-plan work** - Send correction prompt explaining what went wrong and what's expected
|
|
109
|
+
3. **Repeated failure** - Skip milestone, log warning, continue with next milestone
|
|
110
|
+
4. **Never get stuck** - Boris always moves forward. Skip and warn, don't loop forever.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## How Boris Manages Git
|
|
115
|
+
|
|
116
|
+
After each RESOLVED milestone:
|
|
117
|
+
1. `git add -A` in the project directory
|
|
118
|
+
2. `git commit -m "feat(milestone-{id}): {title}"`
|
|
119
|
+
3. `git push` if remote is configured
|
|
120
|
+
|
|
121
|
+
On completion: final commit + push with "chore: Boris orchestration complete"
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Boris's State
|
|
126
|
+
|
|
127
|
+
Boris saves progress after every milestone to `.boris/state.json` so he can resume if interrupted. The state tracks:
|
|
128
|
+
- The full plan
|
|
129
|
+
- Which milestones are completed/skipped/pending
|
|
130
|
+
- Current milestone index
|
|
131
|
+
- Retry counts
|
|
132
|
+
- Timestamps
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## How Boris Exits
|
|
137
|
+
|
|
138
|
+
Boris always exits cleanly with a proper summary and exit code.
|
|
139
|
+
|
|
140
|
+
### Exit Codes:
|
|
141
|
+
- **0** - All milestones completed successfully
|
|
142
|
+
- **1** - Some milestones were skipped or failed
|
|
143
|
+
- **130** - Interrupted by user (Ctrl+C), state saved for resume
|
|
144
|
+
|
|
145
|
+
### Summary Report:
|
|
146
|
+
|
|
147
|
+
When Boris finishes (all milestones processed), he generates a **summary markdown file** at `plans/summary_YYYYMMDD_HHMMSS.md` containing:
|
|
148
|
+
- The original task description
|
|
149
|
+
- Total milestones: completed, skipped, failed
|
|
150
|
+
- Per-milestone breakdown: status, title, files created/modified
|
|
151
|
+
- Timestamps: start time, end time, total duration
|
|
152
|
+
- Skipped milestones: reasons why they were skipped
|
|
153
|
+
|
|
154
|
+
This summary is Boris's final deliverable - a complete record of what was built, what was skipped, and why.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Phase 2: UI Testing & Polish (DaveLoop v1.4)
|
|
159
|
+
|
|
160
|
+
After all structural milestones are completed, Boris enters the UI Testing & Polish phase.
|
|
161
|
+
|
|
162
|
+
### How It Works:
|
|
163
|
+
1. Boris asks Claude to create UI testing milestones (Claude already knows the project - it just built it)
|
|
164
|
+
2. Claude decides the project type and test tool (Playwright for web, Maestro for mobile)
|
|
165
|
+
3. Boris shifts DaveLoop to UI Tester Mode (v1.4) - same DaveLoop, different orders
|
|
166
|
+
4. DaveLoop tests UI flows, finds issues, fixes them
|
|
167
|
+
5. Boris verifies each UI milestone with UI-specific verdicts
|
|
168
|
+
|
|
169
|
+
### DaveLoop v1.4 - UI Tester Mode:
|
|
170
|
+
- Does NOT build new features
|
|
171
|
+
- Tests existing UI flows with Playwright/Maestro
|
|
172
|
+
- Reports issues: `ISSUE FOUND: <description>`
|
|
173
|
+
- Applies fixes: `FIX APPLIED: <description>`
|
|
174
|
+
- Captures screenshots for visual verification
|
|
175
|
+
|
|
176
|
+
Boris doesn't teach DaveLoop how to use Playwright or Maestro. Boris scopes the task, ships DaveLoop off, and DaveLoop handles the rest.
|
|
177
|
+
|
|
178
|
+
### Skip UI Testing:
|
|
179
|
+
Use `--skip-ui` flag to skip the UI testing phase entirely.
|
|
180
|
+
|
|
181
|
+
### Resume Support:
|
|
182
|
+
If interrupted during UI testing, `boris -r -d <project>` resumes directly into the UI phase.
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Boris's Personality
|
|
187
|
+
|
|
188
|
+
Boris is methodical, relentless, and focused:
|
|
189
|
+
- He does not write code. He manages.
|
|
190
|
+
- He does not discuss. He acts.
|
|
191
|
+
- He does not get stuck. He moves forward.
|
|
192
|
+
- He trusts DaveLoop to build. He verifies the results.
|
|
193
|
+
- He keeps perfect records (state, logs, plan markdown, summary report).
|
|
194
|
+
"""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
boris.py,sha256=W5k3eNJzhHSKIAtzvwmTYHX7PBCaxxLwRzdAize6Xy8,63235
|
|
2
|
+
boris_prompt_data.py,sha256=ZBvWMrQOBrl07cNFzgeGumJ54cYg0Be9RSSnK6a3YQY,7940
|
|
3
|
+
config.py,sha256=KfFKyCGasdm1yBvIRFv-ykzA_oRo-zu1Euu9YC7V1Cg,324
|
|
4
|
+
engine.py,sha256=Pdu0i4XrNxiU246EV8MjXvYp9CBvuJWGLA18QMIYvFM,37468
|
|
5
|
+
file_lock.py,sha256=1YriAAayVy8YFe7JFuGIloiJWWvN2FSY0Ry1sB043Sc,4823
|
|
6
|
+
git_manager.py,sha256=BuuTT4naPb5-jLhOik1xHM2ztzuKvJ_bnecZmlYgwFs,8493
|
|
7
|
+
planner.py,sha256=UrU--kBvzvyD1gOVxIn-kdbJiu8tt4rcowsln66WkGw,5670
|
|
8
|
+
prompts.py,sha256=-eSwZ-oTBR12Wx4Md57sVF816T9vHEFlMsvT4zMkwOg,35187
|
|
9
|
+
state.py,sha256=2DCPlcM7SBlCkwWvcnIabltcduv74W46FZ7DxKurWkw,5752
|
|
10
|
+
borisxdave-0.3.2.dist-info/METADATA,sha256=NxpmhoGuCnJCHbNc7bkV03dllb6AAGu-JacgT_JJIjk,133
|
|
11
|
+
borisxdave-0.3.2.dist-info/WHEEL,sha256=hPN0AlP2dZM_3ZJZWP4WooepkmU9wzjGgCLCeFjkHLA,92
|
|
12
|
+
borisxdave-0.3.2.dist-info/entry_points.txt,sha256=a6FLWgxiQjGMJIRSV5sDxaaaaQchunm04ZuzX8N7-6I,61
|
|
13
|
+
borisxdave-0.3.2.dist-info/top_level.txt,sha256=C3fTm1vt0QEQyJtvSZiFiOvmR4d0hWmmr6hujJqFrQE,82
|
|
14
|
+
borisxdave-0.3.2.dist-info/RECORD,,
|
engine.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Boris engine - execution and monitoring (merged from executor + monitor)."""
|
|
2
2
|
import concurrent.futures
|
|
3
3
|
import enum
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import re
|
|
@@ -8,8 +9,10 @@ import shutil
|
|
|
8
9
|
import subprocess
|
|
9
10
|
import sys
|
|
10
11
|
import tempfile
|
|
12
|
+
import time
|
|
11
13
|
from dataclasses import dataclass
|
|
12
14
|
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
13
16
|
from typing import Optional
|
|
14
17
|
|
|
15
18
|
# Force unbuffered stdout for real-time output on Windows
|
|
@@ -62,6 +65,49 @@ def _clean_output(text: str) -> str:
|
|
|
62
65
|
return text
|
|
63
66
|
|
|
64
67
|
|
|
68
|
+
# --- Worker Status (B7: Swarm Dashboard) ---
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _write_worker_status(project_dir: str, milestone_id: str, status: dict):
|
|
72
|
+
"""Write worker status to .boris/workers/ for the swarm dashboard."""
|
|
73
|
+
try:
|
|
74
|
+
status_dir = Path(project_dir) / ".boris" / "workers"
|
|
75
|
+
status_dir.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
status_file = status_dir / f"{milestone_id}.json"
|
|
77
|
+
status["updated_at"] = time.time()
|
|
78
|
+
status_file.write_text(json.dumps(status, indent=2), encoding="utf-8")
|
|
79
|
+
except OSError:
|
|
80
|
+
pass # Non-critical: dashboard is informational only
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def read_worker_statuses(project_dir: str) -> dict:
|
|
84
|
+
"""Read all worker status files from .boris/workers/. Returns {milestone_id: status_dict}."""
|
|
85
|
+
statuses = {}
|
|
86
|
+
status_dir = Path(project_dir) / ".boris" / "workers"
|
|
87
|
+
if not status_dir.exists():
|
|
88
|
+
return statuses
|
|
89
|
+
for status_file in status_dir.glob("*.json"):
|
|
90
|
+
try:
|
|
91
|
+
data = json.loads(status_file.read_text(encoding="utf-8"))
|
|
92
|
+
milestone_id = status_file.stem
|
|
93
|
+
statuses[milestone_id] = data
|
|
94
|
+
except (json.JSONDecodeError, OSError):
|
|
95
|
+
pass
|
|
96
|
+
return statuses
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def clear_worker_statuses(project_dir: str):
|
|
100
|
+
"""Remove all worker status files (call after a batch completes)."""
|
|
101
|
+
status_dir = Path(project_dir) / ".boris" / "workers"
|
|
102
|
+
if not status_dir.exists():
|
|
103
|
+
return
|
|
104
|
+
for status_file in status_dir.glob("*.json"):
|
|
105
|
+
try:
|
|
106
|
+
status_file.unlink()
|
|
107
|
+
except (FileNotFoundError, OSError):
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
|
|
65
111
|
# --- Execution (from executor.py) ---
|
|
66
112
|
|
|
67
113
|
|
|
@@ -246,6 +292,17 @@ def run(prompt: str, project_dir: str, max_iterations: int = None,
|
|
|
246
292
|
|
|
247
293
|
print(f" [Boris] Spawning DaveLoop: max_iter={max_iter}, project={project_dir}", flush=True)
|
|
248
294
|
logger.info("Spawning DaveLoop: max_iter=%d, project=%s", max_iter, project_dir)
|
|
295
|
+
|
|
296
|
+
# Write initial worker status for dashboard (B7)
|
|
297
|
+
if milestone:
|
|
298
|
+
_write_worker_status(project_dir, milestone.id, {
|
|
299
|
+
"milestone_id": milestone.id,
|
|
300
|
+
"title": milestone.title,
|
|
301
|
+
"state": "starting",
|
|
302
|
+
"started_at": time.time(),
|
|
303
|
+
"actions": 0,
|
|
304
|
+
"interrupts": 0,
|
|
305
|
+
})
|
|
249
306
|
logger.debug("Prompt length: %d chars", len(prompt))
|
|
250
307
|
|
|
251
308
|
# Boris's own log for this execution
|
|
@@ -280,6 +337,14 @@ def run(prompt: str, project_dir: str, max_iterations: int = None,
|
|
|
280
337
|
all_accomplishments = [] # cumulative for the whole run
|
|
281
338
|
interrupt_count = 0
|
|
282
339
|
MAX_INTERRUPTS = 3 # After 3 interrupts, let DaveLoop finish and fail at verdict
|
|
340
|
+
# Off-rail detection is suppressed during prompt echo phase.
|
|
341
|
+
# DaveLoop echoes/processes the prompt at startup, which contains sibling
|
|
342
|
+
# milestone IDs (from the PARALLEL EXECUTION WARNING section). Without this
|
|
343
|
+
# guard, _check_off_rail() fires false positives on the prompt's own text.
|
|
344
|
+
# We suppress until DaveLoop starts actual work (first reasoning block) or
|
|
345
|
+
# after a generous line threshold.
|
|
346
|
+
offrail_active = False
|
|
347
|
+
OFFRAIL_WARMUP_LINES = 80 # Lines before off-rail activates even without reasoning
|
|
283
348
|
|
|
284
349
|
for raw_line in process.stdout:
|
|
285
350
|
line = raw_line.decode("utf-8", errors="replace")
|
|
@@ -298,10 +363,17 @@ def run(prompt: str, project_dir: str, max_iterations: int = None,
|
|
|
298
363
|
accomplishments.append(acc)
|
|
299
364
|
all_accomplishments.append(acc)
|
|
300
365
|
|
|
366
|
+
# Activate off-rail detection after warmup threshold (prompt echo complete)
|
|
367
|
+
if not offrail_active and len(output_lines) >= OFFRAIL_WARMUP_LINES:
|
|
368
|
+
offrail_active = True
|
|
369
|
+
|
|
301
370
|
# --- Reasoning block detection ---
|
|
302
371
|
if "REASONING" in clean and ("===" in clean or "---" in clean or "KNOWN" in clean):
|
|
303
372
|
in_reasoning = True
|
|
304
373
|
reasoning_lines = []
|
|
374
|
+
# First reasoning block means DaveLoop is doing real work - activate off-rail
|
|
375
|
+
if not offrail_active:
|
|
376
|
+
offrail_active = True
|
|
305
377
|
continue
|
|
306
378
|
|
|
307
379
|
if in_reasoning:
|
|
@@ -317,14 +389,26 @@ def run(prompt: str, project_dir: str, max_iterations: int = None,
|
|
|
317
389
|
reasoning[key] = rl.split(":", 1)[1].strip()
|
|
318
390
|
if reasoning:
|
|
319
391
|
_boris_commentary(reasoning, reasoning_count, accomplishments)
|
|
392
|
+
# Update worker status for dashboard (B7)
|
|
393
|
+
if milestone:
|
|
394
|
+
_write_worker_status(project_dir, milestone.id, {
|
|
395
|
+
"milestone_id": milestone.id,
|
|
396
|
+
"title": milestone.title,
|
|
397
|
+
"state": "working",
|
|
398
|
+
"started_at": time.time(),
|
|
399
|
+
"reasoning_blocks": reasoning_count,
|
|
400
|
+
"actions": len(all_accomplishments),
|
|
401
|
+
"interrupts": interrupt_count,
|
|
402
|
+
"last_action": all_accomplishments[-1] if all_accomplishments else None,
|
|
403
|
+
})
|
|
320
404
|
# Reset per-block accomplishments, keep cumulative
|
|
321
405
|
accomplishments = []
|
|
322
406
|
reasoning_lines = []
|
|
323
407
|
else:
|
|
324
408
|
reasoning_lines.append(clean)
|
|
325
409
|
|
|
326
|
-
# --- Off-rail detection ---
|
|
327
|
-
if milestone and interrupt_count < MAX_INTERRUPTS:
|
|
410
|
+
# --- Off-rail detection (suppressed during prompt echo phase) ---
|
|
411
|
+
if milestone and interrupt_count < MAX_INTERRUPTS and offrail_active:
|
|
328
412
|
interrupt_msg = _check_off_rail(clean, milestone)
|
|
329
413
|
if interrupt_msg:
|
|
330
414
|
_send_interrupt(process, interrupt_msg, boris_log)
|
|
@@ -332,12 +416,29 @@ def run(prompt: str, project_dir: str, max_iterations: int = None,
|
|
|
332
416
|
if interrupt_count >= MAX_INTERRUPTS:
|
|
333
417
|
warn = (
|
|
334
418
|
f"[Boris] Sent {MAX_INTERRUPTS} interrupts. "
|
|
335
|
-
f"DaveLoop keeps going off-rail.
|
|
419
|
+
f"DaveLoop keeps going off-rail. Terminating process."
|
|
336
420
|
)
|
|
337
421
|
print(f"\n {warn}\n", flush=True)
|
|
338
|
-
logger.warning(
|
|
422
|
+
logger.warning("Terminating DaveLoop process after %d ignored interrupts", MAX_INTERRUPTS)
|
|
339
423
|
if boris_log:
|
|
340
424
|
boris_log.write(f"\n{warn}\n")
|
|
425
|
+
# Hard kill: terminate the process since interrupts are being ignored
|
|
426
|
+
process.terminate()
|
|
427
|
+
try:
|
|
428
|
+
process.wait(timeout=10)
|
|
429
|
+
except subprocess.TimeoutExpired:
|
|
430
|
+
process.kill()
|
|
431
|
+
process.wait(timeout=5)
|
|
432
|
+
output = "".join(output_lines)
|
|
433
|
+
boris_log.write(f"\n=== DaveLoop FORCE KILLED after {MAX_INTERRUPTS} ignored interrupts ===\n")
|
|
434
|
+
boris_log.close()
|
|
435
|
+
boris_log = None # prevent double-close in finally
|
|
436
|
+
return ExecutionResult(
|
|
437
|
+
output=output,
|
|
438
|
+
exit_code=-1,
|
|
439
|
+
resolved=False,
|
|
440
|
+
log_path=log_path,
|
|
441
|
+
)
|
|
341
442
|
|
|
342
443
|
process.wait()
|
|
343
444
|
output = "".join(output_lines)
|
|
@@ -414,28 +515,125 @@ def _setup_log(project_dir: str) -> str:
|
|
|
414
515
|
return os.path.join(_LOGS_DIR, f"boris_exec_{timestamp}.log")
|
|
415
516
|
|
|
416
517
|
|
|
417
|
-
def
|
|
518
|
+
def _create_worktree(project_dir: str, milestone_id: str) -> tuple:
|
|
519
|
+
"""Create a git worktree for a milestone. Returns (worktree_path, branch_name) or None on failure."""
|
|
520
|
+
worktree_path = os.path.join(project_dir, f".boris_worktree_{milestone_id}")
|
|
521
|
+
branch_name = f"boris/{milestone_id}"
|
|
522
|
+
try:
|
|
523
|
+
result = subprocess.run(
|
|
524
|
+
["git", "worktree", "add", "-b", branch_name, worktree_path],
|
|
525
|
+
cwd=project_dir, capture_output=True, timeout=30,
|
|
526
|
+
encoding="utf-8", errors="replace",
|
|
527
|
+
)
|
|
528
|
+
if result.returncode == 0:
|
|
529
|
+
logger.info("Created worktree for %s at %s", milestone_id, worktree_path)
|
|
530
|
+
return (worktree_path, branch_name)
|
|
531
|
+
else:
|
|
532
|
+
logger.warning("Failed to create worktree for %s: %s", milestone_id, result.stderr.strip())
|
|
533
|
+
return None
|
|
534
|
+
except (subprocess.SubprocessError, OSError) as e:
|
|
535
|
+
logger.warning("Worktree creation error for %s: %s", milestone_id, e)
|
|
536
|
+
return None
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _merge_worktree(project_dir: str, worktree_path: str, branch_name: str, milestone_id: str) -> bool:
|
|
540
|
+
"""Merge a worktree branch back into the current branch and clean up. Returns success."""
|
|
541
|
+
try:
|
|
542
|
+
# Merge the branch back
|
|
543
|
+
merge_result = subprocess.run(
|
|
544
|
+
["git", "merge", branch_name, "--no-edit", "-m",
|
|
545
|
+
f"Merge boris/{milestone_id} worktree back"],
|
|
546
|
+
cwd=project_dir, capture_output=True, timeout=60,
|
|
547
|
+
encoding="utf-8", errors="replace",
|
|
548
|
+
)
|
|
549
|
+
if merge_result.returncode != 0:
|
|
550
|
+
logger.warning("Merge failed for %s: %s", milestone_id, merge_result.stderr.strip())
|
|
551
|
+
# Abort merge on conflict
|
|
552
|
+
subprocess.run(["git", "merge", "--abort"], cwd=project_dir,
|
|
553
|
+
capture_output=True, timeout=10)
|
|
554
|
+
return False
|
|
555
|
+
return True
|
|
556
|
+
except (subprocess.SubprocessError, OSError) as e:
|
|
557
|
+
logger.warning("Merge error for %s: %s", milestone_id, e)
|
|
558
|
+
return False
|
|
559
|
+
finally:
|
|
560
|
+
_cleanup_worktree(project_dir, worktree_path, branch_name)
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _cleanup_worktree(project_dir: str, worktree_path: str, branch_name: str):
|
|
564
|
+
"""Remove a git worktree and its branch."""
|
|
565
|
+
try:
|
|
566
|
+
subprocess.run(["git", "worktree", "remove", worktree_path, "--force"],
|
|
567
|
+
cwd=project_dir, capture_output=True, timeout=30)
|
|
568
|
+
except (subprocess.SubprocessError, OSError):
|
|
569
|
+
pass
|
|
570
|
+
try:
|
|
571
|
+
subprocess.run(["git", "branch", "-D", branch_name],
|
|
572
|
+
cwd=project_dir, capture_output=True, timeout=10)
|
|
573
|
+
except (subprocess.SubprocessError, OSError):
|
|
574
|
+
pass
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def run_parallel(tasks: list, project_dir: str, max_iterations: int = None,
|
|
578
|
+
isolation: str = "none") -> list:
|
|
418
579
|
"""Run multiple DaveLoop instances in parallel using ThreadPoolExecutor.
|
|
419
580
|
|
|
420
581
|
Args:
|
|
421
582
|
tasks: List of (prompt, milestone) tuples.
|
|
422
583
|
project_dir: Working directory for the project.
|
|
423
584
|
max_iterations: Max DaveLoop iterations per milestone.
|
|
585
|
+
isolation: Isolation strategy - "none" (shared dir), "worktree" (git worktrees).
|
|
424
586
|
|
|
425
587
|
Returns:
|
|
426
588
|
List of (milestone, ExecutionResult) tuples, one per input task.
|
|
427
589
|
"""
|
|
428
590
|
results = []
|
|
429
591
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
592
|
+
if isolation == "worktree" and len(tasks) > 1:
|
|
593
|
+
# Create worktrees for each task
|
|
594
|
+
worktree_map = {} # milestone_id -> (worktree_path, branch_name)
|
|
595
|
+
for prompt, milestone in tasks:
|
|
596
|
+
wt = _create_worktree(project_dir, milestone.id)
|
|
597
|
+
if wt:
|
|
598
|
+
worktree_map[milestone.id] = wt
|
|
599
|
+
else:
|
|
600
|
+
logger.warning("Worktree failed for %s, falling back to shared dir", milestone.id)
|
|
601
|
+
|
|
602
|
+
def _run_one_worktree(prompt_milestone):
|
|
603
|
+
prompt, milestone = prompt_milestone
|
|
604
|
+
wt_info = worktree_map.get(milestone.id)
|
|
605
|
+
work_dir = wt_info[0] if wt_info else project_dir
|
|
606
|
+
result = run(prompt, work_dir, max_iterations, milestone=milestone)
|
|
607
|
+
return (milestone, result)
|
|
608
|
+
|
|
609
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=len(tasks)) as executor:
|
|
610
|
+
futures = {executor.submit(_run_one_worktree, t): t for t in tasks}
|
|
611
|
+
for future in concurrent.futures.as_completed(futures):
|
|
612
|
+
milestone, result = future.result()
|
|
613
|
+
# Merge worktree back if it was used
|
|
614
|
+
wt_info = worktree_map.get(milestone.id)
|
|
615
|
+
if wt_info and result.resolved:
|
|
616
|
+
wt_path, branch = wt_info
|
|
617
|
+
merge_ok = _merge_worktree(project_dir, wt_path, branch, milestone.id)
|
|
618
|
+
if not merge_ok:
|
|
619
|
+
print(f" [Boris] WARNING: Merge conflict for {milestone.id} worktree", flush=True)
|
|
620
|
+
logger.warning("Worktree merge conflict for %s", milestone.id)
|
|
621
|
+
elif wt_info:
|
|
622
|
+
# Failed milestone - just clean up worktree
|
|
623
|
+
_cleanup_worktree(project_dir, wt_info[0], wt_info[1])
|
|
624
|
+
results.append((milestone, result))
|
|
625
|
+
|
|
626
|
+
else:
|
|
627
|
+
# No isolation or single task - original behavior
|
|
628
|
+
def _run_one(prompt_milestone):
|
|
629
|
+
prompt, milestone = prompt_milestone
|
|
630
|
+
result = run(prompt, project_dir, max_iterations, milestone=milestone)
|
|
631
|
+
return (milestone, result)
|
|
632
|
+
|
|
633
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=len(tasks)) as executor:
|
|
634
|
+
futures = {executor.submit(_run_one, t): t for t in tasks}
|
|
635
|
+
for future in concurrent.futures.as_completed(futures):
|
|
636
|
+
results.append(future.result())
|
|
439
637
|
|
|
440
638
|
return results
|
|
441
639
|
|
file_lock.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""File-level locking for parallel swarm workers.
|
|
2
|
+
|
|
3
|
+
Prevents parallel DaveLoop agents from corrupting shared files by providing
|
|
4
|
+
file-level locks via atomic file creation. Works on both Windows and Unix.
|
|
5
|
+
"""
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import time
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FileLockManager:
|
|
14
|
+
"""Manages file-level locks for parallel swarm workers.
|
|
15
|
+
|
|
16
|
+
Lock state is stored in .boris/locks/ in the project directory.
|
|
17
|
+
Each lock is an atomic file recording: owner (milestone ID), timestamp, file path.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, project_dir: str):
|
|
21
|
+
self.lock_dir = Path(project_dir) / ".boris" / "locks"
|
|
22
|
+
self.lock_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
|
|
24
|
+
def _lock_path(self, filepath: str) -> Path:
|
|
25
|
+
"""Get the lock file path for a given source file."""
|
|
26
|
+
normalized = os.path.normpath(filepath)
|
|
27
|
+
# Replace path separators with underscores for flat lock directory
|
|
28
|
+
safe_name = normalized.replace(os.sep, "_").replace("/", "_").replace("\\", "_")
|
|
29
|
+
return self.lock_dir / f"{safe_name}.lock"
|
|
30
|
+
|
|
31
|
+
@contextmanager
|
|
32
|
+
def lock_file(self, filepath: str, owner: str, timeout: int = 30):
|
|
33
|
+
"""Acquire a lock on a file. Blocks until available or timeout.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
filepath: The file to lock (relative or absolute path).
|
|
37
|
+
owner: Identifier for the lock owner (e.g. milestone ID).
|
|
38
|
+
timeout: Max seconds to wait for the lock.
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
TimeoutError: If the lock cannot be acquired within timeout.
|
|
42
|
+
"""
|
|
43
|
+
lock_path = self._lock_path(filepath)
|
|
44
|
+
start = time.time()
|
|
45
|
+
|
|
46
|
+
while True:
|
|
47
|
+
try:
|
|
48
|
+
# Atomic create-or-fail: 'x' mode fails if file exists
|
|
49
|
+
fd = open(lock_path, "x", encoding="utf-8")
|
|
50
|
+
fd.write(json.dumps({
|
|
51
|
+
"owner": owner,
|
|
52
|
+
"file": filepath,
|
|
53
|
+
"time": time.time(),
|
|
54
|
+
}))
|
|
55
|
+
fd.close()
|
|
56
|
+
break
|
|
57
|
+
except FileExistsError:
|
|
58
|
+
if time.time() - start > timeout:
|
|
59
|
+
# Read who holds the lock for better error messages
|
|
60
|
+
try:
|
|
61
|
+
holder = json.loads(lock_path.read_text(encoding="utf-8"))
|
|
62
|
+
holder_info = f" (held by {holder.get('owner', 'unknown')})"
|
|
63
|
+
except Exception:
|
|
64
|
+
holder_info = ""
|
|
65
|
+
raise TimeoutError(
|
|
66
|
+
f"Could not acquire lock on {filepath}{holder_info} "
|
|
67
|
+
f"after {timeout}s"
|
|
68
|
+
)
|
|
69
|
+
time.sleep(0.5)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
yield
|
|
73
|
+
finally:
|
|
74
|
+
try:
|
|
75
|
+
lock_path.unlink()
|
|
76
|
+
except FileNotFoundError:
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
def get_locked_files(self) -> dict:
|
|
80
|
+
"""Return dict of currently locked files and their owners."""
|
|
81
|
+
locks = {}
|
|
82
|
+
for lock_file in self.lock_dir.glob("*.lock"):
|
|
83
|
+
try:
|
|
84
|
+
data = json.loads(lock_file.read_text(encoding="utf-8"))
|
|
85
|
+
original_file = data.get("file", lock_file.stem.replace("_", os.sep))
|
|
86
|
+
locks[original_file] = data.get("owner", "unknown")
|
|
87
|
+
except (json.JSONDecodeError, KeyError, OSError):
|
|
88
|
+
pass
|
|
89
|
+
return locks
|
|
90
|
+
|
|
91
|
+
def is_locked(self, filepath: str) -> bool:
|
|
92
|
+
"""Check if a file is currently locked."""
|
|
93
|
+
lock_path = self._lock_path(filepath)
|
|
94
|
+
return lock_path.exists()
|
|
95
|
+
|
|
96
|
+
def lock_owner(self, filepath: str) -> str:
|
|
97
|
+
"""Return the owner of the lock on a file, or None if unlocked."""
|
|
98
|
+
lock_path = self._lock_path(filepath)
|
|
99
|
+
if not lock_path.exists():
|
|
100
|
+
return None
|
|
101
|
+
try:
|
|
102
|
+
data = json.loads(lock_path.read_text(encoding="utf-8"))
|
|
103
|
+
return data.get("owner")
|
|
104
|
+
except (json.JSONDecodeError, KeyError, OSError):
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def release_all(self, owner: str):
|
|
108
|
+
"""Release all locks held by a specific owner (milestone cleanup)."""
|
|
109
|
+
for lock_file in self.lock_dir.glob("*.lock"):
|
|
110
|
+
try:
|
|
111
|
+
data = json.loads(lock_file.read_text(encoding="utf-8"))
|
|
112
|
+
if data.get("owner") == owner:
|
|
113
|
+
lock_file.unlink()
|
|
114
|
+
except (json.JSONDecodeError, KeyError, FileNotFoundError, OSError):
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
def cleanup(self):
|
|
118
|
+
"""Remove all lock files (use after all workers complete)."""
|
|
119
|
+
for lock_file in self.lock_dir.glob("*.lock"):
|
|
120
|
+
try:
|
|
121
|
+
lock_file.unlink()
|
|
122
|
+
except (FileNotFoundError, OSError):
|
|
123
|
+
pass
|