pdd-cli 0.0.90__py3-none-any.whl → 0.0.121__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +38 -6
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +506 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +537 -0
- pdd/agentic_common.py +533 -770
- pdd/agentic_crash.py +2 -1
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +582 -0
- pdd/agentic_fix.py +118 -3
- pdd/agentic_update.py +27 -9
- pdd/agentic_verify.py +3 -2
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +63 -53
- pdd/auto_include.py +236 -3
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +195 -23
- pdd/cmd_test_main.py +345 -197
- pdd/code_generator.py +4 -2
- pdd/code_generator_main.py +118 -32
- pdd/commands/__init__.py +6 -0
- pdd/commands/analysis.py +113 -48
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +358 -0
- pdd/commands/fix.py +155 -114
- pdd/commands/generate.py +5 -0
- pdd/commands/maintenance.py +3 -2
- pdd/commands/misc.py +8 -0
- pdd/commands/modify.py +225 -163
- pdd/commands/sessions.py +284 -0
- pdd/commands/utility.py +12 -7
- pdd/construct_paths.py +334 -32
- pdd/context_generator_main.py +167 -170
- pdd/continue_generation.py +6 -3
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +44 -7
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +68 -20
- pdd/core/errors.py +4 -0
- pdd/core/remote_session.py +61 -0
- pdd/crash_main.py +219 -23
- pdd/data/llm_model.csv +4 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +208 -34
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +291 -38
- pdd/fix_main.py +208 -6
- pdd/fix_verification_errors_loop.py +235 -26
- pdd/fix_verification_main.py +269 -83
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +46 -5
- pdd/generate_test.py +212 -151
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +309 -20
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +7 -5
- pdd/insert_includes.py +2 -1
- pdd/llm_invoke.py +531 -97
- pdd/load_prompt_template.py +15 -34
- pdd/operation_log.py +342 -0
- pdd/path_resolution.py +140 -0
- pdd/postprocess.py +122 -97
- pdd/preprocess.py +68 -12
- pdd/preprocess_main.py +33 -1
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +140 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
- pdd/prompts/agentic_update_LLM.prompt +192 -338
- pdd/prompts/auto_include_LLM.prompt +22 -0
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +571 -14
- pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
- pdd/prompts/generate_test_LLM.prompt +19 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +366 -0
- pdd/prompts/insert_includes_LLM.prompt +262 -252
- pdd/prompts/prompt_code_diff_LLM.prompt +123 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/remote_session.py +876 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1347 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +217 -0
- pdd/server/token_counter.py +222 -0
- pdd/summarize_directory.py +236 -237
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +329 -47
- pdd/sync_main.py +272 -28
- pdd/sync_orchestration.py +289 -211
- pdd/sync_order.py +304 -0
- pdd/template_expander.py +161 -0
- pdd/templates/architecture/architecture_json.prompt +41 -46
- pdd/trace.py +1 -1
- pdd/track_cost.py +0 -13
- pdd/unfinished_prompt.py +2 -1
- pdd/update_main.py +68 -26
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +15 -10
- pdd_cli-0.0.121.dist-info/RECORD +229 -0
- pdd_cli-0.0.90.dist-info/RECORD +0 -153
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Tuple, Dict, Any, Optional, Set
|
|
12
|
+
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
|
|
15
|
+
from .agentic_common import (
|
|
16
|
+
run_agentic_task,
|
|
17
|
+
load_workflow_state,
|
|
18
|
+
save_workflow_state,
|
|
19
|
+
clear_workflow_state,
|
|
20
|
+
DEFAULT_MAX_RETRIES,
|
|
21
|
+
)
|
|
22
|
+
from .load_prompt_template import load_prompt_template
|
|
23
|
+
|
|
24
|
+
# Constants
|
|
25
|
+
STEP_NAMES = {
|
|
26
|
+
1: "unit_tests",
|
|
27
|
+
2: "e2e_tests",
|
|
28
|
+
3: "root_cause",
|
|
29
|
+
4: "fix_e2e_tests",
|
|
30
|
+
5: "identify_devunits",
|
|
31
|
+
6: "create_unit_tests",
|
|
32
|
+
7: "verify_tests",
|
|
33
|
+
8: "run_pdd_fix",
|
|
34
|
+
9: "verify_all",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
STEP_DESCRIPTIONS = {
|
|
38
|
+
1: "Running unit tests from issue",
|
|
39
|
+
2: "Running e2e tests",
|
|
40
|
+
3: "Analyzing root cause",
|
|
41
|
+
4: "Fixing e2e tests",
|
|
42
|
+
5: "Identifying dev units",
|
|
43
|
+
6: "Creating unit tests",
|
|
44
|
+
7: "Verifying tests detect bugs",
|
|
45
|
+
8: "Running pdd fix",
|
|
46
|
+
9: "Final verification",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Per-step timeouts for the 9-step agentic e2e fix workflow
|
|
50
|
+
E2E_FIX_STEP_TIMEOUTS: Dict[int, float] = {
|
|
51
|
+
1: 340.0, # Run unit tests from issue, pdd fix failures
|
|
52
|
+
2: 240.0, # Run e2e tests, check completion (early exit)
|
|
53
|
+
3: 340.0, # Root cause analysis (code vs test vs both)
|
|
54
|
+
4: 340.0, # Fix e2e tests if needed
|
|
55
|
+
5: 340.0, # Identify dev units involved in failures
|
|
56
|
+
6: 600.0, # Create/append unit tests for dev units (Complex)
|
|
57
|
+
7: 600.0, # Verify unit tests detect bugs (Complex)
|
|
58
|
+
8: 1000.0, # Run pdd fix on failing dev units (Most Complex - multiple LLM calls)
|
|
59
|
+
9: 240.0, # Final verification, loop control
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
console = Console()
|
|
63
|
+
|
|
64
|
+
def _get_state_dir(cwd: Path) -> Path:
|
|
65
|
+
"""Returns the state directory .pdd/e2e-fix-state/ relative to git root."""
|
|
66
|
+
# Simple heuristic: look for .git, otherwise use cwd
|
|
67
|
+
d = cwd.resolve()
|
|
68
|
+
root = d
|
|
69
|
+
while d != d.parent:
|
|
70
|
+
if (d / ".git").exists():
|
|
71
|
+
root = d
|
|
72
|
+
break
|
|
73
|
+
d = d.parent
|
|
74
|
+
|
|
75
|
+
state_dir = root / ".pdd" / "e2e-fix-state"
|
|
76
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
return state_dir
|
|
78
|
+
|
|
79
|
+
def _parse_changed_files(output: str) -> List[str]:
|
|
80
|
+
"""Parses FILES_CREATED and FILES_MODIFIED from agent output."""
|
|
81
|
+
files = []
|
|
82
|
+
for line in output.splitlines():
|
|
83
|
+
if line.startswith("FILES_CREATED:") or line.startswith("FILES_MODIFIED:"):
|
|
84
|
+
# Extract content after colon
|
|
85
|
+
content = line.split(":", 1)[1].strip()
|
|
86
|
+
if content:
|
|
87
|
+
# Split by comma and strip
|
|
88
|
+
paths = [p.strip() for p in content.split(",") if p.strip()]
|
|
89
|
+
files.extend(paths)
|
|
90
|
+
return files
|
|
91
|
+
|
|
92
|
+
def _parse_dev_units(output: str) -> str:
|
|
93
|
+
"""Parses DEV_UNITS_IDENTIFIED from output."""
|
|
94
|
+
for line in output.splitlines():
|
|
95
|
+
if line.startswith("DEV_UNITS_IDENTIFIED:"):
|
|
96
|
+
return line.split(":", 1)[1].strip()
|
|
97
|
+
return ""
|
|
98
|
+
|
|
99
|
+
def _update_dev_unit_states(output: str, current_states: Dict[str, Any], identified_units_str: str) -> Dict[str, Any]:
|
|
100
|
+
"""Updates dev unit states based on Step 8 output."""
|
|
101
|
+
identified_units = [u.strip() for u in identified_units_str.split(",") if u.strip()]
|
|
102
|
+
|
|
103
|
+
# Initialize if not present
|
|
104
|
+
for unit in identified_units:
|
|
105
|
+
if unit not in current_states:
|
|
106
|
+
current_states[unit] = {"fixed": False, "fix_attempts": 0}
|
|
107
|
+
current_states[unit]["fix_attempts"] += 1
|
|
108
|
+
|
|
109
|
+
# Parse results from output
|
|
110
|
+
# Heuristic: look for "unit_name: FIXED" or "unit_name: Failed"
|
|
111
|
+
# This depends on the LLM following instructions in Step 8 prompt.
|
|
112
|
+
for line in output.splitlines():
|
|
113
|
+
for unit in identified_units:
|
|
114
|
+
if unit in line:
|
|
115
|
+
if "FIXED" in line:
|
|
116
|
+
current_states[unit]["fixed"] = True
|
|
117
|
+
elif "Failed" in line or "FAIL" in line:
|
|
118
|
+
current_states[unit]["fixed"] = False
|
|
119
|
+
|
|
120
|
+
return current_states
|
|
121
|
+
|
|
122
|
+
def _check_staleness(state: Dict[str, Any], cwd: Path) -> None:
|
|
123
|
+
"""Checks if files have changed since state was saved."""
|
|
124
|
+
last_saved_str = state.get("last_saved_at")
|
|
125
|
+
if not last_saved_str:
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
last_saved = datetime.fromisoformat(last_saved_str)
|
|
130
|
+
except ValueError:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
changed_files = state.get("changed_files", [])
|
|
134
|
+
stale = False
|
|
135
|
+
|
|
136
|
+
for file_path in changed_files:
|
|
137
|
+
p = cwd / file_path
|
|
138
|
+
if not p.exists():
|
|
139
|
+
console.print(f"[yellow]Warning: File '{file_path}' from previous state is missing.[/yellow]")
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
# Check mtime
|
|
143
|
+
mtime = datetime.fromtimestamp(p.stat().st_mtime)
|
|
144
|
+
if mtime > last_saved:
|
|
145
|
+
stale = True
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
if stale:
|
|
149
|
+
console.print("[yellow]Warning: Codebase may have changed since last run. Consider --no-resume for fresh start.[/yellow]")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _get_modified_and_untracked(cwd: Path) -> Set[str]:
|
|
153
|
+
"""Returns set of modified tracked files plus untracked files."""
|
|
154
|
+
files: Set[str] = set()
|
|
155
|
+
|
|
156
|
+
# Get modified tracked files
|
|
157
|
+
result = subprocess.run(
|
|
158
|
+
["git", "diff", "--name-only", "HEAD"],
|
|
159
|
+
cwd=cwd,
|
|
160
|
+
capture_output=True,
|
|
161
|
+
text=True
|
|
162
|
+
)
|
|
163
|
+
if result.returncode == 0:
|
|
164
|
+
files.update(f for f in result.stdout.strip().split("\n") if f)
|
|
165
|
+
|
|
166
|
+
# Get untracked files
|
|
167
|
+
result = subprocess.run(
|
|
168
|
+
["git", "ls-files", "--others", "--exclude-standard"],
|
|
169
|
+
cwd=cwd,
|
|
170
|
+
capture_output=True,
|
|
171
|
+
text=True
|
|
172
|
+
)
|
|
173
|
+
if result.returncode == 0:
|
|
174
|
+
files.update(f for f in result.stdout.strip().split("\n") if f)
|
|
175
|
+
|
|
176
|
+
return files
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _get_file_hashes(cwd: Path) -> Dict[str, Optional[str]]:
|
|
180
|
+
"""
|
|
181
|
+
Returns {filepath: md5_hash} for all modified and untracked files.
|
|
182
|
+
|
|
183
|
+
If a file is deleted or unreadable, stores None for that file.
|
|
184
|
+
"""
|
|
185
|
+
hashes: Dict[str, Optional[str]] = {}
|
|
186
|
+
for filepath in _get_modified_and_untracked(cwd):
|
|
187
|
+
path = cwd / filepath
|
|
188
|
+
if path.exists() and path.is_file():
|
|
189
|
+
try:
|
|
190
|
+
hashes[filepath] = hashlib.md5(path.read_bytes()).hexdigest()
|
|
191
|
+
except (IOError, OSError):
|
|
192
|
+
hashes[filepath] = None
|
|
193
|
+
else:
|
|
194
|
+
hashes[filepath] = None # Deleted or not a file
|
|
195
|
+
return hashes
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _commit_and_push(
|
|
199
|
+
cwd: Path,
|
|
200
|
+
issue_number: int,
|
|
201
|
+
issue_title: str,
|
|
202
|
+
initial_file_hashes: Dict[str, Optional[str]],
|
|
203
|
+
quiet: bool = False
|
|
204
|
+
) -> Tuple[bool, str]:
|
|
205
|
+
"""
|
|
206
|
+
Commits only files that changed during the workflow and pushes.
|
|
207
|
+
|
|
208
|
+
Uses hash comparison to detect actual content changes, avoiding
|
|
209
|
+
staging pre-existing modified/untracked files.
|
|
210
|
+
|
|
211
|
+
The PR was already created by `pdd bug`, so pushing
|
|
212
|
+
automatically updates it.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
cwd: Working directory
|
|
216
|
+
issue_number: GitHub issue number
|
|
217
|
+
issue_title: Issue title for commit message
|
|
218
|
+
initial_file_hashes: File hashes from before workflow started
|
|
219
|
+
quiet: Suppress output
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
(success, message)
|
|
223
|
+
"""
|
|
224
|
+
# Get current file hashes
|
|
225
|
+
current_hashes = _get_file_hashes(cwd)
|
|
226
|
+
|
|
227
|
+
# Find files that changed during workflow
|
|
228
|
+
files_to_commit: List[str] = []
|
|
229
|
+
for filepath, current_hash in current_hashes.items():
|
|
230
|
+
if filepath not in initial_file_hashes:
|
|
231
|
+
# New file created during workflow
|
|
232
|
+
files_to_commit.append(filepath)
|
|
233
|
+
elif initial_file_hashes[filepath] != current_hash:
|
|
234
|
+
# Content changed during workflow
|
|
235
|
+
files_to_commit.append(filepath)
|
|
236
|
+
|
|
237
|
+
if not files_to_commit:
|
|
238
|
+
return True, "No changes to commit"
|
|
239
|
+
|
|
240
|
+
# Stage only workflow-changed files
|
|
241
|
+
for filepath in files_to_commit:
|
|
242
|
+
stage_result = subprocess.run(
|
|
243
|
+
["git", "add", filepath],
|
|
244
|
+
cwd=cwd,
|
|
245
|
+
capture_output=True,
|
|
246
|
+
text=True
|
|
247
|
+
)
|
|
248
|
+
if stage_result.returncode != 0:
|
|
249
|
+
return False, f"Failed to stage {filepath}: {stage_result.stderr}"
|
|
250
|
+
|
|
251
|
+
# Commit with message referencing issue
|
|
252
|
+
commit_msg = f"fix: {issue_title}\n\nFixes #{issue_number}"
|
|
253
|
+
commit_result = subprocess.run(
|
|
254
|
+
["git", "commit", "-m", commit_msg],
|
|
255
|
+
cwd=cwd,
|
|
256
|
+
capture_output=True,
|
|
257
|
+
text=True
|
|
258
|
+
)
|
|
259
|
+
if commit_result.returncode != 0:
|
|
260
|
+
return False, f"Failed to commit: {commit_result.stderr}"
|
|
261
|
+
|
|
262
|
+
# Push to remote (branch already exists from pdd bug)
|
|
263
|
+
push_result = subprocess.run(
|
|
264
|
+
["git", "push"],
|
|
265
|
+
cwd=cwd,
|
|
266
|
+
capture_output=True,
|
|
267
|
+
text=True
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if push_result.returncode == 0:
|
|
271
|
+
return True, f"Committed and pushed {len(files_to_commit)} file(s)"
|
|
272
|
+
else:
|
|
273
|
+
return False, f"Push failed: {push_result.stderr}"
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def run_agentic_e2e_fix_orchestrator(
|
|
277
|
+
issue_url: str,
|
|
278
|
+
issue_content: str,
|
|
279
|
+
repo_owner: str,
|
|
280
|
+
repo_name: str,
|
|
281
|
+
issue_number: int,
|
|
282
|
+
issue_author: str,
|
|
283
|
+
issue_title: str,
|
|
284
|
+
*,
|
|
285
|
+
cwd: Path,
|
|
286
|
+
timeout_adder: float = 0.0,
|
|
287
|
+
max_cycles: int = 5,
|
|
288
|
+
resume: bool = True,
|
|
289
|
+
verbose: bool = False,
|
|
290
|
+
quiet: bool = False,
|
|
291
|
+
use_github_state: bool = True
|
|
292
|
+
) -> Tuple[bool, str, float, str, List[str]]:
|
|
293
|
+
"""
|
|
294
|
+
Orchestrator for the 9-step agentic e2e fix workflow.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Tuple[bool, str, float, str, List[str]]:
|
|
298
|
+
(success, final_message, total_cost, model_used, changed_files)
|
|
299
|
+
"""
|
|
300
|
+
state_dir = _get_state_dir(cwd)
|
|
301
|
+
workflow_name = "e2e_fix"
|
|
302
|
+
|
|
303
|
+
# Initialize state variables
|
|
304
|
+
current_cycle = 0
|
|
305
|
+
last_completed_step = 0
|
|
306
|
+
step_outputs: Dict[str, str] = {}
|
|
307
|
+
total_cost = 0.0
|
|
308
|
+
model_used = "unknown"
|
|
309
|
+
changed_files: List[str] = []
|
|
310
|
+
dev_unit_states: Dict[str, Any] = {}
|
|
311
|
+
github_comment_id: Optional[int] = None
|
|
312
|
+
|
|
313
|
+
# Resume Logic
|
|
314
|
+
if resume:
|
|
315
|
+
loaded_state, gh_id = load_workflow_state(
|
|
316
|
+
cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state
|
|
317
|
+
)
|
|
318
|
+
if loaded_state:
|
|
319
|
+
console.print(f"[blue]Resuming from cycle {loaded_state.get('current_cycle', 1)} step {loaded_state.get('last_completed_step', 0)}...[/blue]")
|
|
320
|
+
current_cycle = loaded_state.get("current_cycle", 0)
|
|
321
|
+
last_completed_step = loaded_state.get("last_completed_step", 0)
|
|
322
|
+
step_outputs = loaded_state.get("step_outputs", {})
|
|
323
|
+
total_cost = loaded_state.get("total_cost", 0.0)
|
|
324
|
+
model_used = loaded_state.get("model_used", "unknown")
|
|
325
|
+
changed_files = loaded_state.get("changed_files", [])
|
|
326
|
+
dev_unit_states = loaded_state.get("dev_unit_states", {})
|
|
327
|
+
github_comment_id = gh_id
|
|
328
|
+
|
|
329
|
+
_check_staleness(loaded_state, cwd)
|
|
330
|
+
|
|
331
|
+
# If we finished a cycle but didn't exit, prepare for next cycle
|
|
332
|
+
if last_completed_step >= 9:
|
|
333
|
+
current_cycle += 1
|
|
334
|
+
last_completed_step = 0
|
|
335
|
+
step_outputs = {} # Clear outputs for new cycle
|
|
336
|
+
else:
|
|
337
|
+
# No state found, start fresh
|
|
338
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
339
|
+
else:
|
|
340
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
341
|
+
|
|
342
|
+
console.print(f"Fixing e2e tests for issue #{issue_number}: \"{issue_title}\"")
|
|
343
|
+
|
|
344
|
+
# Snapshot file state before workflow (for hash-based commit detection)
|
|
345
|
+
initial_file_hashes = _get_file_hashes(cwd)
|
|
346
|
+
|
|
347
|
+
success = False
|
|
348
|
+
final_message = ""
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
# Outer Loop
|
|
352
|
+
if current_cycle == 0:
|
|
353
|
+
current_cycle = 1
|
|
354
|
+
|
|
355
|
+
while current_cycle <= max_cycles:
|
|
356
|
+
console.print(f"\n[bold cyan][Cycle {current_cycle}/{max_cycles}] Starting fix cycle...[/bold cyan]")
|
|
357
|
+
|
|
358
|
+
# Inner Loop (Steps 1-9)
|
|
359
|
+
for step_num in range(1, 10):
|
|
360
|
+
if step_num <= last_completed_step:
|
|
361
|
+
continue # Skip already completed steps in this cycle
|
|
362
|
+
|
|
363
|
+
step_name = STEP_NAMES[step_num]
|
|
364
|
+
description = STEP_DESCRIPTIONS[step_num]
|
|
365
|
+
|
|
366
|
+
console.print(f"[bold][Step {step_num}/9] {description}...[/bold]")
|
|
367
|
+
|
|
368
|
+
# 1. Load Prompt
|
|
369
|
+
template_name = f"agentic_e2e_fix_step{step_num}_{step_name}_LLM"
|
|
370
|
+
prompt_template = load_prompt_template(template_name)
|
|
371
|
+
if not prompt_template:
|
|
372
|
+
raise ValueError(f"Could not load prompt template: {template_name}")
|
|
373
|
+
|
|
374
|
+
# 2. Prepare Context
|
|
375
|
+
context = {
|
|
376
|
+
"issue_url": issue_url,
|
|
377
|
+
"repo_owner": repo_owner,
|
|
378
|
+
"repo_name": repo_name,
|
|
379
|
+
"issue_number": issue_number,
|
|
380
|
+
"cycle_number": current_cycle,
|
|
381
|
+
"max_cycles": max_cycles,
|
|
382
|
+
"issue_content": issue_content,
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
# Add previous step outputs
|
|
386
|
+
for prev_step in range(1, step_num):
|
|
387
|
+
key = f"step{prev_step}_output"
|
|
388
|
+
context[key] = step_outputs.get(str(prev_step), "")
|
|
389
|
+
|
|
390
|
+
# Derived variables for specific steps
|
|
391
|
+
if step_num >= 6:
|
|
392
|
+
s5_out = step_outputs.get("5", "")
|
|
393
|
+
context["dev_units_identified"] = _parse_dev_units(s5_out)
|
|
394
|
+
|
|
395
|
+
if step_num == 8:
|
|
396
|
+
s5_out = step_outputs.get("5", "")
|
|
397
|
+
context["failing_dev_units"] = _parse_dev_units(s5_out)
|
|
398
|
+
|
|
399
|
+
if step_num == 9:
|
|
400
|
+
context["next_cycle"] = current_cycle + 1
|
|
401
|
+
|
|
402
|
+
formatted_prompt = prompt_template.format(**context)
|
|
403
|
+
|
|
404
|
+
# 3. Run Task
|
|
405
|
+
base_timeout = E2E_FIX_STEP_TIMEOUTS.get(step_num, 340.0)
|
|
406
|
+
timeout = base_timeout + timeout_adder
|
|
407
|
+
|
|
408
|
+
step_success, step_output, step_cost, step_model = run_agentic_task(
|
|
409
|
+
instruction=formatted_prompt,
|
|
410
|
+
cwd=cwd,
|
|
411
|
+
verbose=verbose,
|
|
412
|
+
quiet=quiet,
|
|
413
|
+
timeout=timeout,
|
|
414
|
+
label=f"cycle{current_cycle}_step{step_num}",
|
|
415
|
+
max_retries=DEFAULT_MAX_RETRIES,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# 4. Store Output & Accumulate
|
|
419
|
+
# Only mark step completed if it succeeded; failed steps get "FAILED:" prefix
|
|
420
|
+
# and last_completed_step stays at previous step (ensures resume re-runs failed step)
|
|
421
|
+
if step_success:
|
|
422
|
+
step_outputs[str(step_num)] = step_output
|
|
423
|
+
last_completed_step = step_num
|
|
424
|
+
else:
|
|
425
|
+
step_outputs[str(step_num)] = f"FAILED: {step_output}"
|
|
426
|
+
# Don't update last_completed_step - keep it at previous value
|
|
427
|
+
|
|
428
|
+
total_cost += step_cost
|
|
429
|
+
model_used = step_model if step_model else model_used
|
|
430
|
+
|
|
431
|
+
# Parse changed files
|
|
432
|
+
new_files = _parse_changed_files(step_output)
|
|
433
|
+
for f in new_files:
|
|
434
|
+
if f not in changed_files:
|
|
435
|
+
changed_files.append(f)
|
|
436
|
+
|
|
437
|
+
# Parse dev unit states (Step 8)
|
|
438
|
+
if step_num == 8:
|
|
439
|
+
s5_out = step_outputs.get("5", "")
|
|
440
|
+
dev_units_str = _parse_dev_units(s5_out)
|
|
441
|
+
dev_unit_states = _update_dev_unit_states(step_output, dev_unit_states, dev_units_str)
|
|
442
|
+
|
|
443
|
+
# Print brief result
|
|
444
|
+
if step_success:
|
|
445
|
+
console.print(f" -> Step {step_num} complete. Cost: ${step_cost:.4f}")
|
|
446
|
+
else:
|
|
447
|
+
console.print(f" -> Step {step_num} [red]failed[/red]. Cost: ${step_cost:.4f}")
|
|
448
|
+
|
|
449
|
+
# 5. Save State
|
|
450
|
+
state_data = {
|
|
451
|
+
"workflow": workflow_name,
|
|
452
|
+
"issue_url": issue_url,
|
|
453
|
+
"issue_number": issue_number,
|
|
454
|
+
"current_cycle": current_cycle,
|
|
455
|
+
"last_completed_step": last_completed_step,
|
|
456
|
+
"step_outputs": step_outputs.copy(), # Copy to avoid shared reference
|
|
457
|
+
"dev_unit_states": dev_unit_states.copy(), # Copy to avoid shared reference
|
|
458
|
+
"total_cost": total_cost,
|
|
459
|
+
"model_used": model_used,
|
|
460
|
+
"changed_files": changed_files.copy(), # Copy to avoid shared reference
|
|
461
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
462
|
+
"github_comment_id": github_comment_id
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
new_gh_id = save_workflow_state(
|
|
466
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
467
|
+
)
|
|
468
|
+
if new_gh_id:
|
|
469
|
+
github_comment_id = new_gh_id
|
|
470
|
+
|
|
471
|
+
# Check Early Exit (Step 2)
|
|
472
|
+
if step_num == 2 and "ALL_TESTS_PASS" in step_output:
|
|
473
|
+
console.print("[green]ALL_TESTS_PASS detected in Step 2. Exiting loop.[/green]")
|
|
474
|
+
success = True
|
|
475
|
+
final_message = "All tests passed during e2e check."
|
|
476
|
+
break
|
|
477
|
+
|
|
478
|
+
# Check Loop Control (Step 9)
|
|
479
|
+
if step_num == 9:
|
|
480
|
+
if "ALL_TESTS_PASS" in step_output:
|
|
481
|
+
console.print("[green]ALL_TESTS_PASS detected in Step 9.[/green]")
|
|
482
|
+
success = True
|
|
483
|
+
final_message = "All tests passed after fixes."
|
|
484
|
+
break
|
|
485
|
+
elif "MAX_CYCLES_REACHED" in step_output:
|
|
486
|
+
console.print("[yellow]MAX_CYCLES_REACHED detected in Step 9.[/yellow]")
|
|
487
|
+
elif "CONTINUE_CYCLE" not in step_output:
|
|
488
|
+
console.print("[yellow]Warning: No loop control token found in Step 9. Defaulting to CONTINUE_CYCLE.[/yellow]")
|
|
489
|
+
|
|
490
|
+
if success:
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
# Prepare for next cycle
|
|
494
|
+
current_cycle += 1
|
|
495
|
+
last_completed_step = 0
|
|
496
|
+
step_outputs = {} # Clear outputs for next cycle
|
|
497
|
+
|
|
498
|
+
state_data["current_cycle"] = current_cycle
|
|
499
|
+
state_data["last_completed_step"] = 0
|
|
500
|
+
state_data["step_outputs"] = {}
|
|
501
|
+
state_data["last_saved_at"] = datetime.now().isoformat()
|
|
502
|
+
|
|
503
|
+
if current_cycle <= max_cycles:
|
|
504
|
+
save_workflow_state(
|
|
505
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
if success:
|
|
509
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
510
|
+
console.print("\n[bold green]E2E fix complete[/bold green]")
|
|
511
|
+
console.print(f" Total cost: ${total_cost:.4f}")
|
|
512
|
+
console.print(f" Cycles used: {current_cycle if current_cycle <= max_cycles else max_cycles}/{max_cycles}")
|
|
513
|
+
console.print(f" Files changed: {', '.join(changed_files)}")
|
|
514
|
+
fixed_units = [u for u, s in dev_unit_states.items() if s.get("fixed")]
|
|
515
|
+
console.print(f" Dev units fixed: {', '.join(fixed_units)}")
|
|
516
|
+
|
|
517
|
+
# Commit and push changes to update the existing PR
|
|
518
|
+
commit_success, commit_message = _commit_and_push(
|
|
519
|
+
cwd=cwd,
|
|
520
|
+
issue_number=issue_number,
|
|
521
|
+
issue_title=issue_title,
|
|
522
|
+
initial_file_hashes=initial_file_hashes,
|
|
523
|
+
quiet=quiet
|
|
524
|
+
)
|
|
525
|
+
if commit_success:
|
|
526
|
+
console.print(f" [green]{commit_message}[/green]")
|
|
527
|
+
else:
|
|
528
|
+
console.print(f" [yellow]Warning: {commit_message}[/yellow]")
|
|
529
|
+
|
|
530
|
+
return True, final_message, total_cost, model_used, changed_files
|
|
531
|
+
else:
|
|
532
|
+
final_message = f"Max cycles ({max_cycles}) reached without all tests passing"
|
|
533
|
+
console.print("\n[bold red]E2E fix incomplete (max cycles reached)[/bold red]")
|
|
534
|
+
console.print(f" Total cost: ${total_cost:.4f}")
|
|
535
|
+
remaining = [u for u, s in dev_unit_states.items() if not s.get("fixed")]
|
|
536
|
+
console.print(f" Remaining failures: {', '.join(remaining)}")
|
|
537
|
+
return False, final_message, total_cost, model_used, changed_files
|
|
538
|
+
|
|
539
|
+
except KeyboardInterrupt:
|
|
540
|
+
console.print("\n[bold red]Interrupted by user. Saving state...[/bold red]")
|
|
541
|
+
state_data = {
|
|
542
|
+
"workflow": workflow_name,
|
|
543
|
+
"issue_url": issue_url,
|
|
544
|
+
"issue_number": issue_number,
|
|
545
|
+
"current_cycle": current_cycle,
|
|
546
|
+
"last_completed_step": last_completed_step,
|
|
547
|
+
"step_outputs": step_outputs,
|
|
548
|
+
"dev_unit_states": dev_unit_states,
|
|
549
|
+
"total_cost": total_cost,
|
|
550
|
+
"model_used": model_used,
|
|
551
|
+
"changed_files": changed_files,
|
|
552
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
553
|
+
"github_comment_id": github_comment_id
|
|
554
|
+
}
|
|
555
|
+
save_workflow_state(
|
|
556
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
557
|
+
)
|
|
558
|
+
raise
|
|
559
|
+
|
|
560
|
+
except Exception as e:
|
|
561
|
+
console.print(f"\n[bold red]Fatal error: {e}[/bold red]")
|
|
562
|
+
try:
|
|
563
|
+
state_data = {
|
|
564
|
+
"workflow": workflow_name,
|
|
565
|
+
"issue_url": issue_url,
|
|
566
|
+
"issue_number": issue_number,
|
|
567
|
+
"current_cycle": current_cycle,
|
|
568
|
+
"last_completed_step": last_completed_step,
|
|
569
|
+
"step_outputs": step_outputs,
|
|
570
|
+
"dev_unit_states": dev_unit_states,
|
|
571
|
+
"total_cost": total_cost,
|
|
572
|
+
"model_used": model_used,
|
|
573
|
+
"changed_files": changed_files,
|
|
574
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
575
|
+
"github_comment_id": github_comment_id
|
|
576
|
+
}
|
|
577
|
+
save_workflow_state(
|
|
578
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
579
|
+
)
|
|
580
|
+
except Exception:
|
|
581
|
+
pass
|
|
582
|
+
return False, f"Stopped at cycle {current_cycle} step {last_completed_step}: {str(e)}", total_cost, model_used, changed_files
|