pdd-cli 0.0.90__py3-none-any.whl → 0.0.121__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. pdd/__init__.py +38 -6
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +506 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +537 -0
  6. pdd/agentic_common.py +533 -770
  7. pdd/agentic_crash.py +2 -1
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +582 -0
  10. pdd/agentic_fix.py +118 -3
  11. pdd/agentic_update.py +27 -9
  12. pdd/agentic_verify.py +3 -2
  13. pdd/architecture_sync.py +565 -0
  14. pdd/auth_service.py +210 -0
  15. pdd/auto_deps_main.py +63 -53
  16. pdd/auto_include.py +236 -3
  17. pdd/auto_update.py +125 -47
  18. pdd/bug_main.py +195 -23
  19. pdd/cmd_test_main.py +345 -197
  20. pdd/code_generator.py +4 -2
  21. pdd/code_generator_main.py +118 -32
  22. pdd/commands/__init__.py +6 -0
  23. pdd/commands/analysis.py +113 -48
  24. pdd/commands/auth.py +309 -0
  25. pdd/commands/connect.py +358 -0
  26. pdd/commands/fix.py +155 -114
  27. pdd/commands/generate.py +5 -0
  28. pdd/commands/maintenance.py +3 -2
  29. pdd/commands/misc.py +8 -0
  30. pdd/commands/modify.py +225 -163
  31. pdd/commands/sessions.py +284 -0
  32. pdd/commands/utility.py +12 -7
  33. pdd/construct_paths.py +334 -32
  34. pdd/context_generator_main.py +167 -170
  35. pdd/continue_generation.py +6 -3
  36. pdd/core/__init__.py +33 -0
  37. pdd/core/cli.py +44 -7
  38. pdd/core/cloud.py +237 -0
  39. pdd/core/dump.py +68 -20
  40. pdd/core/errors.py +4 -0
  41. pdd/core/remote_session.py +61 -0
  42. pdd/crash_main.py +219 -23
  43. pdd/data/llm_model.csv +4 -4
  44. pdd/docs/prompting_guide.md +864 -0
  45. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  46. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  47. pdd/fix_code_loop.py +208 -34
  48. pdd/fix_code_module_errors.py +6 -2
  49. pdd/fix_error_loop.py +291 -38
  50. pdd/fix_main.py +208 -6
  51. pdd/fix_verification_errors_loop.py +235 -26
  52. pdd/fix_verification_main.py +269 -83
  53. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  54. pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
  55. pdd/frontend/dist/index.html +376 -0
  56. pdd/frontend/dist/logo.svg +33 -0
  57. pdd/generate_output_paths.py +46 -5
  58. pdd/generate_test.py +212 -151
  59. pdd/get_comment.py +19 -44
  60. pdd/get_extension.py +8 -9
  61. pdd/get_jwt_token.py +309 -20
  62. pdd/get_language.py +8 -7
  63. pdd/get_run_command.py +7 -5
  64. pdd/insert_includes.py +2 -1
  65. pdd/llm_invoke.py +531 -97
  66. pdd/load_prompt_template.py +15 -34
  67. pdd/operation_log.py +342 -0
  68. pdd/path_resolution.py +140 -0
  69. pdd/postprocess.py +122 -97
  70. pdd/preprocess.py +68 -12
  71. pdd/preprocess_main.py +33 -1
  72. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  73. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  74. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  75. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  76. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  77. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  78. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  79. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  80. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  81. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  82. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  83. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  84. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +140 -0
  85. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  86. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  87. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  88. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  89. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  90. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  91. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  92. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  93. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  94. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  95. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  96. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  97. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  98. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  99. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  100. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  101. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  102. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  103. pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
  104. pdd/prompts/agentic_update_LLM.prompt +192 -338
  105. pdd/prompts/auto_include_LLM.prompt +22 -0
  106. pdd/prompts/change_LLM.prompt +3093 -1
  107. pdd/prompts/detect_change_LLM.prompt +571 -14
  108. pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
  109. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
  110. pdd/prompts/generate_test_LLM.prompt +19 -1
  111. pdd/prompts/generate_test_from_example_LLM.prompt +366 -0
  112. pdd/prompts/insert_includes_LLM.prompt +262 -252
  113. pdd/prompts/prompt_code_diff_LLM.prompt +123 -0
  114. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  115. pdd/remote_session.py +876 -0
  116. pdd/server/__init__.py +52 -0
  117. pdd/server/app.py +335 -0
  118. pdd/server/click_executor.py +587 -0
  119. pdd/server/executor.py +338 -0
  120. pdd/server/jobs.py +661 -0
  121. pdd/server/models.py +241 -0
  122. pdd/server/routes/__init__.py +31 -0
  123. pdd/server/routes/architecture.py +451 -0
  124. pdd/server/routes/auth.py +364 -0
  125. pdd/server/routes/commands.py +929 -0
  126. pdd/server/routes/config.py +42 -0
  127. pdd/server/routes/files.py +603 -0
  128. pdd/server/routes/prompts.py +1347 -0
  129. pdd/server/routes/websocket.py +473 -0
  130. pdd/server/security.py +243 -0
  131. pdd/server/terminal_spawner.py +217 -0
  132. pdd/server/token_counter.py +222 -0
  133. pdd/summarize_directory.py +236 -237
  134. pdd/sync_animation.py +8 -4
  135. pdd/sync_determine_operation.py +329 -47
  136. pdd/sync_main.py +272 -28
  137. pdd/sync_orchestration.py +289 -211
  138. pdd/sync_order.py +304 -0
  139. pdd/template_expander.py +161 -0
  140. pdd/templates/architecture/architecture_json.prompt +41 -46
  141. pdd/trace.py +1 -1
  142. pdd/track_cost.py +0 -13
  143. pdd/unfinished_prompt.py +2 -1
  144. pdd/update_main.py +68 -26
  145. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +15 -10
  146. pdd_cli-0.0.121.dist-info/RECORD +229 -0
  147. pdd_cli-0.0.90.dist-info/RECORD +0 -153
  148. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
  149. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
  150. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
  151. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,582 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ import time
8
+ import json
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import List, Tuple, Dict, Any, Optional, Set
12
+
13
+ from rich.console import Console
14
+
15
+ from .agentic_common import (
16
+ run_agentic_task,
17
+ load_workflow_state,
18
+ save_workflow_state,
19
+ clear_workflow_state,
20
+ DEFAULT_MAX_RETRIES,
21
+ )
22
+ from .load_prompt_template import load_prompt_template
23
+
24
+ # Constants
25
+ STEP_NAMES = {
26
+ 1: "unit_tests",
27
+ 2: "e2e_tests",
28
+ 3: "root_cause",
29
+ 4: "fix_e2e_tests",
30
+ 5: "identify_devunits",
31
+ 6: "create_unit_tests",
32
+ 7: "verify_tests",
33
+ 8: "run_pdd_fix",
34
+ 9: "verify_all",
35
+ }
36
+
37
+ STEP_DESCRIPTIONS = {
38
+ 1: "Running unit tests from issue",
39
+ 2: "Running e2e tests",
40
+ 3: "Analyzing root cause",
41
+ 4: "Fixing e2e tests",
42
+ 5: "Identifying dev units",
43
+ 6: "Creating unit tests",
44
+ 7: "Verifying tests detect bugs",
45
+ 8: "Running pdd fix",
46
+ 9: "Final verification",
47
+ }
48
+
49
+ # Per-step timeouts for the 9-step agentic e2e fix workflow
50
+ E2E_FIX_STEP_TIMEOUTS: Dict[int, float] = {
51
+ 1: 340.0, # Run unit tests from issue, pdd fix failures
52
+ 2: 240.0, # Run e2e tests, check completion (early exit)
53
+ 3: 340.0, # Root cause analysis (code vs test vs both)
54
+ 4: 340.0, # Fix e2e tests if needed
55
+ 5: 340.0, # Identify dev units involved in failures
56
+ 6: 600.0, # Create/append unit tests for dev units (Complex)
57
+ 7: 600.0, # Verify unit tests detect bugs (Complex)
58
+ 8: 1000.0, # Run pdd fix on failing dev units (Most Complex - multiple LLM calls)
59
+ 9: 240.0, # Final verification, loop control
60
+ }
61
+
62
+ console = Console()
63
+
64
+ def _get_state_dir(cwd: Path) -> Path:
65
+ """Returns the state directory .pdd/e2e-fix-state/ relative to git root."""
66
+ # Simple heuristic: look for .git, otherwise use cwd
67
+ d = cwd.resolve()
68
+ root = d
69
+ while d != d.parent:
70
+ if (d / ".git").exists():
71
+ root = d
72
+ break
73
+ d = d.parent
74
+
75
+ state_dir = root / ".pdd" / "e2e-fix-state"
76
+ state_dir.mkdir(parents=True, exist_ok=True)
77
+ return state_dir
78
+
79
+ def _parse_changed_files(output: str) -> List[str]:
80
+ """Parses FILES_CREATED and FILES_MODIFIED from agent output."""
81
+ files = []
82
+ for line in output.splitlines():
83
+ if line.startswith("FILES_CREATED:") or line.startswith("FILES_MODIFIED:"):
84
+ # Extract content after colon
85
+ content = line.split(":", 1)[1].strip()
86
+ if content:
87
+ # Split by comma and strip
88
+ paths = [p.strip() for p in content.split(",") if p.strip()]
89
+ files.extend(paths)
90
+ return files
91
+
92
+ def _parse_dev_units(output: str) -> str:
93
+ """Parses DEV_UNITS_IDENTIFIED from output."""
94
+ for line in output.splitlines():
95
+ if line.startswith("DEV_UNITS_IDENTIFIED:"):
96
+ return line.split(":", 1)[1].strip()
97
+ return ""
98
+
99
+ def _update_dev_unit_states(output: str, current_states: Dict[str, Any], identified_units_str: str) -> Dict[str, Any]:
100
+ """Updates dev unit states based on Step 8 output."""
101
+ identified_units = [u.strip() for u in identified_units_str.split(",") if u.strip()]
102
+
103
+ # Initialize if not present
104
+ for unit in identified_units:
105
+ if unit not in current_states:
106
+ current_states[unit] = {"fixed": False, "fix_attempts": 0}
107
+ current_states[unit]["fix_attempts"] += 1
108
+
109
+ # Parse results from output
110
+ # Heuristic: look for "unit_name: FIXED" or "unit_name: Failed"
111
+ # This depends on the LLM following instructions in Step 8 prompt.
112
+ for line in output.splitlines():
113
+ for unit in identified_units:
114
+ if unit in line:
115
+ if "FIXED" in line:
116
+ current_states[unit]["fixed"] = True
117
+ elif "Failed" in line or "FAIL" in line:
118
+ current_states[unit]["fixed"] = False
119
+
120
+ return current_states
121
+
122
+ def _check_staleness(state: Dict[str, Any], cwd: Path) -> None:
123
+ """Checks if files have changed since state was saved."""
124
+ last_saved_str = state.get("last_saved_at")
125
+ if not last_saved_str:
126
+ return
127
+
128
+ try:
129
+ last_saved = datetime.fromisoformat(last_saved_str)
130
+ except ValueError:
131
+ return
132
+
133
+ changed_files = state.get("changed_files", [])
134
+ stale = False
135
+
136
+ for file_path in changed_files:
137
+ p = cwd / file_path
138
+ if not p.exists():
139
+ console.print(f"[yellow]Warning: File '{file_path}' from previous state is missing.[/yellow]")
140
+ continue
141
+
142
+ # Check mtime
143
+ mtime = datetime.fromtimestamp(p.stat().st_mtime)
144
+ if mtime > last_saved:
145
+ stale = True
146
+ break
147
+
148
+ if stale:
149
+ console.print("[yellow]Warning: Codebase may have changed since last run. Consider --no-resume for fresh start.[/yellow]")
150
+
151
+
152
+ def _get_modified_and_untracked(cwd: Path) -> Set[str]:
153
+ """Returns set of modified tracked files plus untracked files."""
154
+ files: Set[str] = set()
155
+
156
+ # Get modified tracked files
157
+ result = subprocess.run(
158
+ ["git", "diff", "--name-only", "HEAD"],
159
+ cwd=cwd,
160
+ capture_output=True,
161
+ text=True
162
+ )
163
+ if result.returncode == 0:
164
+ files.update(f for f in result.stdout.strip().split("\n") if f)
165
+
166
+ # Get untracked files
167
+ result = subprocess.run(
168
+ ["git", "ls-files", "--others", "--exclude-standard"],
169
+ cwd=cwd,
170
+ capture_output=True,
171
+ text=True
172
+ )
173
+ if result.returncode == 0:
174
+ files.update(f for f in result.stdout.strip().split("\n") if f)
175
+
176
+ return files
177
+
178
+
179
+ def _get_file_hashes(cwd: Path) -> Dict[str, Optional[str]]:
180
+ """
181
+ Returns {filepath: md5_hash} for all modified and untracked files.
182
+
183
+ If a file is deleted or unreadable, stores None for that file.
184
+ """
185
+ hashes: Dict[str, Optional[str]] = {}
186
+ for filepath in _get_modified_and_untracked(cwd):
187
+ path = cwd / filepath
188
+ if path.exists() and path.is_file():
189
+ try:
190
+ hashes[filepath] = hashlib.md5(path.read_bytes()).hexdigest()
191
+ except (IOError, OSError):
192
+ hashes[filepath] = None
193
+ else:
194
+ hashes[filepath] = None # Deleted or not a file
195
+ return hashes
196
+
197
+
198
+ def _commit_and_push(
199
+ cwd: Path,
200
+ issue_number: int,
201
+ issue_title: str,
202
+ initial_file_hashes: Dict[str, Optional[str]],
203
+ quiet: bool = False
204
+ ) -> Tuple[bool, str]:
205
+ """
206
+ Commits only files that changed during the workflow and pushes.
207
+
208
+ Uses hash comparison to detect actual content changes, avoiding
209
+ staging pre-existing modified/untracked files.
210
+
211
+ The PR was already created by `pdd bug`, so pushing
212
+ automatically updates it.
213
+
214
+ Args:
215
+ cwd: Working directory
216
+ issue_number: GitHub issue number
217
+ issue_title: Issue title for commit message
218
+ initial_file_hashes: File hashes from before workflow started
219
+ quiet: Suppress output
220
+
221
+ Returns:
222
+ (success, message)
223
+ """
224
+ # Get current file hashes
225
+ current_hashes = _get_file_hashes(cwd)
226
+
227
+ # Find files that changed during workflow
228
+ files_to_commit: List[str] = []
229
+ for filepath, current_hash in current_hashes.items():
230
+ if filepath not in initial_file_hashes:
231
+ # New file created during workflow
232
+ files_to_commit.append(filepath)
233
+ elif initial_file_hashes[filepath] != current_hash:
234
+ # Content changed during workflow
235
+ files_to_commit.append(filepath)
236
+
237
+ if not files_to_commit:
238
+ return True, "No changes to commit"
239
+
240
+ # Stage only workflow-changed files
241
+ for filepath in files_to_commit:
242
+ stage_result = subprocess.run(
243
+ ["git", "add", filepath],
244
+ cwd=cwd,
245
+ capture_output=True,
246
+ text=True
247
+ )
248
+ if stage_result.returncode != 0:
249
+ return False, f"Failed to stage {filepath}: {stage_result.stderr}"
250
+
251
+ # Commit with message referencing issue
252
+ commit_msg = f"fix: {issue_title}\n\nFixes #{issue_number}"
253
+ commit_result = subprocess.run(
254
+ ["git", "commit", "-m", commit_msg],
255
+ cwd=cwd,
256
+ capture_output=True,
257
+ text=True
258
+ )
259
+ if commit_result.returncode != 0:
260
+ return False, f"Failed to commit: {commit_result.stderr}"
261
+
262
+ # Push to remote (branch already exists from pdd bug)
263
+ push_result = subprocess.run(
264
+ ["git", "push"],
265
+ cwd=cwd,
266
+ capture_output=True,
267
+ text=True
268
+ )
269
+
270
+ if push_result.returncode == 0:
271
+ return True, f"Committed and pushed {len(files_to_commit)} file(s)"
272
+ else:
273
+ return False, f"Push failed: {push_result.stderr}"
274
+
275
+
276
+ def run_agentic_e2e_fix_orchestrator(
277
+ issue_url: str,
278
+ issue_content: str,
279
+ repo_owner: str,
280
+ repo_name: str,
281
+ issue_number: int,
282
+ issue_author: str,
283
+ issue_title: str,
284
+ *,
285
+ cwd: Path,
286
+ timeout_adder: float = 0.0,
287
+ max_cycles: int = 5,
288
+ resume: bool = True,
289
+ verbose: bool = False,
290
+ quiet: bool = False,
291
+ use_github_state: bool = True
292
+ ) -> Tuple[bool, str, float, str, List[str]]:
293
+ """
294
+ Orchestrator for the 9-step agentic e2e fix workflow.
295
+
296
+ Returns:
297
+ Tuple[bool, str, float, str, List[str]]:
298
+ (success, final_message, total_cost, model_used, changed_files)
299
+ """
300
+ state_dir = _get_state_dir(cwd)
301
+ workflow_name = "e2e_fix"
302
+
303
+ # Initialize state variables
304
+ current_cycle = 0
305
+ last_completed_step = 0
306
+ step_outputs: Dict[str, str] = {}
307
+ total_cost = 0.0
308
+ model_used = "unknown"
309
+ changed_files: List[str] = []
310
+ dev_unit_states: Dict[str, Any] = {}
311
+ github_comment_id: Optional[int] = None
312
+
313
+ # Resume Logic
314
+ if resume:
315
+ loaded_state, gh_id = load_workflow_state(
316
+ cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state
317
+ )
318
+ if loaded_state:
319
+ console.print(f"[blue]Resuming from cycle {loaded_state.get('current_cycle', 1)} step {loaded_state.get('last_completed_step', 0)}...[/blue]")
320
+ current_cycle = loaded_state.get("current_cycle", 0)
321
+ last_completed_step = loaded_state.get("last_completed_step", 0)
322
+ step_outputs = loaded_state.get("step_outputs", {})
323
+ total_cost = loaded_state.get("total_cost", 0.0)
324
+ model_used = loaded_state.get("model_used", "unknown")
325
+ changed_files = loaded_state.get("changed_files", [])
326
+ dev_unit_states = loaded_state.get("dev_unit_states", {})
327
+ github_comment_id = gh_id
328
+
329
+ _check_staleness(loaded_state, cwd)
330
+
331
+ # If we finished a cycle but didn't exit, prepare for next cycle
332
+ if last_completed_step >= 9:
333
+ current_cycle += 1
334
+ last_completed_step = 0
335
+ step_outputs = {} # Clear outputs for new cycle
336
+ else:
337
+ # No state found, start fresh
338
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
339
+ else:
340
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
341
+
342
+ console.print(f"Fixing e2e tests for issue #{issue_number}: \"{issue_title}\"")
343
+
344
+ # Snapshot file state before workflow (for hash-based commit detection)
345
+ initial_file_hashes = _get_file_hashes(cwd)
346
+
347
+ success = False
348
+ final_message = ""
349
+
350
+ try:
351
+ # Outer Loop
352
+ if current_cycle == 0:
353
+ current_cycle = 1
354
+
355
+ while current_cycle <= max_cycles:
356
+ console.print(f"\n[bold cyan][Cycle {current_cycle}/{max_cycles}] Starting fix cycle...[/bold cyan]")
357
+
358
+ # Inner Loop (Steps 1-9)
359
+ for step_num in range(1, 10):
360
+ if step_num <= last_completed_step:
361
+ continue # Skip already completed steps in this cycle
362
+
363
+ step_name = STEP_NAMES[step_num]
364
+ description = STEP_DESCRIPTIONS[step_num]
365
+
366
+ console.print(f"[bold][Step {step_num}/9] {description}...[/bold]")
367
+
368
+ # 1. Load Prompt
369
+ template_name = f"agentic_e2e_fix_step{step_num}_{step_name}_LLM"
370
+ prompt_template = load_prompt_template(template_name)
371
+ if not prompt_template:
372
+ raise ValueError(f"Could not load prompt template: {template_name}")
373
+
374
+ # 2. Prepare Context
375
+ context = {
376
+ "issue_url": issue_url,
377
+ "repo_owner": repo_owner,
378
+ "repo_name": repo_name,
379
+ "issue_number": issue_number,
380
+ "cycle_number": current_cycle,
381
+ "max_cycles": max_cycles,
382
+ "issue_content": issue_content,
383
+ }
384
+
385
+ # Add previous step outputs
386
+ for prev_step in range(1, step_num):
387
+ key = f"step{prev_step}_output"
388
+ context[key] = step_outputs.get(str(prev_step), "")
389
+
390
+ # Derived variables for specific steps
391
+ if step_num >= 6:
392
+ s5_out = step_outputs.get("5", "")
393
+ context["dev_units_identified"] = _parse_dev_units(s5_out)
394
+
395
+ if step_num == 8:
396
+ s5_out = step_outputs.get("5", "")
397
+ context["failing_dev_units"] = _parse_dev_units(s5_out)
398
+
399
+ if step_num == 9:
400
+ context["next_cycle"] = current_cycle + 1
401
+
402
+ formatted_prompt = prompt_template.format(**context)
403
+
404
+ # 3. Run Task
405
+ base_timeout = E2E_FIX_STEP_TIMEOUTS.get(step_num, 340.0)
406
+ timeout = base_timeout + timeout_adder
407
+
408
+ step_success, step_output, step_cost, step_model = run_agentic_task(
409
+ instruction=formatted_prompt,
410
+ cwd=cwd,
411
+ verbose=verbose,
412
+ quiet=quiet,
413
+ timeout=timeout,
414
+ label=f"cycle{current_cycle}_step{step_num}",
415
+ max_retries=DEFAULT_MAX_RETRIES,
416
+ )
417
+
418
+ # 4. Store Output & Accumulate
419
+ # Only mark step completed if it succeeded; failed steps get "FAILED:" prefix
420
+ # and last_completed_step stays at previous step (ensures resume re-runs failed step)
421
+ if step_success:
422
+ step_outputs[str(step_num)] = step_output
423
+ last_completed_step = step_num
424
+ else:
425
+ step_outputs[str(step_num)] = f"FAILED: {step_output}"
426
+ # Don't update last_completed_step - keep it at previous value
427
+
428
+ total_cost += step_cost
429
+ model_used = step_model if step_model else model_used
430
+
431
+ # Parse changed files
432
+ new_files = _parse_changed_files(step_output)
433
+ for f in new_files:
434
+ if f not in changed_files:
435
+ changed_files.append(f)
436
+
437
+ # Parse dev unit states (Step 8)
438
+ if step_num == 8:
439
+ s5_out = step_outputs.get("5", "")
440
+ dev_units_str = _parse_dev_units(s5_out)
441
+ dev_unit_states = _update_dev_unit_states(step_output, dev_unit_states, dev_units_str)
442
+
443
+ # Print brief result
444
+ if step_success:
445
+ console.print(f" -> Step {step_num} complete. Cost: ${step_cost:.4f}")
446
+ else:
447
+ console.print(f" -> Step {step_num} [red]failed[/red]. Cost: ${step_cost:.4f}")
448
+
449
+ # 5. Save State
450
+ state_data = {
451
+ "workflow": workflow_name,
452
+ "issue_url": issue_url,
453
+ "issue_number": issue_number,
454
+ "current_cycle": current_cycle,
455
+ "last_completed_step": last_completed_step,
456
+ "step_outputs": step_outputs.copy(), # Copy to avoid shared reference
457
+ "dev_unit_states": dev_unit_states.copy(), # Copy to avoid shared reference
458
+ "total_cost": total_cost,
459
+ "model_used": model_used,
460
+ "changed_files": changed_files.copy(), # Copy to avoid shared reference
461
+ "last_saved_at": datetime.now().isoformat(),
462
+ "github_comment_id": github_comment_id
463
+ }
464
+
465
+ new_gh_id = save_workflow_state(
466
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
467
+ )
468
+ if new_gh_id:
469
+ github_comment_id = new_gh_id
470
+
471
+ # Check Early Exit (Step 2)
472
+ if step_num == 2 and "ALL_TESTS_PASS" in step_output:
473
+ console.print("[green]ALL_TESTS_PASS detected in Step 2. Exiting loop.[/green]")
474
+ success = True
475
+ final_message = "All tests passed during e2e check."
476
+ break
477
+
478
+ # Check Loop Control (Step 9)
479
+ if step_num == 9:
480
+ if "ALL_TESTS_PASS" in step_output:
481
+ console.print("[green]ALL_TESTS_PASS detected in Step 9.[/green]")
482
+ success = True
483
+ final_message = "All tests passed after fixes."
484
+ break
485
+ elif "MAX_CYCLES_REACHED" in step_output:
486
+ console.print("[yellow]MAX_CYCLES_REACHED detected in Step 9.[/yellow]")
487
+ elif "CONTINUE_CYCLE" not in step_output:
488
+ console.print("[yellow]Warning: No loop control token found in Step 9. Defaulting to CONTINUE_CYCLE.[/yellow]")
489
+
490
+ if success:
491
+ break
492
+
493
+ # Prepare for next cycle
494
+ current_cycle += 1
495
+ last_completed_step = 0
496
+ step_outputs = {} # Clear outputs for next cycle
497
+
498
+ state_data["current_cycle"] = current_cycle
499
+ state_data["last_completed_step"] = 0
500
+ state_data["step_outputs"] = {}
501
+ state_data["last_saved_at"] = datetime.now().isoformat()
502
+
503
+ if current_cycle <= max_cycles:
504
+ save_workflow_state(
505
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
506
+ )
507
+
508
+ if success:
509
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
510
+ console.print("\n[bold green]E2E fix complete[/bold green]")
511
+ console.print(f" Total cost: ${total_cost:.4f}")
512
+ console.print(f" Cycles used: {current_cycle if current_cycle <= max_cycles else max_cycles}/{max_cycles}")
513
+ console.print(f" Files changed: {', '.join(changed_files)}")
514
+ fixed_units = [u for u, s in dev_unit_states.items() if s.get("fixed")]
515
+ console.print(f" Dev units fixed: {', '.join(fixed_units)}")
516
+
517
+ # Commit and push changes to update the existing PR
518
+ commit_success, commit_message = _commit_and_push(
519
+ cwd=cwd,
520
+ issue_number=issue_number,
521
+ issue_title=issue_title,
522
+ initial_file_hashes=initial_file_hashes,
523
+ quiet=quiet
524
+ )
525
+ if commit_success:
526
+ console.print(f" [green]{commit_message}[/green]")
527
+ else:
528
+ console.print(f" [yellow]Warning: {commit_message}[/yellow]")
529
+
530
+ return True, final_message, total_cost, model_used, changed_files
531
+ else:
532
+ final_message = f"Max cycles ({max_cycles}) reached without all tests passing"
533
+ console.print("\n[bold red]E2E fix incomplete (max cycles reached)[/bold red]")
534
+ console.print(f" Total cost: ${total_cost:.4f}")
535
+ remaining = [u for u, s in dev_unit_states.items() if not s.get("fixed")]
536
+ console.print(f" Remaining failures: {', '.join(remaining)}")
537
+ return False, final_message, total_cost, model_used, changed_files
538
+
539
+ except KeyboardInterrupt:
540
+ console.print("\n[bold red]Interrupted by user. Saving state...[/bold red]")
541
+ state_data = {
542
+ "workflow": workflow_name,
543
+ "issue_url": issue_url,
544
+ "issue_number": issue_number,
545
+ "current_cycle": current_cycle,
546
+ "last_completed_step": last_completed_step,
547
+ "step_outputs": step_outputs,
548
+ "dev_unit_states": dev_unit_states,
549
+ "total_cost": total_cost,
550
+ "model_used": model_used,
551
+ "changed_files": changed_files,
552
+ "last_saved_at": datetime.now().isoformat(),
553
+ "github_comment_id": github_comment_id
554
+ }
555
+ save_workflow_state(
556
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
557
+ )
558
+ raise
559
+
560
+ except Exception as e:
561
+ console.print(f"\n[bold red]Fatal error: {e}[/bold red]")
562
+ try:
563
+ state_data = {
564
+ "workflow": workflow_name,
565
+ "issue_url": issue_url,
566
+ "issue_number": issue_number,
567
+ "current_cycle": current_cycle,
568
+ "last_completed_step": last_completed_step,
569
+ "step_outputs": step_outputs,
570
+ "dev_unit_states": dev_unit_states,
571
+ "total_cost": total_cost,
572
+ "model_used": model_used,
573
+ "changed_files": changed_files,
574
+ "last_saved_at": datetime.now().isoformat(),
575
+ "github_comment_id": github_comment_id
576
+ }
577
+ save_workflow_state(
578
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
579
+ )
580
+ except Exception:
581
+ pass
582
+ return False, f"Stopped at cycle {current_cycle} step {last_completed_step}: {str(e)}", total_cost, model_used, changed_files