pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/agentic_fix.py ADDED
@@ -0,0 +1,1294 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import difflib
9
+ import tempfile
10
+ from pathlib import Path
11
+ from typing import Tuple, List, Optional, Dict
12
+ from rich.console import Console
13
+
14
+ from .get_language import get_language # Detects language from file extension (e.g., ".py" -> "python")
15
+ from .get_run_command import get_run_command_for_file # Gets run command for a file based on extension
16
+ from .llm_invoke import _load_model_data # Loads provider/model metadata from llm_model.csv
17
+ from .load_prompt_template import load_prompt_template # Loads prompt templates by name
18
+ from .agentic_langtest import default_verify_cmd_for # Provides a default verify command (per language)
19
+
20
+ console = Console()
21
+
22
+ # Provider selection order. The code will try agents in this sequence if keys/CLIs are present.
23
+ AGENT_PROVIDER_PREFERENCE = ["anthropic", "google", "openai"]
24
+
25
+ # Logging level selection; defaults to "quiet" under pytest, else "normal"
26
+ _env_level = os.getenv("PDD_AGENTIC_LOGLEVEL")
27
+ if _env_level is None and os.getenv("PYTEST_CURRENT_TEST"):
28
+ _env_level = "quiet"
29
+ _LOGLEVEL = (_env_level or "normal").strip().lower()
30
+ _IS_QUIET = _LOGLEVEL == "quiet"
31
+ _IS_VERBOSE = _LOGLEVEL == "verbose"
32
+
33
+ # Tunable knobs via env
34
+ _AGENT_COST_PER_CALL = float(os.getenv("PDD_AGENTIC_COST_PER_CALL", "0.02")) # estimated cost accounting
35
+ _AGENT_CALL_TIMEOUT = int(os.getenv("PDD_AGENTIC_TIMEOUT", "240")) # timeout (s) for each agent call
36
+ _VERIFY_TIMEOUT = int(os.getenv("PDD_AGENTIC_VERIFY_TIMEOUT", "120")) # timeout (s) for local verification step
37
+ _MAX_LOG_LINES = int(os.getenv("PDD_AGENTIC_MAX_LOG_LINES", "200")) # preview head truncation for logs
38
+
39
+ # When verification mode is "auto", we may run agent-supplied TESTCMD blocks (if emitted)
40
+ _AGENT_TESTCMD_ALLOWED = os.getenv("PDD_AGENTIC_AGENT_TESTCMD", "1") != "0"
41
+
42
+ def _print(msg: str, *, force: bool = False) -> None:
43
+ """Centralized print helper using Rich; suppressed in quiet mode unless force=True."""
44
+ if not _IS_QUIET or force:
45
+ console.print(msg)
46
+
47
+ def _info(msg: str) -> None:
48
+ """Informational log (respects quiet mode)."""
49
+ _print(msg)
50
+
51
+ def _always(msg: str) -> None:
52
+ """Always print (respects quiet mode toggle via _print)."""
53
+ _print(msg)
54
+
55
+ def _verbose(msg: str) -> None:
56
+ """Verbose-only print (print only when _IS_VERBOSE is True)."""
57
+ if _IS_VERBOSE:
58
+ console.print(msg)
59
+
60
+
61
+ def _detect_suspicious_files(cwd: Path, context: str = "") -> List[Path]:
62
+ """
63
+ Detect suspicious single-character files (like C, E, T) in a directory.
64
+
65
+ This is a diagnostic function to help identify when/where these files are created.
66
+ Issue #186: Empty files named C, E, T (first letters of Code, Example, Test)
67
+ have been appearing during agentic operations.
68
+
69
+ Args:
70
+ cwd: Directory to scan
71
+ context: Description of what operation just ran (for logging)
72
+
73
+ Returns:
74
+ List of suspicious file paths found
75
+ """
76
+ suspicious: List[Path] = []
77
+ try:
78
+ for f in cwd.iterdir():
79
+ if f.is_file() and len(f.name) <= 2 and not f.name.startswith('.'):
80
+ suspicious.append(f)
81
+
82
+ if suspicious:
83
+ import datetime
84
+ timestamp = datetime.datetime.now().isoformat()
85
+ _always(f"[bold red]⚠️ SUSPICIOUS FILES DETECTED (Issue #186)[/bold red]")
86
+ _always(f"[red]Timestamp: {timestamp}[/red]")
87
+ _always(f"[red]Context: {context}[/red]")
88
+ _always(f"[red]Directory: {cwd}[/red]")
89
+ for sf in suspicious:
90
+ try:
91
+ size = sf.stat().st_size
92
+ _always(f"[red] - {sf.name} (size: {size} bytes)[/red]")
93
+ except Exception:
94
+ _always(f"[red] - {sf.name} (could not stat)[/red]")
95
+
96
+ # Also log to a file for persistence
97
+ log_file = Path.home() / ".pdd" / "suspicious_files.log"
98
+ log_file.parent.mkdir(parents=True, exist_ok=True)
99
+ with open(log_file, "a") as lf:
100
+ lf.write(f"\n{'='*60}\n")
101
+ lf.write(f"Timestamp: {timestamp}\n")
102
+ lf.write(f"Context: {context}\n")
103
+ lf.write(f"Directory: {cwd}\n")
104
+ lf.write(f"CWD at detection: {Path.cwd()}\n")
105
+ for sf in suspicious:
106
+ try:
107
+ size = sf.stat().st_size
108
+ lf.write(f" - {sf.name} (size: {size} bytes)\n")
109
+ except Exception as e:
110
+ lf.write(f" - {sf.name} (error: {e})\n")
111
+ # Log stack trace to help identify caller
112
+ import traceback
113
+ lf.write("Stack trace:\n")
114
+ lf.write(traceback.format_stack()[-10:][0] if traceback.format_stack() else "N/A")
115
+ lf.write("\n")
116
+ except Exception as e:
117
+ _verbose(f"[yellow]Could not scan for suspicious files: {e}[/yellow]")
118
+
119
+ return suspicious
120
+
121
+
122
+ def _begin_marker(path: Path) -> str:
123
+ """Marker that must wrap the BEGIN of a corrected file block emitted by the agent."""
124
+ return f"<<<BEGIN_FILE:{path}>>>"
125
+
126
+ def _end_marker(path: Path) -> str:
127
+ """Marker that must wrap the END of a corrected file block emitted by the agent."""
128
+ return f"<<<END_FILE:{path}>>>"
129
+
130
+ def get_agent_command(provider: str, instruction_file: Path) -> List[str]:
131
+ """
132
+ Return a base CLI command for a provider when using the generic runner.
133
+ Note: Anthropic/Google are handled by specialized variant runners, so this often returns [].
134
+ """
135
+ p = provider.lower()
136
+ if p == "anthropic":
137
+ return []
138
+ if p == "google":
139
+ return []
140
+ if p == "openai":
141
+ return ["codex", "exec", "--skip-git-repo-check"]
142
+ return []
143
+
144
+ def find_llm_csv_path() -> Optional[Path]:
145
+ """Look for .pdd/llm_model.csv in $HOME first, then in project cwd."""
146
+ home_path = Path.home() / ".pdd" / "llm_model.csv"
147
+ project_path = Path.cwd() / ".pdd" / "llm_model.csv"
148
+ if home_path.is_file():
149
+ return home_path
150
+ if project_path.is_file():
151
+ return project_path
152
+ return None
153
+
154
+ def _print_head(label: str, text: str, max_lines: int = _MAX_LOG_LINES) -> None:
155
+ """
156
+ Print only the first N lines of a long blob with a label.
157
+ Active in verbose mode; keeps console noise manageable.
158
+ """
159
+ if not _IS_VERBOSE:
160
+ return
161
+ lines = (text or "").splitlines()
162
+ head = "\n".join(lines[:max_lines])
163
+ tail = "" if len(lines) <= max_lines else f"\n... (truncated, total {len(lines)} lines)"
164
+ console.print(f"[bold cyan]{label}[/bold cyan]\n{head}{tail}")
165
+
166
+ def _print_diff(old: str, new: str, path: Path) -> None:
167
+ """Show unified diff for a changed file (verbose mode only)."""
168
+ if not _IS_VERBOSE:
169
+ return
170
+ old_lines = old.splitlines(keepends=True)
171
+ new_lines = new.splitlines(keepends=True)
172
+ diff = list(difflib.unified_diff(old_lines, new_lines, fromfile=f"{path} (before)", tofile=f"{path} (after)"))
173
+ if not diff:
174
+ console.print("[yellow]No diff in code file after this agent attempt.[/yellow]")
175
+ return
176
+ text = "".join(diff)
177
+ _print_head("Unified diff (first lines)", text)
178
+
179
+ def _normalize_code_text(body: str) -> str:
180
+ """
181
+ Normalize agent-emitted file content:
182
+ - remove a single leading newline if present
183
+ - ensure exactly one trailing newline
184
+ """
185
+ if body.startswith("\n"):
186
+ body = body[1:]
187
+ body = body.rstrip("\n") + "\n"
188
+ return body
189
+
190
+ # Regex for many <<<BEGIN_FILE:path>>> ... <<<END_FILE:path>>> blocks in a single output
191
+ _MULTI_FILE_BLOCK_RE = re.compile(
192
+ r"<<<BEGIN_FILE:(.*?)>>>(.*?)<<<END_FILE:\1>>>",
193
+ re.DOTALL,
194
+ )
195
+
196
+
197
+ def _is_suspicious_path(path: str) -> bool:
198
+ """
199
+ Reject paths that look like LLM artifacts or template variables.
200
+
201
+ This defends against:
202
+ - Single/double character filenames (e.g., 'C', 'E', 'T' from agent misbehavior)
203
+ - Template variables like {path}, {code_abs} captured by regex
204
+ - Other LLM-generated garbage patterns
205
+
206
+ Returns True if the path should be rejected.
207
+ """
208
+ if not path:
209
+ return True
210
+ # Get the basename for validation
211
+ base_name = Path(path).name
212
+ # Reject single or double character filenames (too short to be legitimate)
213
+ if len(base_name) <= 2:
214
+ return True
215
+ # Reject template variable patterns like {path}, {code_abs}
216
+ if '{' in base_name or '}' in base_name:
217
+ return True
218
+ # Reject paths that are just dots like "..", "..."
219
+ if base_name.strip('.') == '':
220
+ return True
221
+ return False
222
+
223
+
224
+ def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
225
+ """
226
+ Parse stdout/stderr blobs and collect all emitted file blocks into {path: content}.
227
+ Returns an empty dict if none found.
228
+
229
+ Note: Suspicious paths (single-char, template variables) are rejected to prevent
230
+ LLM artifacts from being written to disk.
231
+ """
232
+ out: Dict[str, str] = {}
233
+ for blob in blobs:
234
+ if not blob:
235
+ continue
236
+ for m in _MULTI_FILE_BLOCK_RE.finditer(blob):
237
+ path = (m.group(1) or "").strip()
238
+ body = m.group(2) or ""
239
+ if path and body != "":
240
+ if _is_suspicious_path(path):
241
+ _info(f"[yellow]Skipping suspicious path from LLM output: {path!r}[/yellow]")
242
+ continue
243
+ out[path] = body
244
+ return out
245
+
246
+ # Regex for an optional agent-supplied test command block
247
+ _TESTCMD_RE = re.compile(
248
+ r"<<<BEGIN_TESTCMD>>>\s*(.*?)\s*<<<END_TESTCMD>>>",
249
+ re.DOTALL,
250
+ )
251
+
252
+ def _extract_testcmd(*blobs: str) -> Optional[str]:
253
+ """Return the single agent-supplied TESTCMD (if present), else None."""
254
+ for blob in blobs:
255
+ if not blob:
256
+ continue
257
+ m = _TESTCMD_RE.search(blob)
258
+ if m:
259
+ cmd = (m.group(1) or "").strip()
260
+ if cmd:
261
+ return cmd
262
+ return None
263
+
264
+ def _extract_corrected_from_output(stdout: str, stderr: str, code_path: Path) -> Optional[str]:
265
+ """
266
+ Single-file fallback extraction: search for the corrected content block that
267
+ specifically targets the primary code file, using various path forms
268
+ (absolute path, real path, relative path, basename).
269
+ Returns the last match, or None if not found.
270
+ """
271
+ resolved = code_path.resolve()
272
+ abs_path = str(resolved)
273
+ real_path = str(Path(abs_path).resolve())
274
+ rel_path = str(code_path)
275
+ just_name = code_path.name
276
+
277
+ def _pattern_for(path_str: str) -> re.Pattern:
278
+ begin = re.escape(f"<<<BEGIN_FILE:{path_str}>>>")
279
+ end = re.escape(f"<<<END_FILE:{path_str}>>>")
280
+ return re.compile(begin + r"(.*?)" + end, re.DOTALL)
281
+
282
+ candidates = [
283
+ _pattern_for(abs_path),
284
+ _pattern_for(real_path),
285
+ _pattern_for(rel_path),
286
+ _pattern_for(just_name),
287
+ ]
288
+
289
+ matches: List[str] = []
290
+ for blob in [stdout or "", stderr or ""]:
291
+ for pat in candidates:
292
+ for m in pat.finditer(blob):
293
+ body = m.group(1) or ""
294
+ if body != "":
295
+ matches.append(body)
296
+
297
+ if not matches:
298
+ return None
299
+
300
+ # Filter out obvious placeholder template mistakes
301
+ placeholder_token = "FULL CORRECTED FILE CONTENT HERE"
302
+ filtered = [b for b in matches if placeholder_token.lower() not in b.lower()]
303
+ return filtered[-1] if filtered else matches[-1]
304
+
305
+ # Code fence (```python ... ```) fallback for providers that sometimes omit markers (e.g., Gemini)
306
+ _CODE_FENCE_RE = re.compile(r"```(?:python)?\s*(.*?)```", re.DOTALL | re.IGNORECASE)
307
+
308
+ def _extract_python_code_block(*blobs: str) -> Optional[str]:
309
+ """Return the last fenced Python code block found in given blobs, or None."""
310
+ candidates: List[str] = []
311
+ for blob in blobs:
312
+ if not blob:
313
+ continue
314
+ for match in _CODE_FENCE_RE.findall(blob):
315
+ block = match or ""
316
+ if block != "":
317
+ candidates.append(block)
318
+ if not candidates:
319
+ return None
320
+ block = candidates[-1]
321
+ return block if block.endswith("\n") else (block + "\n")
322
+
323
+ def _sanitized_env_common() -> dict:
324
+ """
325
+ Build a deterministic, non-interactive env for subprocess calls:
326
+ - disable colors/TTY features
327
+ - provide small default terminal size
328
+ - mark as CI
329
+ """
330
+ env = os.environ.copy()
331
+ env["TERM"] = "dumb"
332
+ env["CI"] = "1"
333
+ env["NO_COLOR"] = "1"
334
+ env["CLICOLOR"] = "0"
335
+ env["CLICOLOR_FORCE"] = "0"
336
+ env["FORCE_COLOR"] = "0"
337
+ env["SHELL"] = "/bin/sh"
338
+ env["COLUMNS"] = env.get("COLUMNS", "80")
339
+ env["LINES"] = env.get("LINES", "40")
340
+ return env
341
+
342
+ def _sanitized_env_for_anthropic(use_cli_auth: bool = False) -> dict:
343
+ """
344
+ Like _sanitized_env_common, plus:
345
+ - optionally remove ANTHROPIC_API_KEY to force subscription auth via Claude CLI
346
+ """
347
+ env = _sanitized_env_common()
348
+ if use_cli_auth:
349
+ # Remove API key so Claude CLI uses subscription auth instead
350
+ env.pop("ANTHROPIC_API_KEY", None)
351
+ return env
352
+
353
+ def _sanitized_env_for_openai() -> dict:
354
+ """
355
+ Like _sanitized_env_common, plus:
356
+ - strip completion-related env vars that can affect behavior
357
+ - set OpenAI CLI no-tty/no-color flags
358
+ """
359
+ env = _sanitized_env_common()
360
+ for k in list(env.keys()):
361
+ if k.startswith("COMP_") or k in ("BASH_COMPLETION", "BASH_COMPLETION_COMPAT_DIR", "BASH_VERSION", "BASH", "ZDOTDIR", "ZSH_NAME", "ZSH_VERSION"):
362
+ env.pop(k, None)
363
+ env["DISABLE_AUTO_COMPLETE"] = "1"
364
+ env["OPENAI_CLI_NO_TTY"] = "1"
365
+ env["OPENAI_CLI_NO_COLOR"] = "1"
366
+ return env
367
+
368
+ def _run_cli(cmd: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
369
+ """
370
+ Generic subprocess runner for arbitrary CLI commands.
371
+ Captures stdout/stderr, returns CompletedProcess without raising on non-zero exit.
372
+ """
373
+ return subprocess.run(
374
+ cmd,
375
+ capture_output=True,
376
+ text=True,
377
+ check=False,
378
+ timeout=timeout,
379
+ cwd=str(cwd),
380
+ )
381
+
382
+ def _run_cli_args_openai(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
383
+ """Subprocess runner for OpenAI commands with OpenAI-specific sanitized env."""
384
+ return subprocess.run(
385
+ args,
386
+ capture_output=True,
387
+ text=True,
388
+ check=False,
389
+ timeout=timeout,
390
+ cwd=str(cwd),
391
+ env=_sanitized_env_for_openai(),
392
+ )
393
+
394
+ def _run_openai_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
395
+ """
396
+ Try several OpenAI CLI variants to improve robustness.
397
+ Returns the first attempt that yields output or succeeds.
398
+
399
+ NOTE: Agents need write access to modify files in agentic mode,
400
+ so we do not restrict the sandbox.
401
+ """
402
+ # Write prompt to a unique temp file to avoid race conditions in concurrent execution
403
+ with tempfile.NamedTemporaryFile(
404
+ mode='w',
405
+ suffix='.txt',
406
+ prefix='.agentic_prompt_',
407
+ dir=cwd,
408
+ delete=False,
409
+ encoding='utf-8'
410
+ ) as f:
411
+ f.write(prompt_text)
412
+ prompt_file = Path(f.name)
413
+
414
+ try:
415
+ # Agentic instruction that tells Codex to read the prompt file and fix
416
+ agentic_instruction = (
417
+ f"Read the file {prompt_file} for instructions on what to fix. "
418
+ "You have full file access to explore and modify files as needed. "
419
+ "After reading the instructions, fix the failing tests."
420
+ )
421
+
422
+ variants = [
423
+ ["codex", "exec", agentic_instruction],
424
+ ["codex", "exec", "--skip-git-repo-check", agentic_instruction],
425
+ ]
426
+ per_attempt = 300
427
+ last = None
428
+ for args in variants:
429
+ try:
430
+ _verbose(f"[cyan]OpenAI variant ({label}): {' '.join(args[:-1])} ...[/cyan]")
431
+ last = _run_cli_args_openai(args, cwd, per_attempt)
432
+ if (last.stdout or last.stderr) or last.returncode == 0:
433
+ return last
434
+ except subprocess.TimeoutExpired:
435
+ _info(f"[yellow]OpenAI variant timed out: {' '.join(args[:-1])} ...[/yellow]")
436
+ continue
437
+ if last is None:
438
+ return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
439
+ return last
440
+ finally:
441
+ prompt_file.unlink(missing_ok=True)
442
+
443
+ def _run_cli_args_anthropic(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
444
+ """Subprocess runner for Anthropic commands with subscription auth (removes API key)."""
445
+ return subprocess.run(
446
+ args,
447
+ capture_output=True,
448
+ text=True,
449
+ check=False,
450
+ timeout=timeout,
451
+ cwd=str(cwd),
452
+ env=_sanitized_env_for_anthropic(use_cli_auth=True),
453
+ )
454
+
455
+ def _run_anthropic_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
456
+ """
457
+ Anthropic CLI runner in agentic mode (without -p flag).
458
+
459
+ NOTE: We do NOT use -p (print mode) because it prevents file tool access.
460
+ Instead, we write the prompt to a file and let Claude read it in agentic mode.
461
+ """
462
+ # Write prompt to a unique temp file to avoid race conditions in concurrent execution
463
+ with tempfile.NamedTemporaryFile(
464
+ mode='w',
465
+ suffix='.txt',
466
+ prefix='.agentic_prompt_',
467
+ dir=cwd,
468
+ delete=False,
469
+ encoding='utf-8'
470
+ ) as f:
471
+ f.write(prompt_text)
472
+ prompt_file = Path(f.name)
473
+
474
+ try:
475
+ # Agentic instruction that tells Claude to read the prompt file and fix
476
+ agentic_instruction = (
477
+ f"Read the file {prompt_file} for instructions on what to fix. "
478
+ "You have full file access to explore and modify files as needed. "
479
+ "After reading the instructions, fix the failing tests."
480
+ )
481
+
482
+ variants = [
483
+ ["claude", "--dangerously-skip-permissions", agentic_instruction],
484
+ ]
485
+ per_attempt = 300
486
+ last: Optional[subprocess.CompletedProcess] = None
487
+ for args in variants:
488
+ try:
489
+ _verbose(f"[cyan]Anthropic variant ({label}): {' '.join(args[:-1])} ...[/cyan]")
490
+ last = _run_cli_args_anthropic(args, cwd, per_attempt)
491
+ if last.stdout or last.stderr or last.returncode == 0:
492
+ return last
493
+ except subprocess.TimeoutExpired:
494
+ _info(f"[yellow]Anthropic variant timed out: {' '.join(args[:-1])} ...[/yellow]")
495
+ continue
496
+ if last is None:
497
+ return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
498
+ return last
499
+ finally:
500
+ prompt_file.unlink(missing_ok=True)
501
+ # Issue #186: Scan for suspicious files after Anthropic agent runs
502
+ _detect_suspicious_files(cwd, f"After _run_anthropic_variants ({label})")
503
+ # Also scan project root in case agent created files there
504
+ project_root = Path.cwd()
505
+ if project_root != cwd:
506
+ _detect_suspicious_files(project_root, f"After _run_anthropic_variants ({label}) - project root")
507
+
508
+ def _run_cli_args_google(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
509
+ """Subprocess runner for Google commands with common sanitized env."""
510
+ return subprocess.run(
511
+ args,
512
+ capture_output=True,
513
+ text=True,
514
+ check=False,
515
+ timeout=timeout,
516
+ cwd=str(cwd),
517
+ env=_sanitized_env_common(),
518
+ )
519
+
520
+ def _run_google_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
521
+ """
522
+ Google CLI runner in agentic mode (without -p flag).
523
+
524
+ NOTE: We do NOT use -p (pipe mode) because it may prevent tool access.
525
+ Instead, we write the prompt to a file and let Gemini read it in agentic mode.
526
+ """
527
+ # Write prompt to a unique temp file to avoid race conditions in concurrent execution
528
+ with tempfile.NamedTemporaryFile(
529
+ mode='w',
530
+ suffix='.txt',
531
+ prefix='.agentic_prompt_',
532
+ dir=cwd,
533
+ delete=False,
534
+ encoding='utf-8'
535
+ ) as f:
536
+ f.write(prompt_text)
537
+ prompt_file = Path(f.name)
538
+
539
+ try:
540
+ # Agentic instruction that tells Gemini to read the prompt file and fix
541
+ agentic_instruction = (
542
+ f"Read the file {prompt_file} for instructions on what to fix. "
543
+ "You have full file access to explore and modify files as needed. "
544
+ "After reading the instructions, fix the failing tests."
545
+ )
546
+
547
+ variants = [
548
+ ["gemini", agentic_instruction],
549
+ ]
550
+ per_attempt = 300
551
+ last = None
552
+ for args in variants:
553
+ try:
554
+ _verbose(f"[cyan]Google variant ({label}): {' '.join(args)} ...[/cyan]")
555
+ last = _run_cli_args_google(args, cwd, per_attempt)
556
+ if (last.stdout or last.stderr) or last.returncode == 0:
557
+ return last
558
+ except subprocess.TimeoutExpired:
559
+ _info(f"[yellow]Google variant timed out: {' '.join(args)} ...[/yellow]")
560
+ continue
561
+ if last is None:
562
+ return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
563
+ return last
564
+ finally:
565
+ prompt_file.unlink(missing_ok=True)
566
+ # Issue #186: Scan for suspicious files after Google agent runs
567
+ _detect_suspicious_files(cwd, f"After _run_google_variants ({label})")
568
+ # Also scan project root in case agent created files there
569
+ project_root = Path.cwd()
570
+ if project_root != cwd:
571
+ _detect_suspicious_files(project_root, f"After _run_google_variants ({label}) - project root")
572
+
573
+ def _run_testcmd(cmd: str, cwd: Path) -> bool:
574
+ """
575
+ Execute an agent-supplied TESTCMD locally via bash -lc "<cmd>".
576
+ Return True on exit code 0, else False. Captures and previews output (verbose).
577
+ """
578
+ _info(f"[cyan]Executing agent-supplied test command:[/cyan] {cmd}")
579
+ proc = subprocess.run(
580
+ ["bash", "-lc", cmd],
581
+ capture_output=True,
582
+ text=True,
583
+ check=False,
584
+ timeout=_VERIFY_TIMEOUT,
585
+ cwd=str(cwd),
586
+ )
587
+ _print_head("testcmd stdout", proc.stdout or "")
588
+ _print_head("testcmd stderr", proc.stderr or "")
589
+ return proc.returncode == 0
590
+
591
+ def _verify_and_log(unit_test_file: str, cwd: Path, *, verify_cmd: Optional[str], enabled: bool) -> bool:
592
+ """
593
+ Standard local verification gate:
594
+ - If disabled, return True immediately (skip verification).
595
+ - If verify_cmd exists: format placeholders and run it via _run_testcmd.
596
+ - Else: run the file directly using the appropriate interpreter for its language.
597
+ Returns True iff the executed command exits 0.
598
+ """
599
+ if not enabled:
600
+ return True
601
+ if verify_cmd:
602
+ cmd = verify_cmd.replace("{test}", str(Path(unit_test_file).resolve())).replace("{cwd}", str(cwd))
603
+ return _run_testcmd(cmd, cwd)
604
+ # Get language-appropriate run command from language_format.csv
605
+ run_cmd = get_run_command_for_file(str(Path(unit_test_file).resolve()))
606
+ if run_cmd:
607
+ return _run_testcmd(run_cmd, cwd)
608
+ # Fallback: try running with Python if no run command found
609
+ verify = subprocess.run(
610
+ [sys.executable, str(Path(unit_test_file).resolve())],
611
+ capture_output=True,
612
+ text=True,
613
+ check=False,
614
+ timeout=_VERIFY_TIMEOUT,
615
+ cwd=str(cwd),
616
+ )
617
+ _print_head("verify stdout", verify.stdout or "")
618
+ _print_head("verify stderr", verify.stderr or "")
619
+ return verify.returncode == 0
620
+
621
+ def _safe_is_subpath(child: Path, parent: Path) -> bool:
622
+ """
623
+ True if 'child' resolves under 'parent' (prevents writes outside project root).
624
+ """
625
+ try:
626
+ child.resolve().relative_to(parent.resolve())
627
+ return True
628
+ except Exception:
629
+ return False
630
+
631
+ # Suffixes we strip when mapping "foo_fixed.py" -> "foo.py"
632
+ _COMMON_FIXED_SUFFIXES = ("_fixed", ".fixed", "-fixed")
633
+
634
+ def _strip_common_suffixes(name: str) -> str:
635
+ """Remove a known fixed-suffix from a basename (before extension), if present."""
636
+ base, ext = os.path.splitext(name)
637
+ for suf in _COMMON_FIXED_SUFFIXES:
638
+ if base.endswith(suf):
639
+ base = base[: -len(suf)]
640
+ break
641
+ return base + ext
642
+
643
+ def _find_existing_by_basename(project_root: Path, basename: str) -> Optional[Path]:
644
+ """Search the project tree for the first file whose name matches 'basename'."""
645
+ try:
646
+ for p in project_root.rglob(basename):
647
+ if p.is_file():
648
+ return p.resolve()
649
+ except Exception:
650
+ return None
651
+ return None
652
+
653
+ def _normalize_target_path(
654
+ emitted_path: str,
655
+ project_root: Path,
656
+ primary_code_path: Path,
657
+ allow_new: bool,
658
+ ) -> Optional[Path]:
659
+ """
660
+ Resolve an emitted path to a safe file path we should write:
661
+ - reject suspicious paths (single-char, template variables)
662
+ - make path absolute under project root
663
+ - allow direct match, primary-file match (with/without _fixed), or basename search
664
+ - create new files only if allow_new is True
665
+ """
666
+ # Early rejection of suspicious paths (defense against LLM artifacts)
667
+ if _is_suspicious_path(emitted_path):
668
+ _info(f"[yellow]Skipping suspicious path: {emitted_path!r}[/yellow]")
669
+ return None
670
+
671
+ p = Path(emitted_path)
672
+ if not p.is_absolute():
673
+ p = (project_root / emitted_path).resolve()
674
+ if not _safe_is_subpath(p, project_root):
675
+ _info(f"[yellow]Skipping write outside project root: {p}[/yellow]")
676
+ return None
677
+ if p.exists():
678
+ return p
679
+ emitted_base = Path(emitted_path).name
680
+ primary_base = primary_code_path.name
681
+ if emitted_base == primary_base:
682
+ return primary_code_path
683
+ if _strip_common_suffixes(emitted_base) == primary_base:
684
+ return primary_code_path
685
+ existing = _find_existing_by_basename(project_root, emitted_base)
686
+ if existing:
687
+ return existing
688
+ if not allow_new:
689
+ _info(f"[yellow]Skipping creation of new file (in-place only): {p}[/yellow]")
690
+ return None
691
+ return p
692
+
693
+ def _apply_file_map(
694
+ file_map: Dict[str, str],
695
+ project_root: Path,
696
+ primary_code_path: Path,
697
+ allow_new: bool,
698
+ ) -> List[Path]:
699
+ """
700
+ Apply a {emitted_path -> content} mapping to disk:
701
+ - resolve a safe target path
702
+ - normalize content
703
+ - write file and print unified diff (verbose)
704
+ Returns a list of the written Paths.
705
+ """
706
+ applied: List[Path] = []
707
+ for emitted, body in file_map.items():
708
+ target = _normalize_target_path(emitted, project_root, primary_code_path, allow_new)
709
+ if target is None:
710
+ continue
711
+ body_to_write = _normalize_code_text(body)
712
+ old = ""
713
+ if target.exists():
714
+ try:
715
+ old = target.read_text(encoding="utf-8")
716
+ except Exception:
717
+ old = ""
718
+ target.parent.mkdir(parents=True, exist_ok=True)
719
+ target.write_text(body_to_write, encoding="utf-8")
720
+ _print_diff(old, body_to_write, target)
721
+ applied.append(target)
722
+ return applied
723
+
724
+ def _post_apply_verify_or_testcmd(
725
+ provider: str,
726
+ unit_test_file: str,
727
+ cwd: Path,
728
+ *,
729
+ verify_cmd: Optional[str],
730
+ verify_enabled: bool,
731
+ stdout: str,
732
+ stderr: str,
733
+ ) -> bool:
734
+ """
735
+ After applying changes, run standard verification.
736
+ If it fails and TESTCMDs are allowed, try running the agent-supplied TESTCMD.
737
+ Return True iff any verification path succeeds.
738
+ """
739
+ # 1) If standard verification is enabled, use it
740
+ if _verify_and_log(unit_test_file, cwd, verify_cmd=verify_cmd, enabled=verify_enabled):
741
+ return True
742
+ # 2) Otherwise (or if disabled/failed) try agent-supplied TESTCMD if allowed
743
+ if _AGENT_TESTCMD_ALLOWED:
744
+ testcmd = _extract_testcmd(stdout or "", stderr or "")
745
+ if testcmd:
746
+ return _run_testcmd(testcmd, cwd)
747
+ return False
748
+
749
+ def _snapshot_mtimes(root: Path) -> Dict[Path, float]:
750
+ """Record mtimes of all files in root."""
751
+ snapshot = {}
752
+ try:
753
+ for p in root.rglob("*"):
754
+ if ".git" in p.parts or "__pycache__" in p.parts:
755
+ continue
756
+ if p.is_file():
757
+ snapshot[p] = p.stat().st_mtime
758
+ except Exception:
759
+ pass
760
+ return snapshot
761
+
762
+ def _detect_mtime_changes(root: Path, snapshot: Dict[Path, float]) -> List[str]:
763
+ """Return list of changed/new file paths."""
764
+ changes = []
765
+ try:
766
+ for p in root.rglob("*"):
767
+ if ".git" in p.parts or "__pycache__" in p.parts:
768
+ continue
769
+ if p.is_file():
770
+ if p not in snapshot:
771
+ changes.append(str(p))
772
+ elif p.stat().st_mtime != snapshot[p]:
773
+ changes.append(str(p))
774
+ except Exception:
775
+ pass
776
+ return changes
777
+
778
+ def _try_harvest_then_verify(
779
+ provider: str,
780
+ code_path: Path,
781
+ unit_test_file: str,
782
+ code_snapshot: str,
783
+ prompt_content: str,
784
+ test_content: str,
785
+ error_content: str,
786
+ cwd: Path,
787
+ *,
788
+ verify_cmd: Optional[str],
789
+ verify_enabled: bool,
790
+ changed_files: List[str],
791
+ ) -> bool:
792
+ """
793
+ Strict, fast path:
794
+ - Ask agent to ONLY emit corrected file blocks (and optionally TESTCMD).
795
+ - Apply emitted results deterministically.
796
+ - Verify locally.
797
+ """
798
+ harvest_prompt_template = load_prompt_template("agentic_fix_harvest_only_LLM")
799
+ if not harvest_prompt_template:
800
+ _info("[yellow]Failed to load harvest-only agent prompt template.[/yellow]")
801
+ return False
802
+
803
+ harvest_instr = harvest_prompt_template.format(
804
+ code_abs=str(code_path),
805
+ test_abs=str(Path(unit_test_file).resolve()),
806
+ begin=_begin_marker(code_path),
807
+ end=_end_marker(code_path),
808
+ code_content=code_snapshot,
809
+ prompt_content=prompt_content,
810
+ test_content=test_content,
811
+ error_content=error_content,
812
+ verify_cmd=verify_cmd or "No verification command provided.",
813
+ )
814
+ harvest_file = Path("agentic_fix_harvest.txt")
815
+ harvest_file.write_text(harvest_instr, encoding="utf-8")
816
+ _info(f"[cyan]Executing {provider.capitalize()} with harvest-only instructions: {harvest_file.resolve()}[/cyan]")
817
+ _print_head("Harvest-only instruction preview", harvest_instr)
818
+
819
+ # Snapshot mtimes before agent run
820
+ mtime_snapshot = _snapshot_mtimes(cwd)
821
+
822
+ try:
823
+ # Provider-specific variant runners with shorter time budgets
824
+ if provider == "openai":
825
+ res = _run_openai_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
826
+ elif provider == "anthropic":
827
+ res = _run_anthropic_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
828
+ elif provider == "google":
829
+ res = _run_google_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
830
+ else:
831
+ res = _run_cli(get_agent_command(provider, harvest_file), cwd, max(60, _AGENT_CALL_TIMEOUT // 2))
832
+ except subprocess.TimeoutExpired:
833
+ _info(f"[yellow]{provider.capitalize()} harvest-only attempt timed out.[/yellow]")
834
+ try:
835
+ harvest_file.unlink()
836
+ except Exception:
837
+ pass
838
+ return False
839
+
840
+ _print_head(f"{provider.capitalize()} harvest stdout", res.stdout or "")
841
+ _print_head(f"{provider.capitalize()} harvest stderr", res.stderr or "")
842
+
843
+ # Detect direct changes by agent
844
+ direct_changes = _detect_mtime_changes(cwd, mtime_snapshot)
845
+ changed_files.extend(direct_changes)
846
+
847
+ allow_new = True
848
+
849
+ # Prefer multi-file blocks; else try single-file; else Gemini code-fence fallback
850
+ multi = _extract_files_from_output(res.stdout or "", res.stderr or "")
851
+ if multi:
852
+ _info("[cyan]Applying multi-file harvest from agent output...[/cyan]")
853
+ applied = _apply_file_map(multi, cwd, code_path, allow_new)
854
+ changed_files.extend([str(p) for p in applied])
855
+ ok = _post_apply_verify_or_testcmd(
856
+ provider, unit_test_file, cwd,
857
+ verify_cmd=verify_cmd, verify_enabled=verify_enabled,
858
+ stdout=res.stdout or "", stderr=res.stderr or ""
859
+ )
860
+ try:
861
+ harvest_file.unlink()
862
+ except Exception:
863
+ pass
864
+ return ok
865
+
866
+ harvested_single = _extract_corrected_from_output(res.stdout or "", res.stderr or "", code_path.resolve())
867
+ if harvested_single is None:
868
+ if provider == "google":
869
+ code_block = _extract_python_code_block(res.stdout or "", res.stderr or "")
870
+ if code_block:
871
+ _info("[cyan]No markers found, but detected a Python code block from Google. Applying it...[/cyan]")
872
+ body_to_write = _normalize_code_text(code_block)
873
+ code_path.write_text(body_to_write, encoding="utf-8")
874
+ changed_files.append(str(code_path))
875
+ newest = code_path.read_text(encoding="utf-8")
876
+ _print_diff(code_snapshot, newest, code_path)
877
+ ok = _post_apply_verify_or_testcmd(
878
+ provider, unit_test_file, cwd,
879
+ verify_cmd=verify_cmd, verify_enabled=verify_enabled,
880
+ stdout=res.stdout or "", stderr=res.stderr or ""
881
+ )
882
+ try:
883
+ harvest_file.unlink()
884
+ except Exception:
885
+ pass
886
+ return ok
887
+
888
+ # If no output blocks, but direct changes occurred, we should verify
889
+ if direct_changes:
890
+ _info("[cyan]No output markers found, but detected file changes. Verifying...[/cyan]")
891
+ ok = _post_apply_verify_or_testcmd(
892
+ provider, unit_test_file, cwd,
893
+ verify_cmd=verify_cmd, verify_enabled=verify_enabled,
894
+ stdout=res.stdout or "", stderr=res.stderr or ""
895
+ )
896
+ try:
897
+ harvest_file.unlink()
898
+ except Exception:
899
+ pass
900
+ return ok
901
+
902
+ _info("[yellow]Harvest-only attempt did not include the required markers.[/yellow]")
903
+ try:
904
+ harvest_file.unlink()
905
+ except Exception:
906
+ pass
907
+ return False
908
+
909
+ _info("[cyan]Applying harvested corrected file (single)...[/cyan]")
910
+ body_to_write = _normalize_code_text(harvested_single)
911
+ code_path.write_text(body_to_write, encoding="utf-8")
912
+ changed_files.append(str(code_path))
913
+ newest = code_path.read_text(encoding="utf-8")
914
+ _print_diff(code_snapshot, newest, code_path)
915
+
916
+ ok = _post_apply_verify_or_testcmd(
917
+ provider, unit_test_file, cwd,
918
+ verify_cmd=verify_cmd, verify_enabled=verify_enabled,
919
+ stdout=res.stdout or "", stderr=res.stderr or ""
920
+ )
921
+ try:
922
+ harvest_file.unlink()
923
+ except Exception:
924
+ pass
925
+ return ok
926
+
927
+ def run_agentic_fix(
928
+ prompt_file: str,
929
+ code_file: str,
930
+ unit_test_file: str,
931
+ error_log_file: str,
932
+ verify_cmd: Optional[str] = None,
933
+ cwd: Optional[Path] = None,
934
+ *,
935
+ verbose: bool = False,
936
+ quiet: bool = False,
937
+ ) -> Tuple[bool, str, float, str, List[str]]:
938
+ """
939
+ Main entrypoint for agentic fallback:
940
+ - Prepares inputs and prompt (with code/tests/error log)
941
+ - Optionally preflight-populates error log if empty (so agent sees failures)
942
+ - Tries providers in preference order: harvest-first, then primary attempt
943
+ - Applies changes locally and verifies locally
944
+ - Returns (success, message, est_cost, used_model, changed_files)
945
+ """
946
+ global _IS_VERBOSE, _IS_QUIET
947
+ if verbose:
948
+ _IS_VERBOSE = True
949
+ _IS_QUIET = False
950
+ elif quiet:
951
+ _IS_QUIET = True
952
+ _IS_VERBOSE = False
953
+
954
+ _always("[bold yellow]Standard fix failed. Initiating agentic fallback (AGENT-ONLY)...[/bold yellow]")
955
+
956
+ instruction_file: Optional[Path] = None
957
+ est_cost: float = 0.0
958
+ used_model: str = "agentic-cli"
959
+ changed_files: List[str] = [] # Track all files changed by agents
960
+
961
+ try:
962
+ # Use explicit cwd if provided, otherwise fall back to current directory
963
+ working_dir = Path(cwd) if cwd else Path.cwd()
964
+ _info(f"[cyan]Project root (cwd): {working_dir}[/cyan]")
965
+
966
+ # Load provider table and filter to those with API keys present in the environment
967
+ csv_path = find_llm_csv_path()
968
+ model_df = _load_model_data(csv_path)
969
+
970
+ available_agents: List[str] = []
971
+ present_keys: List[str] = []
972
+ seen = set()
973
+
974
+ for provider in AGENT_PROVIDER_PREFERENCE:
975
+ provider_df = model_df[model_df["provider"].str.lower() == provider]
976
+ if provider_df.empty:
977
+ continue
978
+ api_key_name = provider_df.iloc[0]["api_key"]
979
+ if not api_key_name:
980
+ continue
981
+ # Check CLI availability first (subscription auth), then API key
982
+ has_cli_auth = provider == "anthropic" and shutil.which("claude")
983
+ has_api_key = os.getenv(api_key_name) or (provider == "google" and os.getenv("GEMINI_API_KEY"))
984
+ if has_cli_auth or has_api_key:
985
+ if has_cli_auth:
986
+ present_keys.append("claude-cli-auth")
987
+ else:
988
+ present_keys.append(api_key_name or ("GEMINI_API_KEY" if provider == "google" else ""))
989
+ if provider not in seen:
990
+ available_agents.append(provider)
991
+ seen.add(provider)
992
+
993
+ _info(f"[cyan]Env API keys present (names only): {', '.join([k for k in present_keys if k]) or 'none'}[/cyan]")
994
+ if not available_agents:
995
+ return False, "No configured agent API keys found in environment.", est_cost, used_model, changed_files
996
+
997
+ _info(f"[cyan]Available agents found: {', '.join(available_agents)}[/cyan]")
998
+
999
+ # Read input artifacts that feed into the prompt
1000
+ prompt_content = Path(prompt_file).read_text(encoding="utf-8")
1001
+
1002
+ # Resolve relative paths against working_dir, not Path.cwd()
1003
+ code_path_input = Path(code_file)
1004
+ if not code_path_input.is_absolute():
1005
+ code_path = (working_dir / code_path_input).resolve()
1006
+ else:
1007
+ code_path = code_path_input.resolve()
1008
+
1009
+ test_path_input = Path(unit_test_file)
1010
+ if not test_path_input.is_absolute():
1011
+ test_path = (working_dir / test_path_input).resolve()
1012
+ else:
1013
+ test_path = test_path_input.resolve()
1014
+
1015
+ orig_code = code_path.read_text(encoding="utf-8")
1016
+ orig_test = test_path.read_text(encoding="utf-8")
1017
+ test_content = orig_test # Alias for prompt template compatibility
1018
+
1019
+ # Read error log if it exists, otherwise we'll populate it via preflight
1020
+ error_log_path = Path(error_log_file)
1021
+ error_content = error_log_path.read_text(encoding="utf-8") if error_log_path.exists() else ""
1022
+
1023
+ # --- Preflight: populate error_content if empty so the agent sees fresh failures ---
1024
+ # This makes run_agentic_fix self-sufficient even if the caller forgot to write the error log.
1025
+ # Also detect useless content patterns like empty XML tags (e.g., "<history></history>")
1026
+ def _is_useless_error_content(content: str) -> bool:
1027
+ """Check if error content is empty or useless (e.g., empty XML tags)."""
1028
+ stripped = (content or "").strip()
1029
+ if not stripped:
1030
+ return True
1031
+ # Detect empty XML-like tags with no actual error content
1032
+ import re
1033
+ # Remove all XML-like empty tags and whitespace
1034
+ cleaned = re.sub(r"<[^>]+>\s*</[^>]+>", "", stripped).strip()
1035
+ if not cleaned:
1036
+ return True
1037
+ # Check if content lacks any traceback or error keywords
1038
+ error_indicators = ["Error", "Exception", "Traceback", "failed", "FAILED", "error:"]
1039
+ return not any(ind in content for ind in error_indicators)
1040
+
1041
+ if _is_useless_error_content(error_content):
1042
+ try:
1043
+ lang = get_language(os.path.splitext(code_path)[1])
1044
+ pre_cmd = os.getenv("PDD_AGENTIC_VERIFY_CMD") or default_verify_cmd_for(lang, unit_test_file)
1045
+ if pre_cmd:
1046
+ pre_cmd = pre_cmd.replace("{test}", str(Path(unit_test_file).resolve())).replace("{cwd}", str(working_dir))
1047
+ pre = subprocess.run(
1048
+ ["bash", "-lc", pre_cmd],
1049
+ capture_output=True,
1050
+ text=True,
1051
+ check=False,
1052
+ timeout=_VERIFY_TIMEOUT,
1053
+ cwd=str(working_dir),
1054
+ )
1055
+ else:
1056
+ # Use language-appropriate run command from language_format.csv
1057
+ run_cmd = get_run_command_for_file(str(Path(unit_test_file).resolve()))
1058
+ if run_cmd:
1059
+ pre = subprocess.run(
1060
+ ["bash", "-lc", run_cmd],
1061
+ capture_output=True,
1062
+ text=True,
1063
+ check=False,
1064
+ timeout=_VERIFY_TIMEOUT,
1065
+ cwd=str(working_dir),
1066
+ )
1067
+ else:
1068
+ # Fallback: run directly with Python interpreter
1069
+ pre = subprocess.run(
1070
+ [sys.executable, str(Path(unit_test_file).resolve())],
1071
+ capture_output=True,
1072
+ text=True,
1073
+ check=False,
1074
+ timeout=_VERIFY_TIMEOUT,
1075
+ cwd=str(working_dir),
1076
+ )
1077
+ error_content = (pre.stdout or "") + "\n" + (pre.stderr or "")
1078
+ try:
1079
+ Path(error_log_file).write_text(error_content, encoding="utf-8")
1080
+ except Exception:
1081
+ pass
1082
+ _print_head("preflight verify stdout", pre.stdout or "")
1083
+ _print_head("preflight verify stderr", pre.stderr or "")
1084
+ except Exception as e:
1085
+ _info(f"[yellow]Preflight verification failed: {e}. Proceeding with empty error log.[/yellow]")
1086
+ # --- End preflight ---
1087
+
1088
+ # Compute verification policy and command
1089
+ ext = code_path.suffix.lower()
1090
+ is_python = ext == ".py"
1091
+
1092
+ env_verify = os.getenv("PDD_AGENTIC_VERIFY", None) # "auto"/"0"/"1"/None
1093
+ verify_force = os.getenv("PDD_AGENTIC_VERIFY_FORCE", "0") == "1"
1094
+
1095
+ # If verify_cmd arg is provided, it overrides env var and default
1096
+ if verify_cmd is None:
1097
+ verify_cmd = os.getenv("PDD_AGENTIC_VERIFY_CMD", None)
1098
+
1099
+ if verify_cmd is None:
1100
+ verify_cmd = default_verify_cmd_for(get_language(os.path.splitext(code_path)[1]), unit_test_file)
1101
+
1102
+ # Load primary prompt template
1103
+ primary_prompt_template = load_prompt_template("agentic_fix_primary_LLM")
1104
+ if not primary_prompt_template:
1105
+ return False, "Failed to load primary agent prompt template.", est_cost, used_model, changed_files
1106
+
1107
+ # Fill primary instruction (includes code/tests/error/markers/verify_cmd hint)
1108
+ primary_instr = primary_prompt_template.format(
1109
+ code_abs=str(code_path),
1110
+ test_abs=str(Path(unit_test_file).resolve()),
1111
+ begin=_begin_marker(code_path),
1112
+ end=_end_marker(code_path),
1113
+ prompt_content=prompt_content,
1114
+ code_content=orig_code,
1115
+ test_content=test_content,
1116
+ error_content=error_content,
1117
+ verify_cmd=verify_cmd or "No verification command provided.",
1118
+ )
1119
+ instruction_file = working_dir / "agentic_fix_instructions.txt"
1120
+ instruction_file.write_text(primary_instr, encoding="utf-8")
1121
+ _info(f"[cyan]Instruction file: {instruction_file.resolve()} ({instruction_file.stat().st_size} bytes)[/cyan]")
1122
+ _print_head("Instruction preview", primary_instr)
1123
+
1124
+ # Decide verification enablement
1125
+ if verify_force:
1126
+ verify_enabled = True
1127
+ # If a verification command is present (from user or defaults), ALWAYS enable verification.
1128
+ elif verify_cmd:
1129
+ verify_enabled = True
1130
+ else:
1131
+ if env_verify is None:
1132
+ # AUTO mode: if not explicitly disabled, allow agent-supplied TESTCMD
1133
+ verify_enabled = True
1134
+ elif env_verify.lower() == "auto":
1135
+ verify_enabled = False
1136
+ else:
1137
+ verify_enabled = (env_verify != "0")
1138
+
1139
+ allow_new = True # allow creating new support files when the agent emits them
1140
+
1141
+ # Try each available agent in order
1142
+ for provider in available_agents:
1143
+ used_model = f"agentic-{provider}"
1144
+ cmd = get_agent_command(provider, instruction_file)
1145
+ binary = (cmd[0] if cmd else {"anthropic": "claude", "google": "gemini", "openai": "codex"}.get(provider, ""))
1146
+ cli_path = shutil.which(binary) or "NOT-IN-PATH"
1147
+ _info(f"[cyan]Attempting fix with {provider.capitalize()} agent...[/cyan]")
1148
+ if _IS_VERBOSE:
1149
+ _verbose(f"[cyan]CLI binary: {binary} -> {cli_path}[/cyan]")
1150
+ if cmd:
1151
+ _verbose(f"Executing (cwd={working_dir}): {' '.join(cmd)}")
1152
+
1153
+ # Skip if the provider CLI is not available on PATH
1154
+ if cli_path == "NOT-IN-PATH":
1155
+ _info(f"[yellow]Skipping {provider.capitalize()} (CLI '{binary}' not found in PATH).[/yellow]")
1156
+ continue
1157
+
1158
+ # PRIMARY-FIRST: Try the full agent approach first (allows exploration, debugging)
1159
+ _info(f"[cyan]Trying primary approach with {provider.capitalize()}...[/cyan]")
1160
+ est_cost += _AGENT_COST_PER_CALL
1161
+
1162
+ # Snapshot mtimes before agent run
1163
+ mtime_snapshot = _snapshot_mtimes(working_dir)
1164
+
1165
+ try:
1166
+ if provider == "openai":
1167
+ res = _run_openai_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
1168
+ elif provider == "anthropic":
1169
+ res = _run_anthropic_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
1170
+ elif provider == "google":
1171
+ res = _run_google_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
1172
+ else:
1173
+ res = _run_cli(cmd, working_dir, _AGENT_CALL_TIMEOUT)
1174
+ except subprocess.TimeoutExpired:
1175
+ _info(f"[yellow]{provider.capitalize()} agent timed out after {_AGENT_CALL_TIMEOUT}s. Trying next...[/yellow]")
1176
+ continue
1177
+
1178
+ _print_head(f"{provider.capitalize()} stdout", res.stdout or "")
1179
+ _print_head(f"{provider.capitalize()} stderr", res.stderr or "")
1180
+
1181
+ # Detect direct changes by agent
1182
+ direct_changes = _detect_mtime_changes(working_dir, mtime_snapshot)
1183
+ changed_files.extend(direct_changes)
1184
+
1185
+ # Parse emitted changes (multi-file preferred)
1186
+ multi = _extract_files_from_output(res.stdout or "", res.stderr or "")
1187
+ if multi:
1188
+ _info("[cyan]Detected multi-file corrected content (primary attempt). Applying...[/cyan]")
1189
+ applied = _apply_file_map(multi, working_dir, code_path, allow_new)
1190
+ changed_files.extend([str(p) for p in applied])
1191
+ else:
1192
+ # Single-file fallback or Gemini code fence
1193
+ harvested = _extract_corrected_from_output(res.stdout or "", res.stderr or "", code_path.resolve())
1194
+ if harvested is not None:
1195
+ _info("[cyan]Detected corrected file content in agent output (primary attempt). Applying patch...[/cyan]")
1196
+ body_to_write = _normalize_code_text(harvested)
1197
+ code_path.write_text(body_to_write, encoding="utf-8")
1198
+ changed_files.append(str(code_path))
1199
+ elif provider == "google":
1200
+ code_block = _extract_python_code_block(res.stdout or "", res.stderr or "")
1201
+ if code_block:
1202
+ _info("[cyan]Detected a Python code block from Google (no markers). Applying patch...[/cyan]")
1203
+ body_to_write = _normalize_code_text(code_block)
1204
+ code_path.write_text(body_to_write, encoding="utf-8")
1205
+ changed_files.append(str(code_path))
1206
+
1207
+ # Show diff (verbose) and decide whether to verify
1208
+ new_code = code_path.read_text(encoding="utf-8")
1209
+ new_test = test_path.read_text(encoding="utf-8")
1210
+ _print_diff(orig_code, new_code, code_path)
1211
+ if new_test != orig_test:
1212
+ _print_diff(orig_test, new_test, test_path)
1213
+ if str(test_path) not in changed_files:
1214
+ changed_files.append(str(test_path))
1215
+
1216
+ # Proceed to verify if: agent returned 0, OR either file changed, OR markers found, OR direct changes
1217
+ code_changed = new_code != orig_code
1218
+ test_changed = new_test != orig_test
1219
+ proceed_to_verify = (res.returncode == 0) or code_changed or test_changed or bool(multi) or bool(direct_changes)
1220
+ if proceed_to_verify:
1221
+ ok = _post_apply_verify_or_testcmd(
1222
+ provider, unit_test_file, working_dir,
1223
+ verify_cmd=verify_cmd, verify_enabled=verify_enabled,
1224
+ stdout=res.stdout or "", stderr=res.stderr or ""
1225
+ )
1226
+ if ok:
1227
+ _always(f"[bold green]{provider.capitalize()} agent completed successfully and tests passed.[/bold green]")
1228
+ try:
1229
+ instruction_file.unlink()
1230
+ except Exception:
1231
+ pass
1232
+ return True, f"Agentic fix successful with {provider.capitalize()}.", est_cost, used_model, changed_files
1233
+
1234
+ # PRIMARY FAILED - Try harvest as a quick fallback before moving to next provider
1235
+ if provider in ("google", "openai", "anthropic"):
1236
+ _info("[yellow]Primary attempt did not pass; trying harvest fallback...[/yellow]")
1237
+ est_cost += _AGENT_COST_PER_CALL
1238
+ try:
1239
+ if _try_harvest_then_verify(
1240
+ provider,
1241
+ code_path,
1242
+ unit_test_file,
1243
+ orig_code,
1244
+ prompt_content,
1245
+ test_content,
1246
+ error_content,
1247
+ working_dir,
1248
+ verify_cmd=verify_cmd,
1249
+ verify_enabled=verify_enabled,
1250
+ changed_files=changed_files,
1251
+ ):
1252
+ try:
1253
+ instruction_file.unlink()
1254
+ except Exception:
1255
+ pass
1256
+ return True, f"Agentic fix successful with {provider.capitalize()} (harvest fallback).", est_cost, used_model, changed_files
1257
+ except subprocess.TimeoutExpired:
1258
+ _info(f"[yellow]{provider.capitalize()} harvest fallback timed out.[/yellow]")
1259
+
1260
+ # Prepare for next iteration/provider: update baseline code snapshot
1261
+ orig_code = new_code
1262
+ _info(f"[yellow]{provider.capitalize()} attempt did not yield a passing test. Trying next...[/yellow]")
1263
+
1264
+ # No providers managed to pass verification
1265
+ try:
1266
+ if instruction_file and instruction_file.exists():
1267
+ instruction_file.unlink()
1268
+ except Exception:
1269
+ pass
1270
+ return False, "All agents failed to produce a passing fix (no local fallback).", est_cost, used_model, changed_files
1271
+
1272
+ except FileNotFoundError as e:
1273
+ # Common failure: provider CLI not installed/in PATH, or missing input files
1274
+ msg = f"A required file or command was not found: {e}. Is the agent CLI installed and in your PATH?"
1275
+ _always(f"[bold red]Error:[/bold red] {msg}")
1276
+ try:
1277
+ if instruction_file and instruction_file.exists():
1278
+ instruction_file.unlink()
1279
+ except Exception:
1280
+ pass
1281
+ return False, msg, 0.0, "agentic-cli", changed_files
1282
+ except Exception as e:
1283
+ # Safety net for any unexpected runtime error
1284
+ _always(f"[bold red]An unexpected error occurred during agentic fix:[/bold red] {e}")
1285
+ try:
1286
+ if instruction_file and instruction_file.exists():
1287
+ instruction_file.unlink()
1288
+ except Exception:
1289
+ pass
1290
+ return False, str(e), 0.0, "agentic-cli", changed_files
1291
+
1292
+ # Back-compat public alias for tests/consumers
1293
+ # Expose the harvest function under a stable name used by earlier code/tests.
1294
+ try_harvest_then_verify = _try_harvest_then_verify