pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/agentic_fix.py
ADDED
|
@@ -0,0 +1,1294 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import difflib
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Tuple, List, Optional, Dict
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
|
|
14
|
+
from .get_language import get_language # Detects language from file extension (e.g., ".py" -> "python")
|
|
15
|
+
from .get_run_command import get_run_command_for_file # Gets run command for a file based on extension
|
|
16
|
+
from .llm_invoke import _load_model_data # Loads provider/model metadata from llm_model.csv
|
|
17
|
+
from .load_prompt_template import load_prompt_template # Loads prompt templates by name
|
|
18
|
+
from .agentic_langtest import default_verify_cmd_for # Provides a default verify command (per language)
|
|
19
|
+
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
# Provider selection order. The code will try agents in this sequence if keys/CLIs are present.
|
|
23
|
+
AGENT_PROVIDER_PREFERENCE = ["anthropic", "google", "openai"]
|
|
24
|
+
|
|
25
|
+
# Logging level selection; defaults to "quiet" under pytest, else "normal"
|
|
26
|
+
_env_level = os.getenv("PDD_AGENTIC_LOGLEVEL")
|
|
27
|
+
if _env_level is None and os.getenv("PYTEST_CURRENT_TEST"):
|
|
28
|
+
_env_level = "quiet"
|
|
29
|
+
_LOGLEVEL = (_env_level or "normal").strip().lower()
|
|
30
|
+
_IS_QUIET = _LOGLEVEL == "quiet"
|
|
31
|
+
_IS_VERBOSE = _LOGLEVEL == "verbose"
|
|
32
|
+
|
|
33
|
+
# Tunable knobs via env
|
|
34
|
+
_AGENT_COST_PER_CALL = float(os.getenv("PDD_AGENTIC_COST_PER_CALL", "0.02")) # estimated cost accounting
|
|
35
|
+
_AGENT_CALL_TIMEOUT = int(os.getenv("PDD_AGENTIC_TIMEOUT", "240")) # timeout (s) for each agent call
|
|
36
|
+
_VERIFY_TIMEOUT = int(os.getenv("PDD_AGENTIC_VERIFY_TIMEOUT", "120")) # timeout (s) for local verification step
|
|
37
|
+
_MAX_LOG_LINES = int(os.getenv("PDD_AGENTIC_MAX_LOG_LINES", "200")) # preview head truncation for logs
|
|
38
|
+
|
|
39
|
+
# When verification mode is "auto", we may run agent-supplied TESTCMD blocks (if emitted)
|
|
40
|
+
_AGENT_TESTCMD_ALLOWED = os.getenv("PDD_AGENTIC_AGENT_TESTCMD", "1") != "0"
|
|
41
|
+
|
|
42
|
+
def _print(msg: str, *, force: bool = False) -> None:
|
|
43
|
+
"""Centralized print helper using Rich; suppressed in quiet mode unless force=True."""
|
|
44
|
+
if not _IS_QUIET or force:
|
|
45
|
+
console.print(msg)
|
|
46
|
+
|
|
47
|
+
def _info(msg: str) -> None:
|
|
48
|
+
"""Informational log (respects quiet mode)."""
|
|
49
|
+
_print(msg)
|
|
50
|
+
|
|
51
|
+
def _always(msg: str) -> None:
|
|
52
|
+
"""Always print (respects quiet mode toggle via _print)."""
|
|
53
|
+
_print(msg)
|
|
54
|
+
|
|
55
|
+
def _verbose(msg: str) -> None:
|
|
56
|
+
"""Verbose-only print (print only when _IS_VERBOSE is True)."""
|
|
57
|
+
if _IS_VERBOSE:
|
|
58
|
+
console.print(msg)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _detect_suspicious_files(cwd: Path, context: str = "") -> List[Path]:
|
|
62
|
+
"""
|
|
63
|
+
Detect suspicious single-character files (like C, E, T) in a directory.
|
|
64
|
+
|
|
65
|
+
This is a diagnostic function to help identify when/where these files are created.
|
|
66
|
+
Issue #186: Empty files named C, E, T (first letters of Code, Example, Test)
|
|
67
|
+
have been appearing during agentic operations.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
cwd: Directory to scan
|
|
71
|
+
context: Description of what operation just ran (for logging)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of suspicious file paths found
|
|
75
|
+
"""
|
|
76
|
+
suspicious: List[Path] = []
|
|
77
|
+
try:
|
|
78
|
+
for f in cwd.iterdir():
|
|
79
|
+
if f.is_file() and len(f.name) <= 2 and not f.name.startswith('.'):
|
|
80
|
+
suspicious.append(f)
|
|
81
|
+
|
|
82
|
+
if suspicious:
|
|
83
|
+
import datetime
|
|
84
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
85
|
+
_always(f"[bold red]⚠️ SUSPICIOUS FILES DETECTED (Issue #186)[/bold red]")
|
|
86
|
+
_always(f"[red]Timestamp: {timestamp}[/red]")
|
|
87
|
+
_always(f"[red]Context: {context}[/red]")
|
|
88
|
+
_always(f"[red]Directory: {cwd}[/red]")
|
|
89
|
+
for sf in suspicious:
|
|
90
|
+
try:
|
|
91
|
+
size = sf.stat().st_size
|
|
92
|
+
_always(f"[red] - {sf.name} (size: {size} bytes)[/red]")
|
|
93
|
+
except Exception:
|
|
94
|
+
_always(f"[red] - {sf.name} (could not stat)[/red]")
|
|
95
|
+
|
|
96
|
+
# Also log to a file for persistence
|
|
97
|
+
log_file = Path.home() / ".pdd" / "suspicious_files.log"
|
|
98
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
with open(log_file, "a") as lf:
|
|
100
|
+
lf.write(f"\n{'='*60}\n")
|
|
101
|
+
lf.write(f"Timestamp: {timestamp}\n")
|
|
102
|
+
lf.write(f"Context: {context}\n")
|
|
103
|
+
lf.write(f"Directory: {cwd}\n")
|
|
104
|
+
lf.write(f"CWD at detection: {Path.cwd()}\n")
|
|
105
|
+
for sf in suspicious:
|
|
106
|
+
try:
|
|
107
|
+
size = sf.stat().st_size
|
|
108
|
+
lf.write(f" - {sf.name} (size: {size} bytes)\n")
|
|
109
|
+
except Exception as e:
|
|
110
|
+
lf.write(f" - {sf.name} (error: {e})\n")
|
|
111
|
+
# Log stack trace to help identify caller
|
|
112
|
+
import traceback
|
|
113
|
+
lf.write("Stack trace:\n")
|
|
114
|
+
lf.write(traceback.format_stack()[-10:][0] if traceback.format_stack() else "N/A")
|
|
115
|
+
lf.write("\n")
|
|
116
|
+
except Exception as e:
|
|
117
|
+
_verbose(f"[yellow]Could not scan for suspicious files: {e}[/yellow]")
|
|
118
|
+
|
|
119
|
+
return suspicious
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _begin_marker(path: Path) -> str:
|
|
123
|
+
"""Marker that must wrap the BEGIN of a corrected file block emitted by the agent."""
|
|
124
|
+
return f"<<<BEGIN_FILE:{path}>>>"
|
|
125
|
+
|
|
126
|
+
def _end_marker(path: Path) -> str:
|
|
127
|
+
"""Marker that must wrap the END of a corrected file block emitted by the agent."""
|
|
128
|
+
return f"<<<END_FILE:{path}>>>"
|
|
129
|
+
|
|
130
|
+
def get_agent_command(provider: str, instruction_file: Path) -> List[str]:
|
|
131
|
+
"""
|
|
132
|
+
Return a base CLI command for a provider when using the generic runner.
|
|
133
|
+
Note: Anthropic/Google are handled by specialized variant runners, so this often returns [].
|
|
134
|
+
"""
|
|
135
|
+
p = provider.lower()
|
|
136
|
+
if p == "anthropic":
|
|
137
|
+
return []
|
|
138
|
+
if p == "google":
|
|
139
|
+
return []
|
|
140
|
+
if p == "openai":
|
|
141
|
+
return ["codex", "exec", "--skip-git-repo-check"]
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
def find_llm_csv_path() -> Optional[Path]:
|
|
145
|
+
"""Look for .pdd/llm_model.csv in $HOME first, then in project cwd."""
|
|
146
|
+
home_path = Path.home() / ".pdd" / "llm_model.csv"
|
|
147
|
+
project_path = Path.cwd() / ".pdd" / "llm_model.csv"
|
|
148
|
+
if home_path.is_file():
|
|
149
|
+
return home_path
|
|
150
|
+
if project_path.is_file():
|
|
151
|
+
return project_path
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
def _print_head(label: str, text: str, max_lines: int = _MAX_LOG_LINES) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Print only the first N lines of a long blob with a label.
|
|
157
|
+
Active in verbose mode; keeps console noise manageable.
|
|
158
|
+
"""
|
|
159
|
+
if not _IS_VERBOSE:
|
|
160
|
+
return
|
|
161
|
+
lines = (text or "").splitlines()
|
|
162
|
+
head = "\n".join(lines[:max_lines])
|
|
163
|
+
tail = "" if len(lines) <= max_lines else f"\n... (truncated, total {len(lines)} lines)"
|
|
164
|
+
console.print(f"[bold cyan]{label}[/bold cyan]\n{head}{tail}")
|
|
165
|
+
|
|
166
|
+
def _print_diff(old: str, new: str, path: Path) -> None:
|
|
167
|
+
"""Show unified diff for a changed file (verbose mode only)."""
|
|
168
|
+
if not _IS_VERBOSE:
|
|
169
|
+
return
|
|
170
|
+
old_lines = old.splitlines(keepends=True)
|
|
171
|
+
new_lines = new.splitlines(keepends=True)
|
|
172
|
+
diff = list(difflib.unified_diff(old_lines, new_lines, fromfile=f"{path} (before)", tofile=f"{path} (after)"))
|
|
173
|
+
if not diff:
|
|
174
|
+
console.print("[yellow]No diff in code file after this agent attempt.[/yellow]")
|
|
175
|
+
return
|
|
176
|
+
text = "".join(diff)
|
|
177
|
+
_print_head("Unified diff (first lines)", text)
|
|
178
|
+
|
|
179
|
+
def _normalize_code_text(body: str) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Normalize agent-emitted file content:
|
|
182
|
+
- remove a single leading newline if present
|
|
183
|
+
- ensure exactly one trailing newline
|
|
184
|
+
"""
|
|
185
|
+
if body.startswith("\n"):
|
|
186
|
+
body = body[1:]
|
|
187
|
+
body = body.rstrip("\n") + "\n"
|
|
188
|
+
return body
|
|
189
|
+
|
|
190
|
+
# Regex for many <<<BEGIN_FILE:path>>> ... <<<END_FILE:path>>> blocks in a single output
|
|
191
|
+
_MULTI_FILE_BLOCK_RE = re.compile(
|
|
192
|
+
r"<<<BEGIN_FILE:(.*?)>>>(.*?)<<<END_FILE:\1>>>",
|
|
193
|
+
re.DOTALL,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _is_suspicious_path(path: str) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
Reject paths that look like LLM artifacts or template variables.
|
|
200
|
+
|
|
201
|
+
This defends against:
|
|
202
|
+
- Single/double character filenames (e.g., 'C', 'E', 'T' from agent misbehavior)
|
|
203
|
+
- Template variables like {path}, {code_abs} captured by regex
|
|
204
|
+
- Other LLM-generated garbage patterns
|
|
205
|
+
|
|
206
|
+
Returns True if the path should be rejected.
|
|
207
|
+
"""
|
|
208
|
+
if not path:
|
|
209
|
+
return True
|
|
210
|
+
# Get the basename for validation
|
|
211
|
+
base_name = Path(path).name
|
|
212
|
+
# Reject single or double character filenames (too short to be legitimate)
|
|
213
|
+
if len(base_name) <= 2:
|
|
214
|
+
return True
|
|
215
|
+
# Reject template variable patterns like {path}, {code_abs}
|
|
216
|
+
if '{' in base_name or '}' in base_name:
|
|
217
|
+
return True
|
|
218
|
+
# Reject paths that are just dots like "..", "..."
|
|
219
|
+
if base_name.strip('.') == '':
|
|
220
|
+
return True
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
|
|
225
|
+
"""
|
|
226
|
+
Parse stdout/stderr blobs and collect all emitted file blocks into {path: content}.
|
|
227
|
+
Returns an empty dict if none found.
|
|
228
|
+
|
|
229
|
+
Note: Suspicious paths (single-char, template variables) are rejected to prevent
|
|
230
|
+
LLM artifacts from being written to disk.
|
|
231
|
+
"""
|
|
232
|
+
out: Dict[str, str] = {}
|
|
233
|
+
for blob in blobs:
|
|
234
|
+
if not blob:
|
|
235
|
+
continue
|
|
236
|
+
for m in _MULTI_FILE_BLOCK_RE.finditer(blob):
|
|
237
|
+
path = (m.group(1) or "").strip()
|
|
238
|
+
body = m.group(2) or ""
|
|
239
|
+
if path and body != "":
|
|
240
|
+
if _is_suspicious_path(path):
|
|
241
|
+
_info(f"[yellow]Skipping suspicious path from LLM output: {path!r}[/yellow]")
|
|
242
|
+
continue
|
|
243
|
+
out[path] = body
|
|
244
|
+
return out
|
|
245
|
+
|
|
246
|
+
# Regex for an optional agent-supplied test command block
|
|
247
|
+
_TESTCMD_RE = re.compile(
|
|
248
|
+
r"<<<BEGIN_TESTCMD>>>\s*(.*?)\s*<<<END_TESTCMD>>>",
|
|
249
|
+
re.DOTALL,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _extract_testcmd(*blobs: str) -> Optional[str]:
|
|
253
|
+
"""Return the single agent-supplied TESTCMD (if present), else None."""
|
|
254
|
+
for blob in blobs:
|
|
255
|
+
if not blob:
|
|
256
|
+
continue
|
|
257
|
+
m = _TESTCMD_RE.search(blob)
|
|
258
|
+
if m:
|
|
259
|
+
cmd = (m.group(1) or "").strip()
|
|
260
|
+
if cmd:
|
|
261
|
+
return cmd
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
def _extract_corrected_from_output(stdout: str, stderr: str, code_path: Path) -> Optional[str]:
|
|
265
|
+
"""
|
|
266
|
+
Single-file fallback extraction: search for the corrected content block that
|
|
267
|
+
specifically targets the primary code file, using various path forms
|
|
268
|
+
(absolute path, real path, relative path, basename).
|
|
269
|
+
Returns the last match, or None if not found.
|
|
270
|
+
"""
|
|
271
|
+
resolved = code_path.resolve()
|
|
272
|
+
abs_path = str(resolved)
|
|
273
|
+
real_path = str(Path(abs_path).resolve())
|
|
274
|
+
rel_path = str(code_path)
|
|
275
|
+
just_name = code_path.name
|
|
276
|
+
|
|
277
|
+
def _pattern_for(path_str: str) -> re.Pattern:
|
|
278
|
+
begin = re.escape(f"<<<BEGIN_FILE:{path_str}>>>")
|
|
279
|
+
end = re.escape(f"<<<END_FILE:{path_str}>>>")
|
|
280
|
+
return re.compile(begin + r"(.*?)" + end, re.DOTALL)
|
|
281
|
+
|
|
282
|
+
candidates = [
|
|
283
|
+
_pattern_for(abs_path),
|
|
284
|
+
_pattern_for(real_path),
|
|
285
|
+
_pattern_for(rel_path),
|
|
286
|
+
_pattern_for(just_name),
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
matches: List[str] = []
|
|
290
|
+
for blob in [stdout or "", stderr or ""]:
|
|
291
|
+
for pat in candidates:
|
|
292
|
+
for m in pat.finditer(blob):
|
|
293
|
+
body = m.group(1) or ""
|
|
294
|
+
if body != "":
|
|
295
|
+
matches.append(body)
|
|
296
|
+
|
|
297
|
+
if not matches:
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
# Filter out obvious placeholder template mistakes
|
|
301
|
+
placeholder_token = "FULL CORRECTED FILE CONTENT HERE"
|
|
302
|
+
filtered = [b for b in matches if placeholder_token.lower() not in b.lower()]
|
|
303
|
+
return filtered[-1] if filtered else matches[-1]
|
|
304
|
+
|
|
305
|
+
# Code fence (```python ... ```) fallback for providers that sometimes omit markers (e.g., Gemini)
|
|
306
|
+
_CODE_FENCE_RE = re.compile(r"```(?:python)?\s*(.*?)```", re.DOTALL | re.IGNORECASE)
|
|
307
|
+
|
|
308
|
+
def _extract_python_code_block(*blobs: str) -> Optional[str]:
|
|
309
|
+
"""Return the last fenced Python code block found in given blobs, or None."""
|
|
310
|
+
candidates: List[str] = []
|
|
311
|
+
for blob in blobs:
|
|
312
|
+
if not blob:
|
|
313
|
+
continue
|
|
314
|
+
for match in _CODE_FENCE_RE.findall(blob):
|
|
315
|
+
block = match or ""
|
|
316
|
+
if block != "":
|
|
317
|
+
candidates.append(block)
|
|
318
|
+
if not candidates:
|
|
319
|
+
return None
|
|
320
|
+
block = candidates[-1]
|
|
321
|
+
return block if block.endswith("\n") else (block + "\n")
|
|
322
|
+
|
|
323
|
+
def _sanitized_env_common() -> dict:
|
|
324
|
+
"""
|
|
325
|
+
Build a deterministic, non-interactive env for subprocess calls:
|
|
326
|
+
- disable colors/TTY features
|
|
327
|
+
- provide small default terminal size
|
|
328
|
+
- mark as CI
|
|
329
|
+
"""
|
|
330
|
+
env = os.environ.copy()
|
|
331
|
+
env["TERM"] = "dumb"
|
|
332
|
+
env["CI"] = "1"
|
|
333
|
+
env["NO_COLOR"] = "1"
|
|
334
|
+
env["CLICOLOR"] = "0"
|
|
335
|
+
env["CLICOLOR_FORCE"] = "0"
|
|
336
|
+
env["FORCE_COLOR"] = "0"
|
|
337
|
+
env["SHELL"] = "/bin/sh"
|
|
338
|
+
env["COLUMNS"] = env.get("COLUMNS", "80")
|
|
339
|
+
env["LINES"] = env.get("LINES", "40")
|
|
340
|
+
return env
|
|
341
|
+
|
|
342
|
+
def _sanitized_env_for_anthropic(use_cli_auth: bool = False) -> dict:
|
|
343
|
+
"""
|
|
344
|
+
Like _sanitized_env_common, plus:
|
|
345
|
+
- optionally remove ANTHROPIC_API_KEY to force subscription auth via Claude CLI
|
|
346
|
+
"""
|
|
347
|
+
env = _sanitized_env_common()
|
|
348
|
+
if use_cli_auth:
|
|
349
|
+
# Remove API key so Claude CLI uses subscription auth instead
|
|
350
|
+
env.pop("ANTHROPIC_API_KEY", None)
|
|
351
|
+
return env
|
|
352
|
+
|
|
353
|
+
def _sanitized_env_for_openai() -> dict:
|
|
354
|
+
"""
|
|
355
|
+
Like _sanitized_env_common, plus:
|
|
356
|
+
- strip completion-related env vars that can affect behavior
|
|
357
|
+
- set OpenAI CLI no-tty/no-color flags
|
|
358
|
+
"""
|
|
359
|
+
env = _sanitized_env_common()
|
|
360
|
+
for k in list(env.keys()):
|
|
361
|
+
if k.startswith("COMP_") or k in ("BASH_COMPLETION", "BASH_COMPLETION_COMPAT_DIR", "BASH_VERSION", "BASH", "ZDOTDIR", "ZSH_NAME", "ZSH_VERSION"):
|
|
362
|
+
env.pop(k, None)
|
|
363
|
+
env["DISABLE_AUTO_COMPLETE"] = "1"
|
|
364
|
+
env["OPENAI_CLI_NO_TTY"] = "1"
|
|
365
|
+
env["OPENAI_CLI_NO_COLOR"] = "1"
|
|
366
|
+
return env
|
|
367
|
+
|
|
368
|
+
def _run_cli(cmd: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
|
|
369
|
+
"""
|
|
370
|
+
Generic subprocess runner for arbitrary CLI commands.
|
|
371
|
+
Captures stdout/stderr, returns CompletedProcess without raising on non-zero exit.
|
|
372
|
+
"""
|
|
373
|
+
return subprocess.run(
|
|
374
|
+
cmd,
|
|
375
|
+
capture_output=True,
|
|
376
|
+
text=True,
|
|
377
|
+
check=False,
|
|
378
|
+
timeout=timeout,
|
|
379
|
+
cwd=str(cwd),
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def _run_cli_args_openai(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
|
|
383
|
+
"""Subprocess runner for OpenAI commands with OpenAI-specific sanitized env."""
|
|
384
|
+
return subprocess.run(
|
|
385
|
+
args,
|
|
386
|
+
capture_output=True,
|
|
387
|
+
text=True,
|
|
388
|
+
check=False,
|
|
389
|
+
timeout=timeout,
|
|
390
|
+
cwd=str(cwd),
|
|
391
|
+
env=_sanitized_env_for_openai(),
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
def _run_openai_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
|
|
395
|
+
"""
|
|
396
|
+
Try several OpenAI CLI variants to improve robustness.
|
|
397
|
+
Returns the first attempt that yields output or succeeds.
|
|
398
|
+
|
|
399
|
+
NOTE: Agents need write access to modify files in agentic mode,
|
|
400
|
+
so we do not restrict the sandbox.
|
|
401
|
+
"""
|
|
402
|
+
# Write prompt to a unique temp file to avoid race conditions in concurrent execution
|
|
403
|
+
with tempfile.NamedTemporaryFile(
|
|
404
|
+
mode='w',
|
|
405
|
+
suffix='.txt',
|
|
406
|
+
prefix='.agentic_prompt_',
|
|
407
|
+
dir=cwd,
|
|
408
|
+
delete=False,
|
|
409
|
+
encoding='utf-8'
|
|
410
|
+
) as f:
|
|
411
|
+
f.write(prompt_text)
|
|
412
|
+
prompt_file = Path(f.name)
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
# Agentic instruction that tells Codex to read the prompt file and fix
|
|
416
|
+
agentic_instruction = (
|
|
417
|
+
f"Read the file {prompt_file} for instructions on what to fix. "
|
|
418
|
+
"You have full file access to explore and modify files as needed. "
|
|
419
|
+
"After reading the instructions, fix the failing tests."
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
variants = [
|
|
423
|
+
["codex", "exec", agentic_instruction],
|
|
424
|
+
["codex", "exec", "--skip-git-repo-check", agentic_instruction],
|
|
425
|
+
]
|
|
426
|
+
per_attempt = 300
|
|
427
|
+
last = None
|
|
428
|
+
for args in variants:
|
|
429
|
+
try:
|
|
430
|
+
_verbose(f"[cyan]OpenAI variant ({label}): {' '.join(args[:-1])} ...[/cyan]")
|
|
431
|
+
last = _run_cli_args_openai(args, cwd, per_attempt)
|
|
432
|
+
if (last.stdout or last.stderr) or last.returncode == 0:
|
|
433
|
+
return last
|
|
434
|
+
except subprocess.TimeoutExpired:
|
|
435
|
+
_info(f"[yellow]OpenAI variant timed out: {' '.join(args[:-1])} ...[/yellow]")
|
|
436
|
+
continue
|
|
437
|
+
if last is None:
|
|
438
|
+
return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
|
|
439
|
+
return last
|
|
440
|
+
finally:
|
|
441
|
+
prompt_file.unlink(missing_ok=True)
|
|
442
|
+
|
|
443
|
+
def _run_cli_args_anthropic(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
|
|
444
|
+
"""Subprocess runner for Anthropic commands with subscription auth (removes API key)."""
|
|
445
|
+
return subprocess.run(
|
|
446
|
+
args,
|
|
447
|
+
capture_output=True,
|
|
448
|
+
text=True,
|
|
449
|
+
check=False,
|
|
450
|
+
timeout=timeout,
|
|
451
|
+
cwd=str(cwd),
|
|
452
|
+
env=_sanitized_env_for_anthropic(use_cli_auth=True),
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
def _run_anthropic_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
|
|
456
|
+
"""
|
|
457
|
+
Anthropic CLI runner in agentic mode (without -p flag).
|
|
458
|
+
|
|
459
|
+
NOTE: We do NOT use -p (print mode) because it prevents file tool access.
|
|
460
|
+
Instead, we write the prompt to a file and let Claude read it in agentic mode.
|
|
461
|
+
"""
|
|
462
|
+
# Write prompt to a unique temp file to avoid race conditions in concurrent execution
|
|
463
|
+
with tempfile.NamedTemporaryFile(
|
|
464
|
+
mode='w',
|
|
465
|
+
suffix='.txt',
|
|
466
|
+
prefix='.agentic_prompt_',
|
|
467
|
+
dir=cwd,
|
|
468
|
+
delete=False,
|
|
469
|
+
encoding='utf-8'
|
|
470
|
+
) as f:
|
|
471
|
+
f.write(prompt_text)
|
|
472
|
+
prompt_file = Path(f.name)
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
# Agentic instruction that tells Claude to read the prompt file and fix
|
|
476
|
+
agentic_instruction = (
|
|
477
|
+
f"Read the file {prompt_file} for instructions on what to fix. "
|
|
478
|
+
"You have full file access to explore and modify files as needed. "
|
|
479
|
+
"After reading the instructions, fix the failing tests."
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
variants = [
|
|
483
|
+
["claude", "--dangerously-skip-permissions", agentic_instruction],
|
|
484
|
+
]
|
|
485
|
+
per_attempt = 300
|
|
486
|
+
last: Optional[subprocess.CompletedProcess] = None
|
|
487
|
+
for args in variants:
|
|
488
|
+
try:
|
|
489
|
+
_verbose(f"[cyan]Anthropic variant ({label}): {' '.join(args[:-1])} ...[/cyan]")
|
|
490
|
+
last = _run_cli_args_anthropic(args, cwd, per_attempt)
|
|
491
|
+
if last.stdout or last.stderr or last.returncode == 0:
|
|
492
|
+
return last
|
|
493
|
+
except subprocess.TimeoutExpired:
|
|
494
|
+
_info(f"[yellow]Anthropic variant timed out: {' '.join(args[:-1])} ...[/yellow]")
|
|
495
|
+
continue
|
|
496
|
+
if last is None:
|
|
497
|
+
return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
|
|
498
|
+
return last
|
|
499
|
+
finally:
|
|
500
|
+
prompt_file.unlink(missing_ok=True)
|
|
501
|
+
# Issue #186: Scan for suspicious files after Anthropic agent runs
|
|
502
|
+
_detect_suspicious_files(cwd, f"After _run_anthropic_variants ({label})")
|
|
503
|
+
# Also scan project root in case agent created files there
|
|
504
|
+
project_root = Path.cwd()
|
|
505
|
+
if project_root != cwd:
|
|
506
|
+
_detect_suspicious_files(project_root, f"After _run_anthropic_variants ({label}) - project root")
|
|
507
|
+
|
|
508
|
+
def _run_cli_args_google(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
|
|
509
|
+
"""Subprocess runner for Google commands with common sanitized env."""
|
|
510
|
+
return subprocess.run(
|
|
511
|
+
args,
|
|
512
|
+
capture_output=True,
|
|
513
|
+
text=True,
|
|
514
|
+
check=False,
|
|
515
|
+
timeout=timeout,
|
|
516
|
+
cwd=str(cwd),
|
|
517
|
+
env=_sanitized_env_common(),
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def _run_google_variants(prompt_text: str, cwd: Path, total_timeout: int, label: str) -> subprocess.CompletedProcess:
|
|
521
|
+
"""
|
|
522
|
+
Google CLI runner in agentic mode (without -p flag).
|
|
523
|
+
|
|
524
|
+
NOTE: We do NOT use -p (pipe mode) because it may prevent tool access.
|
|
525
|
+
Instead, we write the prompt to a file and let Gemini read it in agentic mode.
|
|
526
|
+
"""
|
|
527
|
+
# Write prompt to a unique temp file to avoid race conditions in concurrent execution
|
|
528
|
+
with tempfile.NamedTemporaryFile(
|
|
529
|
+
mode='w',
|
|
530
|
+
suffix='.txt',
|
|
531
|
+
prefix='.agentic_prompt_',
|
|
532
|
+
dir=cwd,
|
|
533
|
+
delete=False,
|
|
534
|
+
encoding='utf-8'
|
|
535
|
+
) as f:
|
|
536
|
+
f.write(prompt_text)
|
|
537
|
+
prompt_file = Path(f.name)
|
|
538
|
+
|
|
539
|
+
try:
|
|
540
|
+
# Agentic instruction that tells Gemini to read the prompt file and fix
|
|
541
|
+
agentic_instruction = (
|
|
542
|
+
f"Read the file {prompt_file} for instructions on what to fix. "
|
|
543
|
+
"You have full file access to explore and modify files as needed. "
|
|
544
|
+
"After reading the instructions, fix the failing tests."
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
variants = [
|
|
548
|
+
["gemini", agentic_instruction],
|
|
549
|
+
]
|
|
550
|
+
per_attempt = 300
|
|
551
|
+
last = None
|
|
552
|
+
for args in variants:
|
|
553
|
+
try:
|
|
554
|
+
_verbose(f"[cyan]Google variant ({label}): {' '.join(args)} ...[/cyan]")
|
|
555
|
+
last = _run_cli_args_google(args, cwd, per_attempt)
|
|
556
|
+
if (last.stdout or last.stderr) or last.returncode == 0:
|
|
557
|
+
return last
|
|
558
|
+
except subprocess.TimeoutExpired:
|
|
559
|
+
_info(f"[yellow]Google variant timed out: {' '.join(args)} ...[/yellow]")
|
|
560
|
+
continue
|
|
561
|
+
if last is None:
|
|
562
|
+
return subprocess.CompletedProcess(variants[-1], 124, stdout="", stderr="timeout")
|
|
563
|
+
return last
|
|
564
|
+
finally:
|
|
565
|
+
prompt_file.unlink(missing_ok=True)
|
|
566
|
+
# Issue #186: Scan for suspicious files after Google agent runs
|
|
567
|
+
_detect_suspicious_files(cwd, f"After _run_google_variants ({label})")
|
|
568
|
+
# Also scan project root in case agent created files there
|
|
569
|
+
project_root = Path.cwd()
|
|
570
|
+
if project_root != cwd:
|
|
571
|
+
_detect_suspicious_files(project_root, f"After _run_google_variants ({label}) - project root")
|
|
572
|
+
|
|
573
|
+
def _run_testcmd(cmd: str, cwd: Path) -> bool:
|
|
574
|
+
"""
|
|
575
|
+
Execute an agent-supplied TESTCMD locally via bash -lc "<cmd>".
|
|
576
|
+
Return True on exit code 0, else False. Captures and previews output (verbose).
|
|
577
|
+
"""
|
|
578
|
+
_info(f"[cyan]Executing agent-supplied test command:[/cyan] {cmd}")
|
|
579
|
+
proc = subprocess.run(
|
|
580
|
+
["bash", "-lc", cmd],
|
|
581
|
+
capture_output=True,
|
|
582
|
+
text=True,
|
|
583
|
+
check=False,
|
|
584
|
+
timeout=_VERIFY_TIMEOUT,
|
|
585
|
+
cwd=str(cwd),
|
|
586
|
+
)
|
|
587
|
+
_print_head("testcmd stdout", proc.stdout or "")
|
|
588
|
+
_print_head("testcmd stderr", proc.stderr or "")
|
|
589
|
+
return proc.returncode == 0
|
|
590
|
+
|
|
591
|
+
def _verify_and_log(unit_test_file: str, cwd: Path, *, verify_cmd: Optional[str], enabled: bool) -> bool:
|
|
592
|
+
"""
|
|
593
|
+
Standard local verification gate:
|
|
594
|
+
- If disabled, return True immediately (skip verification).
|
|
595
|
+
- If verify_cmd exists: format placeholders and run it via _run_testcmd.
|
|
596
|
+
- Else: run the file directly using the appropriate interpreter for its language.
|
|
597
|
+
Returns True iff the executed command exits 0.
|
|
598
|
+
"""
|
|
599
|
+
if not enabled:
|
|
600
|
+
return True
|
|
601
|
+
if verify_cmd:
|
|
602
|
+
cmd = verify_cmd.replace("{test}", str(Path(unit_test_file).resolve())).replace("{cwd}", str(cwd))
|
|
603
|
+
return _run_testcmd(cmd, cwd)
|
|
604
|
+
# Get language-appropriate run command from language_format.csv
|
|
605
|
+
run_cmd = get_run_command_for_file(str(Path(unit_test_file).resolve()))
|
|
606
|
+
if run_cmd:
|
|
607
|
+
return _run_testcmd(run_cmd, cwd)
|
|
608
|
+
# Fallback: try running with Python if no run command found
|
|
609
|
+
verify = subprocess.run(
|
|
610
|
+
[sys.executable, str(Path(unit_test_file).resolve())],
|
|
611
|
+
capture_output=True,
|
|
612
|
+
text=True,
|
|
613
|
+
check=False,
|
|
614
|
+
timeout=_VERIFY_TIMEOUT,
|
|
615
|
+
cwd=str(cwd),
|
|
616
|
+
)
|
|
617
|
+
_print_head("verify stdout", verify.stdout or "")
|
|
618
|
+
_print_head("verify stderr", verify.stderr or "")
|
|
619
|
+
return verify.returncode == 0
|
|
620
|
+
|
|
621
|
+
def _safe_is_subpath(child: Path, parent: Path) -> bool:
|
|
622
|
+
"""
|
|
623
|
+
True if 'child' resolves under 'parent' (prevents writes outside project root).
|
|
624
|
+
"""
|
|
625
|
+
try:
|
|
626
|
+
child.resolve().relative_to(parent.resolve())
|
|
627
|
+
return True
|
|
628
|
+
except Exception:
|
|
629
|
+
return False
|
|
630
|
+
|
|
631
|
+
# Suffixes we strip when mapping "foo_fixed.py" -> "foo.py"
|
|
632
|
+
_COMMON_FIXED_SUFFIXES = ("_fixed", ".fixed", "-fixed")
|
|
633
|
+
|
|
634
|
+
def _strip_common_suffixes(name: str) -> str:
|
|
635
|
+
"""Remove a known fixed-suffix from a basename (before extension), if present."""
|
|
636
|
+
base, ext = os.path.splitext(name)
|
|
637
|
+
for suf in _COMMON_FIXED_SUFFIXES:
|
|
638
|
+
if base.endswith(suf):
|
|
639
|
+
base = base[: -len(suf)]
|
|
640
|
+
break
|
|
641
|
+
return base + ext
|
|
642
|
+
|
|
643
|
+
def _find_existing_by_basename(project_root: Path, basename: str) -> Optional[Path]:
|
|
644
|
+
"""Search the project tree for the first file whose name matches 'basename'."""
|
|
645
|
+
try:
|
|
646
|
+
for p in project_root.rglob(basename):
|
|
647
|
+
if p.is_file():
|
|
648
|
+
return p.resolve()
|
|
649
|
+
except Exception:
|
|
650
|
+
return None
|
|
651
|
+
return None
|
|
652
|
+
|
|
653
|
+
def _normalize_target_path(
|
|
654
|
+
emitted_path: str,
|
|
655
|
+
project_root: Path,
|
|
656
|
+
primary_code_path: Path,
|
|
657
|
+
allow_new: bool,
|
|
658
|
+
) -> Optional[Path]:
|
|
659
|
+
"""
|
|
660
|
+
Resolve an emitted path to a safe file path we should write:
|
|
661
|
+
- reject suspicious paths (single-char, template variables)
|
|
662
|
+
- make path absolute under project root
|
|
663
|
+
- allow direct match, primary-file match (with/without _fixed), or basename search
|
|
664
|
+
- create new files only if allow_new is True
|
|
665
|
+
"""
|
|
666
|
+
# Early rejection of suspicious paths (defense against LLM artifacts)
|
|
667
|
+
if _is_suspicious_path(emitted_path):
|
|
668
|
+
_info(f"[yellow]Skipping suspicious path: {emitted_path!r}[/yellow]")
|
|
669
|
+
return None
|
|
670
|
+
|
|
671
|
+
p = Path(emitted_path)
|
|
672
|
+
if not p.is_absolute():
|
|
673
|
+
p = (project_root / emitted_path).resolve()
|
|
674
|
+
if not _safe_is_subpath(p, project_root):
|
|
675
|
+
_info(f"[yellow]Skipping write outside project root: {p}[/yellow]")
|
|
676
|
+
return None
|
|
677
|
+
if p.exists():
|
|
678
|
+
return p
|
|
679
|
+
emitted_base = Path(emitted_path).name
|
|
680
|
+
primary_base = primary_code_path.name
|
|
681
|
+
if emitted_base == primary_base:
|
|
682
|
+
return primary_code_path
|
|
683
|
+
if _strip_common_suffixes(emitted_base) == primary_base:
|
|
684
|
+
return primary_code_path
|
|
685
|
+
existing = _find_existing_by_basename(project_root, emitted_base)
|
|
686
|
+
if existing:
|
|
687
|
+
return existing
|
|
688
|
+
if not allow_new:
|
|
689
|
+
_info(f"[yellow]Skipping creation of new file (in-place only): {p}[/yellow]")
|
|
690
|
+
return None
|
|
691
|
+
return p
|
|
692
|
+
|
|
693
|
+
def _apply_file_map(
|
|
694
|
+
file_map: Dict[str, str],
|
|
695
|
+
project_root: Path,
|
|
696
|
+
primary_code_path: Path,
|
|
697
|
+
allow_new: bool,
|
|
698
|
+
) -> List[Path]:
|
|
699
|
+
"""
|
|
700
|
+
Apply a {emitted_path -> content} mapping to disk:
|
|
701
|
+
- resolve a safe target path
|
|
702
|
+
- normalize content
|
|
703
|
+
- write file and print unified diff (verbose)
|
|
704
|
+
Returns a list of the written Paths.
|
|
705
|
+
"""
|
|
706
|
+
applied: List[Path] = []
|
|
707
|
+
for emitted, body in file_map.items():
|
|
708
|
+
target = _normalize_target_path(emitted, project_root, primary_code_path, allow_new)
|
|
709
|
+
if target is None:
|
|
710
|
+
continue
|
|
711
|
+
body_to_write = _normalize_code_text(body)
|
|
712
|
+
old = ""
|
|
713
|
+
if target.exists():
|
|
714
|
+
try:
|
|
715
|
+
old = target.read_text(encoding="utf-8")
|
|
716
|
+
except Exception:
|
|
717
|
+
old = ""
|
|
718
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
719
|
+
target.write_text(body_to_write, encoding="utf-8")
|
|
720
|
+
_print_diff(old, body_to_write, target)
|
|
721
|
+
applied.append(target)
|
|
722
|
+
return applied
|
|
723
|
+
|
|
724
|
+
def _post_apply_verify_or_testcmd(
|
|
725
|
+
provider: str,
|
|
726
|
+
unit_test_file: str,
|
|
727
|
+
cwd: Path,
|
|
728
|
+
*,
|
|
729
|
+
verify_cmd: Optional[str],
|
|
730
|
+
verify_enabled: bool,
|
|
731
|
+
stdout: str,
|
|
732
|
+
stderr: str,
|
|
733
|
+
) -> bool:
|
|
734
|
+
"""
|
|
735
|
+
After applying changes, run standard verification.
|
|
736
|
+
If it fails and TESTCMDs are allowed, try running the agent-supplied TESTCMD.
|
|
737
|
+
Return True iff any verification path succeeds.
|
|
738
|
+
"""
|
|
739
|
+
# 1) If standard verification is enabled, use it
|
|
740
|
+
if _verify_and_log(unit_test_file, cwd, verify_cmd=verify_cmd, enabled=verify_enabled):
|
|
741
|
+
return True
|
|
742
|
+
# 2) Otherwise (or if disabled/failed) try agent-supplied TESTCMD if allowed
|
|
743
|
+
if _AGENT_TESTCMD_ALLOWED:
|
|
744
|
+
testcmd = _extract_testcmd(stdout or "", stderr or "")
|
|
745
|
+
if testcmd:
|
|
746
|
+
return _run_testcmd(testcmd, cwd)
|
|
747
|
+
return False
|
|
748
|
+
|
|
749
|
+
def _snapshot_mtimes(root: Path) -> Dict[Path, float]:
|
|
750
|
+
"""Record mtimes of all files in root."""
|
|
751
|
+
snapshot = {}
|
|
752
|
+
try:
|
|
753
|
+
for p in root.rglob("*"):
|
|
754
|
+
if ".git" in p.parts or "__pycache__" in p.parts:
|
|
755
|
+
continue
|
|
756
|
+
if p.is_file():
|
|
757
|
+
snapshot[p] = p.stat().st_mtime
|
|
758
|
+
except Exception:
|
|
759
|
+
pass
|
|
760
|
+
return snapshot
|
|
761
|
+
|
|
762
|
+
def _detect_mtime_changes(root: Path, snapshot: Dict[Path, float]) -> List[str]:
|
|
763
|
+
"""Return list of changed/new file paths."""
|
|
764
|
+
changes = []
|
|
765
|
+
try:
|
|
766
|
+
for p in root.rglob("*"):
|
|
767
|
+
if ".git" in p.parts or "__pycache__" in p.parts:
|
|
768
|
+
continue
|
|
769
|
+
if p.is_file():
|
|
770
|
+
if p not in snapshot:
|
|
771
|
+
changes.append(str(p))
|
|
772
|
+
elif p.stat().st_mtime != snapshot[p]:
|
|
773
|
+
changes.append(str(p))
|
|
774
|
+
except Exception:
|
|
775
|
+
pass
|
|
776
|
+
return changes
|
|
777
|
+
|
|
778
|
+
def _try_harvest_then_verify(
|
|
779
|
+
provider: str,
|
|
780
|
+
code_path: Path,
|
|
781
|
+
unit_test_file: str,
|
|
782
|
+
code_snapshot: str,
|
|
783
|
+
prompt_content: str,
|
|
784
|
+
test_content: str,
|
|
785
|
+
error_content: str,
|
|
786
|
+
cwd: Path,
|
|
787
|
+
*,
|
|
788
|
+
verify_cmd: Optional[str],
|
|
789
|
+
verify_enabled: bool,
|
|
790
|
+
changed_files: List[str],
|
|
791
|
+
) -> bool:
|
|
792
|
+
"""
|
|
793
|
+
Strict, fast path:
|
|
794
|
+
- Ask agent to ONLY emit corrected file blocks (and optionally TESTCMD).
|
|
795
|
+
- Apply emitted results deterministically.
|
|
796
|
+
- Verify locally.
|
|
797
|
+
"""
|
|
798
|
+
harvest_prompt_template = load_prompt_template("agentic_fix_harvest_only_LLM")
|
|
799
|
+
if not harvest_prompt_template:
|
|
800
|
+
_info("[yellow]Failed to load harvest-only agent prompt template.[/yellow]")
|
|
801
|
+
return False
|
|
802
|
+
|
|
803
|
+
harvest_instr = harvest_prompt_template.format(
|
|
804
|
+
code_abs=str(code_path),
|
|
805
|
+
test_abs=str(Path(unit_test_file).resolve()),
|
|
806
|
+
begin=_begin_marker(code_path),
|
|
807
|
+
end=_end_marker(code_path),
|
|
808
|
+
code_content=code_snapshot,
|
|
809
|
+
prompt_content=prompt_content,
|
|
810
|
+
test_content=test_content,
|
|
811
|
+
error_content=error_content,
|
|
812
|
+
verify_cmd=verify_cmd or "No verification command provided.",
|
|
813
|
+
)
|
|
814
|
+
harvest_file = Path("agentic_fix_harvest.txt")
|
|
815
|
+
harvest_file.write_text(harvest_instr, encoding="utf-8")
|
|
816
|
+
_info(f"[cyan]Executing {provider.capitalize()} with harvest-only instructions: {harvest_file.resolve()}[/cyan]")
|
|
817
|
+
_print_head("Harvest-only instruction preview", harvest_instr)
|
|
818
|
+
|
|
819
|
+
# Snapshot mtimes before agent run
|
|
820
|
+
mtime_snapshot = _snapshot_mtimes(cwd)
|
|
821
|
+
|
|
822
|
+
try:
|
|
823
|
+
# Provider-specific variant runners with shorter time budgets
|
|
824
|
+
if provider == "openai":
|
|
825
|
+
res = _run_openai_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
|
|
826
|
+
elif provider == "anthropic":
|
|
827
|
+
res = _run_anthropic_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
|
|
828
|
+
elif provider == "google":
|
|
829
|
+
res = _run_google_variants(harvest_instr, cwd, max(60, _AGENT_CALL_TIMEOUT // 3), "harvest")
|
|
830
|
+
else:
|
|
831
|
+
res = _run_cli(get_agent_command(provider, harvest_file), cwd, max(60, _AGENT_CALL_TIMEOUT // 2))
|
|
832
|
+
except subprocess.TimeoutExpired:
|
|
833
|
+
_info(f"[yellow]{provider.capitalize()} harvest-only attempt timed out.[/yellow]")
|
|
834
|
+
try:
|
|
835
|
+
harvest_file.unlink()
|
|
836
|
+
except Exception:
|
|
837
|
+
pass
|
|
838
|
+
return False
|
|
839
|
+
|
|
840
|
+
_print_head(f"{provider.capitalize()} harvest stdout", res.stdout or "")
|
|
841
|
+
_print_head(f"{provider.capitalize()} harvest stderr", res.stderr or "")
|
|
842
|
+
|
|
843
|
+
# Detect direct changes by agent
|
|
844
|
+
direct_changes = _detect_mtime_changes(cwd, mtime_snapshot)
|
|
845
|
+
changed_files.extend(direct_changes)
|
|
846
|
+
|
|
847
|
+
allow_new = True
|
|
848
|
+
|
|
849
|
+
# Prefer multi-file blocks; else try single-file; else Gemini code-fence fallback
|
|
850
|
+
multi = _extract_files_from_output(res.stdout or "", res.stderr or "")
|
|
851
|
+
if multi:
|
|
852
|
+
_info("[cyan]Applying multi-file harvest from agent output...[/cyan]")
|
|
853
|
+
applied = _apply_file_map(multi, cwd, code_path, allow_new)
|
|
854
|
+
changed_files.extend([str(p) for p in applied])
|
|
855
|
+
ok = _post_apply_verify_or_testcmd(
|
|
856
|
+
provider, unit_test_file, cwd,
|
|
857
|
+
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
858
|
+
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
859
|
+
)
|
|
860
|
+
try:
|
|
861
|
+
harvest_file.unlink()
|
|
862
|
+
except Exception:
|
|
863
|
+
pass
|
|
864
|
+
return ok
|
|
865
|
+
|
|
866
|
+
harvested_single = _extract_corrected_from_output(res.stdout or "", res.stderr or "", code_path.resolve())
|
|
867
|
+
if harvested_single is None:
|
|
868
|
+
if provider == "google":
|
|
869
|
+
code_block = _extract_python_code_block(res.stdout or "", res.stderr or "")
|
|
870
|
+
if code_block:
|
|
871
|
+
_info("[cyan]No markers found, but detected a Python code block from Google. Applying it...[/cyan]")
|
|
872
|
+
body_to_write = _normalize_code_text(code_block)
|
|
873
|
+
code_path.write_text(body_to_write, encoding="utf-8")
|
|
874
|
+
changed_files.append(str(code_path))
|
|
875
|
+
newest = code_path.read_text(encoding="utf-8")
|
|
876
|
+
_print_diff(code_snapshot, newest, code_path)
|
|
877
|
+
ok = _post_apply_verify_or_testcmd(
|
|
878
|
+
provider, unit_test_file, cwd,
|
|
879
|
+
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
880
|
+
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
881
|
+
)
|
|
882
|
+
try:
|
|
883
|
+
harvest_file.unlink()
|
|
884
|
+
except Exception:
|
|
885
|
+
pass
|
|
886
|
+
return ok
|
|
887
|
+
|
|
888
|
+
# If no output blocks, but direct changes occurred, we should verify
|
|
889
|
+
if direct_changes:
|
|
890
|
+
_info("[cyan]No output markers found, but detected file changes. Verifying...[/cyan]")
|
|
891
|
+
ok = _post_apply_verify_or_testcmd(
|
|
892
|
+
provider, unit_test_file, cwd,
|
|
893
|
+
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
894
|
+
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
895
|
+
)
|
|
896
|
+
try:
|
|
897
|
+
harvest_file.unlink()
|
|
898
|
+
except Exception:
|
|
899
|
+
pass
|
|
900
|
+
return ok
|
|
901
|
+
|
|
902
|
+
_info("[yellow]Harvest-only attempt did not include the required markers.[/yellow]")
|
|
903
|
+
try:
|
|
904
|
+
harvest_file.unlink()
|
|
905
|
+
except Exception:
|
|
906
|
+
pass
|
|
907
|
+
return False
|
|
908
|
+
|
|
909
|
+
_info("[cyan]Applying harvested corrected file (single)...[/cyan]")
|
|
910
|
+
body_to_write = _normalize_code_text(harvested_single)
|
|
911
|
+
code_path.write_text(body_to_write, encoding="utf-8")
|
|
912
|
+
changed_files.append(str(code_path))
|
|
913
|
+
newest = code_path.read_text(encoding="utf-8")
|
|
914
|
+
_print_diff(code_snapshot, newest, code_path)
|
|
915
|
+
|
|
916
|
+
ok = _post_apply_verify_or_testcmd(
|
|
917
|
+
provider, unit_test_file, cwd,
|
|
918
|
+
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
919
|
+
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
920
|
+
)
|
|
921
|
+
try:
|
|
922
|
+
harvest_file.unlink()
|
|
923
|
+
except Exception:
|
|
924
|
+
pass
|
|
925
|
+
return ok
|
|
926
|
+
|
|
927
|
+
def run_agentic_fix(
|
|
928
|
+
prompt_file: str,
|
|
929
|
+
code_file: str,
|
|
930
|
+
unit_test_file: str,
|
|
931
|
+
error_log_file: str,
|
|
932
|
+
verify_cmd: Optional[str] = None,
|
|
933
|
+
cwd: Optional[Path] = None,
|
|
934
|
+
*,
|
|
935
|
+
verbose: bool = False,
|
|
936
|
+
quiet: bool = False,
|
|
937
|
+
) -> Tuple[bool, str, float, str, List[str]]:
|
|
938
|
+
"""
|
|
939
|
+
Main entrypoint for agentic fallback:
|
|
940
|
+
- Prepares inputs and prompt (with code/tests/error log)
|
|
941
|
+
- Optionally preflight-populates error log if empty (so agent sees failures)
|
|
942
|
+
- Tries providers in preference order: harvest-first, then primary attempt
|
|
943
|
+
- Applies changes locally and verifies locally
|
|
944
|
+
- Returns (success, message, est_cost, used_model, changed_files)
|
|
945
|
+
"""
|
|
946
|
+
global _IS_VERBOSE, _IS_QUIET
|
|
947
|
+
if verbose:
|
|
948
|
+
_IS_VERBOSE = True
|
|
949
|
+
_IS_QUIET = False
|
|
950
|
+
elif quiet:
|
|
951
|
+
_IS_QUIET = True
|
|
952
|
+
_IS_VERBOSE = False
|
|
953
|
+
|
|
954
|
+
_always("[bold yellow]Standard fix failed. Initiating agentic fallback (AGENT-ONLY)...[/bold yellow]")
|
|
955
|
+
|
|
956
|
+
instruction_file: Optional[Path] = None
|
|
957
|
+
est_cost: float = 0.0
|
|
958
|
+
used_model: str = "agentic-cli"
|
|
959
|
+
changed_files: List[str] = [] # Track all files changed by agents
|
|
960
|
+
|
|
961
|
+
try:
|
|
962
|
+
# Use explicit cwd if provided, otherwise fall back to current directory
|
|
963
|
+
working_dir = Path(cwd) if cwd else Path.cwd()
|
|
964
|
+
_info(f"[cyan]Project root (cwd): {working_dir}[/cyan]")
|
|
965
|
+
|
|
966
|
+
# Load provider table and filter to those with API keys present in the environment
|
|
967
|
+
csv_path = find_llm_csv_path()
|
|
968
|
+
model_df = _load_model_data(csv_path)
|
|
969
|
+
|
|
970
|
+
available_agents: List[str] = []
|
|
971
|
+
present_keys: List[str] = []
|
|
972
|
+
seen = set()
|
|
973
|
+
|
|
974
|
+
for provider in AGENT_PROVIDER_PREFERENCE:
|
|
975
|
+
provider_df = model_df[model_df["provider"].str.lower() == provider]
|
|
976
|
+
if provider_df.empty:
|
|
977
|
+
continue
|
|
978
|
+
api_key_name = provider_df.iloc[0]["api_key"]
|
|
979
|
+
if not api_key_name:
|
|
980
|
+
continue
|
|
981
|
+
# Check CLI availability first (subscription auth), then API key
|
|
982
|
+
has_cli_auth = provider == "anthropic" and shutil.which("claude")
|
|
983
|
+
has_api_key = os.getenv(api_key_name) or (provider == "google" and os.getenv("GEMINI_API_KEY"))
|
|
984
|
+
if has_cli_auth or has_api_key:
|
|
985
|
+
if has_cli_auth:
|
|
986
|
+
present_keys.append("claude-cli-auth")
|
|
987
|
+
else:
|
|
988
|
+
present_keys.append(api_key_name or ("GEMINI_API_KEY" if provider == "google" else ""))
|
|
989
|
+
if provider not in seen:
|
|
990
|
+
available_agents.append(provider)
|
|
991
|
+
seen.add(provider)
|
|
992
|
+
|
|
993
|
+
_info(f"[cyan]Env API keys present (names only): {', '.join([k for k in present_keys if k]) or 'none'}[/cyan]")
|
|
994
|
+
if not available_agents:
|
|
995
|
+
return False, "No configured agent API keys found in environment.", est_cost, used_model, changed_files
|
|
996
|
+
|
|
997
|
+
_info(f"[cyan]Available agents found: {', '.join(available_agents)}[/cyan]")
|
|
998
|
+
|
|
999
|
+
# Read input artifacts that feed into the prompt
|
|
1000
|
+
prompt_content = Path(prompt_file).read_text(encoding="utf-8")
|
|
1001
|
+
|
|
1002
|
+
# Resolve relative paths against working_dir, not Path.cwd()
|
|
1003
|
+
code_path_input = Path(code_file)
|
|
1004
|
+
if not code_path_input.is_absolute():
|
|
1005
|
+
code_path = (working_dir / code_path_input).resolve()
|
|
1006
|
+
else:
|
|
1007
|
+
code_path = code_path_input.resolve()
|
|
1008
|
+
|
|
1009
|
+
test_path_input = Path(unit_test_file)
|
|
1010
|
+
if not test_path_input.is_absolute():
|
|
1011
|
+
test_path = (working_dir / test_path_input).resolve()
|
|
1012
|
+
else:
|
|
1013
|
+
test_path = test_path_input.resolve()
|
|
1014
|
+
|
|
1015
|
+
orig_code = code_path.read_text(encoding="utf-8")
|
|
1016
|
+
orig_test = test_path.read_text(encoding="utf-8")
|
|
1017
|
+
test_content = orig_test # Alias for prompt template compatibility
|
|
1018
|
+
|
|
1019
|
+
# Read error log if it exists, otherwise we'll populate it via preflight
|
|
1020
|
+
error_log_path = Path(error_log_file)
|
|
1021
|
+
error_content = error_log_path.read_text(encoding="utf-8") if error_log_path.exists() else ""
|
|
1022
|
+
|
|
1023
|
+
# --- Preflight: populate error_content if empty so the agent sees fresh failures ---
|
|
1024
|
+
# This makes run_agentic_fix self-sufficient even if the caller forgot to write the error log.
|
|
1025
|
+
# Also detect useless content patterns like empty XML tags (e.g., "<history></history>")
|
|
1026
|
+
def _is_useless_error_content(content: str) -> bool:
|
|
1027
|
+
"""Check if error content is empty or useless (e.g., empty XML tags)."""
|
|
1028
|
+
stripped = (content or "").strip()
|
|
1029
|
+
if not stripped:
|
|
1030
|
+
return True
|
|
1031
|
+
# Detect empty XML-like tags with no actual error content
|
|
1032
|
+
import re
|
|
1033
|
+
# Remove all XML-like empty tags and whitespace
|
|
1034
|
+
cleaned = re.sub(r"<[^>]+>\s*</[^>]+>", "", stripped).strip()
|
|
1035
|
+
if not cleaned:
|
|
1036
|
+
return True
|
|
1037
|
+
# Check if content lacks any traceback or error keywords
|
|
1038
|
+
error_indicators = ["Error", "Exception", "Traceback", "failed", "FAILED", "error:"]
|
|
1039
|
+
return not any(ind in content for ind in error_indicators)
|
|
1040
|
+
|
|
1041
|
+
if _is_useless_error_content(error_content):
|
|
1042
|
+
try:
|
|
1043
|
+
lang = get_language(os.path.splitext(code_path)[1])
|
|
1044
|
+
pre_cmd = os.getenv("PDD_AGENTIC_VERIFY_CMD") or default_verify_cmd_for(lang, unit_test_file)
|
|
1045
|
+
if pre_cmd:
|
|
1046
|
+
pre_cmd = pre_cmd.replace("{test}", str(Path(unit_test_file).resolve())).replace("{cwd}", str(working_dir))
|
|
1047
|
+
pre = subprocess.run(
|
|
1048
|
+
["bash", "-lc", pre_cmd],
|
|
1049
|
+
capture_output=True,
|
|
1050
|
+
text=True,
|
|
1051
|
+
check=False,
|
|
1052
|
+
timeout=_VERIFY_TIMEOUT,
|
|
1053
|
+
cwd=str(working_dir),
|
|
1054
|
+
)
|
|
1055
|
+
else:
|
|
1056
|
+
# Use language-appropriate run command from language_format.csv
|
|
1057
|
+
run_cmd = get_run_command_for_file(str(Path(unit_test_file).resolve()))
|
|
1058
|
+
if run_cmd:
|
|
1059
|
+
pre = subprocess.run(
|
|
1060
|
+
["bash", "-lc", run_cmd],
|
|
1061
|
+
capture_output=True,
|
|
1062
|
+
text=True,
|
|
1063
|
+
check=False,
|
|
1064
|
+
timeout=_VERIFY_TIMEOUT,
|
|
1065
|
+
cwd=str(working_dir),
|
|
1066
|
+
)
|
|
1067
|
+
else:
|
|
1068
|
+
# Fallback: run directly with Python interpreter
|
|
1069
|
+
pre = subprocess.run(
|
|
1070
|
+
[sys.executable, str(Path(unit_test_file).resolve())],
|
|
1071
|
+
capture_output=True,
|
|
1072
|
+
text=True,
|
|
1073
|
+
check=False,
|
|
1074
|
+
timeout=_VERIFY_TIMEOUT,
|
|
1075
|
+
cwd=str(working_dir),
|
|
1076
|
+
)
|
|
1077
|
+
error_content = (pre.stdout or "") + "\n" + (pre.stderr or "")
|
|
1078
|
+
try:
|
|
1079
|
+
Path(error_log_file).write_text(error_content, encoding="utf-8")
|
|
1080
|
+
except Exception:
|
|
1081
|
+
pass
|
|
1082
|
+
_print_head("preflight verify stdout", pre.stdout or "")
|
|
1083
|
+
_print_head("preflight verify stderr", pre.stderr or "")
|
|
1084
|
+
except Exception as e:
|
|
1085
|
+
_info(f"[yellow]Preflight verification failed: {e}. Proceeding with empty error log.[/yellow]")
|
|
1086
|
+
# --- End preflight ---
|
|
1087
|
+
|
|
1088
|
+
# Compute verification policy and command
|
|
1089
|
+
ext = code_path.suffix.lower()
|
|
1090
|
+
is_python = ext == ".py"
|
|
1091
|
+
|
|
1092
|
+
env_verify = os.getenv("PDD_AGENTIC_VERIFY", None) # "auto"/"0"/"1"/None
|
|
1093
|
+
verify_force = os.getenv("PDD_AGENTIC_VERIFY_FORCE", "0") == "1"
|
|
1094
|
+
|
|
1095
|
+
# If verify_cmd arg is provided, it overrides env var and default
|
|
1096
|
+
if verify_cmd is None:
|
|
1097
|
+
verify_cmd = os.getenv("PDD_AGENTIC_VERIFY_CMD", None)
|
|
1098
|
+
|
|
1099
|
+
if verify_cmd is None:
|
|
1100
|
+
verify_cmd = default_verify_cmd_for(get_language(os.path.splitext(code_path)[1]), unit_test_file)
|
|
1101
|
+
|
|
1102
|
+
# Load primary prompt template
|
|
1103
|
+
primary_prompt_template = load_prompt_template("agentic_fix_primary_LLM")
|
|
1104
|
+
if not primary_prompt_template:
|
|
1105
|
+
return False, "Failed to load primary agent prompt template.", est_cost, used_model, changed_files
|
|
1106
|
+
|
|
1107
|
+
# Fill primary instruction (includes code/tests/error/markers/verify_cmd hint)
|
|
1108
|
+
primary_instr = primary_prompt_template.format(
|
|
1109
|
+
code_abs=str(code_path),
|
|
1110
|
+
test_abs=str(Path(unit_test_file).resolve()),
|
|
1111
|
+
begin=_begin_marker(code_path),
|
|
1112
|
+
end=_end_marker(code_path),
|
|
1113
|
+
prompt_content=prompt_content,
|
|
1114
|
+
code_content=orig_code,
|
|
1115
|
+
test_content=test_content,
|
|
1116
|
+
error_content=error_content,
|
|
1117
|
+
verify_cmd=verify_cmd or "No verification command provided.",
|
|
1118
|
+
)
|
|
1119
|
+
instruction_file = working_dir / "agentic_fix_instructions.txt"
|
|
1120
|
+
instruction_file.write_text(primary_instr, encoding="utf-8")
|
|
1121
|
+
_info(f"[cyan]Instruction file: {instruction_file.resolve()} ({instruction_file.stat().st_size} bytes)[/cyan]")
|
|
1122
|
+
_print_head("Instruction preview", primary_instr)
|
|
1123
|
+
|
|
1124
|
+
# Decide verification enablement
|
|
1125
|
+
if verify_force:
|
|
1126
|
+
verify_enabled = True
|
|
1127
|
+
# If a verification command is present (from user or defaults), ALWAYS enable verification.
|
|
1128
|
+
elif verify_cmd:
|
|
1129
|
+
verify_enabled = True
|
|
1130
|
+
else:
|
|
1131
|
+
if env_verify is None:
|
|
1132
|
+
# AUTO mode: if not explicitly disabled, allow agent-supplied TESTCMD
|
|
1133
|
+
verify_enabled = True
|
|
1134
|
+
elif env_verify.lower() == "auto":
|
|
1135
|
+
verify_enabled = False
|
|
1136
|
+
else:
|
|
1137
|
+
verify_enabled = (env_verify != "0")
|
|
1138
|
+
|
|
1139
|
+
allow_new = True # allow creating new support files when the agent emits them
|
|
1140
|
+
|
|
1141
|
+
# Try each available agent in order
|
|
1142
|
+
for provider in available_agents:
|
|
1143
|
+
used_model = f"agentic-{provider}"
|
|
1144
|
+
cmd = get_agent_command(provider, instruction_file)
|
|
1145
|
+
binary = (cmd[0] if cmd else {"anthropic": "claude", "google": "gemini", "openai": "codex"}.get(provider, ""))
|
|
1146
|
+
cli_path = shutil.which(binary) or "NOT-IN-PATH"
|
|
1147
|
+
_info(f"[cyan]Attempting fix with {provider.capitalize()} agent...[/cyan]")
|
|
1148
|
+
if _IS_VERBOSE:
|
|
1149
|
+
_verbose(f"[cyan]CLI binary: {binary} -> {cli_path}[/cyan]")
|
|
1150
|
+
if cmd:
|
|
1151
|
+
_verbose(f"Executing (cwd={working_dir}): {' '.join(cmd)}")
|
|
1152
|
+
|
|
1153
|
+
# Skip if the provider CLI is not available on PATH
|
|
1154
|
+
if cli_path == "NOT-IN-PATH":
|
|
1155
|
+
_info(f"[yellow]Skipping {provider.capitalize()} (CLI '{binary}' not found in PATH).[/yellow]")
|
|
1156
|
+
continue
|
|
1157
|
+
|
|
1158
|
+
# PRIMARY-FIRST: Try the full agent approach first (allows exploration, debugging)
|
|
1159
|
+
_info(f"[cyan]Trying primary approach with {provider.capitalize()}...[/cyan]")
|
|
1160
|
+
est_cost += _AGENT_COST_PER_CALL
|
|
1161
|
+
|
|
1162
|
+
# Snapshot mtimes before agent run
|
|
1163
|
+
mtime_snapshot = _snapshot_mtimes(working_dir)
|
|
1164
|
+
|
|
1165
|
+
try:
|
|
1166
|
+
if provider == "openai":
|
|
1167
|
+
res = _run_openai_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
|
|
1168
|
+
elif provider == "anthropic":
|
|
1169
|
+
res = _run_anthropic_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
|
|
1170
|
+
elif provider == "google":
|
|
1171
|
+
res = _run_google_variants(primary_instr, working_dir, max(30, _AGENT_CALL_TIMEOUT // 2), "primary")
|
|
1172
|
+
else:
|
|
1173
|
+
res = _run_cli(cmd, working_dir, _AGENT_CALL_TIMEOUT)
|
|
1174
|
+
except subprocess.TimeoutExpired:
|
|
1175
|
+
_info(f"[yellow]{provider.capitalize()} agent timed out after {_AGENT_CALL_TIMEOUT}s. Trying next...[/yellow]")
|
|
1176
|
+
continue
|
|
1177
|
+
|
|
1178
|
+
_print_head(f"{provider.capitalize()} stdout", res.stdout or "")
|
|
1179
|
+
_print_head(f"{provider.capitalize()} stderr", res.stderr or "")
|
|
1180
|
+
|
|
1181
|
+
# Detect direct changes by agent
|
|
1182
|
+
direct_changes = _detect_mtime_changes(working_dir, mtime_snapshot)
|
|
1183
|
+
changed_files.extend(direct_changes)
|
|
1184
|
+
|
|
1185
|
+
# Parse emitted changes (multi-file preferred)
|
|
1186
|
+
multi = _extract_files_from_output(res.stdout or "", res.stderr or "")
|
|
1187
|
+
if multi:
|
|
1188
|
+
_info("[cyan]Detected multi-file corrected content (primary attempt). Applying...[/cyan]")
|
|
1189
|
+
applied = _apply_file_map(multi, working_dir, code_path, allow_new)
|
|
1190
|
+
changed_files.extend([str(p) for p in applied])
|
|
1191
|
+
else:
|
|
1192
|
+
# Single-file fallback or Gemini code fence
|
|
1193
|
+
harvested = _extract_corrected_from_output(res.stdout or "", res.stderr or "", code_path.resolve())
|
|
1194
|
+
if harvested is not None:
|
|
1195
|
+
_info("[cyan]Detected corrected file content in agent output (primary attempt). Applying patch...[/cyan]")
|
|
1196
|
+
body_to_write = _normalize_code_text(harvested)
|
|
1197
|
+
code_path.write_text(body_to_write, encoding="utf-8")
|
|
1198
|
+
changed_files.append(str(code_path))
|
|
1199
|
+
elif provider == "google":
|
|
1200
|
+
code_block = _extract_python_code_block(res.stdout or "", res.stderr or "")
|
|
1201
|
+
if code_block:
|
|
1202
|
+
_info("[cyan]Detected a Python code block from Google (no markers). Applying patch...[/cyan]")
|
|
1203
|
+
body_to_write = _normalize_code_text(code_block)
|
|
1204
|
+
code_path.write_text(body_to_write, encoding="utf-8")
|
|
1205
|
+
changed_files.append(str(code_path))
|
|
1206
|
+
|
|
1207
|
+
# Show diff (verbose) and decide whether to verify
|
|
1208
|
+
new_code = code_path.read_text(encoding="utf-8")
|
|
1209
|
+
new_test = test_path.read_text(encoding="utf-8")
|
|
1210
|
+
_print_diff(orig_code, new_code, code_path)
|
|
1211
|
+
if new_test != orig_test:
|
|
1212
|
+
_print_diff(orig_test, new_test, test_path)
|
|
1213
|
+
if str(test_path) not in changed_files:
|
|
1214
|
+
changed_files.append(str(test_path))
|
|
1215
|
+
|
|
1216
|
+
# Proceed to verify if: agent returned 0, OR either file changed, OR markers found, OR direct changes
|
|
1217
|
+
code_changed = new_code != orig_code
|
|
1218
|
+
test_changed = new_test != orig_test
|
|
1219
|
+
proceed_to_verify = (res.returncode == 0) or code_changed or test_changed or bool(multi) or bool(direct_changes)
|
|
1220
|
+
if proceed_to_verify:
|
|
1221
|
+
ok = _post_apply_verify_or_testcmd(
|
|
1222
|
+
provider, unit_test_file, working_dir,
|
|
1223
|
+
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
1224
|
+
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
1225
|
+
)
|
|
1226
|
+
if ok:
|
|
1227
|
+
_always(f"[bold green]{provider.capitalize()} agent completed successfully and tests passed.[/bold green]")
|
|
1228
|
+
try:
|
|
1229
|
+
instruction_file.unlink()
|
|
1230
|
+
except Exception:
|
|
1231
|
+
pass
|
|
1232
|
+
return True, f"Agentic fix successful with {provider.capitalize()}.", est_cost, used_model, changed_files
|
|
1233
|
+
|
|
1234
|
+
# PRIMARY FAILED - Try harvest as a quick fallback before moving to next provider
|
|
1235
|
+
if provider in ("google", "openai", "anthropic"):
|
|
1236
|
+
_info("[yellow]Primary attempt did not pass; trying harvest fallback...[/yellow]")
|
|
1237
|
+
est_cost += _AGENT_COST_PER_CALL
|
|
1238
|
+
try:
|
|
1239
|
+
if _try_harvest_then_verify(
|
|
1240
|
+
provider,
|
|
1241
|
+
code_path,
|
|
1242
|
+
unit_test_file,
|
|
1243
|
+
orig_code,
|
|
1244
|
+
prompt_content,
|
|
1245
|
+
test_content,
|
|
1246
|
+
error_content,
|
|
1247
|
+
working_dir,
|
|
1248
|
+
verify_cmd=verify_cmd,
|
|
1249
|
+
verify_enabled=verify_enabled,
|
|
1250
|
+
changed_files=changed_files,
|
|
1251
|
+
):
|
|
1252
|
+
try:
|
|
1253
|
+
instruction_file.unlink()
|
|
1254
|
+
except Exception:
|
|
1255
|
+
pass
|
|
1256
|
+
return True, f"Agentic fix successful with {provider.capitalize()} (harvest fallback).", est_cost, used_model, changed_files
|
|
1257
|
+
except subprocess.TimeoutExpired:
|
|
1258
|
+
_info(f"[yellow]{provider.capitalize()} harvest fallback timed out.[/yellow]")
|
|
1259
|
+
|
|
1260
|
+
# Prepare for next iteration/provider: update baseline code snapshot
|
|
1261
|
+
orig_code = new_code
|
|
1262
|
+
_info(f"[yellow]{provider.capitalize()} attempt did not yield a passing test. Trying next...[/yellow]")
|
|
1263
|
+
|
|
1264
|
+
# No providers managed to pass verification
|
|
1265
|
+
try:
|
|
1266
|
+
if instruction_file and instruction_file.exists():
|
|
1267
|
+
instruction_file.unlink()
|
|
1268
|
+
except Exception:
|
|
1269
|
+
pass
|
|
1270
|
+
return False, "All agents failed to produce a passing fix (no local fallback).", est_cost, used_model, changed_files
|
|
1271
|
+
|
|
1272
|
+
except FileNotFoundError as e:
|
|
1273
|
+
# Common failure: provider CLI not installed/in PATH, or missing input files
|
|
1274
|
+
msg = f"A required file or command was not found: {e}. Is the agent CLI installed and in your PATH?"
|
|
1275
|
+
_always(f"[bold red]Error:[/bold red] {msg}")
|
|
1276
|
+
try:
|
|
1277
|
+
if instruction_file and instruction_file.exists():
|
|
1278
|
+
instruction_file.unlink()
|
|
1279
|
+
except Exception:
|
|
1280
|
+
pass
|
|
1281
|
+
return False, msg, 0.0, "agentic-cli", changed_files
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
# Safety net for any unexpected runtime error
|
|
1284
|
+
_always(f"[bold red]An unexpected error occurred during agentic fix:[/bold red] {e}")
|
|
1285
|
+
try:
|
|
1286
|
+
if instruction_file and instruction_file.exists():
|
|
1287
|
+
instruction_file.unlink()
|
|
1288
|
+
except Exception:
|
|
1289
|
+
pass
|
|
1290
|
+
return False, str(e), 0.0, "agentic-cli", changed_files
|
|
1291
|
+
|
|
1292
|
+
# Back-compat public alias for tests/consumers
|
|
1293
|
+
# Expose the harvest function under a stable name used by earlier code/tests.
|
|
1294
|
+
try_harvest_then_verify = _try_harvest_then_verify
|