pdd-cli 0.0.90__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +38 -6
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +521 -786
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +118 -3
- pdd/agentic_update.py +25 -8
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +63 -53
- pdd/auto_include.py +185 -3
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +195 -23
- pdd/cmd_test_main.py +345 -197
- pdd/code_generator.py +4 -2
- pdd/code_generator_main.py +118 -32
- pdd/commands/__init__.py +6 -0
- pdd/commands/analysis.py +87 -29
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +136 -113
- pdd/commands/maintenance.py +3 -2
- pdd/commands/misc.py +8 -0
- pdd/commands/modify.py +190 -164
- pdd/commands/sessions.py +284 -0
- pdd/construct_paths.py +334 -32
- pdd/context_generator_main.py +167 -170
- pdd/continue_generation.py +6 -3
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +27 -3
- pdd/core/cloud.py +237 -0
- pdd/core/errors.py +4 -0
- pdd/core/remote_session.py +61 -0
- pdd/crash_main.py +219 -23
- pdd/data/llm_model.csv +4 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +208 -34
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +291 -38
- pdd/fix_main.py +204 -4
- pdd/fix_verification_errors_loop.py +235 -26
- pdd/fix_verification_main.py +269 -83
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +46 -5
- pdd/generate_test.py +212 -151
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +309 -20
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +7 -5
- pdd/insert_includes.py +2 -1
- pdd/llm_invoke.py +459 -95
- pdd/load_prompt_template.py +15 -34
- pdd/path_resolution.py +140 -0
- pdd/postprocess.py +4 -1
- pdd/preprocess.py +68 -12
- pdd/preprocess_main.py +33 -1
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
- pdd/prompts/agentic_update_LLM.prompt +192 -338
- pdd/prompts/auto_include_LLM.prompt +22 -0
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +571 -14
- pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
- pdd/prompts/generate_test_LLM.prompt +20 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/insert_includes_LLM.prompt +262 -252
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/remote_session.py +876 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/summarize_directory.py +236 -237
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +329 -47
- pdd/sync_main.py +272 -28
- pdd/sync_orchestration.py +136 -75
- pdd/template_expander.py +161 -0
- pdd/templates/architecture/architecture_json.prompt +41 -46
- pdd/trace.py +1 -1
- pdd/track_cost.py +0 -13
- pdd/unfinished_prompt.py +2 -1
- pdd/update_main.py +23 -5
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +15 -10
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- pdd_cli-0.0.90.dist-info/RECORD +0 -153
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Tuple, Dict, Any, Optional, Set
|
|
10
|
+
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from .agentic_common import (
|
|
14
|
+
run_agentic_task,
|
|
15
|
+
load_workflow_state,
|
|
16
|
+
save_workflow_state,
|
|
17
|
+
clear_workflow_state,
|
|
18
|
+
)
|
|
19
|
+
from .load_prompt_template import load_prompt_template
|
|
20
|
+
|
|
21
|
+
# Constants
|
|
22
|
+
STEP_NAMES = {
|
|
23
|
+
1: "unit_tests",
|
|
24
|
+
2: "e2e_tests",
|
|
25
|
+
3: "root_cause",
|
|
26
|
+
4: "fix_e2e_tests",
|
|
27
|
+
5: "identify_devunits",
|
|
28
|
+
6: "create_unit_tests",
|
|
29
|
+
7: "verify_tests",
|
|
30
|
+
8: "run_pdd_fix",
|
|
31
|
+
9: "verify_all",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
STEP_DESCRIPTIONS = {
|
|
35
|
+
1: "Running unit tests from issue",
|
|
36
|
+
2: "Running e2e tests",
|
|
37
|
+
3: "Analyzing root cause",
|
|
38
|
+
4: "Fixing e2e tests",
|
|
39
|
+
5: "Identifying dev units",
|
|
40
|
+
6: "Creating unit tests",
|
|
41
|
+
7: "Verifying tests detect bugs",
|
|
42
|
+
8: "Running pdd fix",
|
|
43
|
+
9: "Final verification",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Per-step timeouts for the 9-step agentic e2e fix workflow
|
|
47
|
+
E2E_FIX_STEP_TIMEOUTS: Dict[int, float] = {
|
|
48
|
+
1: 340.0, # Run unit tests from issue, pdd fix failures
|
|
49
|
+
2: 240.0, # Run e2e tests, check completion (early exit)
|
|
50
|
+
3: 340.0, # Root cause analysis (code vs test vs both)
|
|
51
|
+
4: 340.0, # Fix e2e tests if needed
|
|
52
|
+
5: 340.0, # Identify dev units involved in failures
|
|
53
|
+
6: 600.0, # Create/append unit tests for dev units (Complex)
|
|
54
|
+
7: 600.0, # Verify unit tests detect bugs (Complex)
|
|
55
|
+
8: 1000.0, # Run pdd fix on failing dev units (Most Complex - multiple LLM calls)
|
|
56
|
+
9: 240.0, # Final verification, loop control
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
console = Console()
|
|
60
|
+
|
|
61
|
+
def _get_state_dir(cwd: Path) -> Path:
|
|
62
|
+
"""Returns the state directory .pdd/e2e-fix-state/ relative to git root."""
|
|
63
|
+
# Simple heuristic: look for .git, otherwise use cwd
|
|
64
|
+
d = cwd.resolve()
|
|
65
|
+
root = d
|
|
66
|
+
while d != d.parent:
|
|
67
|
+
if (d / ".git").exists():
|
|
68
|
+
root = d
|
|
69
|
+
break
|
|
70
|
+
d = d.parent
|
|
71
|
+
|
|
72
|
+
state_dir = root / ".pdd" / "e2e-fix-state"
|
|
73
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
return state_dir
|
|
75
|
+
|
|
76
|
+
def _parse_changed_files(output: str) -> List[str]:
|
|
77
|
+
"""Parses FILES_CREATED and FILES_MODIFIED from agent output."""
|
|
78
|
+
files = []
|
|
79
|
+
for line in output.splitlines():
|
|
80
|
+
if line.startswith("FILES_CREATED:") or line.startswith("FILES_MODIFIED:"):
|
|
81
|
+
# Extract content after colon
|
|
82
|
+
content = line.split(":", 1)[1].strip()
|
|
83
|
+
if content:
|
|
84
|
+
# Split by comma and strip
|
|
85
|
+
paths = [p.strip() for p in content.split(",") if p.strip()]
|
|
86
|
+
files.extend(paths)
|
|
87
|
+
return files
|
|
88
|
+
|
|
89
|
+
def _parse_dev_units(output: str) -> str:
|
|
90
|
+
"""Parses DEV_UNITS_IDENTIFIED from output."""
|
|
91
|
+
for line in output.splitlines():
|
|
92
|
+
if line.startswith("DEV_UNITS_IDENTIFIED:"):
|
|
93
|
+
return line.split(":", 1)[1].strip()
|
|
94
|
+
return ""
|
|
95
|
+
|
|
96
|
+
def _update_dev_unit_states(output: str, current_states: Dict[str, Any], identified_units_str: str) -> Dict[str, Any]:
|
|
97
|
+
"""Updates dev unit states based on Step 8 output."""
|
|
98
|
+
identified_units = [u.strip() for u in identified_units_str.split(",") if u.strip()]
|
|
99
|
+
|
|
100
|
+
# Initialize if not present
|
|
101
|
+
for unit in identified_units:
|
|
102
|
+
if unit not in current_states:
|
|
103
|
+
current_states[unit] = {"fixed": False, "fix_attempts": 0}
|
|
104
|
+
current_states[unit]["fix_attempts"] += 1
|
|
105
|
+
|
|
106
|
+
# Parse results from output
|
|
107
|
+
# Heuristic: look for "unit_name: FIXED" or "unit_name: Failed"
|
|
108
|
+
# This depends on the LLM following instructions in Step 8 prompt.
|
|
109
|
+
for line in output.splitlines():
|
|
110
|
+
for unit in identified_units:
|
|
111
|
+
if unit in line:
|
|
112
|
+
if "FIXED" in line:
|
|
113
|
+
current_states[unit]["fixed"] = True
|
|
114
|
+
elif "Failed" in line or "FAIL" in line:
|
|
115
|
+
current_states[unit]["fixed"] = False
|
|
116
|
+
|
|
117
|
+
return current_states
|
|
118
|
+
|
|
119
|
+
def _check_staleness(state: Dict[str, Any], cwd: Path) -> None:
|
|
120
|
+
"""Checks if files have changed since state was saved."""
|
|
121
|
+
last_saved_str = state.get("last_saved_at")
|
|
122
|
+
if not last_saved_str:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
last_saved = datetime.fromisoformat(last_saved_str)
|
|
127
|
+
except ValueError:
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
changed_files = state.get("changed_files", [])
|
|
131
|
+
stale = False
|
|
132
|
+
|
|
133
|
+
for file_path in changed_files:
|
|
134
|
+
p = cwd / file_path
|
|
135
|
+
if not p.exists():
|
|
136
|
+
console.print(f"[yellow]Warning: File '{file_path}' from previous state is missing.[/yellow]")
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Check mtime
|
|
140
|
+
mtime = datetime.fromtimestamp(p.stat().st_mtime)
|
|
141
|
+
if mtime > last_saved:
|
|
142
|
+
stale = True
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
if stale:
|
|
146
|
+
console.print("[yellow]Warning: Codebase may have changed since last run. Consider --no-resume for fresh start.[/yellow]")
|
|
147
|
+
|
|
148
|
+
def run_agentic_e2e_fix_orchestrator(
|
|
149
|
+
issue_url: str,
|
|
150
|
+
issue_content: str,
|
|
151
|
+
repo_owner: str,
|
|
152
|
+
repo_name: str,
|
|
153
|
+
issue_number: int,
|
|
154
|
+
issue_author: str,
|
|
155
|
+
issue_title: str,
|
|
156
|
+
*,
|
|
157
|
+
cwd: Path,
|
|
158
|
+
timeout_adder: float = 0.0,
|
|
159
|
+
max_cycles: int = 5,
|
|
160
|
+
resume: bool = True,
|
|
161
|
+
verbose: bool = False,
|
|
162
|
+
quiet: bool = False,
|
|
163
|
+
use_github_state: bool = True
|
|
164
|
+
) -> Tuple[bool, str, float, str, List[str]]:
|
|
165
|
+
"""
|
|
166
|
+
Orchestrator for the 9-step agentic e2e fix workflow.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Tuple[bool, str, float, str, List[str]]:
|
|
170
|
+
(success, final_message, total_cost, model_used, changed_files)
|
|
171
|
+
"""
|
|
172
|
+
state_dir = _get_state_dir(cwd)
|
|
173
|
+
workflow_name = "e2e_fix"
|
|
174
|
+
|
|
175
|
+
# Initialize state variables
|
|
176
|
+
current_cycle = 0
|
|
177
|
+
last_completed_step = 0
|
|
178
|
+
step_outputs: Dict[str, str] = {}
|
|
179
|
+
total_cost = 0.0
|
|
180
|
+
model_used = "unknown"
|
|
181
|
+
changed_files: List[str] = []
|
|
182
|
+
dev_unit_states: Dict[str, Any] = {}
|
|
183
|
+
github_comment_id: Optional[int] = None
|
|
184
|
+
|
|
185
|
+
# Resume Logic
|
|
186
|
+
if resume:
|
|
187
|
+
loaded_state, gh_id = load_workflow_state(
|
|
188
|
+
cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state
|
|
189
|
+
)
|
|
190
|
+
if loaded_state:
|
|
191
|
+
console.print(f"[blue]Resuming from cycle {loaded_state.get('current_cycle', 1)} step {loaded_state.get('last_completed_step', 0)}...[/blue]")
|
|
192
|
+
current_cycle = loaded_state.get("current_cycle", 0)
|
|
193
|
+
last_completed_step = loaded_state.get("last_completed_step", 0)
|
|
194
|
+
step_outputs = loaded_state.get("step_outputs", {})
|
|
195
|
+
total_cost = loaded_state.get("total_cost", 0.0)
|
|
196
|
+
model_used = loaded_state.get("model_used", "unknown")
|
|
197
|
+
changed_files = loaded_state.get("changed_files", [])
|
|
198
|
+
dev_unit_states = loaded_state.get("dev_unit_states", {})
|
|
199
|
+
github_comment_id = gh_id
|
|
200
|
+
|
|
201
|
+
_check_staleness(loaded_state, cwd)
|
|
202
|
+
|
|
203
|
+
# If we finished a cycle but didn't exit, prepare for next cycle
|
|
204
|
+
if last_completed_step >= 9:
|
|
205
|
+
current_cycle += 1
|
|
206
|
+
last_completed_step = 0
|
|
207
|
+
step_outputs = {} # Clear outputs for new cycle
|
|
208
|
+
else:
|
|
209
|
+
# No state found, start fresh
|
|
210
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
211
|
+
else:
|
|
212
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
213
|
+
|
|
214
|
+
console.print(f"Fixing e2e tests for issue #{issue_number}: \"{issue_title}\"")
|
|
215
|
+
|
|
216
|
+
success = False
|
|
217
|
+
final_message = ""
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
# Outer Loop
|
|
221
|
+
if current_cycle == 0:
|
|
222
|
+
current_cycle = 1
|
|
223
|
+
|
|
224
|
+
while current_cycle <= max_cycles:
|
|
225
|
+
console.print(f"\n[bold cyan][Cycle {current_cycle}/{max_cycles}] Starting fix cycle...[/bold cyan]")
|
|
226
|
+
|
|
227
|
+
# Inner Loop (Steps 1-9)
|
|
228
|
+
for step_num in range(1, 10):
|
|
229
|
+
if step_num <= last_completed_step:
|
|
230
|
+
continue # Skip already completed steps in this cycle
|
|
231
|
+
|
|
232
|
+
step_name = STEP_NAMES[step_num]
|
|
233
|
+
description = STEP_DESCRIPTIONS[step_num]
|
|
234
|
+
|
|
235
|
+
console.print(f"[bold][Step {step_num}/9] {description}...[/bold]")
|
|
236
|
+
|
|
237
|
+
# 1. Load Prompt
|
|
238
|
+
template_name = f"agentic_e2e_fix_step{step_num}_{step_name}_LLM"
|
|
239
|
+
prompt_template = load_prompt_template(template_name)
|
|
240
|
+
if not prompt_template:
|
|
241
|
+
raise ValueError(f"Could not load prompt template: {template_name}")
|
|
242
|
+
|
|
243
|
+
# 2. Prepare Context
|
|
244
|
+
context = {
|
|
245
|
+
"issue_url": issue_url,
|
|
246
|
+
"repo_owner": repo_owner,
|
|
247
|
+
"repo_name": repo_name,
|
|
248
|
+
"issue_number": issue_number,
|
|
249
|
+
"cycle_number": current_cycle,
|
|
250
|
+
"max_cycles": max_cycles,
|
|
251
|
+
"issue_content": issue_content,
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# Add previous step outputs
|
|
255
|
+
for prev_step in range(1, step_num):
|
|
256
|
+
key = f"step{prev_step}_output"
|
|
257
|
+
context[key] = step_outputs.get(str(prev_step), "")
|
|
258
|
+
|
|
259
|
+
# Derived variables for specific steps
|
|
260
|
+
if step_num >= 6:
|
|
261
|
+
s5_out = step_outputs.get("5", "")
|
|
262
|
+
context["dev_units_identified"] = _parse_dev_units(s5_out)
|
|
263
|
+
|
|
264
|
+
if step_num == 8:
|
|
265
|
+
s5_out = step_outputs.get("5", "")
|
|
266
|
+
context["failing_dev_units"] = _parse_dev_units(s5_out)
|
|
267
|
+
|
|
268
|
+
if step_num == 9:
|
|
269
|
+
context["next_cycle"] = current_cycle + 1
|
|
270
|
+
|
|
271
|
+
formatted_prompt = prompt_template.format(**context)
|
|
272
|
+
|
|
273
|
+
# 3. Run Task
|
|
274
|
+
base_timeout = E2E_FIX_STEP_TIMEOUTS.get(step_num, 340.0)
|
|
275
|
+
timeout = base_timeout + timeout_adder
|
|
276
|
+
|
|
277
|
+
step_success, step_output, step_cost, step_model = run_agentic_task(
|
|
278
|
+
instruction=formatted_prompt,
|
|
279
|
+
cwd=cwd,
|
|
280
|
+
verbose=verbose,
|
|
281
|
+
quiet=quiet,
|
|
282
|
+
timeout=timeout,
|
|
283
|
+
label=f"cycle{current_cycle}_step{step_num}"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# 4. Store Output & Accumulate
|
|
287
|
+
step_outputs[str(step_num)] = step_output
|
|
288
|
+
total_cost += step_cost
|
|
289
|
+
model_used = step_model if step_model else model_used
|
|
290
|
+
|
|
291
|
+
# Parse changed files
|
|
292
|
+
new_files = _parse_changed_files(step_output)
|
|
293
|
+
for f in new_files:
|
|
294
|
+
if f not in changed_files:
|
|
295
|
+
changed_files.append(f)
|
|
296
|
+
|
|
297
|
+
# Parse dev unit states (Step 8)
|
|
298
|
+
if step_num == 8:
|
|
299
|
+
s5_out = step_outputs.get("5", "")
|
|
300
|
+
dev_units_str = _parse_dev_units(s5_out)
|
|
301
|
+
dev_unit_states = _update_dev_unit_states(step_output, dev_unit_states, dev_units_str)
|
|
302
|
+
|
|
303
|
+
# Print brief result
|
|
304
|
+
console.print(f" -> Step {step_num} complete. Cost: ${step_cost:.4f}")
|
|
305
|
+
|
|
306
|
+
# 5. Save State
|
|
307
|
+
last_completed_step = step_num
|
|
308
|
+
state_data = {
|
|
309
|
+
"workflow": workflow_name,
|
|
310
|
+
"issue_url": issue_url,
|
|
311
|
+
"issue_number": issue_number,
|
|
312
|
+
"current_cycle": current_cycle,
|
|
313
|
+
"last_completed_step": last_completed_step,
|
|
314
|
+
"step_outputs": step_outputs,
|
|
315
|
+
"dev_unit_states": dev_unit_states,
|
|
316
|
+
"total_cost": total_cost,
|
|
317
|
+
"model_used": model_used,
|
|
318
|
+
"changed_files": changed_files,
|
|
319
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
320
|
+
"github_comment_id": github_comment_id
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
new_gh_id = save_workflow_state(
|
|
324
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
325
|
+
)
|
|
326
|
+
if new_gh_id:
|
|
327
|
+
github_comment_id = new_gh_id
|
|
328
|
+
|
|
329
|
+
# Check Early Exit (Step 2)
|
|
330
|
+
if step_num == 2 and "ALL_TESTS_PASS" in step_output:
|
|
331
|
+
console.print("[green]ALL_TESTS_PASS detected in Step 2. Exiting loop.[/green]")
|
|
332
|
+
success = True
|
|
333
|
+
final_message = "All tests passed during e2e check."
|
|
334
|
+
break
|
|
335
|
+
|
|
336
|
+
# Check Loop Control (Step 9)
|
|
337
|
+
if step_num == 9:
|
|
338
|
+
if "ALL_TESTS_PASS" in step_output:
|
|
339
|
+
console.print("[green]ALL_TESTS_PASS detected in Step 9.[/green]")
|
|
340
|
+
success = True
|
|
341
|
+
final_message = "All tests passed after fixes."
|
|
342
|
+
break
|
|
343
|
+
elif "MAX_CYCLES_REACHED" in step_output:
|
|
344
|
+
console.print("[yellow]MAX_CYCLES_REACHED detected in Step 9.[/yellow]")
|
|
345
|
+
elif "CONTINUE_CYCLE" not in step_output:
|
|
346
|
+
console.print("[yellow]Warning: No loop control token found in Step 9. Defaulting to CONTINUE_CYCLE.[/yellow]")
|
|
347
|
+
|
|
348
|
+
if success:
|
|
349
|
+
break
|
|
350
|
+
|
|
351
|
+
# Prepare for next cycle
|
|
352
|
+
current_cycle += 1
|
|
353
|
+
last_completed_step = 0
|
|
354
|
+
step_outputs = {} # Clear outputs for next cycle
|
|
355
|
+
|
|
356
|
+
state_data["current_cycle"] = current_cycle
|
|
357
|
+
state_data["last_completed_step"] = 0
|
|
358
|
+
state_data["step_outputs"] = {}
|
|
359
|
+
state_data["last_saved_at"] = datetime.now().isoformat()
|
|
360
|
+
|
|
361
|
+
if current_cycle <= max_cycles:
|
|
362
|
+
save_workflow_state(
|
|
363
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if success:
|
|
367
|
+
clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
|
|
368
|
+
console.print("\n[bold green]E2E fix complete[/bold green]")
|
|
369
|
+
console.print(f" Total cost: ${total_cost:.4f}")
|
|
370
|
+
console.print(f" Cycles used: {current_cycle if current_cycle <= max_cycles else max_cycles}/{max_cycles}")
|
|
371
|
+
console.print(f" Files changed: {', '.join(changed_files)}")
|
|
372
|
+
fixed_units = [u for u, s in dev_unit_states.items() if s.get("fixed")]
|
|
373
|
+
console.print(f" Dev units fixed: {', '.join(fixed_units)}")
|
|
374
|
+
return True, final_message, total_cost, model_used, changed_files
|
|
375
|
+
else:
|
|
376
|
+
final_message = f"Max cycles ({max_cycles}) reached without all tests passing"
|
|
377
|
+
console.print("\n[bold red]E2E fix incomplete (max cycles reached)[/bold red]")
|
|
378
|
+
console.print(f" Total cost: ${total_cost:.4f}")
|
|
379
|
+
remaining = [u for u, s in dev_unit_states.items() if not s.get("fixed")]
|
|
380
|
+
console.print(f" Remaining failures: {', '.join(remaining)}")
|
|
381
|
+
return False, final_message, total_cost, model_used, changed_files
|
|
382
|
+
|
|
383
|
+
except KeyboardInterrupt:
|
|
384
|
+
console.print("\n[bold red]Interrupted by user. Saving state...[/bold red]")
|
|
385
|
+
state_data = {
|
|
386
|
+
"workflow": workflow_name,
|
|
387
|
+
"issue_url": issue_url,
|
|
388
|
+
"issue_number": issue_number,
|
|
389
|
+
"current_cycle": current_cycle,
|
|
390
|
+
"last_completed_step": last_completed_step,
|
|
391
|
+
"step_outputs": step_outputs,
|
|
392
|
+
"dev_unit_states": dev_unit_states,
|
|
393
|
+
"total_cost": total_cost,
|
|
394
|
+
"model_used": model_used,
|
|
395
|
+
"changed_files": changed_files,
|
|
396
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
397
|
+
"github_comment_id": github_comment_id
|
|
398
|
+
}
|
|
399
|
+
save_workflow_state(
|
|
400
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
401
|
+
)
|
|
402
|
+
raise
|
|
403
|
+
|
|
404
|
+
except Exception as e:
|
|
405
|
+
console.print(f"\n[bold red]Fatal error: {e}[/bold red]")
|
|
406
|
+
try:
|
|
407
|
+
state_data = {
|
|
408
|
+
"workflow": workflow_name,
|
|
409
|
+
"issue_url": issue_url,
|
|
410
|
+
"issue_number": issue_number,
|
|
411
|
+
"current_cycle": current_cycle,
|
|
412
|
+
"last_completed_step": last_completed_step,
|
|
413
|
+
"step_outputs": step_outputs,
|
|
414
|
+
"dev_unit_states": dev_unit_states,
|
|
415
|
+
"total_cost": total_cost,
|
|
416
|
+
"model_used": model_used,
|
|
417
|
+
"changed_files": changed_files,
|
|
418
|
+
"last_saved_at": datetime.now().isoformat(),
|
|
419
|
+
"github_comment_id": github_comment_id
|
|
420
|
+
}
|
|
421
|
+
save_workflow_state(
|
|
422
|
+
cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
|
|
423
|
+
)
|
|
424
|
+
except Exception:
|
|
425
|
+
pass
|
|
426
|
+
return False, f"Stopped at cycle {current_cycle} step {last_completed_step}: {str(e)}", total_cost, model_used, changed_files
|
pdd/agentic_fix.py
CHANGED
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
import re
|
|
5
5
|
import shutil
|
|
6
6
|
import subprocess
|
|
7
|
+
import sys
|
|
7
8
|
import difflib
|
|
8
9
|
import tempfile
|
|
9
10
|
from pathlib import Path
|
|
@@ -56,6 +57,68 @@ def _verbose(msg: str) -> None:
|
|
|
56
57
|
if _IS_VERBOSE:
|
|
57
58
|
console.print(msg)
|
|
58
59
|
|
|
60
|
+
|
|
61
|
+
def _detect_suspicious_files(cwd: Path, context: str = "") -> List[Path]:
|
|
62
|
+
"""
|
|
63
|
+
Detect suspicious single-character files (like C, E, T) in a directory.
|
|
64
|
+
|
|
65
|
+
This is a diagnostic function to help identify when/where these files are created.
|
|
66
|
+
Issue #186: Empty files named C, E, T (first letters of Code, Example, Test)
|
|
67
|
+
have been appearing during agentic operations.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
cwd: Directory to scan
|
|
71
|
+
context: Description of what operation just ran (for logging)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of suspicious file paths found
|
|
75
|
+
"""
|
|
76
|
+
suspicious: List[Path] = []
|
|
77
|
+
try:
|
|
78
|
+
for f in cwd.iterdir():
|
|
79
|
+
if f.is_file() and len(f.name) <= 2 and not f.name.startswith('.'):
|
|
80
|
+
suspicious.append(f)
|
|
81
|
+
|
|
82
|
+
if suspicious:
|
|
83
|
+
import datetime
|
|
84
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
85
|
+
_always(f"[bold red]⚠️ SUSPICIOUS FILES DETECTED (Issue #186)[/bold red]")
|
|
86
|
+
_always(f"[red]Timestamp: {timestamp}[/red]")
|
|
87
|
+
_always(f"[red]Context: {context}[/red]")
|
|
88
|
+
_always(f"[red]Directory: {cwd}[/red]")
|
|
89
|
+
for sf in suspicious:
|
|
90
|
+
try:
|
|
91
|
+
size = sf.stat().st_size
|
|
92
|
+
_always(f"[red] - {sf.name} (size: {size} bytes)[/red]")
|
|
93
|
+
except Exception:
|
|
94
|
+
_always(f"[red] - {sf.name} (could not stat)[/red]")
|
|
95
|
+
|
|
96
|
+
# Also log to a file for persistence
|
|
97
|
+
log_file = Path.home() / ".pdd" / "suspicious_files.log"
|
|
98
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
with open(log_file, "a") as lf:
|
|
100
|
+
lf.write(f"\n{'='*60}\n")
|
|
101
|
+
lf.write(f"Timestamp: {timestamp}\n")
|
|
102
|
+
lf.write(f"Context: {context}\n")
|
|
103
|
+
lf.write(f"Directory: {cwd}\n")
|
|
104
|
+
lf.write(f"CWD at detection: {Path.cwd()}\n")
|
|
105
|
+
for sf in suspicious:
|
|
106
|
+
try:
|
|
107
|
+
size = sf.stat().st_size
|
|
108
|
+
lf.write(f" - {sf.name} (size: {size} bytes)\n")
|
|
109
|
+
except Exception as e:
|
|
110
|
+
lf.write(f" - {sf.name} (error: {e})\n")
|
|
111
|
+
# Log stack trace to help identify caller
|
|
112
|
+
import traceback
|
|
113
|
+
lf.write("Stack trace:\n")
|
|
114
|
+
lf.write(traceback.format_stack()[-10:][0] if traceback.format_stack() else "N/A")
|
|
115
|
+
lf.write("\n")
|
|
116
|
+
except Exception as e:
|
|
117
|
+
_verbose(f"[yellow]Could not scan for suspicious files: {e}[/yellow]")
|
|
118
|
+
|
|
119
|
+
return suspicious
|
|
120
|
+
|
|
121
|
+
|
|
59
122
|
def _begin_marker(path: Path) -> str:
|
|
60
123
|
"""Marker that must wrap the BEGIN of a corrected file block emitted by the agent."""
|
|
61
124
|
return f"<<<BEGIN_FILE:{path}>>>"
|
|
@@ -130,10 +193,41 @@ _MULTI_FILE_BLOCK_RE = re.compile(
|
|
|
130
193
|
re.DOTALL,
|
|
131
194
|
)
|
|
132
195
|
|
|
196
|
+
|
|
197
|
+
def _is_suspicious_path(path: str) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
Reject paths that look like LLM artifacts or template variables.
|
|
200
|
+
|
|
201
|
+
This defends against:
|
|
202
|
+
- Single/double character filenames (e.g., 'C', 'E', 'T' from agent misbehavior)
|
|
203
|
+
- Template variables like {path}, {code_abs} captured by regex
|
|
204
|
+
- Other LLM-generated garbage patterns
|
|
205
|
+
|
|
206
|
+
Returns True if the path should be rejected.
|
|
207
|
+
"""
|
|
208
|
+
if not path:
|
|
209
|
+
return True
|
|
210
|
+
# Get the basename for validation
|
|
211
|
+
base_name = Path(path).name
|
|
212
|
+
# Reject single or double character filenames (too short to be legitimate)
|
|
213
|
+
if len(base_name) <= 2:
|
|
214
|
+
return True
|
|
215
|
+
# Reject template variable patterns like {path}, {code_abs}
|
|
216
|
+
if '{' in base_name or '}' in base_name:
|
|
217
|
+
return True
|
|
218
|
+
# Reject paths that are just dots like "..", "..."
|
|
219
|
+
if base_name.strip('.') == '':
|
|
220
|
+
return True
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
|
|
133
224
|
def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
|
|
134
225
|
"""
|
|
135
226
|
Parse stdout/stderr blobs and collect all emitted file blocks into {path: content}.
|
|
136
227
|
Returns an empty dict if none found.
|
|
228
|
+
|
|
229
|
+
Note: Suspicious paths (single-char, template variables) are rejected to prevent
|
|
230
|
+
LLM artifacts from being written to disk.
|
|
137
231
|
"""
|
|
138
232
|
out: Dict[str, str] = {}
|
|
139
233
|
for blob in blobs:
|
|
@@ -143,6 +237,9 @@ def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
|
|
|
143
237
|
path = (m.group(1) or "").strip()
|
|
144
238
|
body = m.group(2) or ""
|
|
145
239
|
if path and body != "":
|
|
240
|
+
if _is_suspicious_path(path):
|
|
241
|
+
_info(f"[yellow]Skipping suspicious path from LLM output: {path!r}[/yellow]")
|
|
242
|
+
continue
|
|
146
243
|
out[path] = body
|
|
147
244
|
return out
|
|
148
245
|
|
|
@@ -401,6 +498,12 @@ def _run_anthropic_variants(prompt_text: str, cwd: Path, total_timeout: int, lab
|
|
|
401
498
|
return last
|
|
402
499
|
finally:
|
|
403
500
|
prompt_file.unlink(missing_ok=True)
|
|
501
|
+
# Issue #186: Scan for suspicious files after Anthropic agent runs
|
|
502
|
+
_detect_suspicious_files(cwd, f"After _run_anthropic_variants ({label})")
|
|
503
|
+
# Also scan project root in case agent created files there
|
|
504
|
+
project_root = Path.cwd()
|
|
505
|
+
if project_root != cwd:
|
|
506
|
+
_detect_suspicious_files(project_root, f"After _run_anthropic_variants ({label}) - project root")
|
|
404
507
|
|
|
405
508
|
def _run_cli_args_google(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
|
|
406
509
|
"""Subprocess runner for Google commands with common sanitized env."""
|
|
@@ -460,6 +563,12 @@ def _run_google_variants(prompt_text: str, cwd: Path, total_timeout: int, label:
|
|
|
460
563
|
return last
|
|
461
564
|
finally:
|
|
462
565
|
prompt_file.unlink(missing_ok=True)
|
|
566
|
+
# Issue #186: Scan for suspicious files after Google agent runs
|
|
567
|
+
_detect_suspicious_files(cwd, f"After _run_google_variants ({label})")
|
|
568
|
+
# Also scan project root in case agent created files there
|
|
569
|
+
project_root = Path.cwd()
|
|
570
|
+
if project_root != cwd:
|
|
571
|
+
_detect_suspicious_files(project_root, f"After _run_google_variants ({label}) - project root")
|
|
463
572
|
|
|
464
573
|
def _run_testcmd(cmd: str, cwd: Path) -> bool:
|
|
465
574
|
"""
|
|
@@ -498,7 +607,7 @@ def _verify_and_log(unit_test_file: str, cwd: Path, *, verify_cmd: Optional[str]
|
|
|
498
607
|
return _run_testcmd(run_cmd, cwd)
|
|
499
608
|
# Fallback: try running with Python if no run command found
|
|
500
609
|
verify = subprocess.run(
|
|
501
|
-
[
|
|
610
|
+
[sys.executable, str(Path(unit_test_file).resolve())],
|
|
502
611
|
capture_output=True,
|
|
503
612
|
text=True,
|
|
504
613
|
check=False,
|
|
@@ -549,10 +658,16 @@ def _normalize_target_path(
|
|
|
549
658
|
) -> Optional[Path]:
|
|
550
659
|
"""
|
|
551
660
|
Resolve an emitted path to a safe file path we should write:
|
|
661
|
+
- reject suspicious paths (single-char, template variables)
|
|
552
662
|
- make path absolute under project root
|
|
553
663
|
- allow direct match, primary-file match (with/without _fixed), or basename search
|
|
554
664
|
- create new files only if allow_new is True
|
|
555
665
|
"""
|
|
666
|
+
# Early rejection of suspicious paths (defense against LLM artifacts)
|
|
667
|
+
if _is_suspicious_path(emitted_path):
|
|
668
|
+
_info(f"[yellow]Skipping suspicious path: {emitted_path!r}[/yellow]")
|
|
669
|
+
return None
|
|
670
|
+
|
|
556
671
|
p = Path(emitted_path)
|
|
557
672
|
if not p.is_absolute():
|
|
558
673
|
p = (project_root / emitted_path).resolve()
|
|
@@ -760,7 +875,7 @@ def _try_harvest_then_verify(
|
|
|
760
875
|
newest = code_path.read_text(encoding="utf-8")
|
|
761
876
|
_print_diff(code_snapshot, newest, code_path)
|
|
762
877
|
ok = _post_apply_verify_or_testcmd(
|
|
763
|
-
provider, unit_test_file,
|
|
878
|
+
provider, unit_test_file, cwd,
|
|
764
879
|
verify_cmd=verify_cmd, verify_enabled=verify_enabled,
|
|
765
880
|
stdout=res.stdout or "", stderr=res.stderr or ""
|
|
766
881
|
)
|
|
@@ -952,7 +1067,7 @@ def run_agentic_fix(
|
|
|
952
1067
|
else:
|
|
953
1068
|
# Fallback: run directly with Python interpreter
|
|
954
1069
|
pre = subprocess.run(
|
|
955
|
-
[
|
|
1070
|
+
[sys.executable, str(Path(unit_test_file).resolve())],
|
|
956
1071
|
capture_output=True,
|
|
957
1072
|
text=True,
|
|
958
1073
|
check=False,
|