pdd-cli 0.0.90__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. pdd/__init__.py +38 -6
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +521 -786
  7. pdd/agentic_e2e_fix.py +319 -0
  8. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  9. pdd/agentic_fix.py +118 -3
  10. pdd/agentic_update.py +25 -8
  11. pdd/architecture_sync.py +565 -0
  12. pdd/auth_service.py +210 -0
  13. pdd/auto_deps_main.py +63 -53
  14. pdd/auto_include.py +185 -3
  15. pdd/auto_update.py +125 -47
  16. pdd/bug_main.py +195 -23
  17. pdd/cmd_test_main.py +345 -197
  18. pdd/code_generator.py +4 -2
  19. pdd/code_generator_main.py +118 -32
  20. pdd/commands/__init__.py +6 -0
  21. pdd/commands/analysis.py +87 -29
  22. pdd/commands/auth.py +309 -0
  23. pdd/commands/connect.py +290 -0
  24. pdd/commands/fix.py +136 -113
  25. pdd/commands/maintenance.py +3 -2
  26. pdd/commands/misc.py +8 -0
  27. pdd/commands/modify.py +190 -164
  28. pdd/commands/sessions.py +284 -0
  29. pdd/construct_paths.py +334 -32
  30. pdd/context_generator_main.py +167 -170
  31. pdd/continue_generation.py +6 -3
  32. pdd/core/__init__.py +33 -0
  33. pdd/core/cli.py +27 -3
  34. pdd/core/cloud.py +237 -0
  35. pdd/core/errors.py +4 -0
  36. pdd/core/remote_session.py +61 -0
  37. pdd/crash_main.py +219 -23
  38. pdd/data/llm_model.csv +4 -4
  39. pdd/docs/prompting_guide.md +864 -0
  40. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  41. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  42. pdd/fix_code_loop.py +208 -34
  43. pdd/fix_code_module_errors.py +6 -2
  44. pdd/fix_error_loop.py +291 -38
  45. pdd/fix_main.py +204 -4
  46. pdd/fix_verification_errors_loop.py +235 -26
  47. pdd/fix_verification_main.py +269 -83
  48. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  49. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  50. pdd/frontend/dist/index.html +376 -0
  51. pdd/frontend/dist/logo.svg +33 -0
  52. pdd/generate_output_paths.py +46 -5
  53. pdd/generate_test.py +212 -151
  54. pdd/get_comment.py +19 -44
  55. pdd/get_extension.py +8 -9
  56. pdd/get_jwt_token.py +309 -20
  57. pdd/get_language.py +8 -7
  58. pdd/get_run_command.py +7 -5
  59. pdd/insert_includes.py +2 -1
  60. pdd/llm_invoke.py +459 -95
  61. pdd/load_prompt_template.py +15 -34
  62. pdd/path_resolution.py +140 -0
  63. pdd/postprocess.py +4 -1
  64. pdd/preprocess.py +68 -12
  65. pdd/preprocess_main.py +33 -1
  66. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  67. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  68. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  69. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  70. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  71. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  72. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  73. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  74. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  75. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  76. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  77. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  78. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  79. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  80. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  81. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  82. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  83. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  84. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  85. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  86. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  87. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  88. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  89. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  90. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  91. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  92. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  93. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  94. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  95. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  96. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  97. pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
  98. pdd/prompts/agentic_update_LLM.prompt +192 -338
  99. pdd/prompts/auto_include_LLM.prompt +22 -0
  100. pdd/prompts/change_LLM.prompt +3093 -1
  101. pdd/prompts/detect_change_LLM.prompt +571 -14
  102. pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
  103. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
  104. pdd/prompts/generate_test_LLM.prompt +20 -1
  105. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  106. pdd/prompts/insert_includes_LLM.prompt +262 -252
  107. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  108. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  109. pdd/remote_session.py +876 -0
  110. pdd/server/__init__.py +52 -0
  111. pdd/server/app.py +335 -0
  112. pdd/server/click_executor.py +587 -0
  113. pdd/server/executor.py +338 -0
  114. pdd/server/jobs.py +661 -0
  115. pdd/server/models.py +241 -0
  116. pdd/server/routes/__init__.py +31 -0
  117. pdd/server/routes/architecture.py +451 -0
  118. pdd/server/routes/auth.py +364 -0
  119. pdd/server/routes/commands.py +929 -0
  120. pdd/server/routes/config.py +42 -0
  121. pdd/server/routes/files.py +603 -0
  122. pdd/server/routes/prompts.py +1322 -0
  123. pdd/server/routes/websocket.py +473 -0
  124. pdd/server/security.py +243 -0
  125. pdd/server/terminal_spawner.py +209 -0
  126. pdd/server/token_counter.py +222 -0
  127. pdd/summarize_directory.py +236 -237
  128. pdd/sync_animation.py +8 -4
  129. pdd/sync_determine_operation.py +329 -47
  130. pdd/sync_main.py +272 -28
  131. pdd/sync_orchestration.py +136 -75
  132. pdd/template_expander.py +161 -0
  133. pdd/templates/architecture/architecture_json.prompt +41 -46
  134. pdd/trace.py +1 -1
  135. pdd/track_cost.py +0 -13
  136. pdd/unfinished_prompt.py +2 -1
  137. pdd/update_main.py +23 -5
  138. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +15 -10
  139. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  140. pdd_cli-0.0.90.dist-info/RECORD +0 -153
  141. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  142. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  143. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +0 -0
  144. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,426 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ import time
6
+ import json
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import List, Tuple, Dict, Any, Optional, Set
10
+
11
+ from rich.console import Console
12
+
13
+ from .agentic_common import (
14
+ run_agentic_task,
15
+ load_workflow_state,
16
+ save_workflow_state,
17
+ clear_workflow_state,
18
+ )
19
+ from .load_prompt_template import load_prompt_template
20
+
21
+ # Constants
22
+ STEP_NAMES = {
23
+ 1: "unit_tests",
24
+ 2: "e2e_tests",
25
+ 3: "root_cause",
26
+ 4: "fix_e2e_tests",
27
+ 5: "identify_devunits",
28
+ 6: "create_unit_tests",
29
+ 7: "verify_tests",
30
+ 8: "run_pdd_fix",
31
+ 9: "verify_all",
32
+ }
33
+
34
+ STEP_DESCRIPTIONS = {
35
+ 1: "Running unit tests from issue",
36
+ 2: "Running e2e tests",
37
+ 3: "Analyzing root cause",
38
+ 4: "Fixing e2e tests",
39
+ 5: "Identifying dev units",
40
+ 6: "Creating unit tests",
41
+ 7: "Verifying tests detect bugs",
42
+ 8: "Running pdd fix",
43
+ 9: "Final verification",
44
+ }
45
+
46
+ # Per-step timeouts for the 9-step agentic e2e fix workflow
47
+ E2E_FIX_STEP_TIMEOUTS: Dict[int, float] = {
48
+ 1: 340.0, # Run unit tests from issue, pdd fix failures
49
+ 2: 240.0, # Run e2e tests, check completion (early exit)
50
+ 3: 340.0, # Root cause analysis (code vs test vs both)
51
+ 4: 340.0, # Fix e2e tests if needed
52
+ 5: 340.0, # Identify dev units involved in failures
53
+ 6: 600.0, # Create/append unit tests for dev units (Complex)
54
+ 7: 600.0, # Verify unit tests detect bugs (Complex)
55
+ 8: 1000.0, # Run pdd fix on failing dev units (Most Complex - multiple LLM calls)
56
+ 9: 240.0, # Final verification, loop control
57
+ }
58
+
59
+ console = Console()
60
+
61
+ def _get_state_dir(cwd: Path) -> Path:
62
+ """Returns the state directory .pdd/e2e-fix-state/ relative to git root."""
63
+ # Simple heuristic: look for .git, otherwise use cwd
64
+ d = cwd.resolve()
65
+ root = d
66
+ while d != d.parent:
67
+ if (d / ".git").exists():
68
+ root = d
69
+ break
70
+ d = d.parent
71
+
72
+ state_dir = root / ".pdd" / "e2e-fix-state"
73
+ state_dir.mkdir(parents=True, exist_ok=True)
74
+ return state_dir
75
+
76
+ def _parse_changed_files(output: str) -> List[str]:
77
+ """Parses FILES_CREATED and FILES_MODIFIED from agent output."""
78
+ files = []
79
+ for line in output.splitlines():
80
+ if line.startswith("FILES_CREATED:") or line.startswith("FILES_MODIFIED:"):
81
+ # Extract content after colon
82
+ content = line.split(":", 1)[1].strip()
83
+ if content:
84
+ # Split by comma and strip
85
+ paths = [p.strip() for p in content.split(",") if p.strip()]
86
+ files.extend(paths)
87
+ return files
88
+
89
+ def _parse_dev_units(output: str) -> str:
90
+ """Parses DEV_UNITS_IDENTIFIED from output."""
91
+ for line in output.splitlines():
92
+ if line.startswith("DEV_UNITS_IDENTIFIED:"):
93
+ return line.split(":", 1)[1].strip()
94
+ return ""
95
+
96
+ def _update_dev_unit_states(output: str, current_states: Dict[str, Any], identified_units_str: str) -> Dict[str, Any]:
97
+ """Updates dev unit states based on Step 8 output."""
98
+ identified_units = [u.strip() for u in identified_units_str.split(",") if u.strip()]
99
+
100
+ # Initialize if not present
101
+ for unit in identified_units:
102
+ if unit not in current_states:
103
+ current_states[unit] = {"fixed": False, "fix_attempts": 0}
104
+ current_states[unit]["fix_attempts"] += 1
105
+
106
+ # Parse results from output
107
+ # Heuristic: look for "unit_name: FIXED" or "unit_name: Failed"
108
+ # This depends on the LLM following instructions in Step 8 prompt.
109
+ for line in output.splitlines():
110
+ for unit in identified_units:
111
+ if unit in line:
112
+ if "FIXED" in line:
113
+ current_states[unit]["fixed"] = True
114
+ elif "Failed" in line or "FAIL" in line:
115
+ current_states[unit]["fixed"] = False
116
+
117
+ return current_states
118
+
119
+ def _check_staleness(state: Dict[str, Any], cwd: Path) -> None:
120
+ """Checks if files have changed since state was saved."""
121
+ last_saved_str = state.get("last_saved_at")
122
+ if not last_saved_str:
123
+ return
124
+
125
+ try:
126
+ last_saved = datetime.fromisoformat(last_saved_str)
127
+ except ValueError:
128
+ return
129
+
130
+ changed_files = state.get("changed_files", [])
131
+ stale = False
132
+
133
+ for file_path in changed_files:
134
+ p = cwd / file_path
135
+ if not p.exists():
136
+ console.print(f"[yellow]Warning: File '{file_path}' from previous state is missing.[/yellow]")
137
+ continue
138
+
139
+ # Check mtime
140
+ mtime = datetime.fromtimestamp(p.stat().st_mtime)
141
+ if mtime > last_saved:
142
+ stale = True
143
+ break
144
+
145
+ if stale:
146
+ console.print("[yellow]Warning: Codebase may have changed since last run. Consider --no-resume for fresh start.[/yellow]")
147
+
148
+ def run_agentic_e2e_fix_orchestrator(
149
+ issue_url: str,
150
+ issue_content: str,
151
+ repo_owner: str,
152
+ repo_name: str,
153
+ issue_number: int,
154
+ issue_author: str,
155
+ issue_title: str,
156
+ *,
157
+ cwd: Path,
158
+ timeout_adder: float = 0.0,
159
+ max_cycles: int = 5,
160
+ resume: bool = True,
161
+ verbose: bool = False,
162
+ quiet: bool = False,
163
+ use_github_state: bool = True
164
+ ) -> Tuple[bool, str, float, str, List[str]]:
165
+ """
166
+ Orchestrator for the 9-step agentic e2e fix workflow.
167
+
168
+ Returns:
169
+ Tuple[bool, str, float, str, List[str]]:
170
+ (success, final_message, total_cost, model_used, changed_files)
171
+ """
172
+ state_dir = _get_state_dir(cwd)
173
+ workflow_name = "e2e_fix"
174
+
175
+ # Initialize state variables
176
+ current_cycle = 0
177
+ last_completed_step = 0
178
+ step_outputs: Dict[str, str] = {}
179
+ total_cost = 0.0
180
+ model_used = "unknown"
181
+ changed_files: List[str] = []
182
+ dev_unit_states: Dict[str, Any] = {}
183
+ github_comment_id: Optional[int] = None
184
+
185
+ # Resume Logic
186
+ if resume:
187
+ loaded_state, gh_id = load_workflow_state(
188
+ cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state
189
+ )
190
+ if loaded_state:
191
+ console.print(f"[blue]Resuming from cycle {loaded_state.get('current_cycle', 1)} step {loaded_state.get('last_completed_step', 0)}...[/blue]")
192
+ current_cycle = loaded_state.get("current_cycle", 0)
193
+ last_completed_step = loaded_state.get("last_completed_step", 0)
194
+ step_outputs = loaded_state.get("step_outputs", {})
195
+ total_cost = loaded_state.get("total_cost", 0.0)
196
+ model_used = loaded_state.get("model_used", "unknown")
197
+ changed_files = loaded_state.get("changed_files", [])
198
+ dev_unit_states = loaded_state.get("dev_unit_states", {})
199
+ github_comment_id = gh_id
200
+
201
+ _check_staleness(loaded_state, cwd)
202
+
203
+ # If we finished a cycle but didn't exit, prepare for next cycle
204
+ if last_completed_step >= 9:
205
+ current_cycle += 1
206
+ last_completed_step = 0
207
+ step_outputs = {} # Clear outputs for new cycle
208
+ else:
209
+ # No state found, start fresh
210
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
211
+ else:
212
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
213
+
214
+ console.print(f"Fixing e2e tests for issue #{issue_number}: \"{issue_title}\"")
215
+
216
+ success = False
217
+ final_message = ""
218
+
219
+ try:
220
+ # Outer Loop
221
+ if current_cycle == 0:
222
+ current_cycle = 1
223
+
224
+ while current_cycle <= max_cycles:
225
+ console.print(f"\n[bold cyan][Cycle {current_cycle}/{max_cycles}] Starting fix cycle...[/bold cyan]")
226
+
227
+ # Inner Loop (Steps 1-9)
228
+ for step_num in range(1, 10):
229
+ if step_num <= last_completed_step:
230
+ continue # Skip already completed steps in this cycle
231
+
232
+ step_name = STEP_NAMES[step_num]
233
+ description = STEP_DESCRIPTIONS[step_num]
234
+
235
+ console.print(f"[bold][Step {step_num}/9] {description}...[/bold]")
236
+
237
+ # 1. Load Prompt
238
+ template_name = f"agentic_e2e_fix_step{step_num}_{step_name}_LLM"
239
+ prompt_template = load_prompt_template(template_name)
240
+ if not prompt_template:
241
+ raise ValueError(f"Could not load prompt template: {template_name}")
242
+
243
+ # 2. Prepare Context
244
+ context = {
245
+ "issue_url": issue_url,
246
+ "repo_owner": repo_owner,
247
+ "repo_name": repo_name,
248
+ "issue_number": issue_number,
249
+ "cycle_number": current_cycle,
250
+ "max_cycles": max_cycles,
251
+ "issue_content": issue_content,
252
+ }
253
+
254
+ # Add previous step outputs
255
+ for prev_step in range(1, step_num):
256
+ key = f"step{prev_step}_output"
257
+ context[key] = step_outputs.get(str(prev_step), "")
258
+
259
+ # Derived variables for specific steps
260
+ if step_num >= 6:
261
+ s5_out = step_outputs.get("5", "")
262
+ context["dev_units_identified"] = _parse_dev_units(s5_out)
263
+
264
+ if step_num == 8:
265
+ s5_out = step_outputs.get("5", "")
266
+ context["failing_dev_units"] = _parse_dev_units(s5_out)
267
+
268
+ if step_num == 9:
269
+ context["next_cycle"] = current_cycle + 1
270
+
271
+ formatted_prompt = prompt_template.format(**context)
272
+
273
+ # 3. Run Task
274
+ base_timeout = E2E_FIX_STEP_TIMEOUTS.get(step_num, 340.0)
275
+ timeout = base_timeout + timeout_adder
276
+
277
+ step_success, step_output, step_cost, step_model = run_agentic_task(
278
+ instruction=formatted_prompt,
279
+ cwd=cwd,
280
+ verbose=verbose,
281
+ quiet=quiet,
282
+ timeout=timeout,
283
+ label=f"cycle{current_cycle}_step{step_num}"
284
+ )
285
+
286
+ # 4. Store Output & Accumulate
287
+ step_outputs[str(step_num)] = step_output
288
+ total_cost += step_cost
289
+ model_used = step_model if step_model else model_used
290
+
291
+ # Parse changed files
292
+ new_files = _parse_changed_files(step_output)
293
+ for f in new_files:
294
+ if f not in changed_files:
295
+ changed_files.append(f)
296
+
297
+ # Parse dev unit states (Step 8)
298
+ if step_num == 8:
299
+ s5_out = step_outputs.get("5", "")
300
+ dev_units_str = _parse_dev_units(s5_out)
301
+ dev_unit_states = _update_dev_unit_states(step_output, dev_unit_states, dev_units_str)
302
+
303
+ # Print brief result
304
+ console.print(f" -> Step {step_num} complete. Cost: ${step_cost:.4f}")
305
+
306
+ # 5. Save State
307
+ last_completed_step = step_num
308
+ state_data = {
309
+ "workflow": workflow_name,
310
+ "issue_url": issue_url,
311
+ "issue_number": issue_number,
312
+ "current_cycle": current_cycle,
313
+ "last_completed_step": last_completed_step,
314
+ "step_outputs": step_outputs,
315
+ "dev_unit_states": dev_unit_states,
316
+ "total_cost": total_cost,
317
+ "model_used": model_used,
318
+ "changed_files": changed_files,
319
+ "last_saved_at": datetime.now().isoformat(),
320
+ "github_comment_id": github_comment_id
321
+ }
322
+
323
+ new_gh_id = save_workflow_state(
324
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
325
+ )
326
+ if new_gh_id:
327
+ github_comment_id = new_gh_id
328
+
329
+ # Check Early Exit (Step 2)
330
+ if step_num == 2 and "ALL_TESTS_PASS" in step_output:
331
+ console.print("[green]ALL_TESTS_PASS detected in Step 2. Exiting loop.[/green]")
332
+ success = True
333
+ final_message = "All tests passed during e2e check."
334
+ break
335
+
336
+ # Check Loop Control (Step 9)
337
+ if step_num == 9:
338
+ if "ALL_TESTS_PASS" in step_output:
339
+ console.print("[green]ALL_TESTS_PASS detected in Step 9.[/green]")
340
+ success = True
341
+ final_message = "All tests passed after fixes."
342
+ break
343
+ elif "MAX_CYCLES_REACHED" in step_output:
344
+ console.print("[yellow]MAX_CYCLES_REACHED detected in Step 9.[/yellow]")
345
+ elif "CONTINUE_CYCLE" not in step_output:
346
+ console.print("[yellow]Warning: No loop control token found in Step 9. Defaulting to CONTINUE_CYCLE.[/yellow]")
347
+
348
+ if success:
349
+ break
350
+
351
+ # Prepare for next cycle
352
+ current_cycle += 1
353
+ last_completed_step = 0
354
+ step_outputs = {} # Clear outputs for next cycle
355
+
356
+ state_data["current_cycle"] = current_cycle
357
+ state_data["last_completed_step"] = 0
358
+ state_data["step_outputs"] = {}
359
+ state_data["last_saved_at"] = datetime.now().isoformat()
360
+
361
+ if current_cycle <= max_cycles:
362
+ save_workflow_state(
363
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
364
+ )
365
+
366
+ if success:
367
+ clear_workflow_state(cwd, issue_number, workflow_name, state_dir, repo_owner, repo_name, use_github_state)
368
+ console.print("\n[bold green]E2E fix complete[/bold green]")
369
+ console.print(f" Total cost: ${total_cost:.4f}")
370
+ console.print(f" Cycles used: {current_cycle if current_cycle <= max_cycles else max_cycles}/{max_cycles}")
371
+ console.print(f" Files changed: {', '.join(changed_files)}")
372
+ fixed_units = [u for u, s in dev_unit_states.items() if s.get("fixed")]
373
+ console.print(f" Dev units fixed: {', '.join(fixed_units)}")
374
+ return True, final_message, total_cost, model_used, changed_files
375
+ else:
376
+ final_message = f"Max cycles ({max_cycles}) reached without all tests passing"
377
+ console.print("\n[bold red]E2E fix incomplete (max cycles reached)[/bold red]")
378
+ console.print(f" Total cost: ${total_cost:.4f}")
379
+ remaining = [u for u, s in dev_unit_states.items() if not s.get("fixed")]
380
+ console.print(f" Remaining failures: {', '.join(remaining)}")
381
+ return False, final_message, total_cost, model_used, changed_files
382
+
383
+ except KeyboardInterrupt:
384
+ console.print("\n[bold red]Interrupted by user. Saving state...[/bold red]")
385
+ state_data = {
386
+ "workflow": workflow_name,
387
+ "issue_url": issue_url,
388
+ "issue_number": issue_number,
389
+ "current_cycle": current_cycle,
390
+ "last_completed_step": last_completed_step,
391
+ "step_outputs": step_outputs,
392
+ "dev_unit_states": dev_unit_states,
393
+ "total_cost": total_cost,
394
+ "model_used": model_used,
395
+ "changed_files": changed_files,
396
+ "last_saved_at": datetime.now().isoformat(),
397
+ "github_comment_id": github_comment_id
398
+ }
399
+ save_workflow_state(
400
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
401
+ )
402
+ raise
403
+
404
+ except Exception as e:
405
+ console.print(f"\n[bold red]Fatal error: {e}[/bold red]")
406
+ try:
407
+ state_data = {
408
+ "workflow": workflow_name,
409
+ "issue_url": issue_url,
410
+ "issue_number": issue_number,
411
+ "current_cycle": current_cycle,
412
+ "last_completed_step": last_completed_step,
413
+ "step_outputs": step_outputs,
414
+ "dev_unit_states": dev_unit_states,
415
+ "total_cost": total_cost,
416
+ "model_used": model_used,
417
+ "changed_files": changed_files,
418
+ "last_saved_at": datetime.now().isoformat(),
419
+ "github_comment_id": github_comment_id
420
+ }
421
+ save_workflow_state(
422
+ cwd, issue_number, workflow_name, state_data, state_dir, repo_owner, repo_name, use_github_state, github_comment_id
423
+ )
424
+ except Exception:
425
+ pass
426
+ return False, f"Stopped at cycle {current_cycle} step {last_completed_step}: {str(e)}", total_cost, model_used, changed_files
pdd/agentic_fix.py CHANGED
@@ -4,6 +4,7 @@ import os
4
4
  import re
5
5
  import shutil
6
6
  import subprocess
7
+ import sys
7
8
  import difflib
8
9
  import tempfile
9
10
  from pathlib import Path
@@ -56,6 +57,68 @@ def _verbose(msg: str) -> None:
56
57
  if _IS_VERBOSE:
57
58
  console.print(msg)
58
59
 
60
+
61
+ def _detect_suspicious_files(cwd: Path, context: str = "") -> List[Path]:
62
+ """
63
+ Detect suspicious single-character files (like C, E, T) in a directory.
64
+
65
+ This is a diagnostic function to help identify when/where these files are created.
66
+ Issue #186: Empty files named C, E, T (first letters of Code, Example, Test)
67
+ have been appearing during agentic operations.
68
+
69
+ Args:
70
+ cwd: Directory to scan
71
+ context: Description of what operation just ran (for logging)
72
+
73
+ Returns:
74
+ List of suspicious file paths found
75
+ """
76
+ suspicious: List[Path] = []
77
+ try:
78
+ for f in cwd.iterdir():
79
+ if f.is_file() and len(f.name) <= 2 and not f.name.startswith('.'):
80
+ suspicious.append(f)
81
+
82
+ if suspicious:
83
+ import datetime
84
+ timestamp = datetime.datetime.now().isoformat()
85
+ _always(f"[bold red]⚠️ SUSPICIOUS FILES DETECTED (Issue #186)[/bold red]")
86
+ _always(f"[red]Timestamp: {timestamp}[/red]")
87
+ _always(f"[red]Context: {context}[/red]")
88
+ _always(f"[red]Directory: {cwd}[/red]")
89
+ for sf in suspicious:
90
+ try:
91
+ size = sf.stat().st_size
92
+ _always(f"[red] - {sf.name} (size: {size} bytes)[/red]")
93
+ except Exception:
94
+ _always(f"[red] - {sf.name} (could not stat)[/red]")
95
+
96
+ # Also log to a file for persistence
97
+ log_file = Path.home() / ".pdd" / "suspicious_files.log"
98
+ log_file.parent.mkdir(parents=True, exist_ok=True)
99
+ with open(log_file, "a") as lf:
100
+ lf.write(f"\n{'='*60}\n")
101
+ lf.write(f"Timestamp: {timestamp}\n")
102
+ lf.write(f"Context: {context}\n")
103
+ lf.write(f"Directory: {cwd}\n")
104
+ lf.write(f"CWD at detection: {Path.cwd()}\n")
105
+ for sf in suspicious:
106
+ try:
107
+ size = sf.stat().st_size
108
+ lf.write(f" - {sf.name} (size: {size} bytes)\n")
109
+ except Exception as e:
110
+ lf.write(f" - {sf.name} (error: {e})\n")
111
+ # Log stack trace to help identify caller
112
+ import traceback
113
+ lf.write("Stack trace:\n")
114
+ lf.write(traceback.format_stack()[-10:][0] if traceback.format_stack() else "N/A")
115
+ lf.write("\n")
116
+ except Exception as e:
117
+ _verbose(f"[yellow]Could not scan for suspicious files: {e}[/yellow]")
118
+
119
+ return suspicious
120
+
121
+
59
122
  def _begin_marker(path: Path) -> str:
60
123
  """Marker that must wrap the BEGIN of a corrected file block emitted by the agent."""
61
124
  return f"<<<BEGIN_FILE:{path}>>>"
@@ -130,10 +193,41 @@ _MULTI_FILE_BLOCK_RE = re.compile(
130
193
  re.DOTALL,
131
194
  )
132
195
 
196
+
197
+ def _is_suspicious_path(path: str) -> bool:
198
+ """
199
+ Reject paths that look like LLM artifacts or template variables.
200
+
201
+ This defends against:
202
+ - Single/double character filenames (e.g., 'C', 'E', 'T' from agent misbehavior)
203
+ - Template variables like {path}, {code_abs} captured by regex
204
+ - Other LLM-generated garbage patterns
205
+
206
+ Returns True if the path should be rejected.
207
+ """
208
+ if not path:
209
+ return True
210
+ # Get the basename for validation
211
+ base_name = Path(path).name
212
+ # Reject single or double character filenames (too short to be legitimate)
213
+ if len(base_name) <= 2:
214
+ return True
215
+ # Reject template variable patterns like {path}, {code_abs}
216
+ if '{' in base_name or '}' in base_name:
217
+ return True
218
+ # Reject paths that are just dots like "..", "..."
219
+ if base_name.strip('.') == '':
220
+ return True
221
+ return False
222
+
223
+
133
224
  def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
134
225
  """
135
226
  Parse stdout/stderr blobs and collect all emitted file blocks into {path: content}.
136
227
  Returns an empty dict if none found.
228
+
229
+ Note: Suspicious paths (single-char, template variables) are rejected to prevent
230
+ LLM artifacts from being written to disk.
137
231
  """
138
232
  out: Dict[str, str] = {}
139
233
  for blob in blobs:
@@ -143,6 +237,9 @@ def _extract_files_from_output(*blobs: str) -> Dict[str, str]:
143
237
  path = (m.group(1) or "").strip()
144
238
  body = m.group(2) or ""
145
239
  if path and body != "":
240
+ if _is_suspicious_path(path):
241
+ _info(f"[yellow]Skipping suspicious path from LLM output: {path!r}[/yellow]")
242
+ continue
146
243
  out[path] = body
147
244
  return out
148
245
 
@@ -401,6 +498,12 @@ def _run_anthropic_variants(prompt_text: str, cwd: Path, total_timeout: int, lab
401
498
  return last
402
499
  finally:
403
500
  prompt_file.unlink(missing_ok=True)
501
+ # Issue #186: Scan for suspicious files after Anthropic agent runs
502
+ _detect_suspicious_files(cwd, f"After _run_anthropic_variants ({label})")
503
+ # Also scan project root in case agent created files there
504
+ project_root = Path.cwd()
505
+ if project_root != cwd:
506
+ _detect_suspicious_files(project_root, f"After _run_anthropic_variants ({label}) - project root")
404
507
 
405
508
  def _run_cli_args_google(args: List[str], cwd: Path, timeout: int) -> subprocess.CompletedProcess:
406
509
  """Subprocess runner for Google commands with common sanitized env."""
@@ -460,6 +563,12 @@ def _run_google_variants(prompt_text: str, cwd: Path, total_timeout: int, label:
460
563
  return last
461
564
  finally:
462
565
  prompt_file.unlink(missing_ok=True)
566
+ # Issue #186: Scan for suspicious files after Google agent runs
567
+ _detect_suspicious_files(cwd, f"After _run_google_variants ({label})")
568
+ # Also scan project root in case agent created files there
569
+ project_root = Path.cwd()
570
+ if project_root != cwd:
571
+ _detect_suspicious_files(project_root, f"After _run_google_variants ({label}) - project root")
463
572
 
464
573
  def _run_testcmd(cmd: str, cwd: Path) -> bool:
465
574
  """
@@ -498,7 +607,7 @@ def _verify_and_log(unit_test_file: str, cwd: Path, *, verify_cmd: Optional[str]
498
607
  return _run_testcmd(run_cmd, cwd)
499
608
  # Fallback: try running with Python if no run command found
500
609
  verify = subprocess.run(
501
- [os.sys.executable, str(Path(unit_test_file).resolve())],
610
+ [sys.executable, str(Path(unit_test_file).resolve())],
502
611
  capture_output=True,
503
612
  text=True,
504
613
  check=False,
@@ -549,10 +658,16 @@ def _normalize_target_path(
549
658
  ) -> Optional[Path]:
550
659
  """
551
660
  Resolve an emitted path to a safe file path we should write:
661
+ - reject suspicious paths (single-char, template variables)
552
662
  - make path absolute under project root
553
663
  - allow direct match, primary-file match (with/without _fixed), or basename search
554
664
  - create new files only if allow_new is True
555
665
  """
666
+ # Early rejection of suspicious paths (defense against LLM artifacts)
667
+ if _is_suspicious_path(emitted_path):
668
+ _info(f"[yellow]Skipping suspicious path: {emitted_path!r}[/yellow]")
669
+ return None
670
+
556
671
  p = Path(emitted_path)
557
672
  if not p.is_absolute():
558
673
  p = (project_root / emitted_path).resolve()
@@ -760,7 +875,7 @@ def _try_harvest_then_verify(
760
875
  newest = code_path.read_text(encoding="utf-8")
761
876
  _print_diff(code_snapshot, newest, code_path)
762
877
  ok = _post_apply_verify_or_testcmd(
763
- provider, unit_test_file, working_dir,
878
+ provider, unit_test_file, cwd,
764
879
  verify_cmd=verify_cmd, verify_enabled=verify_enabled,
765
880
  stdout=res.stdout or "", stderr=res.stderr or ""
766
881
  )
@@ -952,7 +1067,7 @@ def run_agentic_fix(
952
1067
  else:
953
1068
  # Fallback: run directly with Python interpreter
954
1069
  pre = subprocess.run(
955
- [os.sys.executable, str(Path(unit_test_file).resolve())],
1070
+ [sys.executable, str(Path(unit_test_file).resolve())],
956
1071
  capture_output=True,
957
1072
  text=True,
958
1073
  check=False,