pdd-cli 0.0.90__py3-none-any.whl → 0.0.121__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. pdd/__init__.py +38 -6
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +506 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +537 -0
  6. pdd/agentic_common.py +533 -770
  7. pdd/agentic_crash.py +2 -1
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +582 -0
  10. pdd/agentic_fix.py +118 -3
  11. pdd/agentic_update.py +27 -9
  12. pdd/agentic_verify.py +3 -2
  13. pdd/architecture_sync.py +565 -0
  14. pdd/auth_service.py +210 -0
  15. pdd/auto_deps_main.py +63 -53
  16. pdd/auto_include.py +236 -3
  17. pdd/auto_update.py +125 -47
  18. pdd/bug_main.py +195 -23
  19. pdd/cmd_test_main.py +345 -197
  20. pdd/code_generator.py +4 -2
  21. pdd/code_generator_main.py +118 -32
  22. pdd/commands/__init__.py +6 -0
  23. pdd/commands/analysis.py +113 -48
  24. pdd/commands/auth.py +309 -0
  25. pdd/commands/connect.py +358 -0
  26. pdd/commands/fix.py +155 -114
  27. pdd/commands/generate.py +5 -0
  28. pdd/commands/maintenance.py +3 -2
  29. pdd/commands/misc.py +8 -0
  30. pdd/commands/modify.py +225 -163
  31. pdd/commands/sessions.py +284 -0
  32. pdd/commands/utility.py +12 -7
  33. pdd/construct_paths.py +334 -32
  34. pdd/context_generator_main.py +167 -170
  35. pdd/continue_generation.py +6 -3
  36. pdd/core/__init__.py +33 -0
  37. pdd/core/cli.py +44 -7
  38. pdd/core/cloud.py +237 -0
  39. pdd/core/dump.py +68 -20
  40. pdd/core/errors.py +4 -0
  41. pdd/core/remote_session.py +61 -0
  42. pdd/crash_main.py +219 -23
  43. pdd/data/llm_model.csv +4 -4
  44. pdd/docs/prompting_guide.md +864 -0
  45. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  46. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  47. pdd/fix_code_loop.py +208 -34
  48. pdd/fix_code_module_errors.py +6 -2
  49. pdd/fix_error_loop.py +291 -38
  50. pdd/fix_main.py +208 -6
  51. pdd/fix_verification_errors_loop.py +235 -26
  52. pdd/fix_verification_main.py +269 -83
  53. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  54. pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
  55. pdd/frontend/dist/index.html +376 -0
  56. pdd/frontend/dist/logo.svg +33 -0
  57. pdd/generate_output_paths.py +46 -5
  58. pdd/generate_test.py +212 -151
  59. pdd/get_comment.py +19 -44
  60. pdd/get_extension.py +8 -9
  61. pdd/get_jwt_token.py +309 -20
  62. pdd/get_language.py +8 -7
  63. pdd/get_run_command.py +7 -5
  64. pdd/insert_includes.py +2 -1
  65. pdd/llm_invoke.py +531 -97
  66. pdd/load_prompt_template.py +15 -34
  67. pdd/operation_log.py +342 -0
  68. pdd/path_resolution.py +140 -0
  69. pdd/postprocess.py +122 -97
  70. pdd/preprocess.py +68 -12
  71. pdd/preprocess_main.py +33 -1
  72. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  73. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  74. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  75. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  76. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  77. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  78. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  79. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  80. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  81. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  82. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  83. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  84. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +140 -0
  85. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  86. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  87. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  88. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  89. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  90. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  91. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  92. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  93. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  94. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  95. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  96. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  97. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  98. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  99. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  100. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  101. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  102. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  103. pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
  104. pdd/prompts/agentic_update_LLM.prompt +192 -338
  105. pdd/prompts/auto_include_LLM.prompt +22 -0
  106. pdd/prompts/change_LLM.prompt +3093 -1
  107. pdd/prompts/detect_change_LLM.prompt +571 -14
  108. pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
  109. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
  110. pdd/prompts/generate_test_LLM.prompt +19 -1
  111. pdd/prompts/generate_test_from_example_LLM.prompt +366 -0
  112. pdd/prompts/insert_includes_LLM.prompt +262 -252
  113. pdd/prompts/prompt_code_diff_LLM.prompt +123 -0
  114. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  115. pdd/remote_session.py +876 -0
  116. pdd/server/__init__.py +52 -0
  117. pdd/server/app.py +335 -0
  118. pdd/server/click_executor.py +587 -0
  119. pdd/server/executor.py +338 -0
  120. pdd/server/jobs.py +661 -0
  121. pdd/server/models.py +241 -0
  122. pdd/server/routes/__init__.py +31 -0
  123. pdd/server/routes/architecture.py +451 -0
  124. pdd/server/routes/auth.py +364 -0
  125. pdd/server/routes/commands.py +929 -0
  126. pdd/server/routes/config.py +42 -0
  127. pdd/server/routes/files.py +603 -0
  128. pdd/server/routes/prompts.py +1347 -0
  129. pdd/server/routes/websocket.py +473 -0
  130. pdd/server/security.py +243 -0
  131. pdd/server/terminal_spawner.py +217 -0
  132. pdd/server/token_counter.py +222 -0
  133. pdd/summarize_directory.py +236 -237
  134. pdd/sync_animation.py +8 -4
  135. pdd/sync_determine_operation.py +329 -47
  136. pdd/sync_main.py +272 -28
  137. pdd/sync_orchestration.py +289 -211
  138. pdd/sync_order.py +304 -0
  139. pdd/template_expander.py +161 -0
  140. pdd/templates/architecture/architecture_json.prompt +41 -46
  141. pdd/trace.py +1 -1
  142. pdd/track_cost.py +0 -13
  143. pdd/unfinished_prompt.py +2 -1
  144. pdd/update_main.py +68 -26
  145. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +15 -10
  146. pdd_cli-0.0.121.dist-info/RECORD +229 -0
  147. pdd_cli-0.0.90.dist-info/RECORD +0 -153
  148. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
  149. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
  150. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
  151. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/postprocess.py CHANGED
@@ -1,133 +1,158 @@
1
- from typing import Tuple
2
- from rich import print
3
- from pydantic import BaseModel, Field
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Tuple, Optional
5
+
6
+ from rich.console import Console
7
+ from pydantic import BaseModel, Field, ValidationError
8
+
9
+ from . import DEFAULT_STRENGTH, DEFAULT_TIME
4
10
  from .load_prompt_template import load_prompt_template
5
11
  from .llm_invoke import llm_invoke
6
- from . import DEFAULT_TIME, DEFAULT_STRENGTH
12
+
13
+
14
+ console = Console()
15
+
7
16
 
8
17
  class ExtractedCode(BaseModel):
9
- """Pydantic model for the extracted code."""
10
- extracted_code: str = Field(description="The extracted code from the LLM output")
18
+ focus: str = Field("", description="Focus of the code")
19
+ explanation: str = Field("", description="Explanation of the code")
20
+ extracted_code: str = Field(..., description="Extracted code")
21
+
22
+
23
+ def postprocess_0(llm_output: str, language: str) -> str:
24
+ """Simple extraction of code blocks."""
25
+ if language == "prompt":
26
+ # Strip <prompt> tags
27
+ llm_output = re.sub(r"<prompt>\s*(.*?)\s*</prompt>", r"\1", llm_output, flags=re.DOTALL)
28
+ llm_output = llm_output.strip()
29
+
30
+ # Also strip triple backticks if present
31
+ lines = llm_output.splitlines()
32
+ if lines and lines[0].startswith("```"):
33
+ # Remove first line with opening backticks
34
+ lines = lines[1:]
35
+ # If there's a last line with closing backticks, remove it
36
+ if lines and lines[-1].startswith("```"):
37
+ lines = lines[:-1]
38
+ llm_output = "\n".join(lines)
39
+
40
+ return llm_output.strip()
41
+
42
+ # First try to find complete code blocks with closing backticks
43
+ code_blocks = re.findall(r"```(?:[a-zA-Z]+)?\n(.*?)\n```", llm_output, re.DOTALL)
44
+ if code_blocks:
45
+ return "\n".join(block.strip() for block in code_blocks)
46
+
47
+ # If no complete blocks found, try to find incomplete blocks (opening backticks without closing)
48
+ # But ensure there's actual content after the opening backticks
49
+ incomplete_match = re.search(r"```(?:[a-zA-Z]+)?\n(.+?)(?:\n```)?$", llm_output, re.DOTALL)
50
+ if incomplete_match:
51
+ content = incomplete_match.group(1).strip()
52
+ # Don't return if content is just closing backticks
53
+ if content and content != "```":
54
+ return content
55
+
56
+ return ""
11
57
 
12
- def postprocess_0(text: str) -> str:
13
- """
14
- Simple code extraction for strength = 0.
15
- Extracts code between triple backticks.
16
- """
17
- lines = text.split('\n')
18
- code_lines = []
19
- in_code_block = False
20
-
21
- for line in lines:
22
- if '```' in line: # MODIFIED: Was line.startswith('```')
23
- if not in_code_block:
24
- # Skip the language identifier line / content on opening delimiter line
25
- in_code_block = True
26
- continue
27
- else:
28
- # Content on closing delimiter line is skipped
29
- in_code_block = False
30
- continue
31
- if in_code_block:
32
- code_lines.append(line)
33
-
34
- return '\n'.join(code_lines)
35
58
 
36
59
  def postprocess(
37
60
  llm_output: str,
38
61
  language: str,
39
62
  strength: float = DEFAULT_STRENGTH,
40
- temperature: float = 0,
63
+ temperature: float = 0.0,
41
64
  time: float = DEFAULT_TIME,
42
- verbose: bool = False
65
+ verbose: bool = False,
43
66
  ) -> Tuple[str, float, str]:
44
67
  """
45
- Extract code from LLM output string.
46
-
68
+ Extracts code from a string output of an LLM.
69
+
47
70
  Args:
48
- llm_output (str): The string output from the LLM containing code sections
49
- language (str): The programming language of the code to extract
50
- strength (float): The strength of the LLM model to use (0-1)
51
- temperature (float): The temperature parameter for the LLM (0-1)
52
- time (float): The thinking effort for the LLM model (0-1)
53
- verbose (bool): Whether to print detailed processing information
54
-
71
+ llm_output: A string containing a mix of text and code sections.
72
+ language: A string specifying the programming language of the code to be extracted.
73
+ strength: A float between 0 and 1 that represents the strength of the LLM model to use.
74
+ temperature: A float between 0 and 1 that represents the temperature parameter for the LLM model.
75
+ time: A float between 0 and 1 that controls the thinking effort for the LLM model.
76
+ verbose: A boolean that indicates whether to print detailed processing information.
77
+
55
78
  Returns:
56
- Tuple[str, float, str]: (extracted_code, total_cost, model_name)
79
+ A tuple containing the extracted code string, total cost float and model name string.
57
80
  """
58
- try:
59
- # Input validation
60
- if not llm_output or not isinstance(llm_output, str):
61
- raise ValueError("llm_output must be a non-empty string")
62
- if not language or not isinstance(language, str):
63
- raise ValueError("language must be a non-empty string")
64
- if not 0 <= strength <= 1:
65
- raise ValueError("strength must be between 0 and 1")
66
- if not 0 <= temperature <= 1:
67
- raise ValueError("temperature must be between 0 and 1")
68
-
69
- # Step 1: If strength is 0, use simple extraction
70
- if strength == 0:
71
- if verbose:
72
- print("[blue]Using simple code extraction (strength = 0)[/blue]")
73
- return (postprocess_0(llm_output), 0.0, "simple_extraction")
74
-
75
- # Step 2: Load the prompt template
76
- prompt_template = load_prompt_template("extract_code_LLM")
77
- if not prompt_template:
78
- raise ValueError("Failed to load prompt template")
81
+ if not isinstance(llm_output, str) or not llm_output:
82
+ raise ValueError("llm_output must be a non-empty string")
83
+ if not isinstance(language, str) or not language:
84
+ raise ValueError("language must be a non-empty string")
85
+ if not isinstance(strength, (int, float)):
86
+ raise TypeError("strength must be a number")
87
+ if not 0 <= strength <= 1:
88
+ raise ValueError("strength must be between 0 and 1")
89
+ if not isinstance(temperature, (int, float)):
90
+ raise TypeError("temperature must be a number")
91
+ if not 0 <= temperature <= 1:
92
+ raise ValueError("temperature must be between 0 and 1")
79
93
 
94
+ if language == "prompt":
95
+ extracted_code = postprocess_0(llm_output, language)
96
+ return extracted_code, 0.0, "simple_extraction"
97
+
98
+ if strength == 0:
99
+ extracted_code = postprocess_0(llm_output, language)
80
100
  if verbose:
81
- print("[blue]Loaded prompt template for code extraction[/blue]")
101
+ console.print("[blue]Using simple code extraction (strength = 0)[/blue]")
102
+ return extracted_code, 0.0, "simple_extraction"
82
103
 
83
- # Step 3: Process using llm_invoke
84
- input_json = {
85
- "llm_output": llm_output,
86
- "language": language
87
- }
104
+ prompt_name = "extract_code_LLM"
105
+ prompt = load_prompt_template(prompt_name)
88
106
 
89
- response = llm_invoke(
90
- prompt=prompt_template,
107
+ if not prompt:
108
+ error_msg = "Failed to load prompt template"
109
+ console.print(f"[red]Error:[/red] {error_msg}")
110
+ raise ValueError(error_msg)
111
+
112
+ input_json = {"llm_output": llm_output, "language": language}
113
+
114
+ if verbose:
115
+ console.print("[blue]Loaded prompt template for code extraction[/blue]")
116
+
117
+ try:
118
+ result = llm_invoke(
119
+ prompt=prompt,
91
120
  input_json=input_json,
92
121
  strength=strength,
93
122
  temperature=temperature,
94
123
  time=time,
124
+ output_pydantic=ExtractedCode,
95
125
  verbose=verbose,
96
- output_pydantic=ExtractedCode
97
126
  )
98
127
 
99
- if not response or 'result' not in response:
100
- raise ValueError("Failed to get valid response from LLM")
128
+ if not result or "result" not in result:
129
+ error_msg = "Failed to get valid response from LLM"
130
+ console.print(f"[red]Error during LLM invocation:[/red] {error_msg}")
131
+ raise ValueError(error_msg)
101
132
 
102
- result_obj = response['result']
103
- if not isinstance(result_obj, ExtractedCode):
104
- # If we got a string (likely an error message from llm_invoke), fallback to simple extraction
105
- if verbose:
106
- print(f"[yellow]Structured extraction failed ({result_obj}). Falling back to simple extraction.[/yellow]")
107
- return (postprocess_0(llm_output), response.get('cost', 0.0), response.get('model_name', 'fallback'))
133
+ extracted_code = result["result"].extracted_code
108
134
 
109
- extracted_code_obj: ExtractedCode = result_obj
110
- code_text = extracted_code_obj.extracted_code
111
-
112
- # Step 3c: Remove triple backticks and language identifier if present
113
- lines = code_text.split('\n')
114
- if lines and lines[0].startswith('```'):
135
+ # Clean up triple backticks
136
+ lines = extracted_code.splitlines()
137
+ if lines and lines[0].startswith("```"):
138
+ # Remove first line with opening backticks
115
139
  lines = lines[1:]
116
- if lines and lines[-1].startswith('```'): # Check if lines is not empty again after potentially removing first line
117
- lines = lines[:-1]
118
-
119
- final_code = '\n'.join(lines)
140
+ # If there's a last line with closing backticks, remove it
141
+ if lines and lines[-1].startswith("```"):
142
+ lines = lines[:-1]
143
+ extracted_code = "\n".join(lines)
144
+
145
+ total_cost = result["cost"]
146
+ model_name = result["model_name"]
120
147
 
121
148
  if verbose:
122
- print("[green]Successfully extracted code[/green]")
149
+ console.print("[green]Successfully extracted code[/green]")
123
150
 
124
- # Step 4: Return the results
125
- return (
126
- final_code,
127
- response['cost'],
128
- response['model_name']
129
- )
151
+ return extracted_code, total_cost, model_name
130
152
 
153
+ except KeyError as e:
154
+ console.print(f"[red]Error in postprocess: {e}[/red]")
155
+ raise ValueError(f"Failed to get valid response from LLM: missing key {e}")
131
156
  except Exception as e:
132
- print(f"[red]Error in postprocess: {str(e)}[/red]")
157
+ console.print(f"[red]Error in postprocess: {e}[/red]")
133
158
  raise
pdd/preprocess.py CHANGED
@@ -4,10 +4,12 @@ import base64
4
4
  import subprocess
5
5
  from typing import List, Optional, Tuple
6
6
  import traceback
7
+ from pathlib import Path
7
8
  from rich.console import Console
8
9
  from rich.panel import Panel
9
10
  from rich.markup import escape
10
11
  from rich.traceback import install
12
+ from pdd.path_resolution import get_default_resolver
11
13
 
12
14
  install()
13
15
  console = Console()
@@ -37,24 +39,51 @@ def _write_debug_report() -> None:
37
39
  console.print("[dim]Debug mode enabled but PDD_PREPROCESS_DEBUG_FILE not set (output shown in console only)[/dim]")
38
40
 
39
41
  def _extract_fence_spans(text: str) -> List[Tuple[int, int]]:
40
- """Return list of (start, end) spans for fenced code blocks ```...```.
42
+ """Return list of (start, end) spans for fenced code blocks (``` or ~~~).
41
43
 
42
44
  The spans are [start, end) indices in the original text.
43
45
  """
44
46
  spans: List[Tuple[int, int]] = []
45
47
  try:
46
- for m in re.finditer(r"```[\w\s]*\n[\s\S]*?```", text):
48
+ fence_re = re.compile(
49
+ r"(?m)^[ \t]*([`~]{3,})[^\n]*\n[\s\S]*?\n[ \t]*\1[ \t]*(?:\n|$)"
50
+ )
51
+ for m in fence_re.finditer(text):
47
52
  spans.append((m.start(), m.end()))
48
53
  except Exception:
49
54
  pass
50
55
  return spans
51
56
 
57
+
58
+ def _extract_inline_code_spans(text: str) -> List[Tuple[int, int]]:
59
+ """Return list of (start, end) spans for inline code (backticks)."""
60
+ spans: List[Tuple[int, int]] = []
61
+ try:
62
+ for m in re.finditer(r"(?<!`)(`+)([^\n]*?)\1", text):
63
+ spans.append((m.start(), m.end()))
64
+ except Exception:
65
+ pass
66
+ return spans
67
+
68
+
69
+ def _extract_code_spans(text: str) -> List[Tuple[int, int]]:
70
+ spans = _extract_fence_spans(text)
71
+ spans.extend(_extract_inline_code_spans(text))
72
+ return sorted(spans, key=lambda s: s[0])
73
+
52
74
  def _is_inside_any_span(idx: int, spans: List[Tuple[int, int]]) -> bool:
53
75
  for s, e in spans:
54
76
  if s <= idx < e:
55
77
  return True
56
78
  return False
57
79
 
80
+
81
+ def _intersects_any_span(start: int, end: int, spans: List[Tuple[int, int]]) -> bool:
82
+ for s, e in spans:
83
+ if start < e and end > s:
84
+ return True
85
+ return False
86
+
58
87
  def _scan_risky_placeholders(text: str) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]:
59
88
  """Scan for risky placeholders outside code fences.
60
89
 
@@ -119,8 +148,11 @@ def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool
119
148
  return prompt
120
149
 
121
150
  def get_file_path(file_name: str) -> str:
122
- base_path = './'
123
- return os.path.join(base_path, file_name)
151
+ resolver = get_default_resolver()
152
+ resolved = resolver.resolve_include(file_name)
153
+ if not Path(file_name).is_absolute() and resolved == resolver.cwd / file_name:
154
+ return os.path.join("./", file_name)
155
+ return str(resolved)
124
156
 
125
157
  def process_backtick_includes(text: str, recursive: bool) -> str:
126
158
  # More specific pattern that doesn't match nested > characters
@@ -229,7 +261,12 @@ def process_include_tags(text: str, recursive: bool) -> str:
229
261
  current_text = text
230
262
  while prev_text != current_text:
231
263
  prev_text = current_text
232
- current_text = re.sub(pattern, replace_include, current_text, flags=re.DOTALL)
264
+ code_spans = _extract_code_spans(current_text)
265
+ def replace_include_with_spans(match):
266
+ if _intersects_any_span(match.start(), match.end(), code_spans):
267
+ return match.group(0)
268
+ return replace_include(match)
269
+ current_text = re.sub(pattern, replace_include_with_spans, current_text, flags=re.DOTALL)
233
270
  return current_text
234
271
 
235
272
  def process_pdd_tags(text: str) -> str:
@@ -262,7 +299,12 @@ def process_shell_tags(text: str, recursive: bool) -> str:
262
299
  console.print(f"[bold red]Error executing shell command:[/bold red] {str(e)}")
263
300
  _dbg(f"Shell execution exception: {e}")
264
301
  return f"[Shell execution error: {str(e)}]"
265
- return re.sub(pattern, replace_shell, text, flags=re.DOTALL)
302
+ code_spans = _extract_code_spans(text)
303
+ def replace_shell_with_spans(match):
304
+ if _intersects_any_span(match.start(), match.end(), code_spans):
305
+ return match.group(0)
306
+ return replace_shell(match)
307
+ return re.sub(pattern, replace_shell_with_spans, text, flags=re.DOTALL)
266
308
 
267
309
  def process_web_tags(text: str, recursive: bool) -> str:
268
310
  pattern = r'<web>(.*?)</web>'
@@ -275,7 +317,7 @@ def process_web_tags(text: str, recursive: bool) -> str:
275
317
  _dbg(f"Web tag URL: {url}")
276
318
  try:
277
319
  try:
278
- from firecrawl import FirecrawlApp
320
+ from firecrawl import Firecrawl
279
321
  except ImportError:
280
322
  _dbg("firecrawl import failed; package not installed")
281
323
  return f"[Error: firecrawl-py package not installed. Cannot scrape {url}]"
@@ -284,9 +326,13 @@ def process_web_tags(text: str, recursive: bool) -> str:
284
326
  console.print("[bold yellow]Warning:[/bold yellow] FIRECRAWL_API_KEY not found in environment")
285
327
  _dbg("FIRECRAWL_API_KEY not set")
286
328
  return f"[Error: FIRECRAWL_API_KEY not set. Cannot scrape {url}]"
287
- app = FirecrawlApp(api_key=api_key)
288
- response = app.scrape_url(url, formats=['markdown'])
289
- if hasattr(response, 'markdown'):
329
+ app = Firecrawl(api_key=api_key)
330
+ response = app.scrape(url, formats=['markdown'])
331
+ # Handle both dict response (new API) and object response (legacy)
332
+ if isinstance(response, dict) and 'markdown' in response:
333
+ _dbg(f"Web scrape returned markdown (len={len(response['markdown'])})")
334
+ return response['markdown']
335
+ elif hasattr(response, 'markdown'):
290
336
  _dbg(f"Web scrape returned markdown (len={len(response.markdown)})")
291
337
  return response.markdown
292
338
  else:
@@ -297,7 +343,12 @@ def process_web_tags(text: str, recursive: bool) -> str:
297
343
  console.print(f"[bold red]Error scraping web content:[/bold red] {str(e)}")
298
344
  _dbg(f"Web scraping exception: {e}")
299
345
  return f"[Web scraping error: {str(e)}]"
300
- return re.sub(pattern, replace_web, text, flags=re.DOTALL)
346
+ code_spans = _extract_code_spans(text)
347
+ def replace_web_with_spans(match):
348
+ if _intersects_any_span(match.start(), match.end(), code_spans):
349
+ return match.group(0)
350
+ return replace_web(match)
351
+ return re.sub(pattern, replace_web_with_spans, text, flags=re.DOTALL)
301
352
 
302
353
  def process_include_many_tags(text: str, recursive: bool) -> str:
303
354
  """Process <include-many> blocks whose inner content is a comma- or newline-separated
@@ -328,7 +379,12 @@ def process_include_many_tags(text: str, recursive: bool) -> str:
328
379
  _dbg(f"Error processing include-many {p}: {e}")
329
380
  contents.append(f"[Error processing include: {p}]")
330
381
  return "\n".join(contents)
331
- return re.sub(pattern, replace_many, text, flags=re.DOTALL)
382
+ code_spans = _extract_code_spans(text)
383
+ def replace_many_with_spans(match):
384
+ if _intersects_any_span(match.start(), match.end(), code_spans):
385
+ return match.group(0)
386
+ return replace_many(match)
387
+ return re.sub(pattern, replace_many_with_spans, text, flags=re.DOTALL)
332
388
 
333
389
  def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
334
390
  if exclude_keys is None:
pdd/preprocess_main.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import csv
2
2
  import sys
3
+ from pathlib import Path
3
4
  from typing import Tuple, Optional
4
5
  import click
5
6
  from rich import print as rprint
@@ -8,8 +9,15 @@ from .config_resolution import resolve_effective_config
8
9
  from .construct_paths import construct_paths
9
10
  from .preprocess import preprocess
10
11
  from .xml_tagger import xml_tagger
12
+ from .architecture_sync import (
13
+ get_architecture_entry_for_prompt,
14
+ generate_tags_from_architecture,
15
+ has_pdd_tags,
16
+ )
17
+
18
+
11
19
  def preprocess_main(
12
- ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list
20
+ ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list, pdd_tags: bool = False
13
21
  ) -> Tuple[str, float, str]:
14
22
  """
15
23
  CLI wrapper for preprocessing prompts.
@@ -22,6 +30,7 @@ def preprocess_main(
22
30
  :param double: If True, curly brackets will be doubled.
23
31
  :param exclude: List of keys to exclude from curly bracket doubling.
24
32
  :return: Tuple containing the preprocessed prompt, total cost, and model name used.
33
+ :param pdd_tags: If True, inject PDD metadata tags from architecture.json.
25
34
  """
26
35
  try:
27
36
  # Construct file paths
@@ -39,6 +48,27 @@ def preprocess_main(
39
48
  # Load prompt file
40
49
  prompt = input_strings["prompt_file"]
41
50
 
51
+ # Inject PDD metadata tags from architecture.json if requested
52
+ pdd_tags_injected = False
53
+ if pdd_tags:
54
+ prompt_filename = Path(prompt_file).name
55
+ arch_entry = get_architecture_entry_for_prompt(prompt_filename)
56
+
57
+ if arch_entry:
58
+ if has_pdd_tags(prompt):
59
+ if not ctx.obj.get("quiet", False):
60
+ rprint(f"[yellow]Prompt already has PDD tags, skipping injection.[/yellow]")
61
+ else:
62
+ generated_tags = generate_tags_from_architecture(arch_entry)
63
+ if generated_tags:
64
+ prompt = generated_tags + '\n\n' + prompt
65
+ pdd_tags_injected = True
66
+ if not ctx.obj.get("quiet", False):
67
+ rprint(f"[green]Injected PDD tags from architecture.json[/green]")
68
+ else:
69
+ if not ctx.obj.get("quiet", False):
70
+ rprint(f"[yellow]No architecture entry found for '{prompt_filename}', skipping PDD tags.[/yellow]")
71
+
42
72
  if xml:
43
73
  # Use xml_tagger to add XML delimiters
44
74
  # Use centralized config resolution with proper priority: CLI > pddrc > defaults
@@ -67,6 +97,8 @@ def preprocess_main(
67
97
  # Provide user feedback
68
98
  if not ctx.obj.get("quiet", False):
69
99
  rprint("[bold green]Prompt preprocessing completed successfully.[/bold green]")
100
+ if pdd_tags_injected:
101
+ rprint("[bold]PDD metadata tags: injected from architecture.json[/bold]")
70
102
  if xml:
71
103
  rprint(f"[bold]XML Tagging used: {model_name}[/bold]")
72
104
  else:
@@ -0,0 +1,182 @@
1
+ % You are an expert software engineer investigating a bug report. Your task is to create a draft pull request with the failing tests and link it to the issue.
2
+
3
+ % Context
4
+
5
+ You are working on step 10 of 10 (final step) in an agentic bug investigation workflow. Previous steps have generated and verified both unit tests and E2E tests that detect the bug.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+
13
+ % Issue Content
14
+ <issue_content>
15
+ {issue_content}
16
+ </issue_content>
17
+
18
+ % Previous Steps Output
19
+ <step1_output>
20
+ {step1_output}
21
+ </step1_output>
22
+
23
+ <step2_output>
24
+ {step2_output}
25
+ </step2_output>
26
+
27
+ <step3_output>
28
+ {step3_output}
29
+ </step3_output>
30
+
31
+ <step4_output>
32
+ {step4_output}
33
+ </step4_output>
34
+
35
+ <step5_output>
36
+ {step5_output}
37
+ </step5_output>
38
+
39
+ <step6_output>
40
+ {step6_output}
41
+ </step6_output>
42
+
43
+ <step7_output>
44
+ {step7_output}
45
+ </step7_output>
46
+
47
+ <step8_output>
48
+ {step8_output}
49
+ </step8_output>
50
+
51
+ <step9_output>
52
+ {step9_output}
53
+ </step9_output>
54
+
55
+ % Worktree Information
56
+
57
+ You are operating in an isolated git worktree at: {worktree_path}
58
+ This worktree is already checked out to branch `fix/issue-{issue_number}`.
59
+ Do NOT create a new branch - just stage, commit, and push.
60
+
61
+ % Files to Stage
62
+
63
+ **IMPORTANT: Only stage these specific files:**
64
+ {files_to_stage}
65
+
66
+ % Your Task
67
+
68
+ 1. **Prepare the commit**
69
+ - You are already on branch `fix/issue-{issue_number}` in an isolated worktree
70
+ - **CRITICAL: Stage ONLY the test file(s) created in Steps 7 and 9**
71
+ - Get the exact file paths from:
72
+ - Step 7's `FILES_CREATED:` or `FILES_MODIFIED:` output (unit tests)
73
+ - Step 9's `E2E_FILES_CREATED:` or `E2E_FILES_MODIFIED:` output (E2E tests)
74
+ - Stage each file individually: `git add <exact_file_path>`
75
+ - **DO NOT use `git add .` or `git add -A`** - these will stage unrelated files and pollute the PR
76
+ - Verify only the intended files are staged: `git status --short` (should show only the test file(s))
77
+ - Commit with a descriptive message referencing the issue
78
+
79
+ 2. **Create the draft PR**
80
+ - Push the branch to origin
81
+ - Create a draft pull request using `gh pr create --draft`
82
+ - Link to the issue using "Fixes #{issue_number}" in the PR body
83
+
84
+ 3. **Post final summary**
85
+ - Comment on the issue with PR link and next steps for the fix
86
+
87
+ 4. **Include PDD fix command**
88
+ - Extract code file path from Step 5's `**Location:**` field (strip the `:line_number` suffix)
89
+ - Use test file path from Step 7's `FILES_CREATED:` or test file section
90
+ - Search repo for matching prompt file: `find . -name "*.prompt" -type f`
91
+ - Derive module name from code file (e.g., `pdd/foo.py` -> `foo`)
92
+ - Use verification program: `context/{{module_name}}_example.py`
93
+ - Use error log path: `fix-issue-{issue_number}.log` for the fix command output
94
+ - Include a ready-to-run `pdd fix` command in your GitHub comment
95
+ - If no prompt file or verification program exists, include a note that they must be created first
96
+
97
+ % PR Creation Command
98
+
99
+ ```bash
100
+ gh pr create --draft --title "Add failing tests for #{issue_number}" --body "$(cat <<'EOF'
101
+ ## Summary
102
+ Adds failing tests that detect the bug reported in #{issue_number}.
103
+
104
+ ## Test Files
105
+ - Unit test: `{{unit_test_file_path}}`
106
+ - E2E test: `{{e2e_test_file_path}}` (if applicable)
107
+
108
+ ## What This PR Contains
109
+ - Failing unit test that reproduces the reported bug
110
+ - Failing E2E test that verifies the bug at integration level (if applicable)
111
+ - Tests are verified to fail on current code and will pass once the bug is fixed
112
+
113
+ ## Root Cause
114
+ {{root_cause_summary}}
115
+
116
+ ## Next Steps
117
+ 1. [ ] Implement the fix at the identified location
118
+ 2. [ ] Verify the unit test passes
119
+ 3. [ ] Verify the E2E test passes
120
+ 4. [ ] Run full test suite
121
+ 5. [ ] Mark PR as ready for review
122
+
123
+ Fixes #{issue_number}
124
+
125
+ ---
126
+ *Generated by PDD agentic bug workflow*
127
+ EOF
128
+ )"
129
+ ```
130
+
131
+ % Output
132
+
133
+ After creating the PR, use `gh issue comment` to post your final report to issue #{issue_number}:
134
+
135
+ ```
136
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
137
+ ```
138
+
139
+ Your comment should follow this format:
140
+
141
+ ```markdown
142
+ ## Step 10: Draft PR Created
143
+
144
+ ### Pull Request
145
+ **PR #{{pr_number}}:** [{{pr_title}}]({{pr_url}})
146
+
147
+ ### Branch
148
+ `fix/issue-{issue_number}`
149
+
150
+ ### What's Included
151
+ - Failing unit test at `{{unit_test_file_path}}`
152
+ - Failing E2E test at `{{e2e_test_file_path}}` (if applicable)
153
+ - Commits: {{commit_count}}
154
+
155
+ ### Next Steps for Maintainers
156
+ 1. Review the failing tests to understand the expected behavior
157
+ 2. Implement the fix at the identified location
158
+ 3. Verify both unit and E2E tests pass with your fix
159
+ 4. Run full test suite to check for regressions
160
+ 5. Mark the PR as ready for review
161
+
162
+ ### PDD Fix Command
163
+
164
+ To auto-fix this bug using PDD:
165
+
166
+ ```bash
167
+ cd {{worktree_path}}
168
+ pdd --force fix --loop --max-attempts 5 --verification-program context/{{module_name}}_example.py {{prompt_file}} {{code_file_path}} {{test_file_path}} fix-issue-{{issue_number}}.log
169
+ ```
170
+
171
+ ---
172
+ *Investigation complete. A draft PR with failing tests has been created and linked to this issue.*
173
+ ```
174
+
175
+ % Important
176
+
177
+ - Create a DRAFT PR (not ready for review) since it only contains the failing tests
178
+ - The PR should clearly state that a fix is still needed
179
+ - Use "Fixes #{issue_number}" to auto-link the PR to the issue
180
+ - Do NOT create a new branch - you are already on the correct branch in the worktree
181
+ - Include both unit test files (Step 7) and E2E test files (Step 9) if both exist
182
+ - Always post your findings as a GitHub comment before completing