pdd-cli 0.0.90__py3-none-any.whl → 0.0.121__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +38 -6
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +506 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +537 -0
- pdd/agentic_common.py +533 -770
- pdd/agentic_crash.py +2 -1
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +582 -0
- pdd/agentic_fix.py +118 -3
- pdd/agentic_update.py +27 -9
- pdd/agentic_verify.py +3 -2
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +63 -53
- pdd/auto_include.py +236 -3
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +195 -23
- pdd/cmd_test_main.py +345 -197
- pdd/code_generator.py +4 -2
- pdd/code_generator_main.py +118 -32
- pdd/commands/__init__.py +6 -0
- pdd/commands/analysis.py +113 -48
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +358 -0
- pdd/commands/fix.py +155 -114
- pdd/commands/generate.py +5 -0
- pdd/commands/maintenance.py +3 -2
- pdd/commands/misc.py +8 -0
- pdd/commands/modify.py +225 -163
- pdd/commands/sessions.py +284 -0
- pdd/commands/utility.py +12 -7
- pdd/construct_paths.py +334 -32
- pdd/context_generator_main.py +167 -170
- pdd/continue_generation.py +6 -3
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +44 -7
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +68 -20
- pdd/core/errors.py +4 -0
- pdd/core/remote_session.py +61 -0
- pdd/crash_main.py +219 -23
- pdd/data/llm_model.csv +4 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +208 -34
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +291 -38
- pdd/fix_main.py +208 -6
- pdd/fix_verification_errors_loop.py +235 -26
- pdd/fix_verification_main.py +269 -83
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +46 -5
- pdd/generate_test.py +212 -151
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +309 -20
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +7 -5
- pdd/insert_includes.py +2 -1
- pdd/llm_invoke.py +531 -97
- pdd/load_prompt_template.py +15 -34
- pdd/operation_log.py +342 -0
- pdd/path_resolution.py +140 -0
- pdd/postprocess.py +122 -97
- pdd/preprocess.py +68 -12
- pdd/preprocess_main.py +33 -1
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +140 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
- pdd/prompts/agentic_update_LLM.prompt +192 -338
- pdd/prompts/auto_include_LLM.prompt +22 -0
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +571 -14
- pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
- pdd/prompts/generate_test_LLM.prompt +19 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +366 -0
- pdd/prompts/insert_includes_LLM.prompt +262 -252
- pdd/prompts/prompt_code_diff_LLM.prompt +123 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/remote_session.py +876 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1347 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +217 -0
- pdd/server/token_counter.py +222 -0
- pdd/summarize_directory.py +236 -237
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +329 -47
- pdd/sync_main.py +272 -28
- pdd/sync_orchestration.py +289 -211
- pdd/sync_order.py +304 -0
- pdd/template_expander.py +161 -0
- pdd/templates/architecture/architecture_json.prompt +41 -46
- pdd/trace.py +1 -1
- pdd/track_cost.py +0 -13
- pdd/unfinished_prompt.py +2 -1
- pdd/update_main.py +68 -26
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +15 -10
- pdd_cli-0.0.121.dist-info/RECORD +229 -0
- pdd_cli-0.0.90.dist-info/RECORD +0 -153
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/postprocess.py
CHANGED
|
@@ -1,133 +1,158 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Tuple, Optional
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
8
|
+
|
|
9
|
+
from . import DEFAULT_STRENGTH, DEFAULT_TIME
|
|
4
10
|
from .load_prompt_template import load_prompt_template
|
|
5
11
|
from .llm_invoke import llm_invoke
|
|
6
|
-
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
7
16
|
|
|
8
17
|
class ExtractedCode(BaseModel):
|
|
9
|
-
"""
|
|
10
|
-
|
|
18
|
+
focus: str = Field("", description="Focus of the code")
|
|
19
|
+
explanation: str = Field("", description="Explanation of the code")
|
|
20
|
+
extracted_code: str = Field(..., description="Extracted code")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def postprocess_0(llm_output: str, language: str) -> str:
|
|
24
|
+
"""Simple extraction of code blocks."""
|
|
25
|
+
if language == "prompt":
|
|
26
|
+
# Strip <prompt> tags
|
|
27
|
+
llm_output = re.sub(r"<prompt>\s*(.*?)\s*</prompt>", r"\1", llm_output, flags=re.DOTALL)
|
|
28
|
+
llm_output = llm_output.strip()
|
|
29
|
+
|
|
30
|
+
# Also strip triple backticks if present
|
|
31
|
+
lines = llm_output.splitlines()
|
|
32
|
+
if lines and lines[0].startswith("```"):
|
|
33
|
+
# Remove first line with opening backticks
|
|
34
|
+
lines = lines[1:]
|
|
35
|
+
# If there's a last line with closing backticks, remove it
|
|
36
|
+
if lines and lines[-1].startswith("```"):
|
|
37
|
+
lines = lines[:-1]
|
|
38
|
+
llm_output = "\n".join(lines)
|
|
39
|
+
|
|
40
|
+
return llm_output.strip()
|
|
41
|
+
|
|
42
|
+
# First try to find complete code blocks with closing backticks
|
|
43
|
+
code_blocks = re.findall(r"```(?:[a-zA-Z]+)?\n(.*?)\n```", llm_output, re.DOTALL)
|
|
44
|
+
if code_blocks:
|
|
45
|
+
return "\n".join(block.strip() for block in code_blocks)
|
|
46
|
+
|
|
47
|
+
# If no complete blocks found, try to find incomplete blocks (opening backticks without closing)
|
|
48
|
+
# But ensure there's actual content after the opening backticks
|
|
49
|
+
incomplete_match = re.search(r"```(?:[a-zA-Z]+)?\n(.+?)(?:\n```)?$", llm_output, re.DOTALL)
|
|
50
|
+
if incomplete_match:
|
|
51
|
+
content = incomplete_match.group(1).strip()
|
|
52
|
+
# Don't return if content is just closing backticks
|
|
53
|
+
if content and content != "```":
|
|
54
|
+
return content
|
|
55
|
+
|
|
56
|
+
return ""
|
|
11
57
|
|
|
12
|
-
def postprocess_0(text: str) -> str:
|
|
13
|
-
"""
|
|
14
|
-
Simple code extraction for strength = 0.
|
|
15
|
-
Extracts code between triple backticks.
|
|
16
|
-
"""
|
|
17
|
-
lines = text.split('\n')
|
|
18
|
-
code_lines = []
|
|
19
|
-
in_code_block = False
|
|
20
|
-
|
|
21
|
-
for line in lines:
|
|
22
|
-
if '```' in line: # MODIFIED: Was line.startswith('```')
|
|
23
|
-
if not in_code_block:
|
|
24
|
-
# Skip the language identifier line / content on opening delimiter line
|
|
25
|
-
in_code_block = True
|
|
26
|
-
continue
|
|
27
|
-
else:
|
|
28
|
-
# Content on closing delimiter line is skipped
|
|
29
|
-
in_code_block = False
|
|
30
|
-
continue
|
|
31
|
-
if in_code_block:
|
|
32
|
-
code_lines.append(line)
|
|
33
|
-
|
|
34
|
-
return '\n'.join(code_lines)
|
|
35
58
|
|
|
36
59
|
def postprocess(
|
|
37
60
|
llm_output: str,
|
|
38
61
|
language: str,
|
|
39
62
|
strength: float = DEFAULT_STRENGTH,
|
|
40
|
-
temperature: float = 0,
|
|
63
|
+
temperature: float = 0.0,
|
|
41
64
|
time: float = DEFAULT_TIME,
|
|
42
|
-
verbose: bool = False
|
|
65
|
+
verbose: bool = False,
|
|
43
66
|
) -> Tuple[str, float, str]:
|
|
44
67
|
"""
|
|
45
|
-
|
|
46
|
-
|
|
68
|
+
Extracts code from a string output of an LLM.
|
|
69
|
+
|
|
47
70
|
Args:
|
|
48
|
-
llm_output
|
|
49
|
-
language
|
|
50
|
-
strength
|
|
51
|
-
temperature
|
|
52
|
-
time
|
|
53
|
-
verbose
|
|
54
|
-
|
|
71
|
+
llm_output: A string containing a mix of text and code sections.
|
|
72
|
+
language: A string specifying the programming language of the code to be extracted.
|
|
73
|
+
strength: A float between 0 and 1 that represents the strength of the LLM model to use.
|
|
74
|
+
temperature: A float between 0 and 1 that represents the temperature parameter for the LLM model.
|
|
75
|
+
time: A float between 0 and 1 that controls the thinking effort for the LLM model.
|
|
76
|
+
verbose: A boolean that indicates whether to print detailed processing information.
|
|
77
|
+
|
|
55
78
|
Returns:
|
|
56
|
-
|
|
79
|
+
A tuple containing the extracted code string, total cost float and model name string.
|
|
57
80
|
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if strength == 0:
|
|
71
|
-
if verbose:
|
|
72
|
-
print("[blue]Using simple code extraction (strength = 0)[/blue]")
|
|
73
|
-
return (postprocess_0(llm_output), 0.0, "simple_extraction")
|
|
74
|
-
|
|
75
|
-
# Step 2: Load the prompt template
|
|
76
|
-
prompt_template = load_prompt_template("extract_code_LLM")
|
|
77
|
-
if not prompt_template:
|
|
78
|
-
raise ValueError("Failed to load prompt template")
|
|
81
|
+
if not isinstance(llm_output, str) or not llm_output:
|
|
82
|
+
raise ValueError("llm_output must be a non-empty string")
|
|
83
|
+
if not isinstance(language, str) or not language:
|
|
84
|
+
raise ValueError("language must be a non-empty string")
|
|
85
|
+
if not isinstance(strength, (int, float)):
|
|
86
|
+
raise TypeError("strength must be a number")
|
|
87
|
+
if not 0 <= strength <= 1:
|
|
88
|
+
raise ValueError("strength must be between 0 and 1")
|
|
89
|
+
if not isinstance(temperature, (int, float)):
|
|
90
|
+
raise TypeError("temperature must be a number")
|
|
91
|
+
if not 0 <= temperature <= 1:
|
|
92
|
+
raise ValueError("temperature must be between 0 and 1")
|
|
79
93
|
|
|
94
|
+
if language == "prompt":
|
|
95
|
+
extracted_code = postprocess_0(llm_output, language)
|
|
96
|
+
return extracted_code, 0.0, "simple_extraction"
|
|
97
|
+
|
|
98
|
+
if strength == 0:
|
|
99
|
+
extracted_code = postprocess_0(llm_output, language)
|
|
80
100
|
if verbose:
|
|
81
|
-
print("[blue]
|
|
101
|
+
console.print("[blue]Using simple code extraction (strength = 0)[/blue]")
|
|
102
|
+
return extracted_code, 0.0, "simple_extraction"
|
|
82
103
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
"llm_output": llm_output,
|
|
86
|
-
"language": language
|
|
87
|
-
}
|
|
104
|
+
prompt_name = "extract_code_LLM"
|
|
105
|
+
prompt = load_prompt_template(prompt_name)
|
|
88
106
|
|
|
89
|
-
|
|
90
|
-
|
|
107
|
+
if not prompt:
|
|
108
|
+
error_msg = "Failed to load prompt template"
|
|
109
|
+
console.print(f"[red]Error:[/red] {error_msg}")
|
|
110
|
+
raise ValueError(error_msg)
|
|
111
|
+
|
|
112
|
+
input_json = {"llm_output": llm_output, "language": language}
|
|
113
|
+
|
|
114
|
+
if verbose:
|
|
115
|
+
console.print("[blue]Loaded prompt template for code extraction[/blue]")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
result = llm_invoke(
|
|
119
|
+
prompt=prompt,
|
|
91
120
|
input_json=input_json,
|
|
92
121
|
strength=strength,
|
|
93
122
|
temperature=temperature,
|
|
94
123
|
time=time,
|
|
124
|
+
output_pydantic=ExtractedCode,
|
|
95
125
|
verbose=verbose,
|
|
96
|
-
output_pydantic=ExtractedCode
|
|
97
126
|
)
|
|
98
127
|
|
|
99
|
-
if not
|
|
100
|
-
|
|
128
|
+
if not result or "result" not in result:
|
|
129
|
+
error_msg = "Failed to get valid response from LLM"
|
|
130
|
+
console.print(f"[red]Error during LLM invocation:[/red] {error_msg}")
|
|
131
|
+
raise ValueError(error_msg)
|
|
101
132
|
|
|
102
|
-
|
|
103
|
-
if not isinstance(result_obj, ExtractedCode):
|
|
104
|
-
# If we got a string (likely an error message from llm_invoke), fallback to simple extraction
|
|
105
|
-
if verbose:
|
|
106
|
-
print(f"[yellow]Structured extraction failed ({result_obj}). Falling back to simple extraction.[/yellow]")
|
|
107
|
-
return (postprocess_0(llm_output), response.get('cost', 0.0), response.get('model_name', 'fallback'))
|
|
133
|
+
extracted_code = result["result"].extracted_code
|
|
108
134
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
lines = code_text.split('\n')
|
|
114
|
-
if lines and lines[0].startswith('```'):
|
|
135
|
+
# Clean up triple backticks
|
|
136
|
+
lines = extracted_code.splitlines()
|
|
137
|
+
if lines and lines[0].startswith("```"):
|
|
138
|
+
# Remove first line with opening backticks
|
|
115
139
|
lines = lines[1:]
|
|
116
|
-
|
|
117
|
-
lines
|
|
118
|
-
|
|
119
|
-
|
|
140
|
+
# If there's a last line with closing backticks, remove it
|
|
141
|
+
if lines and lines[-1].startswith("```"):
|
|
142
|
+
lines = lines[:-1]
|
|
143
|
+
extracted_code = "\n".join(lines)
|
|
144
|
+
|
|
145
|
+
total_cost = result["cost"]
|
|
146
|
+
model_name = result["model_name"]
|
|
120
147
|
|
|
121
148
|
if verbose:
|
|
122
|
-
print("[green]Successfully extracted code[/green]")
|
|
149
|
+
console.print("[green]Successfully extracted code[/green]")
|
|
123
150
|
|
|
124
|
-
|
|
125
|
-
return (
|
|
126
|
-
final_code,
|
|
127
|
-
response['cost'],
|
|
128
|
-
response['model_name']
|
|
129
|
-
)
|
|
151
|
+
return extracted_code, total_cost, model_name
|
|
130
152
|
|
|
153
|
+
except KeyError as e:
|
|
154
|
+
console.print(f"[red]Error in postprocess: {e}[/red]")
|
|
155
|
+
raise ValueError(f"Failed to get valid response from LLM: missing key {e}")
|
|
131
156
|
except Exception as e:
|
|
132
|
-
print(f"[red]Error in postprocess: {
|
|
157
|
+
console.print(f"[red]Error in postprocess: {e}[/red]")
|
|
133
158
|
raise
|
pdd/preprocess.py
CHANGED
|
@@ -4,10 +4,12 @@ import base64
|
|
|
4
4
|
import subprocess
|
|
5
5
|
from typing import List, Optional, Tuple
|
|
6
6
|
import traceback
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from rich.console import Console
|
|
8
9
|
from rich.panel import Panel
|
|
9
10
|
from rich.markup import escape
|
|
10
11
|
from rich.traceback import install
|
|
12
|
+
from pdd.path_resolution import get_default_resolver
|
|
11
13
|
|
|
12
14
|
install()
|
|
13
15
|
console = Console()
|
|
@@ -37,24 +39,51 @@ def _write_debug_report() -> None:
|
|
|
37
39
|
console.print("[dim]Debug mode enabled but PDD_PREPROCESS_DEBUG_FILE not set (output shown in console only)[/dim]")
|
|
38
40
|
|
|
39
41
|
def _extract_fence_spans(text: str) -> List[Tuple[int, int]]:
|
|
40
|
-
"""Return list of (start, end) spans for fenced code blocks
|
|
42
|
+
"""Return list of (start, end) spans for fenced code blocks (``` or ~~~).
|
|
41
43
|
|
|
42
44
|
The spans are [start, end) indices in the original text.
|
|
43
45
|
"""
|
|
44
46
|
spans: List[Tuple[int, int]] = []
|
|
45
47
|
try:
|
|
46
|
-
|
|
48
|
+
fence_re = re.compile(
|
|
49
|
+
r"(?m)^[ \t]*([`~]{3,})[^\n]*\n[\s\S]*?\n[ \t]*\1[ \t]*(?:\n|$)"
|
|
50
|
+
)
|
|
51
|
+
for m in fence_re.finditer(text):
|
|
47
52
|
spans.append((m.start(), m.end()))
|
|
48
53
|
except Exception:
|
|
49
54
|
pass
|
|
50
55
|
return spans
|
|
51
56
|
|
|
57
|
+
|
|
58
|
+
def _extract_inline_code_spans(text: str) -> List[Tuple[int, int]]:
|
|
59
|
+
"""Return list of (start, end) spans for inline code (backticks)."""
|
|
60
|
+
spans: List[Tuple[int, int]] = []
|
|
61
|
+
try:
|
|
62
|
+
for m in re.finditer(r"(?<!`)(`+)([^\n]*?)\1", text):
|
|
63
|
+
spans.append((m.start(), m.end()))
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
return spans
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_code_spans(text: str) -> List[Tuple[int, int]]:
|
|
70
|
+
spans = _extract_fence_spans(text)
|
|
71
|
+
spans.extend(_extract_inline_code_spans(text))
|
|
72
|
+
return sorted(spans, key=lambda s: s[0])
|
|
73
|
+
|
|
52
74
|
def _is_inside_any_span(idx: int, spans: List[Tuple[int, int]]) -> bool:
|
|
53
75
|
for s, e in spans:
|
|
54
76
|
if s <= idx < e:
|
|
55
77
|
return True
|
|
56
78
|
return False
|
|
57
79
|
|
|
80
|
+
|
|
81
|
+
def _intersects_any_span(start: int, end: int, spans: List[Tuple[int, int]]) -> bool:
|
|
82
|
+
for s, e in spans:
|
|
83
|
+
if start < e and end > s:
|
|
84
|
+
return True
|
|
85
|
+
return False
|
|
86
|
+
|
|
58
87
|
def _scan_risky_placeholders(text: str) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]:
|
|
59
88
|
"""Scan for risky placeholders outside code fences.
|
|
60
89
|
|
|
@@ -119,8 +148,11 @@ def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool
|
|
|
119
148
|
return prompt
|
|
120
149
|
|
|
121
150
|
def get_file_path(file_name: str) -> str:
|
|
122
|
-
|
|
123
|
-
|
|
151
|
+
resolver = get_default_resolver()
|
|
152
|
+
resolved = resolver.resolve_include(file_name)
|
|
153
|
+
if not Path(file_name).is_absolute() and resolved == resolver.cwd / file_name:
|
|
154
|
+
return os.path.join("./", file_name)
|
|
155
|
+
return str(resolved)
|
|
124
156
|
|
|
125
157
|
def process_backtick_includes(text: str, recursive: bool) -> str:
|
|
126
158
|
# More specific pattern that doesn't match nested > characters
|
|
@@ -229,7 +261,12 @@ def process_include_tags(text: str, recursive: bool) -> str:
|
|
|
229
261
|
current_text = text
|
|
230
262
|
while prev_text != current_text:
|
|
231
263
|
prev_text = current_text
|
|
232
|
-
|
|
264
|
+
code_spans = _extract_code_spans(current_text)
|
|
265
|
+
def replace_include_with_spans(match):
|
|
266
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
267
|
+
return match.group(0)
|
|
268
|
+
return replace_include(match)
|
|
269
|
+
current_text = re.sub(pattern, replace_include_with_spans, current_text, flags=re.DOTALL)
|
|
233
270
|
return current_text
|
|
234
271
|
|
|
235
272
|
def process_pdd_tags(text: str) -> str:
|
|
@@ -262,7 +299,12 @@ def process_shell_tags(text: str, recursive: bool) -> str:
|
|
|
262
299
|
console.print(f"[bold red]Error executing shell command:[/bold red] {str(e)}")
|
|
263
300
|
_dbg(f"Shell execution exception: {e}")
|
|
264
301
|
return f"[Shell execution error: {str(e)}]"
|
|
265
|
-
|
|
302
|
+
code_spans = _extract_code_spans(text)
|
|
303
|
+
def replace_shell_with_spans(match):
|
|
304
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
305
|
+
return match.group(0)
|
|
306
|
+
return replace_shell(match)
|
|
307
|
+
return re.sub(pattern, replace_shell_with_spans, text, flags=re.DOTALL)
|
|
266
308
|
|
|
267
309
|
def process_web_tags(text: str, recursive: bool) -> str:
|
|
268
310
|
pattern = r'<web>(.*?)</web>'
|
|
@@ -275,7 +317,7 @@ def process_web_tags(text: str, recursive: bool) -> str:
|
|
|
275
317
|
_dbg(f"Web tag URL: {url}")
|
|
276
318
|
try:
|
|
277
319
|
try:
|
|
278
|
-
from firecrawl import
|
|
320
|
+
from firecrawl import Firecrawl
|
|
279
321
|
except ImportError:
|
|
280
322
|
_dbg("firecrawl import failed; package not installed")
|
|
281
323
|
return f"[Error: firecrawl-py package not installed. Cannot scrape {url}]"
|
|
@@ -284,9 +326,13 @@ def process_web_tags(text: str, recursive: bool) -> str:
|
|
|
284
326
|
console.print("[bold yellow]Warning:[/bold yellow] FIRECRAWL_API_KEY not found in environment")
|
|
285
327
|
_dbg("FIRECRAWL_API_KEY not set")
|
|
286
328
|
return f"[Error: FIRECRAWL_API_KEY not set. Cannot scrape {url}]"
|
|
287
|
-
app =
|
|
288
|
-
response = app.
|
|
289
|
-
|
|
329
|
+
app = Firecrawl(api_key=api_key)
|
|
330
|
+
response = app.scrape(url, formats=['markdown'])
|
|
331
|
+
# Handle both dict response (new API) and object response (legacy)
|
|
332
|
+
if isinstance(response, dict) and 'markdown' in response:
|
|
333
|
+
_dbg(f"Web scrape returned markdown (len={len(response['markdown'])})")
|
|
334
|
+
return response['markdown']
|
|
335
|
+
elif hasattr(response, 'markdown'):
|
|
290
336
|
_dbg(f"Web scrape returned markdown (len={len(response.markdown)})")
|
|
291
337
|
return response.markdown
|
|
292
338
|
else:
|
|
@@ -297,7 +343,12 @@ def process_web_tags(text: str, recursive: bool) -> str:
|
|
|
297
343
|
console.print(f"[bold red]Error scraping web content:[/bold red] {str(e)}")
|
|
298
344
|
_dbg(f"Web scraping exception: {e}")
|
|
299
345
|
return f"[Web scraping error: {str(e)}]"
|
|
300
|
-
|
|
346
|
+
code_spans = _extract_code_spans(text)
|
|
347
|
+
def replace_web_with_spans(match):
|
|
348
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
349
|
+
return match.group(0)
|
|
350
|
+
return replace_web(match)
|
|
351
|
+
return re.sub(pattern, replace_web_with_spans, text, flags=re.DOTALL)
|
|
301
352
|
|
|
302
353
|
def process_include_many_tags(text: str, recursive: bool) -> str:
|
|
303
354
|
"""Process <include-many> blocks whose inner content is a comma- or newline-separated
|
|
@@ -328,7 +379,12 @@ def process_include_many_tags(text: str, recursive: bool) -> str:
|
|
|
328
379
|
_dbg(f"Error processing include-many {p}: {e}")
|
|
329
380
|
contents.append(f"[Error processing include: {p}]")
|
|
330
381
|
return "\n".join(contents)
|
|
331
|
-
|
|
382
|
+
code_spans = _extract_code_spans(text)
|
|
383
|
+
def replace_many_with_spans(match):
|
|
384
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
385
|
+
return match.group(0)
|
|
386
|
+
return replace_many(match)
|
|
387
|
+
return re.sub(pattern, replace_many_with_spans, text, flags=re.DOTALL)
|
|
332
388
|
|
|
333
389
|
def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
|
|
334
390
|
if exclude_keys is None:
|
pdd/preprocess_main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import sys
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Tuple, Optional
|
|
4
5
|
import click
|
|
5
6
|
from rich import print as rprint
|
|
@@ -8,8 +9,15 @@ from .config_resolution import resolve_effective_config
|
|
|
8
9
|
from .construct_paths import construct_paths
|
|
9
10
|
from .preprocess import preprocess
|
|
10
11
|
from .xml_tagger import xml_tagger
|
|
12
|
+
from .architecture_sync import (
|
|
13
|
+
get_architecture_entry_for_prompt,
|
|
14
|
+
generate_tags_from_architecture,
|
|
15
|
+
has_pdd_tags,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
11
19
|
def preprocess_main(
|
|
12
|
-
ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list
|
|
20
|
+
ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list, pdd_tags: bool = False
|
|
13
21
|
) -> Tuple[str, float, str]:
|
|
14
22
|
"""
|
|
15
23
|
CLI wrapper for preprocessing prompts.
|
|
@@ -22,6 +30,7 @@ def preprocess_main(
|
|
|
22
30
|
:param double: If True, curly brackets will be doubled.
|
|
23
31
|
:param exclude: List of keys to exclude from curly bracket doubling.
|
|
24
32
|
:return: Tuple containing the preprocessed prompt, total cost, and model name used.
|
|
33
|
+
:param pdd_tags: If True, inject PDD metadata tags from architecture.json.
|
|
25
34
|
"""
|
|
26
35
|
try:
|
|
27
36
|
# Construct file paths
|
|
@@ -39,6 +48,27 @@ def preprocess_main(
|
|
|
39
48
|
# Load prompt file
|
|
40
49
|
prompt = input_strings["prompt_file"]
|
|
41
50
|
|
|
51
|
+
# Inject PDD metadata tags from architecture.json if requested
|
|
52
|
+
pdd_tags_injected = False
|
|
53
|
+
if pdd_tags:
|
|
54
|
+
prompt_filename = Path(prompt_file).name
|
|
55
|
+
arch_entry = get_architecture_entry_for_prompt(prompt_filename)
|
|
56
|
+
|
|
57
|
+
if arch_entry:
|
|
58
|
+
if has_pdd_tags(prompt):
|
|
59
|
+
if not ctx.obj.get("quiet", False):
|
|
60
|
+
rprint(f"[yellow]Prompt already has PDD tags, skipping injection.[/yellow]")
|
|
61
|
+
else:
|
|
62
|
+
generated_tags = generate_tags_from_architecture(arch_entry)
|
|
63
|
+
if generated_tags:
|
|
64
|
+
prompt = generated_tags + '\n\n' + prompt
|
|
65
|
+
pdd_tags_injected = True
|
|
66
|
+
if not ctx.obj.get("quiet", False):
|
|
67
|
+
rprint(f"[green]Injected PDD tags from architecture.json[/green]")
|
|
68
|
+
else:
|
|
69
|
+
if not ctx.obj.get("quiet", False):
|
|
70
|
+
rprint(f"[yellow]No architecture entry found for '{prompt_filename}', skipping PDD tags.[/yellow]")
|
|
71
|
+
|
|
42
72
|
if xml:
|
|
43
73
|
# Use xml_tagger to add XML delimiters
|
|
44
74
|
# Use centralized config resolution with proper priority: CLI > pddrc > defaults
|
|
@@ -67,6 +97,8 @@ def preprocess_main(
|
|
|
67
97
|
# Provide user feedback
|
|
68
98
|
if not ctx.obj.get("quiet", False):
|
|
69
99
|
rprint("[bold green]Prompt preprocessing completed successfully.[/bold green]")
|
|
100
|
+
if pdd_tags_injected:
|
|
101
|
+
rprint("[bold]PDD metadata tags: injected from architecture.json[/bold]")
|
|
70
102
|
if xml:
|
|
71
103
|
rprint(f"[bold]XML Tagging used: {model_name}[/bold]")
|
|
72
104
|
else:
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
% You are an expert software engineer investigating a bug report. Your task is to create a draft pull request with the failing tests and link it to the issue.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 10 of 10 (final step) in an agentic bug investigation workflow. Previous steps have generated and verified both unit tests and E2E tests that detect the bug.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
|
|
13
|
+
% Issue Content
|
|
14
|
+
<issue_content>
|
|
15
|
+
{issue_content}
|
|
16
|
+
</issue_content>
|
|
17
|
+
|
|
18
|
+
% Previous Steps Output
|
|
19
|
+
<step1_output>
|
|
20
|
+
{step1_output}
|
|
21
|
+
</step1_output>
|
|
22
|
+
|
|
23
|
+
<step2_output>
|
|
24
|
+
{step2_output}
|
|
25
|
+
</step2_output>
|
|
26
|
+
|
|
27
|
+
<step3_output>
|
|
28
|
+
{step3_output}
|
|
29
|
+
</step3_output>
|
|
30
|
+
|
|
31
|
+
<step4_output>
|
|
32
|
+
{step4_output}
|
|
33
|
+
</step4_output>
|
|
34
|
+
|
|
35
|
+
<step5_output>
|
|
36
|
+
{step5_output}
|
|
37
|
+
</step5_output>
|
|
38
|
+
|
|
39
|
+
<step6_output>
|
|
40
|
+
{step6_output}
|
|
41
|
+
</step6_output>
|
|
42
|
+
|
|
43
|
+
<step7_output>
|
|
44
|
+
{step7_output}
|
|
45
|
+
</step7_output>
|
|
46
|
+
|
|
47
|
+
<step8_output>
|
|
48
|
+
{step8_output}
|
|
49
|
+
</step8_output>
|
|
50
|
+
|
|
51
|
+
<step9_output>
|
|
52
|
+
{step9_output}
|
|
53
|
+
</step9_output>
|
|
54
|
+
|
|
55
|
+
% Worktree Information
|
|
56
|
+
|
|
57
|
+
You are operating in an isolated git worktree at: {worktree_path}
|
|
58
|
+
This worktree is already checked out to branch `fix/issue-{issue_number}`.
|
|
59
|
+
Do NOT create a new branch - just stage, commit, and push.
|
|
60
|
+
|
|
61
|
+
% Files to Stage
|
|
62
|
+
|
|
63
|
+
**IMPORTANT: Only stage these specific files:**
|
|
64
|
+
{files_to_stage}
|
|
65
|
+
|
|
66
|
+
% Your Task
|
|
67
|
+
|
|
68
|
+
1. **Prepare the commit**
|
|
69
|
+
- You are already on branch `fix/issue-{issue_number}` in an isolated worktree
|
|
70
|
+
- **CRITICAL: Stage ONLY the test file(s) created in Steps 7 and 9**
|
|
71
|
+
- Get the exact file paths from:
|
|
72
|
+
- Step 7's `FILES_CREATED:` or `FILES_MODIFIED:` output (unit tests)
|
|
73
|
+
- Step 9's `E2E_FILES_CREATED:` or `E2E_FILES_MODIFIED:` output (E2E tests)
|
|
74
|
+
- Stage each file individually: `git add <exact_file_path>`
|
|
75
|
+
- **DO NOT use `git add .` or `git add -A`** - these will stage unrelated files and pollute the PR
|
|
76
|
+
- Verify only the intended files are staged: `git status --short` (should show only the test file(s))
|
|
77
|
+
- Commit with a descriptive message referencing the issue
|
|
78
|
+
|
|
79
|
+
2. **Create the draft PR**
|
|
80
|
+
- Push the branch to origin
|
|
81
|
+
- Create a draft pull request using `gh pr create --draft`
|
|
82
|
+
- Link to the issue using "Fixes #{issue_number}" in the PR body
|
|
83
|
+
|
|
84
|
+
3. **Post final summary**
|
|
85
|
+
- Comment on the issue with PR link and next steps for the fix
|
|
86
|
+
|
|
87
|
+
4. **Include PDD fix command**
|
|
88
|
+
- Extract code file path from Step 5's `**Location:**` field (strip the `:line_number` suffix)
|
|
89
|
+
- Use test file path from Step 7's `FILES_CREATED:` or test file section
|
|
90
|
+
- Search repo for matching prompt file: `find . -name "*.prompt" -type f`
|
|
91
|
+
- Derive module name from code file (e.g., `pdd/foo.py` -> `foo`)
|
|
92
|
+
- Use verification program: `context/{{module_name}}_example.py`
|
|
93
|
+
- Use error log path: `fix-issue-{issue_number}.log` for the fix command output
|
|
94
|
+
- Include a ready-to-run `pdd fix` command in your GitHub comment
|
|
95
|
+
- If no prompt file or verification program exists, include a note that they must be created first
|
|
96
|
+
|
|
97
|
+
% PR Creation Command
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
gh pr create --draft --title "Add failing tests for #{issue_number}" --body "$(cat <<'EOF'
|
|
101
|
+
## Summary
|
|
102
|
+
Adds failing tests that detect the bug reported in #{issue_number}.
|
|
103
|
+
|
|
104
|
+
## Test Files
|
|
105
|
+
- Unit test: `{{unit_test_file_path}}`
|
|
106
|
+
- E2E test: `{{e2e_test_file_path}}` (if applicable)
|
|
107
|
+
|
|
108
|
+
## What This PR Contains
|
|
109
|
+
- Failing unit test that reproduces the reported bug
|
|
110
|
+
- Failing E2E test that verifies the bug at integration level (if applicable)
|
|
111
|
+
- Tests are verified to fail on current code and will pass once the bug is fixed
|
|
112
|
+
|
|
113
|
+
## Root Cause
|
|
114
|
+
{{root_cause_summary}}
|
|
115
|
+
|
|
116
|
+
## Next Steps
|
|
117
|
+
1. [ ] Implement the fix at the identified location
|
|
118
|
+
2. [ ] Verify the unit test passes
|
|
119
|
+
3. [ ] Verify the E2E test passes
|
|
120
|
+
4. [ ] Run full test suite
|
|
121
|
+
5. [ ] Mark PR as ready for review
|
|
122
|
+
|
|
123
|
+
Fixes #{issue_number}
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
*Generated by PDD agentic bug workflow*
|
|
127
|
+
EOF
|
|
128
|
+
)"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
% Output
|
|
132
|
+
|
|
133
|
+
After creating the PR, use `gh issue comment` to post your final report to issue #{issue_number}:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Your comment should follow this format:
|
|
140
|
+
|
|
141
|
+
```markdown
|
|
142
|
+
## Step 10: Draft PR Created
|
|
143
|
+
|
|
144
|
+
### Pull Request
|
|
145
|
+
**PR #{{pr_number}}:** [{{pr_title}}]({{pr_url}})
|
|
146
|
+
|
|
147
|
+
### Branch
|
|
148
|
+
`fix/issue-{issue_number}`
|
|
149
|
+
|
|
150
|
+
### What's Included
|
|
151
|
+
- Failing unit test at `{{unit_test_file_path}}`
|
|
152
|
+
- Failing E2E test at `{{e2e_test_file_path}}` (if applicable)
|
|
153
|
+
- Commits: {{commit_count}}
|
|
154
|
+
|
|
155
|
+
### Next Steps for Maintainers
|
|
156
|
+
1. Review the failing tests to understand the expected behavior
|
|
157
|
+
2. Implement the fix at the identified location
|
|
158
|
+
3. Verify both unit and E2E tests pass with your fix
|
|
159
|
+
4. Run full test suite to check for regressions
|
|
160
|
+
5. Mark the PR as ready for review
|
|
161
|
+
|
|
162
|
+
### PDD Fix Command
|
|
163
|
+
|
|
164
|
+
To auto-fix this bug using PDD:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
cd {{worktree_path}}
|
|
168
|
+
pdd --force fix --loop --max-attempts 5 --verification-program context/{{module_name}}_example.py {{prompt_file}} {{code_file_path}} {{test_file_path}} fix-issue-{{issue_number}}.log
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
*Investigation complete. A draft PR with failing tests has been created and linked to this issue.*
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
% Important
|
|
176
|
+
|
|
177
|
+
- Create a DRAFT PR (not ready for review) since it only contains the failing tests
|
|
178
|
+
- The PR should clearly state that a fix is still needed
|
|
179
|
+
- Use "Fixes #{issue_number}" to auto-link the PR to the issue
|
|
180
|
+
- Do NOT create a new branch - you are already on the correct branch in the worktree
|
|
181
|
+
- Include both unit test files (Step 7) and E2E test files (Step 9) if both exist
|
|
182
|
+
- Always post your findings as a GitHub comment before completing
|