pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/preprocess.py
CHANGED
|
@@ -1,37 +1,158 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
|
+
import base64
|
|
3
4
|
import subprocess
|
|
4
|
-
from typing import List, Optional
|
|
5
|
+
from typing import List, Optional, Tuple
|
|
5
6
|
import traceback
|
|
7
|
+
from pathlib import Path
|
|
6
8
|
from rich.console import Console
|
|
7
9
|
from rich.panel import Panel
|
|
8
10
|
from rich.markup import escape
|
|
9
11
|
from rich.traceback import install
|
|
12
|
+
from pdd.path_resolution import get_default_resolver
|
|
10
13
|
|
|
11
14
|
install()
|
|
12
15
|
console = Console()
|
|
13
16
|
|
|
17
|
+
# Debug/Instrumentation controls
|
|
18
|
+
_DEBUG_PREPROCESS = str(os.getenv("PDD_PREPROCESS_DEBUG", "")).lower() in ("1", "true", "yes", "on")
|
|
19
|
+
_DEBUG_OUTPUT_FILE = os.getenv("PDD_PREPROCESS_DEBUG_FILE") # Optional path to write a debug report
|
|
20
|
+
_DEBUG_EVENTS: List[str] = []
|
|
21
|
+
|
|
22
|
+
def _dbg(msg: str) -> None:
|
|
23
|
+
if _DEBUG_PREPROCESS:
|
|
24
|
+
console.print(f"[dim][PPD][preprocess][/dim] {escape(msg)}")
|
|
25
|
+
_DEBUG_EVENTS.append(msg)
|
|
26
|
+
|
|
27
|
+
def _write_debug_report() -> None:
|
|
28
|
+
if _DEBUG_PREPROCESS and _DEBUG_OUTPUT_FILE:
|
|
29
|
+
try:
|
|
30
|
+
with open(_DEBUG_OUTPUT_FILE, "w", encoding="utf-8") as fh:
|
|
31
|
+
fh.write("Preprocess Debug Report\n\n")
|
|
32
|
+
for line in _DEBUG_EVENTS:
|
|
33
|
+
fh.write(line + "\n")
|
|
34
|
+
console.print(f"[green]Debug report written to:[/green] {_DEBUG_OUTPUT_FILE}")
|
|
35
|
+
except Exception as e:
|
|
36
|
+
# Report the error so users know why the log file wasn't written
|
|
37
|
+
console.print(f"[yellow]Warning: Could not write debug report to {_DEBUG_OUTPUT_FILE}: {e}[/yellow]")
|
|
38
|
+
elif _DEBUG_PREPROCESS and not _DEBUG_OUTPUT_FILE:
|
|
39
|
+
console.print("[dim]Debug mode enabled but PDD_PREPROCESS_DEBUG_FILE not set (output shown in console only)[/dim]")
|
|
40
|
+
|
|
41
|
+
def _extract_fence_spans(text: str) -> List[Tuple[int, int]]:
|
|
42
|
+
"""Return list of (start, end) spans for fenced code blocks (``` or ~~~).
|
|
43
|
+
|
|
44
|
+
The spans are [start, end) indices in the original text.
|
|
45
|
+
"""
|
|
46
|
+
spans: List[Tuple[int, int]] = []
|
|
47
|
+
try:
|
|
48
|
+
fence_re = re.compile(
|
|
49
|
+
r"(?m)^[ \t]*([`~]{3,})[^\n]*\n[\s\S]*?\n[ \t]*\1[ \t]*(?:\n|$)"
|
|
50
|
+
)
|
|
51
|
+
for m in fence_re.finditer(text):
|
|
52
|
+
spans.append((m.start(), m.end()))
|
|
53
|
+
except Exception:
|
|
54
|
+
pass
|
|
55
|
+
return spans
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _extract_inline_code_spans(text: str) -> List[Tuple[int, int]]:
|
|
59
|
+
"""Return list of (start, end) spans for inline code (backticks)."""
|
|
60
|
+
spans: List[Tuple[int, int]] = []
|
|
61
|
+
try:
|
|
62
|
+
for m in re.finditer(r"(?<!`)(`+)([^\n]*?)\1", text):
|
|
63
|
+
spans.append((m.start(), m.end()))
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
return spans
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_code_spans(text: str) -> List[Tuple[int, int]]:
|
|
70
|
+
spans = _extract_fence_spans(text)
|
|
71
|
+
spans.extend(_extract_inline_code_spans(text))
|
|
72
|
+
return sorted(spans, key=lambda s: s[0])
|
|
73
|
+
|
|
74
|
+
def _is_inside_any_span(idx: int, spans: List[Tuple[int, int]]) -> bool:
|
|
75
|
+
for s, e in spans:
|
|
76
|
+
if s <= idx < e:
|
|
77
|
+
return True
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _intersects_any_span(start: int, end: int, spans: List[Tuple[int, int]]) -> bool:
|
|
82
|
+
for s, e in spans:
|
|
83
|
+
if start < e and end > s:
|
|
84
|
+
return True
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
def _scan_risky_placeholders(text: str) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]:
|
|
88
|
+
"""Scan for risky placeholders outside code fences.
|
|
89
|
+
|
|
90
|
+
Returns two lists of (line_no, snippet):
|
|
91
|
+
- single_brace: matches like {name} not doubled and not part of {{...}}
|
|
92
|
+
- template_brace: `${...}` occurrences (which include single { ... })
|
|
93
|
+
"""
|
|
94
|
+
single_brace: List[Tuple[int, str]] = []
|
|
95
|
+
template_brace: List[Tuple[int, str]] = []
|
|
96
|
+
try:
|
|
97
|
+
fence_spans = _extract_fence_spans(text)
|
|
98
|
+
# Single-brace variable placeholders (avoid matching {{ or }})
|
|
99
|
+
for m in re.finditer(r"(?<!\{)\{([A-Za-z_][A-Za-z0-9_]*)\}(?!\})", text):
|
|
100
|
+
if not _is_inside_any_span(m.start(), fence_spans):
|
|
101
|
+
line_no = text.count("\n", 0, m.start()) + 1
|
|
102
|
+
single_brace.append((line_no, m.group(0)))
|
|
103
|
+
# JavaScript template placeholders like ${...}
|
|
104
|
+
for m in re.finditer(r"\$\{[^\}]+\}", text):
|
|
105
|
+
if not _is_inside_any_span(m.start(), fence_spans):
|
|
106
|
+
line_no = text.count("\n", 0, m.start()) + 1
|
|
107
|
+
template_brace.append((line_no, m.group(0)))
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
return single_brace, template_brace
|
|
111
|
+
|
|
14
112
|
def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool = True, exclude_keys: Optional[List[str]] = None) -> str:
|
|
15
113
|
try:
|
|
16
114
|
if not prompt:
|
|
17
115
|
console.print("[bold red]Error:[/bold red] Empty prompt provided")
|
|
18
116
|
return ""
|
|
117
|
+
_DEBUG_EVENTS.clear()
|
|
118
|
+
_dbg(f"Start preprocess(recursive={recursive}, double_curly={double_curly_brackets}, exclude_keys={exclude_keys})")
|
|
119
|
+
_dbg(f"Initial length: {len(prompt)} characters")
|
|
19
120
|
console.print(Panel("Starting prompt preprocessing", style="bold blue"))
|
|
20
121
|
prompt = process_backtick_includes(prompt, recursive)
|
|
122
|
+
_dbg("After backtick includes processed")
|
|
21
123
|
prompt = process_xml_tags(prompt, recursive)
|
|
124
|
+
_dbg("After XML-like tags processed")
|
|
22
125
|
if double_curly_brackets:
|
|
23
126
|
prompt = double_curly(prompt, exclude_keys)
|
|
127
|
+
_dbg("After double_curly execution")
|
|
128
|
+
# Scan for risky placeholders remaining outside code fences
|
|
129
|
+
singles, templates = _scan_risky_placeholders(prompt)
|
|
130
|
+
if singles:
|
|
131
|
+
_dbg(f"WARNING: Found {len(singles)} single-brace placeholders outside code fences (examples):")
|
|
132
|
+
for ln, frag in singles[:5]:
|
|
133
|
+
_dbg(f" line {ln}: {frag}")
|
|
134
|
+
if templates:
|
|
135
|
+
_dbg(f"INFO: Found {len(templates)} template literals ${'{...'} outside code fences (examples):")
|
|
136
|
+
for ln, frag in templates[:5]:
|
|
137
|
+
_dbg(f" line {ln}: {frag}")
|
|
24
138
|
# Don't trim whitespace that might be significant for the tests
|
|
25
139
|
console.print(Panel("Preprocessing complete", style="bold green"))
|
|
140
|
+
_dbg(f"Final length: {len(prompt)} characters")
|
|
141
|
+
_write_debug_report()
|
|
26
142
|
return prompt
|
|
27
143
|
except Exception as e:
|
|
28
144
|
console.print(f"[bold red]Error during preprocessing:[/bold red] {str(e)}")
|
|
29
145
|
console.print(Panel(traceback.format_exc(), title="Error Details", style="red"))
|
|
146
|
+
_dbg(f"Exception: {str(e)}")
|
|
147
|
+
_write_debug_report()
|
|
30
148
|
return prompt
|
|
31
149
|
|
|
32
150
|
def get_file_path(file_name: str) -> str:
|
|
33
|
-
|
|
34
|
-
|
|
151
|
+
resolver = get_default_resolver()
|
|
152
|
+
resolved = resolver.resolve_include(file_name)
|
|
153
|
+
if not Path(file_name).is_absolute() and resolved == resolver.cwd / file_name:
|
|
154
|
+
return os.path.join("./", file_name)
|
|
155
|
+
return str(resolved)
|
|
35
156
|
|
|
36
157
|
def process_backtick_includes(text: str, recursive: bool) -> str:
|
|
37
158
|
# More specific pattern that doesn't match nested > characters
|
|
@@ -45,12 +166,17 @@ def process_backtick_includes(text: str, recursive: bool) -> str:
|
|
|
45
166
|
content = file.read()
|
|
46
167
|
if recursive:
|
|
47
168
|
content = preprocess(content, recursive=True, double_curly_brackets=False)
|
|
169
|
+
_dbg(f"Included via backticks: {file_path} (len={len(content)})")
|
|
48
170
|
return f"```{content}```"
|
|
49
171
|
except FileNotFoundError:
|
|
50
172
|
console.print(f"[bold red]Warning:[/bold red] File not found: {file_path}")
|
|
51
|
-
|
|
173
|
+
_dbg(f"Missing backtick include: {file_path}")
|
|
174
|
+
# First pass (recursive=True): leave the tag so a later env expansion can resolve it
|
|
175
|
+
# Second pass (recursive=False): replace with a visible placeholder
|
|
176
|
+
return match.group(0) if recursive else f"```[File not found: {file_path}]```"
|
|
52
177
|
except Exception as e:
|
|
53
178
|
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
|
|
179
|
+
_dbg(f"Error processing backtick include {file_path}: {e}")
|
|
54
180
|
return f"```[Error processing include: {file_path}]```"
|
|
55
181
|
prev_text = ""
|
|
56
182
|
current_text = text
|
|
@@ -62,9 +188,9 @@ def process_backtick_includes(text: str, recursive: bool) -> str:
|
|
|
62
188
|
def process_xml_tags(text: str, recursive: bool) -> str:
|
|
63
189
|
text = process_pdd_tags(text)
|
|
64
190
|
text = process_include_tags(text, recursive)
|
|
65
|
-
|
|
66
|
-
text = process_shell_tags(text)
|
|
67
|
-
text = process_web_tags(text)
|
|
191
|
+
text = process_include_many_tags(text, recursive)
|
|
192
|
+
text = process_shell_tags(text, recursive)
|
|
193
|
+
text = process_web_tags(text, recursive)
|
|
68
194
|
return text
|
|
69
195
|
|
|
70
196
|
def process_include_tags(text: str, recursive: bool) -> str:
|
|
@@ -73,23 +199,74 @@ def process_include_tags(text: str, recursive: bool) -> str:
|
|
|
73
199
|
file_path = match.group(1).strip()
|
|
74
200
|
try:
|
|
75
201
|
full_path = get_file_path(file_path)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
202
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
203
|
+
image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.heic']
|
|
204
|
+
|
|
205
|
+
if ext in image_extensions:
|
|
206
|
+
console.print(f"Processing image include: [cyan]{full_path}[/cyan]")
|
|
207
|
+
from PIL import Image
|
|
208
|
+
import io
|
|
209
|
+
import pillow_heif
|
|
210
|
+
|
|
211
|
+
pillow_heif.register_heif_opener()
|
|
212
|
+
|
|
213
|
+
MAX_DIMENSION = 1024
|
|
214
|
+
with open(full_path, 'rb') as file:
|
|
215
|
+
img = Image.open(file)
|
|
216
|
+
img.load() # Force loading the image data before the file closes
|
|
217
|
+
|
|
218
|
+
if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
|
|
219
|
+
img.thumbnail((MAX_DIMENSION, MAX_DIMENSION))
|
|
220
|
+
console.print(f"Image resized to {img.size}")
|
|
221
|
+
|
|
222
|
+
# Handle GIFs: convert to a static PNG of the first frame
|
|
223
|
+
if ext == '.gif':
|
|
224
|
+
img.seek(0)
|
|
225
|
+
img = img.convert("RGB")
|
|
226
|
+
img_format = 'PNG'
|
|
227
|
+
mime_type = 'image/png'
|
|
228
|
+
elif ext == '.heic':
|
|
229
|
+
img_format = 'JPEG'
|
|
230
|
+
mime_type = 'image/jpeg'
|
|
231
|
+
else:
|
|
232
|
+
img_format = 'JPEG' if ext in ['.jpg', '.jpeg'] else 'PNG'
|
|
233
|
+
mime_type = f'image/{img_format.lower()}'
|
|
234
|
+
|
|
235
|
+
# Save the (potentially resized and converted) image to an in-memory buffer
|
|
236
|
+
buffer = io.BytesIO()
|
|
237
|
+
img.save(buffer, format=img_format)
|
|
238
|
+
content = buffer.getvalue()
|
|
239
|
+
|
|
240
|
+
encoded_string = base64.b64encode(content).decode('utf-8')
|
|
241
|
+
return f"data:{mime_type};base64,{encoded_string}"
|
|
242
|
+
else:
|
|
243
|
+
console.print(f"Processing XML include: [cyan]{full_path}[/cyan]")
|
|
244
|
+
with open(full_path, 'r', encoding='utf-8') as file:
|
|
245
|
+
content = file.read()
|
|
246
|
+
if recursive:
|
|
247
|
+
content = preprocess(content, recursive=True, double_curly_brackets=False)
|
|
248
|
+
_dbg(f"Included via XML tag: {file_path} (len={len(content)})")
|
|
249
|
+
return content
|
|
82
250
|
except FileNotFoundError:
|
|
83
251
|
console.print(f"[bold red]Warning:[/bold red] File not found: {file_path}")
|
|
84
|
-
|
|
252
|
+
_dbg(f"Missing XML include: {file_path}")
|
|
253
|
+
# First pass (recursive=True): leave the tag so a later env expansion can resolve it
|
|
254
|
+
# Second pass (recursive=False): replace with a visible placeholder
|
|
255
|
+
return match.group(0) if recursive else f"[File not found: {file_path}]"
|
|
85
256
|
except Exception as e:
|
|
86
257
|
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
|
|
258
|
+
_dbg(f"Error processing XML include {file_path}: {e}")
|
|
87
259
|
return f"[Error processing include: {file_path}]"
|
|
88
260
|
prev_text = ""
|
|
89
261
|
current_text = text
|
|
90
262
|
while prev_text != current_text:
|
|
91
263
|
prev_text = current_text
|
|
92
|
-
|
|
264
|
+
code_spans = _extract_code_spans(current_text)
|
|
265
|
+
def replace_include_with_spans(match):
|
|
266
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
267
|
+
return match.group(0)
|
|
268
|
+
return replace_include(match)
|
|
269
|
+
current_text = re.sub(pattern, replace_include_with_spans, current_text, flags=re.DOTALL)
|
|
93
270
|
return current_text
|
|
94
271
|
|
|
95
272
|
def process_pdd_tags(text: str) -> str:
|
|
@@ -101,54 +278,120 @@ def process_pdd_tags(text: str) -> str:
|
|
|
101
278
|
return "This is a test "
|
|
102
279
|
return processed
|
|
103
280
|
|
|
104
|
-
def process_shell_tags(text: str) -> str:
|
|
281
|
+
def process_shell_tags(text: str, recursive: bool) -> str:
|
|
105
282
|
pattern = r'<shell>(.*?)</shell>'
|
|
106
283
|
def replace_shell(match):
|
|
107
284
|
command = match.group(1).strip()
|
|
285
|
+
if recursive:
|
|
286
|
+
# Defer execution until after env var expansion
|
|
287
|
+
return match.group(0)
|
|
108
288
|
console.print(f"Executing shell command: [cyan]{escape(command)}[/cyan]")
|
|
289
|
+
_dbg(f"Shell tag command: {command}")
|
|
109
290
|
try:
|
|
110
291
|
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
|
|
111
292
|
return result.stdout
|
|
112
293
|
except subprocess.CalledProcessError as e:
|
|
113
294
|
error_msg = f"Command '{command}' returned non-zero exit status {e.returncode}."
|
|
114
295
|
console.print(f"[bold red]Error:[/bold red] {error_msg}")
|
|
296
|
+
_dbg(f"Shell command error: {error_msg}")
|
|
115
297
|
return f"Error: {error_msg}"
|
|
116
298
|
except Exception as e:
|
|
117
299
|
console.print(f"[bold red]Error executing shell command:[/bold red] {str(e)}")
|
|
300
|
+
_dbg(f"Shell execution exception: {e}")
|
|
118
301
|
return f"[Shell execution error: {str(e)}]"
|
|
119
|
-
|
|
302
|
+
code_spans = _extract_code_spans(text)
|
|
303
|
+
def replace_shell_with_spans(match):
|
|
304
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
305
|
+
return match.group(0)
|
|
306
|
+
return replace_shell(match)
|
|
307
|
+
return re.sub(pattern, replace_shell_with_spans, text, flags=re.DOTALL)
|
|
120
308
|
|
|
121
|
-
def process_web_tags(text: str) -> str:
|
|
309
|
+
def process_web_tags(text: str, recursive: bool) -> str:
|
|
122
310
|
pattern = r'<web>(.*?)</web>'
|
|
123
311
|
def replace_web(match):
|
|
124
312
|
url = match.group(1).strip()
|
|
313
|
+
if recursive:
|
|
314
|
+
# Defer network operations until after env var expansion
|
|
315
|
+
return match.group(0)
|
|
125
316
|
console.print(f"Scraping web content from: [cyan]{url}[/cyan]")
|
|
317
|
+
_dbg(f"Web tag URL: {url}")
|
|
126
318
|
try:
|
|
127
319
|
try:
|
|
128
|
-
from firecrawl import
|
|
320
|
+
from firecrawl import Firecrawl
|
|
129
321
|
except ImportError:
|
|
322
|
+
_dbg("firecrawl import failed; package not installed")
|
|
130
323
|
return f"[Error: firecrawl-py package not installed. Cannot scrape {url}]"
|
|
131
324
|
api_key = os.environ.get('FIRECRAWL_API_KEY')
|
|
132
325
|
if not api_key:
|
|
133
326
|
console.print("[bold yellow]Warning:[/bold yellow] FIRECRAWL_API_KEY not found in environment")
|
|
327
|
+
_dbg("FIRECRAWL_API_KEY not set")
|
|
134
328
|
return f"[Error: FIRECRAWL_API_KEY not set. Cannot scrape {url}]"
|
|
135
|
-
app =
|
|
136
|
-
response = app.
|
|
137
|
-
|
|
329
|
+
app = Firecrawl(api_key=api_key)
|
|
330
|
+
response = app.scrape(url, formats=['markdown'])
|
|
331
|
+
# Handle both dict response (new API) and object response (legacy)
|
|
332
|
+
if isinstance(response, dict) and 'markdown' in response:
|
|
333
|
+
_dbg(f"Web scrape returned markdown (len={len(response['markdown'])})")
|
|
334
|
+
return response['markdown']
|
|
335
|
+
elif hasattr(response, 'markdown'):
|
|
336
|
+
_dbg(f"Web scrape returned markdown (len={len(response.markdown)})")
|
|
138
337
|
return response.markdown
|
|
139
338
|
else:
|
|
140
339
|
console.print(f"[bold yellow]Warning:[/bold yellow] No markdown content returned for {url}")
|
|
340
|
+
_dbg("Web scrape returned no markdown content")
|
|
141
341
|
return f"[No content available for {url}]"
|
|
142
342
|
except Exception as e:
|
|
143
343
|
console.print(f"[bold red]Error scraping web content:[/bold red] {str(e)}")
|
|
344
|
+
_dbg(f"Web scraping exception: {e}")
|
|
144
345
|
return f"[Web scraping error: {str(e)}]"
|
|
145
|
-
|
|
346
|
+
code_spans = _extract_code_spans(text)
|
|
347
|
+
def replace_web_with_spans(match):
|
|
348
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
349
|
+
return match.group(0)
|
|
350
|
+
return replace_web(match)
|
|
351
|
+
return re.sub(pattern, replace_web_with_spans, text, flags=re.DOTALL)
|
|
352
|
+
|
|
353
|
+
def process_include_many_tags(text: str, recursive: bool) -> str:
|
|
354
|
+
"""Process <include-many> blocks whose inner content is a comma- or newline-separated
|
|
355
|
+
list of file paths (typically provided via variables after env expansion)."""
|
|
356
|
+
pattern = r'<include-many>(.*?)</include-many>'
|
|
357
|
+
def replace_many(match):
|
|
358
|
+
inner = match.group(1)
|
|
359
|
+
if recursive:
|
|
360
|
+
# Wait for env expansion to materialize the list
|
|
361
|
+
return match.group(0)
|
|
362
|
+
# Split by newlines or commas
|
|
363
|
+
raw_items = [s.strip() for part in inner.split('\n') for s in part.split(',')]
|
|
364
|
+
paths = [p for p in raw_items if p]
|
|
365
|
+
contents: list[str] = []
|
|
366
|
+
for p in paths:
|
|
367
|
+
try:
|
|
368
|
+
full_path = get_file_path(p)
|
|
369
|
+
console.print(f"Including (many): [cyan]{full_path}[/cyan]")
|
|
370
|
+
with open(full_path, 'r', encoding='utf-8') as fh:
|
|
371
|
+
contents.append(fh.read())
|
|
372
|
+
_dbg(f"Included (many): {p}")
|
|
373
|
+
except FileNotFoundError:
|
|
374
|
+
console.print(f"[bold red]Warning:[/bold red] File not found: {p}")
|
|
375
|
+
_dbg(f"Missing include-many: {p}")
|
|
376
|
+
contents.append(f"[File not found: {p}]")
|
|
377
|
+
except Exception as e:
|
|
378
|
+
console.print(f"[bold red]Error processing include-many:[/bold red] {str(e)}")
|
|
379
|
+
_dbg(f"Error processing include-many {p}: {e}")
|
|
380
|
+
contents.append(f"[Error processing include: {p}]")
|
|
381
|
+
return "\n".join(contents)
|
|
382
|
+
code_spans = _extract_code_spans(text)
|
|
383
|
+
def replace_many_with_spans(match):
|
|
384
|
+
if _intersects_any_span(match.start(), match.end(), code_spans):
|
|
385
|
+
return match.group(0)
|
|
386
|
+
return replace_many(match)
|
|
387
|
+
return re.sub(pattern, replace_many_with_spans, text, flags=re.DOTALL)
|
|
146
388
|
|
|
147
389
|
def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
|
|
148
390
|
if exclude_keys is None:
|
|
149
391
|
exclude_keys = []
|
|
150
392
|
|
|
151
393
|
console.print("Doubling curly brackets...")
|
|
394
|
+
_dbg("double_curly invoked")
|
|
152
395
|
|
|
153
396
|
# Special case handling for specific test patterns
|
|
154
397
|
if "Mix of {excluded{inner}} nesting" in text and "excluded" in exclude_keys:
|
|
@@ -172,6 +415,14 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
|
|
|
172
415
|
"2": {{"id": "2", "name": "Resource Two"}}
|
|
173
416
|
}}"""
|
|
174
417
|
|
|
418
|
+
# Protect ${IDENT} placeholders so we can safely double braces, then restore
|
|
419
|
+
# them as ${{IDENT}} to avoid PromptTemplate interpreting {IDENT}.
|
|
420
|
+
protected_vars: List[str] = []
|
|
421
|
+
def _protect_var(m):
|
|
422
|
+
protected_vars.append(m.group(0))
|
|
423
|
+
return f"__PDD_VAR_{len(protected_vars)-1}__"
|
|
424
|
+
text = re.sub(r"\$\{[A-Za-z_][A-Za-z0-9_]*\}", _protect_var, text)
|
|
425
|
+
|
|
175
426
|
# First, protect any existing double curly braces
|
|
176
427
|
text = re.sub(r'\{\{([^{}]*)\}\}', r'__ALREADY_DOUBLED__\1__END_ALREADY__', text)
|
|
177
428
|
|
|
@@ -188,6 +439,24 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
|
|
|
188
439
|
|
|
189
440
|
# Restore already doubled brackets
|
|
190
441
|
text = re.sub(r'__ALREADY_DOUBLED__(.*?)__END_ALREADY__', r'{{\1}}', text)
|
|
442
|
+
|
|
443
|
+
# Restore protected ${IDENT} placeholders as ${{IDENT}} so single braces
|
|
444
|
+
# don't leak into PromptTemplate formatting. This is safe for JS template
|
|
445
|
+
# literals and prevents missing-key errors in later formatting steps.
|
|
446
|
+
def _restore_var(m):
|
|
447
|
+
idx = int(m.group(1))
|
|
448
|
+
if 0 <= idx < len(protected_vars):
|
|
449
|
+
original = protected_vars[idx] # e.g., ${FOO}
|
|
450
|
+
try:
|
|
451
|
+
inner = re.match(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", original)
|
|
452
|
+
if inner:
|
|
453
|
+
# Build as concatenation to avoid f-string brace escaping confusion
|
|
454
|
+
return "${{" + inner.group(1) + "}}" # -> ${{FOO}}
|
|
455
|
+
except Exception:
|
|
456
|
+
pass
|
|
457
|
+
return original
|
|
458
|
+
return m.group(0)
|
|
459
|
+
text = re.sub(r"__PDD_VAR_(\d+)__", _restore_var, text)
|
|
191
460
|
|
|
192
461
|
# Special handling for code blocks
|
|
193
462
|
code_block_pattern = r'```([\w\s]*)\n([\s\S]*?)```'
|
|
@@ -213,4 +482,4 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
|
|
|
213
482
|
# Process code blocks
|
|
214
483
|
text = re.sub(code_block_pattern, process_code_block, text, flags=re.DOTALL)
|
|
215
484
|
|
|
216
|
-
return text
|
|
485
|
+
return text
|
pdd/preprocess_main.py
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import sys
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Tuple, Optional
|
|
4
5
|
import click
|
|
5
6
|
from rich import print as rprint
|
|
6
7
|
|
|
8
|
+
from .config_resolution import resolve_effective_config
|
|
7
9
|
from .construct_paths import construct_paths
|
|
8
10
|
from .preprocess import preprocess
|
|
9
11
|
from .xml_tagger import xml_tagger
|
|
10
|
-
from . import
|
|
12
|
+
from .architecture_sync import (
|
|
13
|
+
get_architecture_entry_for_prompt,
|
|
14
|
+
generate_tags_from_architecture,
|
|
15
|
+
has_pdd_tags,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
11
19
|
def preprocess_main(
|
|
12
|
-
ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list
|
|
20
|
+
ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list, pdd_tags: bool = False
|
|
13
21
|
) -> Tuple[str, float, str]:
|
|
14
22
|
"""
|
|
15
23
|
CLI wrapper for preprocessing prompts.
|
|
@@ -22,6 +30,7 @@ def preprocess_main(
|
|
|
22
30
|
:param double: If True, curly brackets will be doubled.
|
|
23
31
|
:param exclude: List of keys to exclude from curly bracket doubling.
|
|
24
32
|
:return: Tuple containing the preprocessed prompt, total cost, and model name used.
|
|
33
|
+
:param pdd_tags: If True, inject PDD metadata tags from architecture.json.
|
|
25
34
|
"""
|
|
26
35
|
try:
|
|
27
36
|
# Construct file paths
|
|
@@ -33,17 +42,41 @@ def preprocess_main(
|
|
|
33
42
|
quiet=ctx.obj.get("quiet", False),
|
|
34
43
|
command="preprocess",
|
|
35
44
|
command_options=command_options,
|
|
45
|
+
context_override=ctx.obj.get('context')
|
|
36
46
|
)
|
|
37
47
|
|
|
38
48
|
# Load prompt file
|
|
39
49
|
prompt = input_strings["prompt_file"]
|
|
40
50
|
|
|
51
|
+
# Inject PDD metadata tags from architecture.json if requested
|
|
52
|
+
pdd_tags_injected = False
|
|
53
|
+
if pdd_tags:
|
|
54
|
+
prompt_filename = Path(prompt_file).name
|
|
55
|
+
arch_entry = get_architecture_entry_for_prompt(prompt_filename)
|
|
56
|
+
|
|
57
|
+
if arch_entry:
|
|
58
|
+
if has_pdd_tags(prompt):
|
|
59
|
+
if not ctx.obj.get("quiet", False):
|
|
60
|
+
rprint(f"[yellow]Prompt already has PDD tags, skipping injection.[/yellow]")
|
|
61
|
+
else:
|
|
62
|
+
generated_tags = generate_tags_from_architecture(arch_entry)
|
|
63
|
+
if generated_tags:
|
|
64
|
+
prompt = generated_tags + '\n\n' + prompt
|
|
65
|
+
pdd_tags_injected = True
|
|
66
|
+
if not ctx.obj.get("quiet", False):
|
|
67
|
+
rprint(f"[green]Injected PDD tags from architecture.json[/green]")
|
|
68
|
+
else:
|
|
69
|
+
if not ctx.obj.get("quiet", False):
|
|
70
|
+
rprint(f"[yellow]No architecture entry found for '{prompt_filename}', skipping PDD tags.[/yellow]")
|
|
71
|
+
|
|
41
72
|
if xml:
|
|
42
73
|
# Use xml_tagger to add XML delimiters
|
|
43
|
-
|
|
44
|
-
|
|
74
|
+
# Use centralized config resolution with proper priority: CLI > pddrc > defaults
|
|
75
|
+
effective_config = resolve_effective_config(ctx, resolved_config)
|
|
76
|
+
strength = effective_config["strength"]
|
|
77
|
+
temperature = effective_config["temperature"]
|
|
78
|
+
time = effective_config["time"]
|
|
45
79
|
verbose = ctx.obj.get("verbose", False)
|
|
46
|
-
time = ctx.obj.get("time", DEFAULT_TIME)
|
|
47
80
|
xml_tagged, total_cost, model_name = xml_tagger(
|
|
48
81
|
prompt,
|
|
49
82
|
strength,
|
|
@@ -64,6 +97,8 @@ def preprocess_main(
|
|
|
64
97
|
# Provide user feedback
|
|
65
98
|
if not ctx.obj.get("quiet", False):
|
|
66
99
|
rprint("[bold green]Prompt preprocessing completed successfully.[/bold green]")
|
|
100
|
+
if pdd_tags_injected:
|
|
101
|
+
rprint("[bold]PDD metadata tags: injected from architecture.json[/bold]")
|
|
67
102
|
if xml:
|
|
68
103
|
rprint(f"[bold]XML Tagging used: {model_name}[/bold]")
|
|
69
104
|
else:
|
|
@@ -76,4 +111,4 @@ def preprocess_main(
|
|
|
76
111
|
except Exception as e:
|
|
77
112
|
if not ctx.obj.get("quiet", False):
|
|
78
113
|
rprint(f"[bold red]Error during preprocessing:[/bold red] {e}")
|
|
79
|
-
sys.exit(1)
|
|
114
|
+
sys.exit(1)
|