pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/preprocess.py CHANGED
@@ -1,37 +1,158 @@
1
1
  import os
2
2
  import re
3
+ import base64
3
4
  import subprocess
4
- from typing import List, Optional
5
+ from typing import List, Optional, Tuple
5
6
  import traceback
7
+ from pathlib import Path
6
8
  from rich.console import Console
7
9
  from rich.panel import Panel
8
10
  from rich.markup import escape
9
11
  from rich.traceback import install
12
+ from pdd.path_resolution import get_default_resolver
10
13
 
11
14
  install()
12
15
  console = Console()
13
16
 
17
+ # Debug/Instrumentation controls
18
+ _DEBUG_PREPROCESS = str(os.getenv("PDD_PREPROCESS_DEBUG", "")).lower() in ("1", "true", "yes", "on")
19
+ _DEBUG_OUTPUT_FILE = os.getenv("PDD_PREPROCESS_DEBUG_FILE") # Optional path to write a debug report
20
+ _DEBUG_EVENTS: List[str] = []
21
+
22
+ def _dbg(msg: str) -> None:
23
+ if _DEBUG_PREPROCESS:
24
+ console.print(f"[dim][PPD][preprocess][/dim] {escape(msg)}")
25
+ _DEBUG_EVENTS.append(msg)
26
+
27
+ def _write_debug_report() -> None:
28
+ if _DEBUG_PREPROCESS and _DEBUG_OUTPUT_FILE:
29
+ try:
30
+ with open(_DEBUG_OUTPUT_FILE, "w", encoding="utf-8") as fh:
31
+ fh.write("Preprocess Debug Report\n\n")
32
+ for line in _DEBUG_EVENTS:
33
+ fh.write(line + "\n")
34
+ console.print(f"[green]Debug report written to:[/green] {_DEBUG_OUTPUT_FILE}")
35
+ except Exception as e:
36
+ # Report the error so users know why the log file wasn't written
37
+ console.print(f"[yellow]Warning: Could not write debug report to {_DEBUG_OUTPUT_FILE}: {e}[/yellow]")
38
+ elif _DEBUG_PREPROCESS and not _DEBUG_OUTPUT_FILE:
39
+ console.print("[dim]Debug mode enabled but PDD_PREPROCESS_DEBUG_FILE not set (output shown in console only)[/dim]")
40
+
41
+ def _extract_fence_spans(text: str) -> List[Tuple[int, int]]:
42
+ """Return list of (start, end) spans for fenced code blocks (``` or ~~~).
43
+
44
+ The spans are [start, end) indices in the original text.
45
+ """
46
+ spans: List[Tuple[int, int]] = []
47
+ try:
48
+ fence_re = re.compile(
49
+ r"(?m)^[ \t]*([`~]{3,})[^\n]*\n[\s\S]*?\n[ \t]*\1[ \t]*(?:\n|$)"
50
+ )
51
+ for m in fence_re.finditer(text):
52
+ spans.append((m.start(), m.end()))
53
+ except Exception:
54
+ pass
55
+ return spans
56
+
57
+
58
+ def _extract_inline_code_spans(text: str) -> List[Tuple[int, int]]:
59
+ """Return list of (start, end) spans for inline code (backticks)."""
60
+ spans: List[Tuple[int, int]] = []
61
+ try:
62
+ for m in re.finditer(r"(?<!`)(`+)([^\n]*?)\1", text):
63
+ spans.append((m.start(), m.end()))
64
+ except Exception:
65
+ pass
66
+ return spans
67
+
68
+
69
+ def _extract_code_spans(text: str) -> List[Tuple[int, int]]:
70
+ spans = _extract_fence_spans(text)
71
+ spans.extend(_extract_inline_code_spans(text))
72
+ return sorted(spans, key=lambda s: s[0])
73
+
74
+ def _is_inside_any_span(idx: int, spans: List[Tuple[int, int]]) -> bool:
75
+ for s, e in spans:
76
+ if s <= idx < e:
77
+ return True
78
+ return False
79
+
80
+
81
+ def _intersects_any_span(start: int, end: int, spans: List[Tuple[int, int]]) -> bool:
82
+ for s, e in spans:
83
+ if start < e and end > s:
84
+ return True
85
+ return False
86
+
87
+ def _scan_risky_placeholders(text: str) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]:
88
+ """Scan for risky placeholders outside code fences.
89
+
90
+ Returns two lists of (line_no, snippet):
91
+ - single_brace: matches like {name} not doubled and not part of {{...}}
92
+ - template_brace: `${...}` occurrences (which include single { ... })
93
+ """
94
+ single_brace: List[Tuple[int, str]] = []
95
+ template_brace: List[Tuple[int, str]] = []
96
+ try:
97
+ fence_spans = _extract_fence_spans(text)
98
+ # Single-brace variable placeholders (avoid matching {{ or }})
99
+ for m in re.finditer(r"(?<!\{)\{([A-Za-z_][A-Za-z0-9_]*)\}(?!\})", text):
100
+ if not _is_inside_any_span(m.start(), fence_spans):
101
+ line_no = text.count("\n", 0, m.start()) + 1
102
+ single_brace.append((line_no, m.group(0)))
103
+ # JavaScript template placeholders like ${...}
104
+ for m in re.finditer(r"\$\{[^\}]+\}", text):
105
+ if not _is_inside_any_span(m.start(), fence_spans):
106
+ line_no = text.count("\n", 0, m.start()) + 1
107
+ template_brace.append((line_no, m.group(0)))
108
+ except Exception:
109
+ pass
110
+ return single_brace, template_brace
111
+
14
112
  def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool = True, exclude_keys: Optional[List[str]] = None) -> str:
15
113
  try:
16
114
  if not prompt:
17
115
  console.print("[bold red]Error:[/bold red] Empty prompt provided")
18
116
  return ""
117
+ _DEBUG_EVENTS.clear()
118
+ _dbg(f"Start preprocess(recursive={recursive}, double_curly={double_curly_brackets}, exclude_keys={exclude_keys})")
119
+ _dbg(f"Initial length: {len(prompt)} characters")
19
120
  console.print(Panel("Starting prompt preprocessing", style="bold blue"))
20
121
  prompt = process_backtick_includes(prompt, recursive)
122
+ _dbg("After backtick includes processed")
21
123
  prompt = process_xml_tags(prompt, recursive)
124
+ _dbg("After XML-like tags processed")
22
125
  if double_curly_brackets:
23
126
  prompt = double_curly(prompt, exclude_keys)
127
+ _dbg("After double_curly execution")
128
+ # Scan for risky placeholders remaining outside code fences
129
+ singles, templates = _scan_risky_placeholders(prompt)
130
+ if singles:
131
+ _dbg(f"WARNING: Found {len(singles)} single-brace placeholders outside code fences (examples):")
132
+ for ln, frag in singles[:5]:
133
+ _dbg(f" line {ln}: {frag}")
134
+ if templates:
135
+ _dbg(f"INFO: Found {len(templates)} template literals ${'{...'} outside code fences (examples):")
136
+ for ln, frag in templates[:5]:
137
+ _dbg(f" line {ln}: {frag}")
24
138
  # Don't trim whitespace that might be significant for the tests
25
139
  console.print(Panel("Preprocessing complete", style="bold green"))
140
+ _dbg(f"Final length: {len(prompt)} characters")
141
+ _write_debug_report()
26
142
  return prompt
27
143
  except Exception as e:
28
144
  console.print(f"[bold red]Error during preprocessing:[/bold red] {str(e)}")
29
145
  console.print(Panel(traceback.format_exc(), title="Error Details", style="red"))
146
+ _dbg(f"Exception: {str(e)}")
147
+ _write_debug_report()
30
148
  return prompt
31
149
 
32
150
  def get_file_path(file_name: str) -> str:
33
- base_path = './'
34
- return os.path.join(base_path, file_name)
151
+ resolver = get_default_resolver()
152
+ resolved = resolver.resolve_include(file_name)
153
+ if not Path(file_name).is_absolute() and resolved == resolver.cwd / file_name:
154
+ return os.path.join("./", file_name)
155
+ return str(resolved)
35
156
 
36
157
  def process_backtick_includes(text: str, recursive: bool) -> str:
37
158
  # More specific pattern that doesn't match nested > characters
@@ -45,12 +166,17 @@ def process_backtick_includes(text: str, recursive: bool) -> str:
45
166
  content = file.read()
46
167
  if recursive:
47
168
  content = preprocess(content, recursive=True, double_curly_brackets=False)
169
+ _dbg(f"Included via backticks: {file_path} (len={len(content)})")
48
170
  return f"```{content}```"
49
171
  except FileNotFoundError:
50
172
  console.print(f"[bold red]Warning:[/bold red] File not found: {file_path}")
51
- return match.group(0)
173
+ _dbg(f"Missing backtick include: {file_path}")
174
+ # First pass (recursive=True): leave the tag so a later env expansion can resolve it
175
+ # Second pass (recursive=False): replace with a visible placeholder
176
+ return match.group(0) if recursive else f"```[File not found: {file_path}]```"
52
177
  except Exception as e:
53
178
  console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
179
+ _dbg(f"Error processing backtick include {file_path}: {e}")
54
180
  return f"```[Error processing include: {file_path}]```"
55
181
  prev_text = ""
56
182
  current_text = text
@@ -62,9 +188,9 @@ def process_backtick_includes(text: str, recursive: bool) -> str:
62
188
  def process_xml_tags(text: str, recursive: bool) -> str:
63
189
  text = process_pdd_tags(text)
64
190
  text = process_include_tags(text, recursive)
65
-
66
- text = process_shell_tags(text)
67
- text = process_web_tags(text)
191
+ text = process_include_many_tags(text, recursive)
192
+ text = process_shell_tags(text, recursive)
193
+ text = process_web_tags(text, recursive)
68
194
  return text
69
195
 
70
196
  def process_include_tags(text: str, recursive: bool) -> str:
@@ -73,23 +199,74 @@ def process_include_tags(text: str, recursive: bool) -> str:
73
199
  file_path = match.group(1).strip()
74
200
  try:
75
201
  full_path = get_file_path(file_path)
76
- console.print(f"Processing XML include: [cyan]{full_path}[/cyan]")
77
- with open(full_path, 'r', encoding='utf-8') as file:
78
- content = file.read()
79
- if recursive:
80
- content = preprocess(content, recursive=True, double_curly_brackets=False)
81
- return content
202
+ ext = os.path.splitext(file_path)[1].lower()
203
+ image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.heic']
204
+
205
+ if ext in image_extensions:
206
+ console.print(f"Processing image include: [cyan]{full_path}[/cyan]")
207
+ from PIL import Image
208
+ import io
209
+ import pillow_heif
210
+
211
+ pillow_heif.register_heif_opener()
212
+
213
+ MAX_DIMENSION = 1024
214
+ with open(full_path, 'rb') as file:
215
+ img = Image.open(file)
216
+ img.load() # Force loading the image data before the file closes
217
+
218
+ if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
219
+ img.thumbnail((MAX_DIMENSION, MAX_DIMENSION))
220
+ console.print(f"Image resized to {img.size}")
221
+
222
+ # Handle GIFs: convert to a static PNG of the first frame
223
+ if ext == '.gif':
224
+ img.seek(0)
225
+ img = img.convert("RGB")
226
+ img_format = 'PNG'
227
+ mime_type = 'image/png'
228
+ elif ext == '.heic':
229
+ img_format = 'JPEG'
230
+ mime_type = 'image/jpeg'
231
+ else:
232
+ img_format = 'JPEG' if ext in ['.jpg', '.jpeg'] else 'PNG'
233
+ mime_type = f'image/{img_format.lower()}'
234
+
235
+ # Save the (potentially resized and converted) image to an in-memory buffer
236
+ buffer = io.BytesIO()
237
+ img.save(buffer, format=img_format)
238
+ content = buffer.getvalue()
239
+
240
+ encoded_string = base64.b64encode(content).decode('utf-8')
241
+ return f"data:{mime_type};base64,{encoded_string}"
242
+ else:
243
+ console.print(f"Processing XML include: [cyan]{full_path}[/cyan]")
244
+ with open(full_path, 'r', encoding='utf-8') as file:
245
+ content = file.read()
246
+ if recursive:
247
+ content = preprocess(content, recursive=True, double_curly_brackets=False)
248
+ _dbg(f"Included via XML tag: {file_path} (len={len(content)})")
249
+ return content
82
250
  except FileNotFoundError:
83
251
  console.print(f"[bold red]Warning:[/bold red] File not found: {file_path}")
84
- return f"[File not found: {file_path}]"
252
+ _dbg(f"Missing XML include: {file_path}")
253
+ # First pass (recursive=True): leave the tag so a later env expansion can resolve it
254
+ # Second pass (recursive=False): replace with a visible placeholder
255
+ return match.group(0) if recursive else f"[File not found: {file_path}]"
85
256
  except Exception as e:
86
257
  console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
258
+ _dbg(f"Error processing XML include {file_path}: {e}")
87
259
  return f"[Error processing include: {file_path}]"
88
260
  prev_text = ""
89
261
  current_text = text
90
262
  while prev_text != current_text:
91
263
  prev_text = current_text
92
- current_text = re.sub(pattern, replace_include, current_text, flags=re.DOTALL)
264
+ code_spans = _extract_code_spans(current_text)
265
+ def replace_include_with_spans(match):
266
+ if _intersects_any_span(match.start(), match.end(), code_spans):
267
+ return match.group(0)
268
+ return replace_include(match)
269
+ current_text = re.sub(pattern, replace_include_with_spans, current_text, flags=re.DOTALL)
93
270
  return current_text
94
271
 
95
272
  def process_pdd_tags(text: str) -> str:
@@ -101,54 +278,120 @@ def process_pdd_tags(text: str) -> str:
101
278
  return "This is a test "
102
279
  return processed
103
280
 
104
- def process_shell_tags(text: str) -> str:
281
+ def process_shell_tags(text: str, recursive: bool) -> str:
105
282
  pattern = r'<shell>(.*?)</shell>'
106
283
  def replace_shell(match):
107
284
  command = match.group(1).strip()
285
+ if recursive:
286
+ # Defer execution until after env var expansion
287
+ return match.group(0)
108
288
  console.print(f"Executing shell command: [cyan]{escape(command)}[/cyan]")
289
+ _dbg(f"Shell tag command: {command}")
109
290
  try:
110
291
  result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
111
292
  return result.stdout
112
293
  except subprocess.CalledProcessError as e:
113
294
  error_msg = f"Command '{command}' returned non-zero exit status {e.returncode}."
114
295
  console.print(f"[bold red]Error:[/bold red] {error_msg}")
296
+ _dbg(f"Shell command error: {error_msg}")
115
297
  return f"Error: {error_msg}"
116
298
  except Exception as e:
117
299
  console.print(f"[bold red]Error executing shell command:[/bold red] {str(e)}")
300
+ _dbg(f"Shell execution exception: {e}")
118
301
  return f"[Shell execution error: {str(e)}]"
119
- return re.sub(pattern, replace_shell, text, flags=re.DOTALL)
302
+ code_spans = _extract_code_spans(text)
303
+ def replace_shell_with_spans(match):
304
+ if _intersects_any_span(match.start(), match.end(), code_spans):
305
+ return match.group(0)
306
+ return replace_shell(match)
307
+ return re.sub(pattern, replace_shell_with_spans, text, flags=re.DOTALL)
120
308
 
121
- def process_web_tags(text: str) -> str:
309
+ def process_web_tags(text: str, recursive: bool) -> str:
122
310
  pattern = r'<web>(.*?)</web>'
123
311
  def replace_web(match):
124
312
  url = match.group(1).strip()
313
+ if recursive:
314
+ # Defer network operations until after env var expansion
315
+ return match.group(0)
125
316
  console.print(f"Scraping web content from: [cyan]{url}[/cyan]")
317
+ _dbg(f"Web tag URL: {url}")
126
318
  try:
127
319
  try:
128
- from firecrawl import FirecrawlApp
320
+ from firecrawl import Firecrawl
129
321
  except ImportError:
322
+ _dbg("firecrawl import failed; package not installed")
130
323
  return f"[Error: firecrawl-py package not installed. Cannot scrape {url}]"
131
324
  api_key = os.environ.get('FIRECRAWL_API_KEY')
132
325
  if not api_key:
133
326
  console.print("[bold yellow]Warning:[/bold yellow] FIRECRAWL_API_KEY not found in environment")
327
+ _dbg("FIRECRAWL_API_KEY not set")
134
328
  return f"[Error: FIRECRAWL_API_KEY not set. Cannot scrape {url}]"
135
- app = FirecrawlApp(api_key=api_key)
136
- response = app.scrape_url(url, formats=['markdown'])
137
- if hasattr(response, 'markdown'):
329
+ app = Firecrawl(api_key=api_key)
330
+ response = app.scrape(url, formats=['markdown'])
331
+ # Handle both dict response (new API) and object response (legacy)
332
+ if isinstance(response, dict) and 'markdown' in response:
333
+ _dbg(f"Web scrape returned markdown (len={len(response['markdown'])})")
334
+ return response['markdown']
335
+ elif hasattr(response, 'markdown'):
336
+ _dbg(f"Web scrape returned markdown (len={len(response.markdown)})")
138
337
  return response.markdown
139
338
  else:
140
339
  console.print(f"[bold yellow]Warning:[/bold yellow] No markdown content returned for {url}")
340
+ _dbg("Web scrape returned no markdown content")
141
341
  return f"[No content available for {url}]"
142
342
  except Exception as e:
143
343
  console.print(f"[bold red]Error scraping web content:[/bold red] {str(e)}")
344
+ _dbg(f"Web scraping exception: {e}")
144
345
  return f"[Web scraping error: {str(e)}]"
145
- return re.sub(pattern, replace_web, text, flags=re.DOTALL)
346
+ code_spans = _extract_code_spans(text)
347
+ def replace_web_with_spans(match):
348
+ if _intersects_any_span(match.start(), match.end(), code_spans):
349
+ return match.group(0)
350
+ return replace_web(match)
351
+ return re.sub(pattern, replace_web_with_spans, text, flags=re.DOTALL)
352
+
353
+ def process_include_many_tags(text: str, recursive: bool) -> str:
354
+ """Process <include-many> blocks whose inner content is a comma- or newline-separated
355
+ list of file paths (typically provided via variables after env expansion)."""
356
+ pattern = r'<include-many>(.*?)</include-many>'
357
+ def replace_many(match):
358
+ inner = match.group(1)
359
+ if recursive:
360
+ # Wait for env expansion to materialize the list
361
+ return match.group(0)
362
+ # Split by newlines or commas
363
+ raw_items = [s.strip() for part in inner.split('\n') for s in part.split(',')]
364
+ paths = [p for p in raw_items if p]
365
+ contents: list[str] = []
366
+ for p in paths:
367
+ try:
368
+ full_path = get_file_path(p)
369
+ console.print(f"Including (many): [cyan]{full_path}[/cyan]")
370
+ with open(full_path, 'r', encoding='utf-8') as fh:
371
+ contents.append(fh.read())
372
+ _dbg(f"Included (many): {p}")
373
+ except FileNotFoundError:
374
+ console.print(f"[bold red]Warning:[/bold red] File not found: {p}")
375
+ _dbg(f"Missing include-many: {p}")
376
+ contents.append(f"[File not found: {p}]")
377
+ except Exception as e:
378
+ console.print(f"[bold red]Error processing include-many:[/bold red] {str(e)}")
379
+ _dbg(f"Error processing include-many {p}: {e}")
380
+ contents.append(f"[Error processing include: {p}]")
381
+ return "\n".join(contents)
382
+ code_spans = _extract_code_spans(text)
383
+ def replace_many_with_spans(match):
384
+ if _intersects_any_span(match.start(), match.end(), code_spans):
385
+ return match.group(0)
386
+ return replace_many(match)
387
+ return re.sub(pattern, replace_many_with_spans, text, flags=re.DOTALL)
146
388
 
147
389
  def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
148
390
  if exclude_keys is None:
149
391
  exclude_keys = []
150
392
 
151
393
  console.print("Doubling curly brackets...")
394
+ _dbg("double_curly invoked")
152
395
 
153
396
  # Special case handling for specific test patterns
154
397
  if "Mix of {excluded{inner}} nesting" in text and "excluded" in exclude_keys:
@@ -172,6 +415,14 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
172
415
  "2": {{"id": "2", "name": "Resource Two"}}
173
416
  }}"""
174
417
 
418
+ # Protect ${IDENT} placeholders so we can safely double braces, then restore
419
+ # them as ${{IDENT}} to avoid PromptTemplate interpreting {IDENT}.
420
+ protected_vars: List[str] = []
421
+ def _protect_var(m):
422
+ protected_vars.append(m.group(0))
423
+ return f"__PDD_VAR_{len(protected_vars)-1}__"
424
+ text = re.sub(r"\$\{[A-Za-z_][A-Za-z0-9_]*\}", _protect_var, text)
425
+
175
426
  # First, protect any existing double curly braces
176
427
  text = re.sub(r'\{\{([^{}]*)\}\}', r'__ALREADY_DOUBLED__\1__END_ALREADY__', text)
177
428
 
@@ -188,6 +439,24 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
188
439
 
189
440
  # Restore already doubled brackets
190
441
  text = re.sub(r'__ALREADY_DOUBLED__(.*?)__END_ALREADY__', r'{{\1}}', text)
442
+
443
+ # Restore protected ${IDENT} placeholders as ${{IDENT}} so single braces
444
+ # don't leak into PromptTemplate formatting. This is safe for JS template
445
+ # literals and prevents missing-key errors in later formatting steps.
446
+ def _restore_var(m):
447
+ idx = int(m.group(1))
448
+ if 0 <= idx < len(protected_vars):
449
+ original = protected_vars[idx] # e.g., ${FOO}
450
+ try:
451
+ inner = re.match(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", original)
452
+ if inner:
453
+ # Build as concatenation to avoid f-string brace escaping confusion
454
+ return "${{" + inner.group(1) + "}}" # -> ${{FOO}}
455
+ except Exception:
456
+ pass
457
+ return original
458
+ return m.group(0)
459
+ text = re.sub(r"__PDD_VAR_(\d+)__", _restore_var, text)
191
460
 
192
461
  # Special handling for code blocks
193
462
  code_block_pattern = r'```([\w\s]*)\n([\s\S]*?)```'
@@ -213,4 +482,4 @@ def double_curly(text: str, exclude_keys: Optional[List[str]] = None) -> str:
213
482
  # Process code blocks
214
483
  text = re.sub(code_block_pattern, process_code_block, text, flags=re.DOTALL)
215
484
 
216
- return text
485
+ return text
pdd/preprocess_main.py CHANGED
@@ -1,15 +1,23 @@
1
1
  import csv
2
2
  import sys
3
+ from pathlib import Path
3
4
  from typing import Tuple, Optional
4
5
  import click
5
6
  from rich import print as rprint
6
7
 
8
+ from .config_resolution import resolve_effective_config
7
9
  from .construct_paths import construct_paths
8
10
  from .preprocess import preprocess
9
11
  from .xml_tagger import xml_tagger
10
- from . import DEFAULT_TIME, DEFAULT_STRENGTH
12
+ from .architecture_sync import (
13
+ get_architecture_entry_for_prompt,
14
+ generate_tags_from_architecture,
15
+ has_pdd_tags,
16
+ )
17
+
18
+
11
19
  def preprocess_main(
12
- ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list
20
+ ctx: click.Context, prompt_file: str, output: Optional[str], xml: bool, recursive: bool, double: bool, exclude: list, pdd_tags: bool = False
13
21
  ) -> Tuple[str, float, str]:
14
22
  """
15
23
  CLI wrapper for preprocessing prompts.
@@ -22,6 +30,7 @@ def preprocess_main(
22
30
  :param double: If True, curly brackets will be doubled.
23
31
  :param exclude: List of keys to exclude from curly bracket doubling.
24
32
  :return: Tuple containing the preprocessed prompt, total cost, and model name used.
33
+ :param pdd_tags: If True, inject PDD metadata tags from architecture.json.
25
34
  """
26
35
  try:
27
36
  # Construct file paths
@@ -33,17 +42,41 @@ def preprocess_main(
33
42
  quiet=ctx.obj.get("quiet", False),
34
43
  command="preprocess",
35
44
  command_options=command_options,
45
+ context_override=ctx.obj.get('context')
36
46
  )
37
47
 
38
48
  # Load prompt file
39
49
  prompt = input_strings["prompt_file"]
40
50
 
51
+ # Inject PDD metadata tags from architecture.json if requested
52
+ pdd_tags_injected = False
53
+ if pdd_tags:
54
+ prompt_filename = Path(prompt_file).name
55
+ arch_entry = get_architecture_entry_for_prompt(prompt_filename)
56
+
57
+ if arch_entry:
58
+ if has_pdd_tags(prompt):
59
+ if not ctx.obj.get("quiet", False):
60
+ rprint(f"[yellow]Prompt already has PDD tags, skipping injection.[/yellow]")
61
+ else:
62
+ generated_tags = generate_tags_from_architecture(arch_entry)
63
+ if generated_tags:
64
+ prompt = generated_tags + '\n\n' + prompt
65
+ pdd_tags_injected = True
66
+ if not ctx.obj.get("quiet", False):
67
+ rprint(f"[green]Injected PDD tags from architecture.json[/green]")
68
+ else:
69
+ if not ctx.obj.get("quiet", False):
70
+ rprint(f"[yellow]No architecture entry found for '{prompt_filename}', skipping PDD tags.[/yellow]")
71
+
41
72
  if xml:
42
73
  # Use xml_tagger to add XML delimiters
43
- strength = ctx.obj.get("strength", DEFAULT_STRENGTH)
44
- temperature = ctx.obj.get("temperature", 0.0)
74
+ # Use centralized config resolution with proper priority: CLI > pddrc > defaults
75
+ effective_config = resolve_effective_config(ctx, resolved_config)
76
+ strength = effective_config["strength"]
77
+ temperature = effective_config["temperature"]
78
+ time = effective_config["time"]
45
79
  verbose = ctx.obj.get("verbose", False)
46
- time = ctx.obj.get("time", DEFAULT_TIME)
47
80
  xml_tagged, total_cost, model_name = xml_tagger(
48
81
  prompt,
49
82
  strength,
@@ -64,6 +97,8 @@ def preprocess_main(
64
97
  # Provide user feedback
65
98
  if not ctx.obj.get("quiet", False):
66
99
  rprint("[bold green]Prompt preprocessing completed successfully.[/bold green]")
100
+ if pdd_tags_injected:
101
+ rprint("[bold]PDD metadata tags: injected from architecture.json[/bold]")
67
102
  if xml:
68
103
  rprint(f"[bold]XML Tagging used: {model_name}[/bold]")
69
104
  else:
@@ -76,4 +111,4 @@ def preprocess_main(
76
111
  except Exception as e:
77
112
  if not ctx.obj.get("quiet", False):
78
113
  rprint(f"[bold red]Error during preprocessing:[/bold red] {e}")
79
- sys.exit(1)
114
+ sys.exit(1)