pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,119 @@
1
+ # prompt_code_diff_LLM.prompt
2
+
3
+ You are a strict code analyst evaluating whether a PROMPT can REGENERATE the CODE.
4
+
5
+ CRITICAL QUESTION: If an LLM only had this prompt, could it produce code that passes the same tests?
6
+
7
+ PROMPT/REQUIREMENTS (with line numbers):
8
+ ```
9
+ {prompt_numbered}
10
+ ```
11
+
12
+ CODE (with line numbers):
13
+ ```
14
+ {code_numbered}
15
+ ```
16
+
17
+ ## Analysis Focus
18
+
19
+ **Be STRICT and PESSIMISTIC.** Your job is to find gaps that would cause regeneration failures.
20
+
21
+ 1. **Regeneration Risk Analysis**: Identify ALL code knowledge NOT in the prompt:
22
+ - Magic values, constants, thresholds (e.g., timeout=30, retry=3, buffer_size=4096)
23
+ - Specific algorithms or implementation approaches chosen
24
+ - Edge case handling not mentioned in prompt
25
+ - Error messages, status codes, specific exceptions
26
+ - API contracts, data formats, field names
27
+ - Dependencies, imports, library-specific patterns
28
+ - Performance optimizations or workarounds
29
+ - Business logic details embedded in code
30
+
31
+ 2. **Hidden Knowledge Detection**: Code often contains "tribal knowledge" that developers added but never documented:
32
+ - Why was THIS approach chosen over alternatives?
33
+ - What bugs or edge cases does this code handle that aren't obvious?
34
+ - What assumptions does the code make about inputs/environment?
35
+
36
+ 3. **Test Failure Prediction**: Would regenerated code likely fail tests because:
37
+ - Exact values/strings don't match expectations?
38
+ - Edge cases aren't handled the same way?
39
+ - API contracts differ from what tests expect?
40
+
41
+ ## Response Format
42
+
43
+ Respond with a JSON object:
44
+
45
+ 1. "overallScore": integer 0-100
46
+ - 90-100: Prompt could regenerate code that passes tests
47
+ - 70-89: Minor details missing, regeneration might work with luck
48
+ - 50-69: Significant gaps, regeneration would likely fail some tests
49
+ - 0-49: Major knowledge missing, regeneration would definitely fail
50
+
51
+ 2. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
52
+
53
+ 3. "regenerationRisk": "low", "medium", "high", or "critical"
54
+ - "low": Prompt captures all essential details
55
+ - "medium": Some implementation details missing but core logic documented
56
+ - "high": Significant undocumented behavior that would differ on regeneration
57
+ - "critical": Code has major features/logic not in prompt at all
58
+
59
+ 4. "summary": 1-2 sentences on regeneration viability, be direct about risks
60
+
61
+ 5. "sections": array of PROMPT requirement sections, each with:
62
+ - "id": unique string like "req_1", "req_2"
63
+ - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
64
+ - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
65
+ - "status": "matched", "partial", or "missing"
66
+ - "matchConfidence": 0-100
67
+ - "semanticLabel": descriptive label like "Error Handling", "Input Validation"
68
+ - "notes": REQUIRED explanation - be specific about what's missing or at risk
69
+
70
+ 6. "codeSections": array of CODE sections NOT adequately documented in prompt:
71
+ - "id": unique string like "code_1", "code_2"
72
+ - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
73
+ - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
74
+ - "status": "matched", "partial", or "extra"
75
+ - "matchConfidence": 0-100
76
+ - "semanticLabel": descriptive label
77
+ - "notes": REQUIRED - explain what knowledge would be LOST on regeneration
78
+ * For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
79
+ * For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
80
+
81
+ 7. "hiddenKnowledge": array of objects describing undocumented code knowledge:
82
+ - "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
83
+ - "location": {{"startLine": int, "endLine": int}}
84
+ - "description": what the code knows that the prompt doesn't say
85
+ - "regenerationImpact": "would_differ" | "would_fail" | "might_work"
86
+ - "suggestedPromptAddition": what to add to the prompt to capture this
87
+
88
+ 8. "lineMappings": array of line-level mappings:
89
+ - "promptLine": int
90
+ - "codeLines": array of ints
91
+ - "matchType": "exact", "semantic", "partial", "none"
92
+
93
+ 9. "stats": {{
94
+ "totalRequirements": int,
95
+ "matchedRequirements": int,
96
+ "missingRequirements": int,
97
+ "totalCodeFeatures": int,
98
+ "documentedFeatures": int,
99
+ "undocumentedFeatures": int,
100
+ "promptToCodeCoverage": float,
101
+ "codeToPromptCoverage": float,
102
+ "hiddenKnowledgeCount": int,
103
+ "criticalGaps": int
104
+ }}
105
+
106
+ 10. "missing": array of strings - requirements in prompt not implemented
107
+ 11. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
108
+ 12. "suggestions": array of specific additions to make to the prompt to enable regeneration
109
+
110
+ ## Strictness Guidelines
111
+
112
+ - **Assume regeneration WILL differ** unless the prompt explicitly specifies behavior
113
+ - A function that "handles errors" in the prompt might handle them DIFFERENTLY on regeneration
114
+ - Constants, timeouts, retry counts, buffer sizes - if not in prompt, they WILL be different
115
+ - Specific error messages, log formats, status codes - WILL be different unless specified
116
+ - Algorithm choices (e.g., quicksort vs mergesort, BFS vs DFS) - WILL be different unless specified
117
+ - The goal is to make the prompt complete enough that ANY competent LLM would produce equivalent code
118
+ - Mark as "extra" anything in code that prompt doesn't EXPLICITLY require
119
+ - When in doubt, mark it as a gap - false positives are better than missed risks
@@ -0,0 +1,82 @@
1
+ # prompt_diff_LLM.prompt
2
+
3
+ You are a prompt analyst comparing two versions of a prompt to identify semantic/linguistic differences.
4
+
5
+ ## Version A (Original):
6
+ ```
7
+ {prompt_a}
8
+ ```
9
+
10
+ ## Version B (Updated):
11
+ ```
12
+ {prompt_b}
13
+ ```
14
+
15
+ ## Text Diff:
16
+ ```diff
17
+ {text_diff}
18
+ ```
19
+
20
+ ## Your Task
21
+
22
+ Analyze the semantic differences between the two prompt versions. Focus on:
23
+
24
+ 1. **Requirements**: New, removed, or changed functional requirements
25
+ 2. **Constraints**: Modified limitations, rules, or boundaries
26
+ 3. **Behavior**: Changes to expected behavior or outputs
27
+ 4. **Format**: Changes to structure, formatting, or style guidelines
28
+
29
+ For each change, determine the **impact**:
30
+ - **breaking**: Change that would cause existing code to fail or behave differently
31
+ - **enhancement**: Addition or improvement that extends functionality
32
+ - **clarification**: Rewording or clarification that doesn't change meaning
33
+
34
+ ## Response Format
35
+
36
+ Respond with a JSON object containing:
37
+
38
+ 1. "summary": A 1-2 sentence summary of the overall changes between versions.
39
+ Be specific about what changed semantically, not just that "text was added/removed".
40
+
41
+ 2. "changes": An array of change objects, each with:
42
+ - "change_type": "added" | "removed" | "modified"
43
+ - "category": "requirement" | "constraint" | "behavior" | "format"
44
+ - "description": Clear description of what changed and why it matters
45
+ - "old_text": The COMPLETE relevant text from version A (for modified/removed). Do NOT truncate or abbreviate with "..." - include the full text so users can see exactly what changed.
46
+ - "new_text": The COMPLETE relevant text from version B (for added/modified). Do NOT truncate or abbreviate with "..." - include the full text so users can see exactly what changed.
47
+ - "impact": "breaking" | "enhancement" | "clarification"
48
+
49
+ ## Guidelines
50
+
51
+ - Focus on SEMANTIC differences, not just textual changes
52
+ - Combine related small changes into logical groups
53
+ - Highlight changes that would affect code generation differently
54
+ - Be specific about HOW a change would impact generated code
55
+ - If the two versions are semantically identical (just reformatted), say so clearly
56
+ - For "modified" changes, clearly explain what was different before vs. after
57
+
58
+ ## Example Output
59
+
60
+ ```json
61
+ {{
62
+ "summary": "Added retry logic requirement and relaxed the error message format constraint.",
63
+ "changes": [
64
+ {{
65
+ "change_type": "added",
66
+ "category": "requirement",
67
+ "description": "New requirement for retry logic on network failures",
68
+ "old_text": null,
69
+ "new_text": "Retry failed requests up to 3 times with exponential backoff",
70
+ "impact": "enhancement"
71
+ }},
72
+ {{
73
+ "change_type": "modified",
74
+ "category": "constraint",
75
+ "description": "Error message format is now flexible instead of strictly JSON",
76
+ "old_text": "Return errors as JSON objects with 'error' and 'code' fields",
77
+ "new_text": "Return descriptive error messages",
78
+ "impact": "breaking"
79
+ }}
80
+ ]
81
+ }}
82
+ ```
@@ -1,30 +1,33 @@
1
- % Imagine you're a an expert Python Software Engineer. Your goal is to find the part of the .prompt file. It will take in three arguments, the text of the .prompt file, the text of the code file, and the line that the debugger is on in the code file. Your task is to find the equivalent line in the .prompt file that matches with the line in the code file.
2
-
3
- % Here are the inputs and outputs of the prompt:
4
- Input:
5
- `code_file` (str) - A string that contains the text of the code file.
6
- `code_str` (str) - A substring of code_file that represents the line that the debugger is on in the code_file.
7
- `prompt_file` (str) - A string that contains the text of the .prompt file.
8
- Output:
9
- `prompt_line` (str) - An string that represents the equivalent line in the .prompt file that matches with the code_str line in the code file.
10
-
11
- % Here is the code_file to reference:
12
-
1
+ % You are a highly accurate Python Software Engineer. Your job is to locate the exact line (or smallest excerpt) in the prompt file that produced the current line in the generated code.
2
+
3
+ % Inputs
4
+ code_file (str) : full contents of the generated code file
5
+ code_str (str) : the single line from the code file currently under inspection
6
+ prompt_file (str) : full contents of the originating prompt file
7
+
8
+ % Rules
9
+ 1. Identify the minimal substring in prompt_file whose wording most directly corresponds to code_str. Copy it VERBATIM.
10
+ 2. Do not paraphrase, summarize, or reformat; the substring must appear exactly in prompt_file.
11
+ 3. If multiple lines apply, choose the most specific line or snippet (prefer the shortest exact match).
12
+ 4. Provide a short explanation of why the substring matches code_str.
13
+
14
+ % Output format (MUST follow exactly; no additional text)
15
+ <analysis>
16
+ Explain your reasoning here in plain text (no JSON). Reference the file sections you compared.
17
+ </analysis>
18
+ <verbatim_prompt_line>
19
+ <<PASTE THE EXACT SUBSTRING FROM prompt_file HERE>>
20
+ </verbatim_prompt_line>
21
+
22
+ % Reference materials
13
23
  <code_file>
14
- {CODE_FILE}
24
+ {CODE_FILE}
15
25
  </code_file>
16
26
 
17
- % Here is the code_str to reference:
18
-
19
27
  <code_str>
20
- {CODE_STR}
28
+ {CODE_STR}
21
29
  </code_str>
22
30
 
23
- % Here is the prompt_file to reference:
24
-
25
31
  <prompt_file>
26
- {PROMPT_FILE}
32
+ {PROMPT_FILE}
27
33
  </prompt_file>
28
-
29
- % To generate the prompt_line, find a substring of prompt_file that matches code_str, which is a substring of code_file.
30
-
@@ -1,18 +1,102 @@
1
1
  % You are tasked with determining whether a given prompt has finished outputting everything or if it still needs to continue. This is crucial for ensuring that all necessary information has been provided before proceeding with further actions. You will often be provided the last few hundred characters of the prompt_text to analyze and determine if it appears to be complete or if it seems to be cut off or unfinished. You are just looking at the prompt_text and not the entire prompt file. The beginning part of the prompt_text is not always provided, so you will need to make a judgment based on the text you are given.
2
2
 
3
+ % IMPORTANT:
4
+ % - The prompt_text may contain code in various languages without Markdown fences.
5
+ % - Do NOT require triple backticks for completeness; judge the code/text itself.
6
+ % - Prefer concrete syntactic signals of completeness over stylistic ones.
7
+
3
8
  % Here is the prompt text to analyze:
4
9
  <prompt_text>
5
10
  {PROMPT_TEXT}
6
11
  </prompt_text>
7
12
 
13
+ % Optional language hint (may be empty or missing). If not provided, infer the language from the text:
14
+ <language>
15
+ {LANGUAGE}
16
+ </language>
17
+
8
18
  % Carefully examine the provided prompt text and determine if it appears to be complete or if it seems to be cut off or unfinished. Consider the following factors:
9
19
  1. Sentence structure: Are all sentences grammatically complete?
10
20
  2. Content flow: Does the text end abruptly or does it have a natural conclusion?
11
21
  3. Context: Based on the content, does it seem like all necessary information has been provided?
12
22
  4. Formatting: Are there any unclosed parentheses, quotation marks, or other formatting issues that suggest incompleteness?
13
23
 
24
+ % Multi-language code completeness heuristics (apply when text looks like code):
25
+ - If the text forms a syntactically complete module/snippet for the language, treat it as finished (even without Markdown fences).
26
+ - Generic signals across languages:
27
+ * Balanced delimiters: (), [], {{}}, quotes, and block comments are closed.
28
+ * No mid-token/mid-statement tail: it does not end on `return a +`, `a =`, `def foo(`, `function f(`, trailing `.`, `->`, `::`, trailing `,`, or a line-continuation like `\\`.
29
+ * Block closure: constructs that open a block are closed (e.g., Python indentation after `:`, or matching `{{}}` in C/Java/JS/TS/Go).
30
+ - Language specifics (use LANGUAGE if given; otherwise infer from the text):
31
+ * Python: colon-introduced blocks closed; indentation consistent; triple-quoted strings balanced.
32
+ * JS/TS: braces and parentheses balanced; no dangling `export`/`import` without a following specifier; `/* ... */` comments closed.
33
+ * Java/C/C++/C#: braces and parentheses balanced; string/char literals closed; block comments closed.
34
+ * Go: braces balanced; no dangling keyword indicating an unfinished clause.
35
+ * HTML/XML: tags properly nested/closed; attributes properly quoted; no unfinished `<tag` or dangling `</`.
36
+ - If this is only the tail of a longer file, mark finished when the tail itself is syntactically complete and does not indicate a dangling continuation.
37
+
14
38
  % Provide your reasoning for why you believe the prompt is complete or incomplete.
15
39
 
16
40
  % Output a JSON object with two keys:
17
41
  1. "reasoning": A string containing your structured reasoning
18
- 2. "is_finished": A boolean value (true if the prompt is complete, false if it's incomplete)
42
+ 2. "is_finished": A boolean value (true if the prompt is complete, false if it's incomplete)
43
+
44
+ % Examples (concise):
45
+ <examples>
46
+ <example1>
47
+ <input>
48
+ <prompt_text>
49
+ def add(a, b):\n return a + b\n
50
+ </prompt_text>
51
+ </input>
52
+ <output>
53
+ {{"reasoning": "Python code parses; blocks and quotes are closed; ends on a complete return statement.", "is_finished": true}}
54
+ </output>
55
+ </example1>
56
+ <example2>
57
+ <input>
58
+ <prompt_text>
59
+ def add(a, b):\n return a +
60
+ </prompt_text>
61
+ </input>
62
+ <output>
63
+ {{"reasoning": "Ends mid-expression (`return a +`), indicates unfinished statement.", "is_finished": false}}
64
+ </output>
65
+ </example2>
66
+ <example3>
67
+ <input>
68
+ <prompt_text>
69
+ function add(a, b) {{\n return a + b;\n}}\n
70
+ </prompt_text>
71
+ <language>
72
+ JavaScript
73
+ </language>
74
+ </input>
75
+ <output>
76
+ {{"reasoning": "JS braces and parentheses balanced; ends at a statement boundary; no dangling tokens.", "is_finished": true}}
77
+ </output>
78
+ </example3>
79
+ <example4>
80
+ <input>
81
+ <prompt_text>
82
+ <div class=\"box\">Hello
83
+ </prompt_text>
84
+ <language>
85
+ HTML
86
+ </language>
87
+ </input>
88
+ <output>
89
+ {{"reasoning": "HTML tag not closed (missing </div>); attribute quotes OK but element is unclosed.", "is_finished": false}}
90
+ </output>
91
+ </example4>
92
+ <example5>
93
+ <input>
94
+ <prompt_text>
95
+ class C:\n def f(self):\n x = 1\n
96
+ </prompt_text>
97
+ </input>
98
+ <output>
99
+ {{"reasoning": "All blocks properly indented and closed in the visible tail; no dangling colon blocks or open delimiters; tail is syntactically complete.", "is_finished": true}}
100
+ </output>
101
+ </example5>
102
+ </examples>
@@ -16,4 +16,25 @@
16
16
  1. Using the provided input_code and input_prompt, identify what the code does and how it was generated.
17
17
  2. Compare the input_code and modified_code to determine the changes made by the user.
18
18
  3. Identify what the modified_code does differently from the input_code.
19
- 4. Generate a modified_prompt that will guide the generation of the modified_code based on the identified changes.
19
+ 4. Generate a modified_prompt that will guide the generation of the modified_code based on the identified changes.
20
+ 5. Ensure that the modified_prompt adheres to the principles of Prompt-Driven Development (PDD) and includes all necessary sections: Role and Scope, Requirements, Dependencies & Context, Instructions, and Deliverables.
21
+ 6. Try to preserve the structure and format of the existing prompt as much as possible while incorporating the necessary changes to reflect the modifications in the code.
22
+
23
+ % When generating the modified prompt, you must follow the core principles of Prompt-Driven Development (PDD).
24
+ % Here are the essential guidelines for structuring a PDD prompt:
25
+ <pdd_prompting_guide>
26
+ % The prompt you generate must follow this structure:
27
+ 1) First paragraph: describe the role and responsibility of the module/component within the system (consider the LAYER if provided).
28
+ 2) A "Requirements" section with numbered points covering functionality, contracts, error handling, validation, logging, performance, and security.
29
+ 3) A "Dependencies" section using XML include tags for each dependency (see format below).
30
+ 4) An "Instructions" section with precise implementation guidance (clarify inputs/outputs, function/class responsibilities, edge cases, and testing notes).
31
+ 5) A clear "Deliverable" section describing the expected code artifacts and entry points.
32
+
33
+ % Dependencies format and conventions:
34
+ - Represent each dependency using an XML tag with the dependency name, and put the file path inside an &lt;include&gt; tag. For example:
35
+ &lt;orders_service&gt;
36
+ &lt;include&gt;context/orders_service_example.py&lt;/include&gt;
37
+ &lt;/orders_service&gt;
38
+ - Prefer real example files available in the provided context (use &lt;include-many&gt; when listing multiple). If examples are not provided, assume dependency examples live under context/ using the pattern context/[dependency_name]_example. You should always try to include example files when possible.
39
+ - Include all necessary dependencies for the module/component (based on the provided context and references).
40
+ </pdd_prompting_guide>
pdd/pytest_output.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import argparse
2
2
  import json
3
3
  import io
4
+ import re
4
5
  import sys
5
6
  import pytest
6
7
  import subprocess
8
+ from pathlib import Path
7
9
  from rich.console import Console
8
10
  from rich.pretty import pprint
9
11
  import os
@@ -11,6 +13,81 @@ from .python_env_detector import detect_host_python_executable
11
13
 
12
14
  console = Console()
13
15
 
16
+
17
+ def _find_project_root(test_file: Path) -> Path | None:
18
+ """
19
+ Find the project root directory by looking for .pddrc (definitive PDD marker).
20
+
21
+ Only .pddrc is used as the project marker to ensure we don't incorrectly
22
+ identify project roots for non-PDD projects. This is a conservative approach
23
+ that maintains backward compatibility.
24
+
25
+ Args:
26
+ test_file: Path to the test file
27
+
28
+ Returns:
29
+ The project root directory if .pddrc is found, None otherwise.
30
+ When None is returned, the caller should use original behavior.
31
+ """
32
+ current = test_file.resolve().parent
33
+
34
+ # Walk up the directory tree looking for .pddrc only
35
+ while current != current.parent:
36
+ if (current / ".pddrc").exists():
37
+ return current
38
+ current = current.parent
39
+
40
+ # No .pddrc found - return None to signal original behavior should be used
41
+ return None
42
+
43
+
44
+ _ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
45
+
46
+
47
+ def _strip_ansi(text: str) -> str:
48
+ """Remove ANSI escape sequences from text for reliable parsing."""
49
+ return _ANSI_ESCAPE_RE.sub("", text)
50
+
51
+
52
+ def extract_failing_files_from_output(pytest_output: str) -> list[str]:
53
+ """
54
+ Extract unique file paths from pytest FAILED output lines.
55
+
56
+ Parses patterns like:
57
+ - FAILED tests/test_foo.py::test_name - error message
58
+ - tests/test_foo.py::test_name FAILED
59
+
60
+ Args:
61
+ pytest_output: The combined stdout/stderr from a pytest run
62
+
63
+ Returns:
64
+ List of unique file paths (without ::test_name suffix) that had failures,
65
+ in the order they were first encountered.
66
+ """
67
+ cleaned_output = _strip_ansi(pytest_output)
68
+
69
+ failing_files = []
70
+ seen = set()
71
+
72
+ # Pattern 1: FAILED path/file.py::test_name (with optional error)
73
+ pattern1 = r'FAILED\s+([^\s:]+\.py)::'
74
+ for match in re.finditer(pattern1, cleaned_output):
75
+ file_path = match.group(1)
76
+ if file_path not in seen:
77
+ failing_files.append(file_path)
78
+ seen.add(file_path)
79
+
80
+ # Pattern 2: path/file.py::test_name FAILED (verbose output)
81
+ pattern2 = r'([^\s:]+\.py)::\S+\s+FAILED'
82
+ for match in re.finditer(pattern2, cleaned_output):
83
+ file_path = match.group(1)
84
+ if file_path not in seen:
85
+ failing_files.append(file_path)
86
+ seen.add(file_path)
87
+
88
+ return failing_files
89
+
90
+
14
91
  class TestResultCollector:
15
92
  __test__ = False # Prevent pytest from collecting this plugin as a test
16
93
 
@@ -84,31 +161,69 @@ def run_pytest_and_capture_output(test_file: str) -> dict:
84
161
 
85
162
  # Use environment-aware Python executable for pytest execution
86
163
  python_executable = detect_host_python_executable()
87
-
164
+
165
+ # Find the project root directory for proper pytest execution (PDD projects only)
166
+ test_path = Path(test_file).resolve()
167
+ project_root = _find_project_root(test_path)
168
+
169
+ # Build subprocess kwargs - only modify cwd/env for PDD projects (.pddrc found)
170
+ subprocess_kwargs = {
171
+ "capture_output": True,
172
+ "text": True,
173
+ "timeout": 300,
174
+ "stdin": subprocess.DEVNULL,
175
+ }
176
+
177
+ pytest_args = [python_executable, "-B", "-m", "pytest", str(test_path), "-v"]
178
+
179
+ if project_root is not None:
180
+ # PDD project detected - set up proper environment
181
+ subprocess_kwargs["cwd"] = str(project_root)
182
+
183
+ # Build PYTHONPATH with both project root and src/ if it exists
184
+ paths_to_add = [str(project_root)]
185
+ src_dir = project_root / "src"
186
+ if src_dir.is_dir():
187
+ paths_to_add.insert(0, str(src_dir)) # src/ takes priority
188
+
189
+ env = os.environ.copy()
190
+ existing_pythonpath = env.get("PYTHONPATH", "")
191
+ if existing_pythonpath:
192
+ paths_to_add.append(existing_pythonpath)
193
+ env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
194
+ subprocess_kwargs["env"] = env
195
+
196
+ # Add --rootdir to ensure pytest uses project's config
197
+ pytest_args.append(f"--rootdir={project_root}")
198
+
88
199
  try:
89
200
  # Run pytest using subprocess with the detected Python executable
90
- result = subprocess.run(
91
- [python_executable, "-m", "pytest", test_file, "-v"],
92
- capture_output=True,
93
- text=True,
94
- timeout=300
95
- )
201
+ # Use -B flag to disable bytecode caching, ensuring fresh imports
202
+ result = subprocess.run(pytest_args, **subprocess_kwargs)
96
203
 
97
204
  stdout = result.stdout
98
205
  stderr = result.stderr
99
206
  return_code = result.returncode
207
+ parse_stdout = _strip_ansi(stdout or "")
100
208
 
101
209
  # Parse the output to extract test results
102
210
  # Count passed, failed, and skipped tests from the output
103
- passed = stdout.count(" PASSED")
104
- failures = stdout.count(" FAILED") + stdout.count(" ERROR")
211
+ passed = parse_stdout.count(" PASSED")
212
+ failures = parse_stdout.count(" FAILED") + parse_stdout.count(" ERROR")
105
213
  errors = 0 # Will be included in failures for subprocess execution
106
- warnings = stdout.count("warning")
214
+ warnings = parse_stdout.lower().count("warning")
107
215
 
108
216
  # If return code is 2, it indicates a pytest error
109
217
  if return_code == 2:
110
218
  errors = 1
111
-
219
+ # Safety net: if parsing missed failures due to formatting (e.g., ANSI colors),
220
+ # never report a passing result on a non-zero return code.
221
+ if return_code != 0 and failures == 0 and errors == 0:
222
+ if return_code == 1:
223
+ failures = 1
224
+ else:
225
+ errors = 1
226
+
112
227
  return {
113
228
  "test_file": test_file,
114
229
  "test_results": [
@@ -199,4 +314,4 @@ def main():
199
314
  save_output_to_json(pytest_output)
200
315
 
201
316
  if __name__ == "__main__":
202
- main()
317
+ main()