pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,116 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to verify that the unit tests detect the bugs.
2
+
3
+ % Context
4
+
5
+ You are working on step 7 of 9 in an agentic e2e fix workflow. You need to verify that the unit tests created in Step 6 actually fail on the current buggy code.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ <step3_output>
29
+ {step3_output}
30
+ </step3_output>
31
+
32
+ <step4_output>
33
+ {step4_output}
34
+ </step4_output>
35
+
36
+ <step5_output>
37
+ {step5_output}
38
+ </step5_output>
39
+
40
+ <step6_output>
41
+ {step6_output}
42
+ </step6_output>
43
+
44
+ % Your Task
45
+
46
+ 1. **Run the newly created unit tests**
47
+ - Parse Step 6 output for FILES_CREATED/FILES_MODIFIED
48
+ - Execute pytest with the test files from Step 6's FILES_CREATED output
49
+ - Capture the test output
50
+
51
+ 2. **Verify tests fail as expected**
52
+ - Each test should FAIL on the current buggy code
53
+ - The failure message should relate to the bug being detected
54
+ - If a test passes, it's not detecting the bug correctly
55
+
56
+ 3. **Handle unexpected results**
57
+ - If tests pass instead of fail: the test is not detecting the bug
58
+ - If tests error (not assert fail): test setup issue
59
+ - Report issues and suggest fixes
60
+
61
+ % Output
62
+
63
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
64
+
65
+ ```
66
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
67
+ ```
68
+
69
+ **If tests properly detect bugs:**
70
+ ```markdown
71
+ ## Step 7: Verify Tests Detect Bugs (Cycle {cycle_number})
72
+
73
+ **Status:** Tests Correctly Detect Bugs
74
+
75
+ ### Test Results
76
+ | Test | Status | Error |
77
+ |------|--------|-------|
78
+ | `test_{{name}}` | FAIL (expected) | {{assertion_error}} |
79
+
80
+ ### Verification Summary
81
+ - {{N}} tests correctly fail on buggy code
82
+ - Tests will pass once bugs are fixed via `pdd fix`
83
+
84
+ ---
85
+ *Proceeding to Step 8: Run pdd fix*
86
+ ```
87
+
88
+ **If tests don't detect bugs:**
89
+ ```markdown
90
+ ## Step 7: Verify Tests Detect Bugs (Cycle {cycle_number})
91
+
92
+ **Status:** TESTS_NOT_DETECTING_BUGS
93
+
94
+ ### Problem
95
+ | Test | Issue |
96
+ |------|-------|
97
+ | `test_{{name}}` | Passes when it should fail |
98
+
99
+ ### Analysis
100
+ {{explanation of why tests aren't detecting the bug}}
101
+
102
+ ### Fix Applied
103
+ {{description of how tests were corrected}}
104
+
105
+ FILES_MODIFIED: {{comma_separated_list}}
106
+
107
+ ---
108
+ *Proceeding to Step 8: Run pdd fix*
109
+ ```
110
+
111
+ % Important
112
+
113
+ - Tests MUST fail on current code to be valid bug-detecting tests
114
+ - If tests pass, either the bug is already fixed or the test is wrong
115
+ - Always fix invalid tests before proceeding
116
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,120 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to run pdd fix sequentially on dev units identified during e2e failure analysis.
2
+
3
+ % Context
4
+
5
+ You are working on step 8 of 9 in an agentic e2e fix workflow. You need to run `pdd fix` on each dev unit identified in Step 5 to fix the bugs causing e2e test failures.
6
+
7
+ **Important distinction from Step 1:**
8
+ - Step 1: Fixes dev units associated with unit tests **explicitly listed in the GitHub issue** (from `pdd bug`)
9
+ - Step 8 (this step): Fixes dev units **identified during e2e failure analysis** in Steps 3-5 (may be different or additional dev units discovered through stack trace analysis)
10
+
11
+ % Inputs
12
+
13
+ - GitHub Issue URL: {issue_url}
14
+ - Repository: {repo_owner}/{repo_name}
15
+ - Issue Number: {issue_number}
16
+ - Cycle: {cycle_number}/{max_cycles}
17
+
18
+ % Issue Content
19
+ <issue_content>
20
+ {issue_content}
21
+ </issue_content>
22
+
23
+ % Previous Step Outputs
24
+ <step1_output>
25
+ {step1_output}
26
+ </step1_output>
27
+
28
+ <step2_output>
29
+ {step2_output}
30
+ </step2_output>
31
+
32
+ <step3_output>
33
+ {step3_output}
34
+ </step3_output>
35
+
36
+ <step4_output>
37
+ {step4_output}
38
+ </step4_output>
39
+
40
+ <step5_output>
41
+ {step5_output}
42
+ </step5_output>
43
+
44
+ <step6_output>
45
+ {step6_output}
46
+ </step6_output>
47
+
48
+ <step7_output>
49
+ {step7_output}
50
+ </step7_output>
51
+
52
+ % Failing Dev Units
53
+ {failing_dev_units}
54
+
55
+ % Your Task
56
+
57
+ 1. **Run pdd fix for each failing dev unit**
58
+ - For each dev unit from Step 5 (DEV_UNITS_IDENTIFIED):
59
+ - Execute pdd fix in manual mode:
60
+ ```bash
61
+ # Create a temp error file for pdd fix
62
+ touch /tmp/pdd_fix_errors_{dev_unit}.log
63
+ pdd fix --manual prompts/{dev_unit}_python.prompt pdd/{dev_unit}.py tests/test_{dev_unit}.py /tmp/pdd_fix_errors_{dev_unit}.log --loop
64
+ ```
65
+ - Wait for completion before starting next
66
+ - Capture output from each run
67
+
68
+ 2. **Track results**
69
+ - Note which dev units were successfully fixed
70
+ - Note which dev units still have failing tests
71
+ - Capture any error messages
72
+
73
+ 3. **Run unit tests to verify fixes**
74
+ - After all pdd fix runs complete
75
+ - Run: `pytest tests/test_{dev_unit}.py -v` for each
76
+ - Report pass/fail status
77
+
78
+ % Output
79
+
80
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
81
+
82
+ ```
83
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
84
+ ```
85
+
86
+ Your comment should follow this format:
87
+
88
+ ```markdown
89
+ ## Step 8: Run pdd fix (Cycle {cycle_number})
90
+
91
+ **Status:** {N}/{M} Dev Units Fixed
92
+
93
+ ### pdd fix Results
94
+ | Dev Unit | pdd fix Status | Tests After |
95
+ |----------|----------------|-------------|
96
+ | `{name}` | Success | {N}/{M} pass |
97
+ | `{name}` | Failed | {error} |
98
+
99
+ ### Files Changed
100
+ {list_of_modified_files}
101
+
102
+ ### Summary
103
+ - Dev units fixed: {list}
104
+ - Dev units still failing: {list}
105
+ - Total pdd fix runs: {N}
106
+ - Total cost: ${cost:.4f}
107
+
108
+ FILES_MODIFIED: {comma_separated_list_of_code_files}
109
+
110
+ ---
111
+ *Proceeding to Step 9: Final Verification*
112
+ ```
113
+
114
+ % Important
115
+
116
+ - Run pdd fix SEQUENTIALLY, not in parallel
117
+ - Each dev unit may take significant time - be patient
118
+ - Track costs from each pdd fix invocation
119
+ - Always output FILES_MODIFIED line
120
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,146 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to perform final verification and control the outer loop.
2
+
3
+ % Context
4
+
5
+ You are working on step 9 of 9 in an agentic e2e fix workflow. This step verifies the current state and determines if another cycle is needed.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ <step3_output>
29
+ {step3_output}
30
+ </step3_output>
31
+
32
+ <step4_output>
33
+ {step4_output}
34
+ </step4_output>
35
+
36
+ <step5_output>
37
+ {step5_output}
38
+ </step5_output>
39
+
40
+ <step6_output>
41
+ {step6_output}
42
+ </step6_output>
43
+
44
+ <step7_output>
45
+ {step7_output}
46
+ </step7_output>
47
+
48
+ <step8_output>
49
+ {step8_output}
50
+ </step8_output>
51
+
52
+ % Your Task
53
+
54
+ 1. **Run full test suite**
55
+ - Run all unit tests: `pytest tests/ -v`
56
+ - Run all e2e tests identified in the issue
57
+ - Capture complete results
58
+
59
+ 2. **Assess current state**
60
+ - How many tests pass vs fail?
61
+ - Are the original bugs fixed?
62
+ - Are there new failures?
63
+
64
+ 3. **Determine next action**
65
+ - If all tests pass: Workflow complete
66
+ - If tests still fail and cycle < max_cycles: Another cycle needed
67
+ - If tests still fail and cycle >= max_cycles: Max cycles reached, report status
68
+
69
+ % Output
70
+
71
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
72
+
73
+ ```
74
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
75
+ ```
76
+
77
+ **If all tests pass:**
78
+ ```markdown
79
+ ## Step 9: Final Verification (Cycle {cycle_number})
80
+
81
+ **Status:** ALL_TESTS_PASS
82
+
83
+ ### Test Results
84
+ - Unit tests: {N}/{M} pass
85
+ - E2E tests: {N}/{M} pass
86
+
87
+ ### Summary
88
+ All bugs fixed successfully in {cycle_number} cycle(s).
89
+
90
+ ### Files Changed (Total)
91
+ {list_of_all_files_modified}
92
+
93
+ ---
94
+ *Workflow complete - all tests passing*
95
+ ```
96
+
97
+ **If more cycles needed:**
98
+ ```markdown
99
+ ## Step 9: Final Verification (Cycle {cycle_number})
100
+
101
+ **Status:** CONTINUE_CYCLE
102
+
103
+ ### Test Results
104
+ - Unit tests: {N}/{M} pass ({K} still failing)
105
+ - E2E tests: {N}/{M} pass ({K} still failing)
106
+
107
+ ### Remaining Failures
108
+ | Test | Failure Type |
109
+ |------|--------------|
110
+ | `test_{name}` | {type} |
111
+
112
+ ### Analysis
113
+ {explanation_of_remaining_failures}
114
+
115
+ ---
116
+ *Starting Cycle {next_cycle}*
117
+ ```
118
+
119
+ **If max cycles reached:**
120
+ ```markdown
121
+ ## Step 9: Final Verification (Cycle {cycle_number})
122
+
123
+ **Status:** MAX_CYCLES_REACHED
124
+
125
+ ### Test Results
126
+ - Unit tests: {N}/{M} pass ({K} still failing)
127
+ - E2E tests: {N}/{M} pass ({K} still failing)
128
+
129
+ ### Remaining Failures
130
+ | Test | Failure Type |
131
+ |------|--------------|
132
+ | `test_{name}` | {type} |
133
+
134
+ ### Recommendation
135
+ {manual_intervention_suggestions}
136
+
137
+ ---
138
+ *Workflow stopped - max cycles reached*
139
+ ```
140
+
141
+ % Important
142
+
143
+ - The exact strings "ALL_TESTS_PASS", "CONTINUE_CYCLE", or "MAX_CYCLES_REACHED" control the outer loop
144
+ - Be accurate in test counts - don't round or estimate
145
+ - Include specific failure information for debugging
146
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,45 @@
1
+ You are fixing a test failure in a PDD (Prompt-Driven Development) project.
2
+ You are running as FALLBACK after PDD's normal fix loop failed multiple times. This loop was only allowed to change the code and/or test file.
3
+
4
+ ## PDD Principle
5
+ The PROMPT FILE is the source of truth. Code and tests are generated artifacts.
6
+ If tests expect behavior not defined in the prompt, the TESTS may be wrong.
7
+
8
+ ## Files (you have full read/write access)
9
+ - Prompt file (THE SPEC): {prompt_path}
10
+ - Code file: {code_path}
11
+ - Test file: {test_path}
12
+ - Example program file: {example_program_path}
13
+ - Project root: {project_root}
14
+
15
+ ## Previous Fix Attempts
16
+ The following shows what PDD's normal fix loop already tried.
17
+ DO NOT repeat these approaches - try something different.
18
+
19
+ {error_content}
20
+
21
+ ## Your Task
22
+ 1. Read the prompt file to understand the intended behavior
23
+ 2. Read the code and test files
24
+ 3. Run test file to get the error(s)
25
+ 4. Explore related files (helpers, fixtures, etc.) if needed
26
+ 5. Determine what needs fixing:
27
+ - Code doesn't match the prompt spec -> fix the code
28
+ - Tests don't match the prompt spec -> fix the tests
29
+ - Tests have implementation issues (mocking, isolation) -> fix test implementation
30
+ - Issue requires changes to other files -> make those changes
31
+ 5. Make ALL necessary changes to fix the tests
32
+ 6. Run the example program file to verify the fix didn't break the program
33
+ 7. Repeat steps 4-6 until the program output aligns with the prompt's intent
34
+ 8. Output a JSON string with the following fields:
35
+ - success: bool
36
+ - message: str
37
+ - cost: float
38
+ - model: str
39
+ - changed_files: list[str]
40
+
41
+ ## Critical Rules
42
+ - The prompt file defines what's correct - code and tests should conform to it
43
+ - DO NOT repeat approaches from the fix history above
44
+ - You may modify existing files or create new ones
45
+ - If the error involves mocking/test isolation, focus on the TEST implementation
@@ -0,0 +1,48 @@
1
+ % You are a strictly constrained code emitter. Your goal is to make {test_abs} run without errors. The bug could be in EITHER {code_abs} (the source code) OR {test_abs} (the test/example file). Analyze the error carefully to determine which file needs fixing. Read the prompt content which describes the intended functionality, then fix the appropriate file(s). Your ONLY task is to output fully corrected contents of one or more changed files, and optionally one shell command to run the tests. Wrap outputs between the provided BEGIN/END markers. No commentary or extra text.
2
+
3
+ % IMPORTANT: Analyze the error traceback carefully:
4
+ - If the error is in how the test/example USES the code (wrong exception caught, wrong API usage), fix {test_abs}
5
+ - If the error is in the code's IMPLEMENTATION (wrong behavior, missing functionality), fix {code_abs}
6
+ - You may need to fix BOTH files in some cases
7
+
8
+ % IMPORTANT: If you see ModuleNotFoundError or ImportError:
9
+ - For external packages: include "pip install <package> &&" before the test command in TESTCMD
10
+ - For local imports: fix the sys.path or import statement to correctly locate {code_abs}
11
+ - The code file is at: {code_abs} - ensure imports can find this path
12
+
13
+ <inputs>
14
+ <paths>
15
+ <begin_marker>{begin}</begin_marker>
16
+ <end_marker>{end}</end_marker>
17
+ <code_file>{code_abs}</code_file>
18
+ </paths>
19
+
20
+ <context>
21
+ <prompt_content>
22
+ {prompt_content}
23
+ </prompt_content>
24
+ <relevant_error>
25
+ {error_content}
26
+ </relevant_error>
27
+ </context>
28
+ </inputs>
29
+
30
+ % Follow these instructions:
31
+
32
+ 1) Output ALL files you changed that are needed to make tests pass (source files, tests, or small support files).
33
+ Use one block per file, with this exact wrapping:
34
+ <<<BEGIN_FILE:{code_abs}>>>
35
+ <FULL CORRECTED FILE CONTENT>
36
+ <<<END_FILE:{code_abs}>>>
37
+
38
+ If you also modify the test file:
39
+ <<<BEGIN_FILE:{test_abs}>>>
40
+ <FULL CORRECTED FILE CONTENT>
41
+ <<<END_FILE:{test_abs}>>>
42
+
43
+ 2) If you cannot run tests, ALSO print a single block containing the exact shell command to run tests such that it returns 0 on success:
44
+ <<<BEGIN_TESTCMD>>>
45
+ python {test_abs}
46
+ <<<END_TESTCMD>>>
47
+
48
+ 3) Print nothing else. No code fences, no comments, no prose.
@@ -0,0 +1,85 @@
1
+ % YOU ARE A DEBUGGING AGENT with full file system access.
2
+
3
+ % TASK: Fix the failing test at {test_abs}
4
+
5
+ % APPROACH:
6
+ 1. Read the error traceback carefully to understand what's failing
7
+ 2. Explore the relevant files to understand the codebase structure
8
+ 3. Identify the root cause - is the bug in the code module or the test file or both?
9
+ 4. Use your file editing tools to make minimal, targeted fixes
10
+ 5. After fixing, output the test command to verify your changes
11
+
12
+ % FILES YOU CAN READ AND EDIT:
13
+ <code_module>
14
+ {code_abs}
15
+ </code_module>
16
+ <test_file>
17
+ {test_abs}
18
+ </test_file>
19
+
20
+
21
+ % ORIGINAL SPECIFICATION:
22
+ <prompt_content>
23
+ {prompt_content}
24
+ </prompt_content>
25
+
26
+
27
+ % ERROR LOG:
28
+ <error_content>
29
+ {error_content}
30
+ </error_content>
31
+
32
+
33
+ % DEBUGGING GUIDELINES:
34
+ - Analyze the traceback to find WHERE the error occurs and WHY
35
+ - The bug could be in EITHER file - don't assume it's always in the code
36
+ - If the error is in how the test USES the code → fix the test
37
+ - If the error is in the code's IMPLEMENTATION → fix the code
38
+ - You may need to fix BOTH files in some cases
39
+
40
+ % COMMON ERROR TYPES AND FIXES:
41
+ - ImportError/ModuleNotFoundError for LOCAL modules: The import statement may be wrong.
42
+ FIX: Change the import to use the correct module name (look at what modules exist).
43
+ DO NOT create new modules to match a wrong import - fix the import instead!
44
+ - ImportError/ModuleNotFoundError for EXTERNAL packages (pip packages like toml, requests, humanize, etc.):
45
+ PREFERRED: Install the missing package using TESTCMD:
46
+ <<<BEGIN_TESTCMD>>>
47
+ pip install <package_name> && python -m pytest "{test_abs}" -q
48
+ <<<END_TESTCMD>>>
49
+
50
+ DO NOT rewrite the code to remove or replace the dependency unless the specification
51
+ explicitly says the dependency is optional. If the code uses a library, INSTALL IT.
52
+
53
+ ONLY use try/except fallback if the specification says the feature is optional:
54
+ ```python
55
+ try:
56
+ import toml
57
+ except ImportError:
58
+ toml = None # Only if spec says toml is optional
59
+ ```
60
+ - TypeError/AttributeError: Check function signatures and method names
61
+ - AssertionError: Check if the test expectation or the code logic is wrong
62
+ - ZeroDivisionError/ValueError: Add proper error handling
63
+ - SyntaxError (unterminated string literal / unexpected character):
64
+ This often means the file has garbage appended at the end (common LLM extraction bug).
65
+ FIX: Read the end of the file and look for JSON-like metadata patterns such as:
66
+ - Lines starting with `"explanation":`, `"focus":`, `"description":`
67
+ - Lines with only `}}` or `]`
68
+ - Code lines ending with `",` followed by JSON keys
69
+ SOLUTION: Delete all the garbage lines at the end of the file to restore valid Python.
70
+
71
+ % EDIT POLICY:
72
+ - Keep changes minimal and directly related to the failure
73
+ - Prefer fixing import statements over creating new files
74
+ - Prefer fixing implementation bugs over weakening tests
75
+ - You MAY create new files if truly needed (e.g., __init__.py for packages)
76
+
77
+ % AFTER FIXING, OUTPUT VERIFICATION COMMAND:
78
+ <<<BEGIN_TESTCMD>>>
79
+ python -m pytest "{test_abs}" -q
80
+ <<<END_TESTCMD>>>
81
+
82
+ % IMPORTANT:
83
+ - Use your file tools to directly read and modify the files
84
+ - Do NOT output the full file contents - just make targeted edits
85
+ - The test command will be run automatically to verify your fix worked