pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,49 @@
1
+ You are fixing a crash in a PDD (Prompt-Driven Development) project.
2
+ You are running as FALLBACK after PDD's normal crash loop failed multiple times. This loop was only allowed to change the code and/or program file.
3
+ The error(s) is likely outside of these files.
4
+
5
+ ## PDD Principle
6
+ The PROMPT FILE is the source of truth. Code is a generated artifact.
7
+ The PROGRAM FILE calls the code and crashed. Both files may need fixes.
8
+
9
+ ## Files (you have full read/write access)
10
+ - Prompt file (THE SPEC): {prompt_path}
11
+ - Code file: {code_path}
12
+ - Program file: {program_path}
13
+ - Project root: {project_root}
14
+
15
+ ## Previous Fix Attempts
16
+ The following shows what PDD's normal crash loop already tried.
17
+ DO NOT repeat these approaches - try something different.
18
+
19
+ <previous_attempts>
20
+ {previous_attempts}
21
+ </previous_attempts>
22
+
23
+ ## Your Task
24
+ 1. Read the prompt file to understand the intended behavior
25
+ 2. Read the code and program files
26
+ 3. Analyze the crash traceback to identify the root cause
27
+ 4. Explore related files (imports, dependencies, conftest.py) if needed
28
+ 5. Determine what needs fixing:
29
+ - Code has a bug -> fix the code
30
+ - Program calls code incorrectly -> fix the program
31
+ - Both have issues -> fix both
32
+ - Issue requires changes to other files -> make those changes
33
+ 6. Make ALL necessary changes to stop the crash including other files if needed
34
+ 7. Run the program file to verify the fix
35
+ 8. Repeat steps 4-7 until the program output aligns with the prompt's intent
36
+ 9. Output a JSON string with the following fields:
37
+ - success: bool
38
+ - message: str
39
+ - cost: float
40
+ - model: str
41
+ - changed_files: list[str]
42
+
43
+ ## Critical Rules
44
+ - The prompt file defines what's correct - code should conform to it
45
+ - DO NOT repeat approaches from the fix history above
46
+ - You MAY modify BOTH the code file AND the program file
47
+ - IMPORTANT: Read actual source files before assuming what functions/classes exist
48
+ - Do NOT guess at imports or API names
49
+ - Explore the codebase to understand actual exports
@@ -0,0 +1,90 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to run the **issue-specific unit tests** (created by `pdd bug`) and fix any failures using pdd fix.
2
+
3
+ % Context
4
+
5
+ You are working on step 1 of 9 in an agentic e2e fix workflow. This issue was typically created by `pdd bug` and contains unit tests that need to pass.
6
+
7
+ **Important distinction from Step 8:**
8
+ - Step 1 (this step): Runs unit tests **explicitly listed in the GitHub issue** (created by `pdd bug`) and fixes their associated dev units
9
+ - Step 8: Runs pdd fix on dev units **identified during e2e failure analysis** in Steps 3-5 (may be different dev units)
10
+
11
+ % Inputs
12
+
13
+ - GitHub Issue URL: {issue_url}
14
+ - Repository: {repo_owner}/{repo_name}
15
+ - Issue Number: {issue_number}
16
+ - Cycle: {cycle_number}/{max_cycles}
17
+
18
+ % Issue Content
19
+ <issue_content>
20
+ {issue_content}
21
+ </issue_content>
22
+
23
+ % Your Task
24
+
25
+ 1. **Identify unit tests from the issue**
26
+ - Parse the issue content to find unit test file paths
27
+ - Look for patterns like `tests/test_*.py`, `FILES_CREATED:`, or test code blocks
28
+ - Extract the test file paths and test function names
29
+
30
+ 2. **Run the unit tests**
31
+ - Execute pytest with the test files you identified above (e.g., `pytest tests/test_foo.py -v`)
32
+ - Capture the output including any failures
33
+ - Note which tests pass and which fail
34
+
35
+ 3. **For each failing test, run pdd fix**
36
+ - Identify the dev unit (module) associated with each failing test
37
+ - For each failing dev unit, run pdd fix in manual mode:
38
+ ```bash
39
+ # Create a temp error file for pdd fix
40
+ touch /tmp/pdd_fix_errors_{{dev_unit}}.log
41
+ pdd fix --manual prompts/{{dev_unit}}_python.prompt pdd/{{dev_unit}}.py tests/test_{{dev_unit}}.py /tmp/pdd_fix_errors_{{dev_unit}}.log --loop
42
+ ```
43
+ - Run pdd fix sequentially, one dev unit at a time
44
+ - Capture the output of each pdd fix run
45
+
46
+ 4. **Re-run unit tests to verify fixes**
47
+ - After all pdd fix runs complete, re-run the unit tests
48
+ - Report which tests now pass vs still fail
49
+
50
+ % Output
51
+
52
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
53
+
54
+ ```
55
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
56
+ ```
57
+
58
+ Your comment should follow this format:
59
+
60
+ ```markdown
61
+ ## Step 1: Unit Test Execution (Cycle {cycle_number})
62
+
63
+ **Status:** [All Tests Pass | N Tests Still Failing]
64
+
65
+ ### Unit Tests Identified
66
+ - `{{test_file}}::{{test_function}}`
67
+ - ...
68
+
69
+ ### Initial Test Results
70
+ - Passed: {{N}}
71
+ - Failed: {{N}}
72
+
73
+ ### pdd fix Runs
74
+ | Dev Unit | Result |
75
+ |----------|--------|
76
+ | {{name}} | Fixed / Still Failing |
77
+
78
+ ### Final Test Results
79
+ - Passed: {{N}}
80
+ - Failed: {{N}}
81
+
82
+ ---
83
+ *Proceeding to Step 2: E2E Test Check*
84
+ ```
85
+
86
+ % Important
87
+
88
+ - Run pdd fix sequentially, not in parallel
89
+ - If a dev unit has already been fixed in a previous cycle, skip it unless tests still fail
90
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,91 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to run e2e tests and check if they all pass.
2
+
3
+ % Context
4
+
5
+ You are working on step 2 of 9 in an agentic e2e fix workflow. This step checks if all e2e tests pass, which is the exit condition for the outer loop.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Output
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ % Your Task
25
+
26
+ 1. **Identify e2e tests from the issue**
27
+ - Parse the issue content to find e2e test file paths
28
+ - Look for `E2E_FILES_CREATED:` markers from `pdd bug` output
29
+ - Also check for patterns like `tests/e2e/`, `tests/integration/`, or `test_e2e_*.py`
30
+
31
+ 2. **Run the e2e tests**
32
+ - Execute pytest with the e2e test files you identified above (e.g., `pytest tests/e2e/test_foo.py -v`)
33
+ - Capture the full output including any failures
34
+ - Note specific error messages and stack traces
35
+
36
+ 3. **Evaluate results**
37
+ - If ALL tests pass: Output includes "ALL_TESTS_PASS" (triggers outer loop exit)
38
+ - If ANY tests fail: Document failures for root cause analysis
39
+
40
+ % Output
41
+
42
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
43
+
44
+ ```
45
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
46
+ ```
47
+
48
+ Your comment should follow this format:
49
+
50
+ **If all tests pass:**
51
+ ```markdown
52
+ ## Step 2: E2E Test Check (Cycle {cycle_number})
53
+
54
+ **Status:** ALL_TESTS_PASS
55
+
56
+ ### E2E Tests Run
57
+ - `{{test_file}}::{{test_function}}` ✓
58
+ - ...
59
+
60
+ ### Summary
61
+ All {{N}} e2e tests pass. Bug fix complete!
62
+
63
+ ---
64
+ *Workflow complete - all tests passing*
65
+ ```
66
+
67
+ **If tests fail:**
68
+ ```markdown
69
+ ## Step 2: E2E Test Check (Cycle {cycle_number})
70
+
71
+ **Status:** {{N}} Tests Failing
72
+
73
+ ### E2E Tests Run
74
+ - `{{test_file}}::{{test_function}}` ✓
75
+ - `{{test_file}}::{{test_function}}` ✗
76
+
77
+ ### Failures
78
+ #### `{{test_name}}`
79
+ ```
80
+ {{error_message_and_stack_trace}}
81
+ ```
82
+
83
+ ---
84
+ *Proceeding to Step 3: Root Cause Analysis*
85
+ ```
86
+
87
+ % Important
88
+
89
+ - The exact string "ALL_TESTS_PASS" triggers the outer loop exit - only output it if ALL tests truly pass
90
+ - Capture complete error messages and stack traces for failing tests
91
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,89 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to root cause the e2e test failures.
2
+
3
+ % Context
4
+
5
+ You are working on step 3 of 9 in an agentic e2e fix workflow. E2e tests are failing and you need to determine whether the problem is in the code, the tests, or both.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ % Your Task
29
+
30
+ 1. **Analyze the test failures**
31
+ - Review the error messages and stack traces from Step 2
32
+ - Identify the assertion that failed or exception that was raised
33
+ - Trace back to understand what behavior was unexpected
34
+
35
+ 2. **Review the documentation**
36
+ - Check project README, CLAUDE.md, or other docs
37
+ - Look for the expected behavior of the functionality being tested
38
+ - Understand what the correct behavior should be
39
+
40
+ 3. **Compare expected vs actual**
41
+ - Based on documentation, what should the code do?
42
+ - What is the test expecting?
43
+ - What is the code actually doing?
44
+
45
+ 4. **Determine root cause category**
46
+ - **CODE_BUG**: Code behavior doesn't match documented/expected behavior
47
+ - **TEST_BUG**: Test expectations don't match documented/expected behavior
48
+ - **BOTH**: Both code and tests have issues
49
+ - **UNCLEAR**: Need more investigation
50
+
51
+ % Output
52
+
53
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
54
+
55
+ ```
56
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
57
+ ```
58
+
59
+ Your comment should follow this format:
60
+
61
+ ```markdown
62
+ ## Step 3: Root Cause Analysis (Cycle {cycle_number})
63
+
64
+ **Status:** [CODE_BUG | TEST_BUG | BOTH | UNCLEAR]
65
+
66
+ ### Failure Analysis
67
+
68
+ #### Test: `{test_name}`
69
+ - **Expected behavior (per docs):** {description}
70
+ - **Test expects:** {description}
71
+ - **Code does:** {description}
72
+
73
+ ### Root Cause
74
+ {detailed_explanation}
75
+
76
+ ### Recommendation
77
+ [If CODE_BUG: Which dev units need fixing]
78
+ [If TEST_BUG: Which test assertions need updating]
79
+ [If BOTH: Both fixes needed]
80
+
81
+ ---
82
+ *Proceeding to Step 4: Fix E2E Tests (if needed)*
83
+ ```
84
+
85
+ % Important
86
+
87
+ - Be thorough in understanding the documented expected behavior
88
+ - If documentation is unclear, note that and make a reasonable judgment
89
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,96 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to fix e2e tests if they are the problem.
2
+
3
+ % Context
4
+
5
+ You are working on step 4 of 9 in an agentic e2e fix workflow. Based on root cause analysis, you may need to fix the e2e tests themselves.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ <step3_output>
29
+ {step3_output}
30
+ </step3_output>
31
+
32
+ % Your Task
33
+
34
+ 1. **Check if test fixes are needed**
35
+ - Review Step 3's root cause analysis
36
+ - If status was "CODE_BUG" only, skip to Step 5
37
+ - If status was "TEST_BUG" or "BOTH", proceed with test fixes
38
+
39
+ 2. **Fix the e2e tests**
40
+ - Identify which test assertions are incorrect
41
+ - Update the tests to match the documented expected behavior
42
+ - Ensure tests still exercise the intended functionality
43
+
44
+ 3. **Re-run e2e tests to verify fix**
45
+ - Execute the modified e2e tests
46
+ - Confirm the test changes work as expected
47
+ - If tests still fail due to code bugs, that's expected (will be fixed in later steps)
48
+
49
+ % Output
50
+
51
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
52
+
53
+ ```
54
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
55
+ ```
56
+
57
+ **If no test fixes needed:**
58
+ ```markdown
59
+ ## Step 4: Fix E2E Tests (Cycle {cycle_number})
60
+
61
+ **Status:** No Test Fixes Needed
62
+
63
+ Root cause analysis indicates CODE_BUG only. Skipping to dev unit identification.
64
+
65
+ ---
66
+ *Proceeding to Step 5: Identify Dev Units*
67
+ ```
68
+
69
+ **If test fixes applied:**
70
+ ```markdown
71
+ ## Step 4: Fix E2E Tests (Cycle {cycle_number})
72
+
73
+ **Status:** Test Fixes Applied
74
+
75
+ ### Changes Made
76
+ | File | Change |
77
+ |------|--------|
78
+ | `{test_file}` | {description_of_change} |
79
+
80
+ ### Verification
81
+ - Tests run: {N}
82
+ - Tests passing (after test fix): {N}
83
+ - Tests still failing (code bugs): {N}
84
+
85
+ FILES_MODIFIED: {comma_separated_list_of_test_files}
86
+
87
+ ---
88
+ *Proceeding to Step 5: Identify Dev Units*
89
+ ```
90
+
91
+ % Important
92
+
93
+ - Only fix tests if Step 3 indicated TEST_BUG or BOTH
94
+ - Test fixes should align tests with documented expected behavior, not make tests pass bad code
95
+ - Always output FILES_MODIFIED line if any test files were changed
96
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,91 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to identify which dev units are involved in the failures.
2
+
3
+ % Context
4
+
5
+ You are working on step 5 of 9 in an agentic e2e fix workflow. You need to identify which dev units (modules) are responsible for the remaining test failures.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ <step3_output>
29
+ {step3_output}
30
+ </step3_output>
31
+
32
+ <step4_output>
33
+ {step4_output}
34
+ </step4_output>
35
+
36
+ % Your Task
37
+
38
+ 1. **Analyze the stack traces**
39
+ - Review error stack traces from Step 2
40
+ - Identify which source files are involved in the failures
41
+ - Map source files to their corresponding dev units (modules)
42
+
43
+ 2. **Map files to dev units**
44
+ - List all prompts in the `prompts/` directory: `ls prompts/*_python.prompt`
45
+ - For each involved source file, find its corresponding prompt
46
+ - A dev unit name is derived from the prompt file: `{name}_python.prompt` -> `{name}`
47
+
48
+ 3. **List dev units to fix**
49
+ - Create a prioritized list of dev units that need fixing
50
+ - Consider dependencies between dev units
51
+ - Note which dev units were already fixed in previous cycles
52
+
53
+ % Output
54
+
55
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
56
+
57
+ ```
58
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
59
+ ```
60
+
61
+ Your comment should follow this format:
62
+
63
+ ```markdown
64
+ ## Step 5: Identify Dev Units (Cycle {cycle_number})
65
+
66
+ **Status:** {N} Dev Units Identified
67
+
68
+ ### Stack Trace Analysis
69
+ | Source File | Dev Unit | Error Type |
70
+ |-------------|----------|------------|
71
+ | `pdd/{file}.py` | `{dev_unit}` | {error_type} |
72
+
73
+ ### Dev Units to Fix
74
+ 1. `{dev_unit_1}` - {reason}
75
+ 2. `{dev_unit_2}` - {reason}
76
+
77
+ ### Previously Fixed (This Session)
78
+ - `{dev_unit}` - Cycle {N}
79
+
80
+ DEV_UNITS_IDENTIFIED: {comma_separated_list}
81
+
82
+ ---
83
+ *Proceeding to Step 6: Create Unit Tests*
84
+ ```
85
+
86
+ % Important
87
+
88
+ - The DEV_UNITS_IDENTIFIED line is machine-readable and used by subsequent steps
89
+ - Only include dev units that have corresponding prompts (PDD-managed modules)
90
+ - Skip dev units that don't have prompts (external dependencies)
91
+ - Always post your findings as a GitHub comment before completing
@@ -0,0 +1,106 @@
1
+ % You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to create or append unit tests for the involved dev units.
2
+
3
+ % Context
4
+
5
+ You are working on step 6 of 9 in an agentic e2e fix workflow. You need to create unit tests that will detect the bugs in the identified dev units.
6
+
7
+ % Inputs
8
+
9
+ - GitHub Issue URL: {issue_url}
10
+ - Repository: {repo_owner}/{repo_name}
11
+ - Issue Number: {issue_number}
12
+ - Cycle: {cycle_number}/{max_cycles}
13
+
14
+ % Issue Content
15
+ <issue_content>
16
+ {issue_content}
17
+ </issue_content>
18
+
19
+ % Previous Step Outputs
20
+ <step1_output>
21
+ {step1_output}
22
+ </step1_output>
23
+
24
+ <step2_output>
25
+ {step2_output}
26
+ </step2_output>
27
+
28
+ <step3_output>
29
+ {step3_output}
30
+ </step3_output>
31
+
32
+ <step4_output>
33
+ {step4_output}
34
+ </step4_output>
35
+
36
+ <step5_output>
37
+ {step5_output}
38
+ </step5_output>
39
+
40
+ % Dev Units to Fix
41
+ {dev_units_identified}
42
+
43
+ % Your Task
44
+
45
+ 1. **For each identified dev unit:**
46
+ - Read the module's source code: `pdd/{dev_unit}.py`
47
+ - Read the module's prompt: `prompts/{dev_unit}_python.prompt`
48
+ - Understand the expected behavior from the prompt
49
+
50
+ 2. **Design unit tests that detect the bug**
51
+ - Create tests that specifically exercise the buggy code path
52
+ - Tests should FAIL on current buggy code
53
+ - Tests should PASS once the bug is fixed
54
+
55
+ 3. **Write or append to test files**
56
+ - Check if `tests/test_{dev_unit}.py` exists
57
+ - If exists: append new test functions
58
+ - If not: create new test file
59
+
60
+ 4. **Follow testing conventions**
61
+ - Use pytest framework
62
+ - Include clear docstrings explaining what each test verifies
63
+ - Use appropriate assertions and mocking
64
+
65
+ % Output
66
+
67
+ After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
68
+
69
+ ```
70
+ gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
71
+ ```
72
+
73
+ Your comment should follow this format:
74
+
75
+ ```markdown
76
+ ## Step 6: Create Unit Tests (Cycle {cycle_number})
77
+
78
+ **Status:** {N} Tests Created/Updated
79
+
80
+ ### Tests Created
81
+
82
+ #### `tests/test_{dev_unit}.py`
83
+ ```python
84
+ {test_code}
85
+ ```
86
+
87
+ **What this tests:** {explanation}
88
+
89
+ ### Summary
90
+ | Dev Unit | Test File | Tests Added |
91
+ |----------|-----------|-------------|
92
+ | `{name}` | `tests/test_{name}.py` | {N} |
93
+
94
+ FILES_CREATED: {comma_separated_new_files}
95
+ FILES_MODIFIED: {comma_separated_modified_files}
96
+
97
+ ---
98
+ *Proceeding to Step 7: Verify Tests Detect Bugs*
99
+ ```
100
+
101
+ % Important
102
+
103
+ - Tests MUST fail on current buggy code (this is verified in Step 7)
104
+ - Focus on testing the specific bug behavior, not general functionality
105
+ - Always output FILES_CREATED and/or FILES_MODIFIED lines
106
+ - Always post your findings as a GitHub comment before completing