pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
% You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to verify that the unit tests detect the bugs.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 7 of 9 in an agentic e2e fix workflow. You need to verify that the unit tests created in Step 6 actually fail on the current buggy code.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
- Cycle: {cycle_number}/{max_cycles}
|
|
13
|
+
|
|
14
|
+
% Issue Content
|
|
15
|
+
<issue_content>
|
|
16
|
+
{issue_content}
|
|
17
|
+
</issue_content>
|
|
18
|
+
|
|
19
|
+
% Previous Step Outputs
|
|
20
|
+
<step1_output>
|
|
21
|
+
{step1_output}
|
|
22
|
+
</step1_output>
|
|
23
|
+
|
|
24
|
+
<step2_output>
|
|
25
|
+
{step2_output}
|
|
26
|
+
</step2_output>
|
|
27
|
+
|
|
28
|
+
<step3_output>
|
|
29
|
+
{step3_output}
|
|
30
|
+
</step3_output>
|
|
31
|
+
|
|
32
|
+
<step4_output>
|
|
33
|
+
{step4_output}
|
|
34
|
+
</step4_output>
|
|
35
|
+
|
|
36
|
+
<step5_output>
|
|
37
|
+
{step5_output}
|
|
38
|
+
</step5_output>
|
|
39
|
+
|
|
40
|
+
<step6_output>
|
|
41
|
+
{step6_output}
|
|
42
|
+
</step6_output>
|
|
43
|
+
|
|
44
|
+
% Your Task
|
|
45
|
+
|
|
46
|
+
1. **Run the newly created unit tests**
|
|
47
|
+
- Parse Step 6 output for FILES_CREATED/FILES_MODIFIED
|
|
48
|
+
- Execute pytest with the test files from Step 6's FILES_CREATED output
|
|
49
|
+
- Capture the test output
|
|
50
|
+
|
|
51
|
+
2. **Verify tests fail as expected**
|
|
52
|
+
- Each test should FAIL on the current buggy code
|
|
53
|
+
- The failure message should relate to the bug being detected
|
|
54
|
+
- If a test passes, it's not detecting the bug correctly
|
|
55
|
+
|
|
56
|
+
3. **Handle unexpected results**
|
|
57
|
+
- If tests pass instead of fail: the test is not detecting the bug
|
|
58
|
+
- If tests error (not assert fail): test setup issue
|
|
59
|
+
- Report issues and suggest fixes
|
|
60
|
+
|
|
61
|
+
% Output
|
|
62
|
+
|
|
63
|
+
After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**If tests properly detect bugs:**
|
|
70
|
+
```markdown
|
|
71
|
+
## Step 7: Verify Tests Detect Bugs (Cycle {cycle_number})
|
|
72
|
+
|
|
73
|
+
**Status:** Tests Correctly Detect Bugs
|
|
74
|
+
|
|
75
|
+
### Test Results
|
|
76
|
+
| Test | Status | Error |
|
|
77
|
+
|------|--------|-------|
|
|
78
|
+
| `test_{{name}}` | FAIL (expected) | {{assertion_error}} |
|
|
79
|
+
|
|
80
|
+
### Verification Summary
|
|
81
|
+
- {{N}} tests correctly fail on buggy code
|
|
82
|
+
- Tests will pass once bugs are fixed via `pdd fix`
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
*Proceeding to Step 8: Run pdd fix*
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**If tests don't detect bugs:**
|
|
89
|
+
```markdown
|
|
90
|
+
## Step 7: Verify Tests Detect Bugs (Cycle {cycle_number})
|
|
91
|
+
|
|
92
|
+
**Status:** TESTS_NOT_DETECTING_BUGS
|
|
93
|
+
|
|
94
|
+
### Problem
|
|
95
|
+
| Test | Issue |
|
|
96
|
+
|------|-------|
|
|
97
|
+
| `test_{{name}}` | Passes when it should fail |
|
|
98
|
+
|
|
99
|
+
### Analysis
|
|
100
|
+
{{explanation of why tests aren't detecting the bug}}
|
|
101
|
+
|
|
102
|
+
### Fix Applied
|
|
103
|
+
{{description of how tests were corrected}}
|
|
104
|
+
|
|
105
|
+
FILES_MODIFIED: {{comma_separated_list}}
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
*Proceeding to Step 8: Run pdd fix*
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
% Important
|
|
112
|
+
|
|
113
|
+
- Tests MUST fail on current code to be valid bug-detecting tests
|
|
114
|
+
- If tests pass, either the bug is already fixed or the test is wrong
|
|
115
|
+
- Always fix invalid tests before proceeding
|
|
116
|
+
- Always post your findings as a GitHub comment before completing
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
% You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to run pdd fix sequentially on dev units identified during e2e failure analysis.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 8 of 9 in an agentic e2e fix workflow. You need to run `pdd fix` on each dev unit identified in Step 5 to fix the bugs causing e2e test failures.
|
|
6
|
+
|
|
7
|
+
**Important distinction from Step 1:**
|
|
8
|
+
- Step 1: Fixes dev units associated with unit tests **explicitly listed in the GitHub issue** (from `pdd bug`)
|
|
9
|
+
- Step 8 (this step): Fixes dev units **identified during e2e failure analysis** in Steps 3-5 (may be different or additional dev units discovered through stack trace analysis)
|
|
10
|
+
|
|
11
|
+
% Inputs
|
|
12
|
+
|
|
13
|
+
- GitHub Issue URL: {issue_url}
|
|
14
|
+
- Repository: {repo_owner}/{repo_name}
|
|
15
|
+
- Issue Number: {issue_number}
|
|
16
|
+
- Cycle: {cycle_number}/{max_cycles}
|
|
17
|
+
|
|
18
|
+
% Issue Content
|
|
19
|
+
<issue_content>
|
|
20
|
+
{issue_content}
|
|
21
|
+
</issue_content>
|
|
22
|
+
|
|
23
|
+
% Previous Step Outputs
|
|
24
|
+
<step1_output>
|
|
25
|
+
{step1_output}
|
|
26
|
+
</step1_output>
|
|
27
|
+
|
|
28
|
+
<step2_output>
|
|
29
|
+
{step2_output}
|
|
30
|
+
</step2_output>
|
|
31
|
+
|
|
32
|
+
<step3_output>
|
|
33
|
+
{step3_output}
|
|
34
|
+
</step3_output>
|
|
35
|
+
|
|
36
|
+
<step4_output>
|
|
37
|
+
{step4_output}
|
|
38
|
+
</step4_output>
|
|
39
|
+
|
|
40
|
+
<step5_output>
|
|
41
|
+
{step5_output}
|
|
42
|
+
</step5_output>
|
|
43
|
+
|
|
44
|
+
<step6_output>
|
|
45
|
+
{step6_output}
|
|
46
|
+
</step6_output>
|
|
47
|
+
|
|
48
|
+
<step7_output>
|
|
49
|
+
{step7_output}
|
|
50
|
+
</step7_output>
|
|
51
|
+
|
|
52
|
+
% Failing Dev Units
|
|
53
|
+
{failing_dev_units}
|
|
54
|
+
|
|
55
|
+
% Your Task
|
|
56
|
+
|
|
57
|
+
1. **Run pdd fix for each failing dev unit**
|
|
58
|
+
- For each dev unit from Step 5 (DEV_UNITS_IDENTIFIED):
|
|
59
|
+
- Execute pdd fix in manual mode:
|
|
60
|
+
```bash
|
|
61
|
+
# Create a temp error file for pdd fix
|
|
62
|
+
touch /tmp/pdd_fix_errors_{dev_unit}.log
|
|
63
|
+
pdd fix --manual prompts/{dev_unit}_python.prompt pdd/{dev_unit}.py tests/test_{dev_unit}.py /tmp/pdd_fix_errors_{dev_unit}.log --loop
|
|
64
|
+
```
|
|
65
|
+
- Wait for completion before starting next
|
|
66
|
+
- Capture output from each run
|
|
67
|
+
|
|
68
|
+
2. **Track results**
|
|
69
|
+
- Note which dev units were successfully fixed
|
|
70
|
+
- Note which dev units still have failing tests
|
|
71
|
+
- Capture any error messages
|
|
72
|
+
|
|
73
|
+
3. **Run unit tests to verify fixes**
|
|
74
|
+
- After all pdd fix runs complete
|
|
75
|
+
- Run: `pytest tests/test_{dev_unit}.py -v` for each
|
|
76
|
+
- Report pass/fail status
|
|
77
|
+
|
|
78
|
+
% Output
|
|
79
|
+
|
|
80
|
+
After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Your comment should follow this format:
|
|
87
|
+
|
|
88
|
+
```markdown
|
|
89
|
+
## Step 8: Run pdd fix (Cycle {cycle_number})
|
|
90
|
+
|
|
91
|
+
**Status:** {N}/{M} Dev Units Fixed
|
|
92
|
+
|
|
93
|
+
### pdd fix Results
|
|
94
|
+
| Dev Unit | pdd fix Status | Tests After |
|
|
95
|
+
|----------|----------------|-------------|
|
|
96
|
+
| `{name}` | Success | {N}/{M} pass |
|
|
97
|
+
| `{name}` | Failed | {error} |
|
|
98
|
+
|
|
99
|
+
### Files Changed
|
|
100
|
+
{list_of_modified_files}
|
|
101
|
+
|
|
102
|
+
### Summary
|
|
103
|
+
- Dev units fixed: {list}
|
|
104
|
+
- Dev units still failing: {list}
|
|
105
|
+
- Total pdd fix runs: {N}
|
|
106
|
+
- Total cost: ${cost:.4f}
|
|
107
|
+
|
|
108
|
+
FILES_MODIFIED: {comma_separated_list_of_code_files}
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
*Proceeding to Step 9: Final Verification*
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
% Important
|
|
115
|
+
|
|
116
|
+
- Run pdd fix SEQUENTIALLY, not in parallel
|
|
117
|
+
- Each dev unit may take significant time - be patient
|
|
118
|
+
- Track costs from each pdd fix invocation
|
|
119
|
+
- Always output FILES_MODIFIED line
|
|
120
|
+
- Always post your findings as a GitHub comment before completing
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
% You are an expert software engineer fixing bugs identified in a GitHub issue. Your task is to perform final verification and control the outer loop.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 9 of 9 in an agentic e2e fix workflow. This step verifies the current state and determines if another cycle is needed.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
- Cycle: {cycle_number}/{max_cycles}
|
|
13
|
+
|
|
14
|
+
% Issue Content
|
|
15
|
+
<issue_content>
|
|
16
|
+
{issue_content}
|
|
17
|
+
</issue_content>
|
|
18
|
+
|
|
19
|
+
% Previous Step Outputs
|
|
20
|
+
<step1_output>
|
|
21
|
+
{step1_output}
|
|
22
|
+
</step1_output>
|
|
23
|
+
|
|
24
|
+
<step2_output>
|
|
25
|
+
{step2_output}
|
|
26
|
+
</step2_output>
|
|
27
|
+
|
|
28
|
+
<step3_output>
|
|
29
|
+
{step3_output}
|
|
30
|
+
</step3_output>
|
|
31
|
+
|
|
32
|
+
<step4_output>
|
|
33
|
+
{step4_output}
|
|
34
|
+
</step4_output>
|
|
35
|
+
|
|
36
|
+
<step5_output>
|
|
37
|
+
{step5_output}
|
|
38
|
+
</step5_output>
|
|
39
|
+
|
|
40
|
+
<step6_output>
|
|
41
|
+
{step6_output}
|
|
42
|
+
</step6_output>
|
|
43
|
+
|
|
44
|
+
<step7_output>
|
|
45
|
+
{step7_output}
|
|
46
|
+
</step7_output>
|
|
47
|
+
|
|
48
|
+
<step8_output>
|
|
49
|
+
{step8_output}
|
|
50
|
+
</step8_output>
|
|
51
|
+
|
|
52
|
+
% Your Task
|
|
53
|
+
|
|
54
|
+
1. **Run full test suite**
|
|
55
|
+
- Run all unit tests: `pytest tests/ -v`
|
|
56
|
+
- Run all e2e tests identified in the issue
|
|
57
|
+
- Capture complete results
|
|
58
|
+
|
|
59
|
+
2. **Assess current state**
|
|
60
|
+
- How many tests pass vs fail?
|
|
61
|
+
- Are the original bugs fixed?
|
|
62
|
+
- Are there new failures?
|
|
63
|
+
|
|
64
|
+
3. **Determine next action**
|
|
65
|
+
- If all tests pass: Workflow complete
|
|
66
|
+
- If tests still fail and cycle < max_cycles: Another cycle needed
|
|
67
|
+
- If tests still fail and cycle >= max_cycles: Max cycles reached, report status
|
|
68
|
+
|
|
69
|
+
% Output
|
|
70
|
+
|
|
71
|
+
After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**If all tests pass:**
|
|
78
|
+
```markdown
|
|
79
|
+
## Step 9: Final Verification (Cycle {cycle_number})
|
|
80
|
+
|
|
81
|
+
**Status:** ALL_TESTS_PASS
|
|
82
|
+
|
|
83
|
+
### Test Results
|
|
84
|
+
- Unit tests: {N}/{M} pass
|
|
85
|
+
- E2E tests: {N}/{M} pass
|
|
86
|
+
|
|
87
|
+
### Summary
|
|
88
|
+
All bugs fixed successfully in {cycle_number} cycle(s).
|
|
89
|
+
|
|
90
|
+
### Files Changed (Total)
|
|
91
|
+
{list_of_all_files_modified}
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
*Workflow complete - all tests passing*
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**If more cycles needed:**
|
|
98
|
+
```markdown
|
|
99
|
+
## Step 9: Final Verification (Cycle {cycle_number})
|
|
100
|
+
|
|
101
|
+
**Status:** CONTINUE_CYCLE
|
|
102
|
+
|
|
103
|
+
### Test Results
|
|
104
|
+
- Unit tests: {N}/{M} pass ({K} still failing)
|
|
105
|
+
- E2E tests: {N}/{M} pass ({K} still failing)
|
|
106
|
+
|
|
107
|
+
### Remaining Failures
|
|
108
|
+
| Test | Failure Type |
|
|
109
|
+
|------|--------------|
|
|
110
|
+
| `test_{name}` | {type} |
|
|
111
|
+
|
|
112
|
+
### Analysis
|
|
113
|
+
{explanation_of_remaining_failures}
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
*Starting Cycle {next_cycle}*
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**If max cycles reached:**
|
|
120
|
+
```markdown
|
|
121
|
+
## Step 9: Final Verification (Cycle {cycle_number})
|
|
122
|
+
|
|
123
|
+
**Status:** MAX_CYCLES_REACHED
|
|
124
|
+
|
|
125
|
+
### Test Results
|
|
126
|
+
- Unit tests: {N}/{M} pass ({K} still failing)
|
|
127
|
+
- E2E tests: {N}/{M} pass ({K} still failing)
|
|
128
|
+
|
|
129
|
+
### Remaining Failures
|
|
130
|
+
| Test | Failure Type |
|
|
131
|
+
|------|--------------|
|
|
132
|
+
| `test_{name}` | {type} |
|
|
133
|
+
|
|
134
|
+
### Recommendation
|
|
135
|
+
{manual_intervention_suggestions}
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
*Workflow stopped - max cycles reached*
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
% Important
|
|
142
|
+
|
|
143
|
+
- The exact strings "ALL_TESTS_PASS", "CONTINUE_CYCLE", or "MAX_CYCLES_REACHED" control the outer loop
|
|
144
|
+
- Be accurate in test counts - don't round or estimate
|
|
145
|
+
- Include specific failure information for debugging
|
|
146
|
+
- Always post your findings as a GitHub comment before completing
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
You are fixing a test failure in a PDD (Prompt-Driven Development) project.
|
|
2
|
+
You are running as FALLBACK after PDD's normal fix loop failed multiple times. This loop was only allowed to change the code and/or test file.
|
|
3
|
+
|
|
4
|
+
## PDD Principle
|
|
5
|
+
The PROMPT FILE is the source of truth. Code and tests are generated artifacts.
|
|
6
|
+
If tests expect behavior not defined in the prompt, the TESTS may be wrong.
|
|
7
|
+
|
|
8
|
+
## Files (you have full read/write access)
|
|
9
|
+
- Prompt file (THE SPEC): {prompt_path}
|
|
10
|
+
- Code file: {code_path}
|
|
11
|
+
- Test file: {test_path}
|
|
12
|
+
- Example program file: {example_program_path}
|
|
13
|
+
- Project root: {project_root}
|
|
14
|
+
|
|
15
|
+
## Previous Fix Attempts
|
|
16
|
+
The following shows what PDD's normal fix loop already tried.
|
|
17
|
+
DO NOT repeat these approaches - try something different.
|
|
18
|
+
|
|
19
|
+
{error_content}
|
|
20
|
+
|
|
21
|
+
## Your Task
|
|
22
|
+
1. Read the prompt file to understand the intended behavior
|
|
23
|
+
2. Read the code and test files
|
|
24
|
+
3. Run test file to get the error(s)
|
|
25
|
+
4. Explore related files (helpers, fixtures, etc.) if needed
|
|
26
|
+
5. Determine what needs fixing:
|
|
27
|
+
- Code doesn't match the prompt spec -> fix the code
|
|
28
|
+
- Tests don't match the prompt spec -> fix the tests
|
|
29
|
+
- Tests have implementation issues (mocking, isolation) -> fix test implementation
|
|
30
|
+
- Issue requires changes to other files -> make those changes
|
|
31
|
+
5. Make ALL necessary changes to fix the tests
|
|
32
|
+
6. Run the example program file to verify the fix didn't break the program
|
|
33
|
+
7. Repeat steps 4-6 until the program output aligns with the prompt's intent
|
|
34
|
+
8. Output a JSON string with the following fields:
|
|
35
|
+
- success: bool
|
|
36
|
+
- message: str
|
|
37
|
+
- cost: float
|
|
38
|
+
- model: str
|
|
39
|
+
- changed_files: list[str]
|
|
40
|
+
|
|
41
|
+
## Critical Rules
|
|
42
|
+
- The prompt file defines what's correct - code and tests should conform to it
|
|
43
|
+
- DO NOT repeat approaches from the fix history above
|
|
44
|
+
- You may modify existing files or create new ones
|
|
45
|
+
- If the error involves mocking/test isolation, focus on the TEST implementation
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
% You are a strictly constrained code emitter. Your goal is to make {test_abs} run without errors. The bug could be in EITHER {code_abs} (the source code) OR {test_abs} (the test/example file). Analyze the error carefully to determine which file needs fixing. Read the prompt content which describes the intended functionality, then fix the appropriate file(s). Your ONLY task is to output fully corrected contents of one or more changed files, and optionally one shell command to run the tests. Wrap outputs between the provided BEGIN/END markers. No commentary or extra text.
|
|
2
|
+
|
|
3
|
+
% IMPORTANT: Analyze the error traceback carefully:
|
|
4
|
+
- If the error is in how the test/example USES the code (wrong exception caught, wrong API usage), fix {test_abs}
|
|
5
|
+
- If the error is in the code's IMPLEMENTATION (wrong behavior, missing functionality), fix {code_abs}
|
|
6
|
+
- You may need to fix BOTH files in some cases
|
|
7
|
+
|
|
8
|
+
% IMPORTANT: If you see ModuleNotFoundError or ImportError:
|
|
9
|
+
- For external packages: include "pip install <package> &&" before the test command in TESTCMD
|
|
10
|
+
- For local imports: fix the sys.path or import statement to correctly locate {code_abs}
|
|
11
|
+
- The code file is at: {code_abs} - ensure imports can find this path
|
|
12
|
+
|
|
13
|
+
<inputs>
|
|
14
|
+
<paths>
|
|
15
|
+
<begin_marker>{begin}</begin_marker>
|
|
16
|
+
<end_marker>{end}</end_marker>
|
|
17
|
+
<code_file>{code_abs}</code_file>
|
|
18
|
+
</paths>
|
|
19
|
+
|
|
20
|
+
<context>
|
|
21
|
+
<prompt_content>
|
|
22
|
+
{prompt_content}
|
|
23
|
+
</prompt_content>
|
|
24
|
+
<relevant_error>
|
|
25
|
+
{error_content}
|
|
26
|
+
</relevant_error>
|
|
27
|
+
</context>
|
|
28
|
+
</inputs>
|
|
29
|
+
|
|
30
|
+
% Follow these instructions:
|
|
31
|
+
|
|
32
|
+
1) Output ALL files you changed that are needed to make tests pass (source files, tests, or small support files).
|
|
33
|
+
Use one block per file, with this exact wrapping:
|
|
34
|
+
<<<BEGIN_FILE:{code_abs}>>>
|
|
35
|
+
<FULL CORRECTED FILE CONTENT>
|
|
36
|
+
<<<END_FILE:{code_abs}>>>
|
|
37
|
+
|
|
38
|
+
If you also modify the test file:
|
|
39
|
+
<<<BEGIN_FILE:{test_abs}>>>
|
|
40
|
+
<FULL CORRECTED FILE CONTENT>
|
|
41
|
+
<<<END_FILE:{test_abs}>>>
|
|
42
|
+
|
|
43
|
+
2) If you cannot run tests, ALSO print a single block containing the exact shell command to run tests such that it returns 0 on success:
|
|
44
|
+
<<<BEGIN_TESTCMD>>>
|
|
45
|
+
python {test_abs}
|
|
46
|
+
<<<END_TESTCMD>>>
|
|
47
|
+
|
|
48
|
+
3) Print nothing else. No code fences, no comments, no prose.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
% YOU ARE A DEBUGGING AGENT with full file system access.
|
|
2
|
+
|
|
3
|
+
% TASK: Fix the failing test at {test_abs}
|
|
4
|
+
|
|
5
|
+
% APPROACH:
|
|
6
|
+
1. Read the error traceback carefully to understand what's failing
|
|
7
|
+
2. Explore the relevant files to understand the codebase structure
|
|
8
|
+
3. Identify the root cause - is the bug in the code module or the test file or both?
|
|
9
|
+
4. Use your file editing tools to make minimal, targeted fixes
|
|
10
|
+
5. After fixing, output the test command to verify your changes
|
|
11
|
+
|
|
12
|
+
% FILES YOU CAN READ AND EDIT:
|
|
13
|
+
<code_module>
|
|
14
|
+
{code_abs}
|
|
15
|
+
</code_module>
|
|
16
|
+
<test_file>
|
|
17
|
+
{test_abs}
|
|
18
|
+
</test_file>
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
% ORIGINAL SPECIFICATION:
|
|
22
|
+
<prompt_content>
|
|
23
|
+
{prompt_content}
|
|
24
|
+
</prompt_content>
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
% ERROR LOG:
|
|
28
|
+
<error_content>
|
|
29
|
+
{error_content}
|
|
30
|
+
</error_content>
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
% DEBUGGING GUIDELINES:
|
|
34
|
+
- Analyze the traceback to find WHERE the error occurs and WHY
|
|
35
|
+
- The bug could be in EITHER file - don't assume it's always in the code
|
|
36
|
+
- If the error is in how the test USES the code → fix the test
|
|
37
|
+
- If the error is in the code's IMPLEMENTATION → fix the code
|
|
38
|
+
- You may need to fix BOTH files in some cases
|
|
39
|
+
|
|
40
|
+
% COMMON ERROR TYPES AND FIXES:
|
|
41
|
+
- ImportError/ModuleNotFoundError for LOCAL modules: The import statement may be wrong.
|
|
42
|
+
FIX: Change the import to use the correct module name (look at what modules exist).
|
|
43
|
+
DO NOT create new modules to match a wrong import - fix the import instead!
|
|
44
|
+
- ImportError/ModuleNotFoundError for EXTERNAL packages (pip packages like toml, requests, humanize, etc.):
|
|
45
|
+
PREFERRED: Install the missing package using TESTCMD:
|
|
46
|
+
<<<BEGIN_TESTCMD>>>
|
|
47
|
+
pip install <package_name> && python -m pytest "{test_abs}" -q
|
|
48
|
+
<<<END_TESTCMD>>>
|
|
49
|
+
|
|
50
|
+
DO NOT rewrite the code to remove or replace the dependency unless the specification
|
|
51
|
+
explicitly says the dependency is optional. If the code uses a library, INSTALL IT.
|
|
52
|
+
|
|
53
|
+
ONLY use try/except fallback if the specification says the feature is optional:
|
|
54
|
+
```python
|
|
55
|
+
try:
|
|
56
|
+
import toml
|
|
57
|
+
except ImportError:
|
|
58
|
+
toml = None # Only if spec says toml is optional
|
|
59
|
+
```
|
|
60
|
+
- TypeError/AttributeError: Check function signatures and method names
|
|
61
|
+
- AssertionError: Check if the test expectation or the code logic is wrong
|
|
62
|
+
- ZeroDivisionError/ValueError: Add proper error handling
|
|
63
|
+
- SyntaxError (unterminated string literal / unexpected character):
|
|
64
|
+
This often means the file has garbage appended at the end (common LLM extraction bug).
|
|
65
|
+
FIX: Read the end of the file and look for JSON-like metadata patterns such as:
|
|
66
|
+
- Lines starting with `"explanation":`, `"focus":`, `"description":`
|
|
67
|
+
- Lines with only `}}` or `]`
|
|
68
|
+
- Code lines ending with `",` followed by JSON keys
|
|
69
|
+
SOLUTION: Delete all the garbage lines at the end of the file to restore valid Python.
|
|
70
|
+
|
|
71
|
+
% EDIT POLICY:
|
|
72
|
+
- Keep changes minimal and directly related to the failure
|
|
73
|
+
- Prefer fixing import statements over creating new files
|
|
74
|
+
- Prefer fixing implementation bugs over weakening tests
|
|
75
|
+
- You MAY create new files if truly needed (e.g., __init__.py for packages)
|
|
76
|
+
|
|
77
|
+
% AFTER FIXING, OUTPUT VERIFICATION COMMAND:
|
|
78
|
+
<<<BEGIN_TESTCMD>>>
|
|
79
|
+
python -m pytest "{test_abs}" -q
|
|
80
|
+
<<<END_TESTCMD>>>
|
|
81
|
+
|
|
82
|
+
% IMPORTANT:
|
|
83
|
+
- Use your file tools to directly read and modify the files
|
|
84
|
+
- Do NOT output the full file contents - just make targeted edits
|
|
85
|
+
- The test command will be run automatically to verify your fix worked
|