pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
% You are an expert software engineer investigating a bug report. Your task is to design a test plan that will detect this bug.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 6 of 10 in an agentic bug investigation workflow. Previous steps have identified the root cause of the issue.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
|
|
13
|
+
% Issue Content
|
|
14
|
+
<issue_content>
|
|
15
|
+
{issue_content}
|
|
16
|
+
</issue_content>
|
|
17
|
+
|
|
18
|
+
% Previous Steps Output
|
|
19
|
+
<step1_output>
|
|
20
|
+
{step1_output}
|
|
21
|
+
</step1_output>
|
|
22
|
+
|
|
23
|
+
<step2_output>
|
|
24
|
+
{step2_output}
|
|
25
|
+
</step2_output>
|
|
26
|
+
|
|
27
|
+
<step3_output>
|
|
28
|
+
{step3_output}
|
|
29
|
+
</step3_output>
|
|
30
|
+
|
|
31
|
+
<step4_output>
|
|
32
|
+
{step4_output}
|
|
33
|
+
</step4_output>
|
|
34
|
+
|
|
35
|
+
<step5_output>
|
|
36
|
+
{step5_output}
|
|
37
|
+
</step5_output>
|
|
38
|
+
|
|
39
|
+
% Your Task
|
|
40
|
+
|
|
41
|
+
1. **Review existing tests**
|
|
42
|
+
- Find existing test files for the affected module
|
|
43
|
+
- Understand the testing patterns used in this project
|
|
44
|
+
- Identify gaps in current test coverage
|
|
45
|
+
|
|
46
|
+
2. **Design the test strategy**
|
|
47
|
+
- What specific behavior needs to be tested?
|
|
48
|
+
- What are the inputs that trigger the bug?
|
|
49
|
+
- What is the expected vs actual output?
|
|
50
|
+
- Are there related edge cases to cover?
|
|
51
|
+
|
|
52
|
+
3. **Plan test location**
|
|
53
|
+
- Which test file should contain the new test?
|
|
54
|
+
- Should it be added to an existing file or create a new one?
|
|
55
|
+
- Follow the project's testing conventions
|
|
56
|
+
|
|
57
|
+
4. **Define test cases**
|
|
58
|
+
- Primary test: reproduces the exact bug
|
|
59
|
+
- Edge cases: related scenarios that should also work
|
|
60
|
+
- Regression prevention: ensure the fix doesn't break other behavior
|
|
61
|
+
|
|
62
|
+
% Output
|
|
63
|
+
|
|
64
|
+
After completing your analysis, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Your comment should follow this format:
|
|
71
|
+
|
|
72
|
+
```markdown
|
|
73
|
+
## Step 6: Test Plan
|
|
74
|
+
|
|
75
|
+
### Existing Test Coverage
|
|
76
|
+
- **Test file:** `tests/test_module.py`
|
|
77
|
+
- **Current coverage:** [Brief description of what's tested]
|
|
78
|
+
- **Gap:** [What's not covered that should be]
|
|
79
|
+
|
|
80
|
+
### Proposed Tests
|
|
81
|
+
|
|
82
|
+
#### Test 1: [Name describing the bug scenario]
|
|
83
|
+
- **Input:** [The triggering condition]
|
|
84
|
+
- **Expected:** [Correct behavior]
|
|
85
|
+
- **Actual (before fix):** [Bug behavior]
|
|
86
|
+
|
|
87
|
+
#### Test 2: [Related edge case]
|
|
88
|
+
- **Input:** [Edge case condition]
|
|
89
|
+
- **Expected:** [Correct behavior]
|
|
90
|
+
|
|
91
|
+
### Test Location
|
|
92
|
+
- **File:** `tests/test_module.py` (append) or `tests/test_module_bug_123.py` (new)
|
|
93
|
+
- **Framework:** [pytest/unittest/etc.]
|
|
94
|
+
|
|
95
|
+
### Notes
|
|
96
|
+
[Any special setup, fixtures, or considerations]
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
*Proceeding to Step 7: Generate Test*
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
% Important
|
|
103
|
+
|
|
104
|
+
- Tests should be specific enough to catch this exact bug
|
|
105
|
+
- Tests should not be so brittle they break with valid changes
|
|
106
|
+
- Consider both the bug case and related edge cases
|
|
107
|
+
- Always post your findings as a GitHub comment before completing
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
% You are an expert software engineer investigating a bug report. Your task is to generate a failing unit test that detects the bug.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 7 of 10 in an agentic bug investigation workflow. Previous steps have identified the root cause and planned the test strategy.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
|
|
13
|
+
% Issue Content
|
|
14
|
+
<issue_content>
|
|
15
|
+
{issue_content}
|
|
16
|
+
</issue_content>
|
|
17
|
+
|
|
18
|
+
% Previous Steps Output
|
|
19
|
+
<step1_output>
|
|
20
|
+
{step1_output}
|
|
21
|
+
</step1_output>
|
|
22
|
+
|
|
23
|
+
<step2_output>
|
|
24
|
+
{step2_output}
|
|
25
|
+
</step2_output>
|
|
26
|
+
|
|
27
|
+
<step3_output>
|
|
28
|
+
{step3_output}
|
|
29
|
+
</step3_output>
|
|
30
|
+
|
|
31
|
+
<step4_output>
|
|
32
|
+
{step4_output}
|
|
33
|
+
</step4_output>
|
|
34
|
+
|
|
35
|
+
<step5_output>
|
|
36
|
+
{step5_output}
|
|
37
|
+
</step5_output>
|
|
38
|
+
|
|
39
|
+
<step6_output>
|
|
40
|
+
{step6_output}
|
|
41
|
+
</step6_output>
|
|
42
|
+
|
|
43
|
+
% Critical: Testing Caller Behavior Bugs
|
|
44
|
+
|
|
45
|
+
When the bug involves incorrect function call arguments (e.g., caller uses `limit=` but callee expects `k=`):
|
|
46
|
+
|
|
47
|
+
**DO: Mock the callee and verify caller behavior**
|
|
48
|
+
- Use `@patch` or `with patch()` to mock the called function
|
|
49
|
+
- Invoke the caller's code path
|
|
50
|
+
- Use `mock.call_args.kwargs` to verify the caller passed correct parameter names
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
```python
|
|
54
|
+
from unittest.mock import patch
|
|
55
|
+
|
|
56
|
+
def test_caller_uses_correct_parameter():
|
|
57
|
+
with patch('module.callee_function') as mock_callee:
|
|
58
|
+
caller_function() # This triggers the call
|
|
59
|
+
# Verify caller used 'k=' not 'limit='
|
|
60
|
+
assert 'k' in mock_callee.call_args.kwargs
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**DO NOT: Test that the callee rejects wrong parameters**
|
|
64
|
+
- This anti-pattern tests the callee's signature, not the caller's behavior
|
|
65
|
+
- Such tests always pass because the callee was never broken
|
|
66
|
+
- Example of WRONG approach: `pytest.raises(TypeError, lambda: callee(limit=5))`
|
|
67
|
+
|
|
68
|
+
% Worktree Setup
|
|
69
|
+
|
|
70
|
+
You are now working in an isolated git worktree at: {worktree_path}
|
|
71
|
+
|
|
72
|
+
**Before writing any tests, set up the worktree environment:**
|
|
73
|
+
|
|
74
|
+
1. Check what environment/config files exist in the main repo but are missing in the worktree:
|
|
75
|
+
- Look for `.env`, `.env.local`, `.env.test`, or similar files in the repo root (parent of worktree)
|
|
76
|
+
- Check for any config files that are gitignored but needed for tests
|
|
77
|
+
- Look at the project's test configuration (pytest.ini, pyproject.toml, etc.) for hints
|
|
78
|
+
|
|
79
|
+
2. Symlink or copy necessary files:
|
|
80
|
+
- **Symlink `.env` files** from the main repo if they exist (so environment stays in sync)
|
|
81
|
+
- Example: `ln -s ../../.env .env` (adjust path based on worktree depth)
|
|
82
|
+
- Do NOT copy/symlink `venv`, `node_modules`, or other large dependency directories
|
|
83
|
+
|
|
84
|
+
3. Verify the test environment works:
|
|
85
|
+
- Try running an existing test to confirm the setup is correct
|
|
86
|
+
- If tests fail due to missing config, investigate and fix before proceeding
|
|
87
|
+
|
|
88
|
+
% Your Task
|
|
89
|
+
|
|
90
|
+
1. **Review the test plan and implement Step 6's test strategy**
|
|
91
|
+
- Extract the test file path from Step 6's `### Test Location` → `**File:**` field
|
|
92
|
+
- Note if Step 6 marked it `(append)` or `(new)`
|
|
93
|
+
- **CRITICAL: Follow Step 6's test plan exactly** — if Step 6 provided example test code, use that as the template
|
|
94
|
+
- Implement the testing approach Step 6 specified (mocking, assertions, fixtures, etc.) unless there is a mistake in the test plan.
|
|
95
|
+
- Understand the project's testing conventions from existing tests
|
|
96
|
+
|
|
97
|
+
2. **Generate the test code**
|
|
98
|
+
- Write a clear, focused test that fails on the current buggy code
|
|
99
|
+
- Follow the project's testing framework and style
|
|
100
|
+
- Include descriptive test name and docstring
|
|
101
|
+
- Add comments explaining what the test verifies
|
|
102
|
+
|
|
103
|
+
3. **Write the test file**
|
|
104
|
+
- Use the EXACT file path from Step 6's Test Location
|
|
105
|
+
- If Step 6 said `(append)`: append to the existing file
|
|
106
|
+
- If Step 6 said `(new)`: create that new file
|
|
107
|
+
- Include necessary imports and fixtures
|
|
108
|
+
- Ensure the test is self-contained and runnable
|
|
109
|
+
|
|
110
|
+
% Output
|
|
111
|
+
|
|
112
|
+
After generating the test, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Your comment should follow this format:
|
|
119
|
+
|
|
120
|
+
```markdown
|
|
121
|
+
## Step 7: Generated Test
|
|
122
|
+
|
|
123
|
+
### Test File
|
|
124
|
+
`{{test_file_path}}`
|
|
125
|
+
|
|
126
|
+
### Test Code
|
|
127
|
+
```{{language}}
|
|
128
|
+
{{generated_test_code}}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### What This Test Verifies
|
|
132
|
+
[Brief explanation of what the test checks and why it fails on buggy code]
|
|
133
|
+
|
|
134
|
+
### Running the Test
|
|
135
|
+
```bash
|
|
136
|
+
{{test_run_command}}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
*Proceeding to Step 8: Verification*
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
% Important
|
|
144
|
+
|
|
145
|
+
- The test MUST fail on the current (buggy) code
|
|
146
|
+
- The test should pass once the bug is fixed
|
|
147
|
+
- Focus on testing behavior, not implementation details
|
|
148
|
+
- Write the test file to disk before posting the comment
|
|
149
|
+
- Always post your findings as a GitHub comment before completing
|
|
150
|
+
|
|
151
|
+
% CRITICAL: Machine-Readable Output (REQUIRED)
|
|
152
|
+
|
|
153
|
+
**You MUST output exactly one of these lines at the very end of your response.**
|
|
154
|
+
This is required for the automation to continue. Without this line, the workflow will fail.
|
|
155
|
+
|
|
156
|
+
If you created a new test file:
|
|
157
|
+
```
|
|
158
|
+
FILES_CREATED: path/to/test_file
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
If you modified an existing test file:
|
|
162
|
+
```
|
|
163
|
+
FILES_MODIFIED: path/to/test_file
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
- Python: `FILES_CREATED: backend/tests/test_timeout_fix.py`
|
|
168
|
+
- TypeScript: `FILES_CREATED: frontend/src/__tests__/api.test.ts`
|
|
169
|
+
- Go: `FILES_CREATED: internal/handler/handler_test.go`
|
|
170
|
+
- Multiple files: `FILES_CREATED: tests/test_a.py, tests/test_b.py`
|
|
171
|
+
|
|
172
|
+
⚠️ IMPORTANT: This line must be the last thing you output. Do not add any text after it.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
% You are an expert software engineer investigating a bug report. Your task is to verify that the generated test successfully detects the bug.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 8 of 10 in an agentic bug investigation workflow. Previous steps have generated a test to detect the bug.
|
|
6
|
+
|
|
7
|
+
% Inputs
|
|
8
|
+
|
|
9
|
+
- GitHub Issue URL: {issue_url}
|
|
10
|
+
- Repository: {repo_owner}/{repo_name}
|
|
11
|
+
- Issue Number: {issue_number}
|
|
12
|
+
|
|
13
|
+
% Issue Content
|
|
14
|
+
<issue_content>
|
|
15
|
+
{issue_content}
|
|
16
|
+
</issue_content>
|
|
17
|
+
|
|
18
|
+
% Previous Steps Output
|
|
19
|
+
<step1_output>
|
|
20
|
+
{step1_output}
|
|
21
|
+
</step1_output>
|
|
22
|
+
|
|
23
|
+
<step2_output>
|
|
24
|
+
{step2_output}
|
|
25
|
+
</step2_output>
|
|
26
|
+
|
|
27
|
+
<step3_output>
|
|
28
|
+
{step3_output}
|
|
29
|
+
</step3_output>
|
|
30
|
+
|
|
31
|
+
<step4_output>
|
|
32
|
+
{step4_output}
|
|
33
|
+
</step4_output>
|
|
34
|
+
|
|
35
|
+
<step5_output>
|
|
36
|
+
{step5_output}
|
|
37
|
+
</step5_output>
|
|
38
|
+
|
|
39
|
+
<step6_output>
|
|
40
|
+
{step6_output}
|
|
41
|
+
</step6_output>
|
|
42
|
+
|
|
43
|
+
<step7_output>
|
|
44
|
+
{step7_output}
|
|
45
|
+
</step7_output>
|
|
46
|
+
|
|
47
|
+
% Your Task
|
|
48
|
+
|
|
49
|
+
1. **Run the generated test**
|
|
50
|
+
- Execute the test against the current codebase
|
|
51
|
+
- Capture the full test output including any error messages
|
|
52
|
+
- Verify it fails as expected (not for unrelated reasons)
|
|
53
|
+
|
|
54
|
+
2. **Validate the failure**
|
|
55
|
+
- Confirm the test fails for the right reason (the bug)
|
|
56
|
+
- Check that the assertion message is clear and helpful
|
|
57
|
+
- Ensure the failure points to the actual bug location
|
|
58
|
+
|
|
59
|
+
3. **Summarize the investigation**
|
|
60
|
+
- Recap the bug, root cause, and test created
|
|
61
|
+
- Provide next steps for fixing the bug
|
|
62
|
+
- Note any additional observations
|
|
63
|
+
|
|
64
|
+
% Output
|
|
65
|
+
|
|
66
|
+
After completing verification, use `gh issue comment` to post your final report to issue #{issue_number}:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Your comment should follow this format:
|
|
73
|
+
|
|
74
|
+
```markdown
|
|
75
|
+
## Step 8: Verification Complete
|
|
76
|
+
|
|
77
|
+
### Test Execution
|
|
78
|
+
```
|
|
79
|
+
{{test_output}}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Verification Status
|
|
83
|
+
**[PASS: Test correctly detects the bug | FAIL: Test does not work as expected]**
|
|
84
|
+
|
|
85
|
+
### Summary
|
|
86
|
+
|
|
87
|
+
| Step | Result |
|
|
88
|
+
|------|--------|
|
|
89
|
+
| Duplicate Check | {{step1_status}} |
|
|
90
|
+
| Documentation | {{step2_status}} |
|
|
91
|
+
| Triage | {{step3_status}} |
|
|
92
|
+
| Reproduction | {{step4_status}} |
|
|
93
|
+
| Root Cause | {{step5_status}} |
|
|
94
|
+
| Test Plan | {{step6_status}} |
|
|
95
|
+
| Test Generation | {{step7_status}} |
|
|
96
|
+
| Verification | {{step8_status}} |
|
|
97
|
+
|
|
98
|
+
### Bug Details
|
|
99
|
+
- **Location:** `{{file_path}}:{{line_number}}`
|
|
100
|
+
- **Root Cause:** {{one_line_summary}}
|
|
101
|
+
- **Test File:** `{{test_file_path}}`
|
|
102
|
+
|
|
103
|
+
### Next Steps
|
|
104
|
+
1. Fix the bug at the identified location
|
|
105
|
+
2. Run the test to confirm the fix
|
|
106
|
+
3. Run full test suite to check for regressions
|
|
107
|
+
4. Submit PR with fix and test
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
*Proceeding to Step 9: E2E Tests*
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
% Important
|
|
114
|
+
|
|
115
|
+
- Provide a complete summary of the investigation so far
|
|
116
|
+
- The test must actually fail when run (verify this)
|
|
117
|
+
- If the test doesn't fail, go back and fix the test
|
|
118
|
+
- The comment should serve as documentation for anyone fixing the bug
|
|
119
|
+
- Always post your findings as a GitHub comment before completing
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
% You are an expert software engineer investigating a bug report. Your task is to generate and run end-to-end (E2E) tests that verify the bug at a system level.
|
|
2
|
+
|
|
3
|
+
% Context
|
|
4
|
+
|
|
5
|
+
You are working on step 9 of 10 in an agentic bug investigation workflow. Previous steps have:
|
|
6
|
+
- Identified the root cause (Step 5)
|
|
7
|
+
- Generated a failing unit test (Step 7)
|
|
8
|
+
- Verified the unit test correctly catches the bug (Step 8)
|
|
9
|
+
|
|
10
|
+
Now you need to create E2E tests that verify the bug at a higher level of integration.
|
|
11
|
+
|
|
12
|
+
% Inputs
|
|
13
|
+
|
|
14
|
+
- GitHub Issue URL: {issue_url}
|
|
15
|
+
- Repository: {repo_owner}/{repo_name}
|
|
16
|
+
- Issue Number: {issue_number}
|
|
17
|
+
|
|
18
|
+
% Issue Content
|
|
19
|
+
<issue_content>
|
|
20
|
+
{issue_content}
|
|
21
|
+
</issue_content>
|
|
22
|
+
|
|
23
|
+
% Previous Steps Output
|
|
24
|
+
<step1_output>
|
|
25
|
+
{step1_output}
|
|
26
|
+
</step1_output>
|
|
27
|
+
|
|
28
|
+
<step2_output>
|
|
29
|
+
{step2_output}
|
|
30
|
+
</step2_output>
|
|
31
|
+
|
|
32
|
+
<step3_output>
|
|
33
|
+
{step3_output}
|
|
34
|
+
</step3_output>
|
|
35
|
+
|
|
36
|
+
<step4_output>
|
|
37
|
+
{step4_output}
|
|
38
|
+
</step4_output>
|
|
39
|
+
|
|
40
|
+
<step5_output>
|
|
41
|
+
{step5_output}
|
|
42
|
+
</step5_output>
|
|
43
|
+
|
|
44
|
+
<step6_output>
|
|
45
|
+
{step6_output}
|
|
46
|
+
</step6_output>
|
|
47
|
+
|
|
48
|
+
<step7_output>
|
|
49
|
+
{step7_output}
|
|
50
|
+
</step7_output>
|
|
51
|
+
|
|
52
|
+
<step8_output>
|
|
53
|
+
{step8_output}
|
|
54
|
+
</step8_output>
|
|
55
|
+
|
|
56
|
+
% Worktree Information
|
|
57
|
+
|
|
58
|
+
You are operating in an isolated git worktree at: {worktree_path}
|
|
59
|
+
The unit test file(s) created in Step 7 are: {files_to_stage}
|
|
60
|
+
|
|
61
|
+
% E2E Test Strategy
|
|
62
|
+
|
|
63
|
+
E2E tests differ from unit tests:
|
|
64
|
+
- **Unit tests** mock dependencies and test individual functions in isolation
|
|
65
|
+
- **E2E tests** exercise the full system path that a user or API consumer would take
|
|
66
|
+
|
|
67
|
+
% CRITICAL: Discover E2E Test Environment First
|
|
68
|
+
|
|
69
|
+
Before writing any E2E test, you MUST understand the existing test infrastructure. First, identify the primary language of the repository, then search for language-appropriate configuration files.
|
|
70
|
+
|
|
71
|
+
1. **Identify Repository Language**
|
|
72
|
+
- Check file extensions: `.py`, `.js`, `.ts`, `.go`, `.rs`, `.java`, `.rb`, `.cs`, etc.
|
|
73
|
+
- Read `package.json`, `pyproject.toml`, `go.mod`, `Cargo.toml`, `pom.xml`, `Gemfile` for dependencies
|
|
74
|
+
|
|
75
|
+
2. **Test Configuration Files (by language)**
|
|
76
|
+
|
|
77
|
+
**Python:**
|
|
78
|
+
- `pytest.ini`, `pyproject.toml`, `setup.cfg` - test runner settings
|
|
79
|
+
- `conftest.py` files (especially in `tests/`, `tests/e2e/`, `tests/integration/`) - shared fixtures
|
|
80
|
+
- `tox.ini`, `noxfile.py` - test environment definitions
|
|
81
|
+
|
|
82
|
+
**JavaScript/TypeScript:**
|
|
83
|
+
- `jest.config.js`, `jest.config.ts` - Jest configuration
|
|
84
|
+
- `vitest.config.js`, `vitest.config.ts` - Vitest configuration
|
|
85
|
+
- `cypress.config.js`, `playwright.config.ts` - E2E framework configs
|
|
86
|
+
- `__tests__/`, `*.test.js`, `*.spec.ts` patterns
|
|
87
|
+
|
|
88
|
+
**Go:**
|
|
89
|
+
- `go.mod` - module definition
|
|
90
|
+
- `*_test.go` files - test files with TestXxx functions
|
|
91
|
+
- `testdata/` directories - test fixtures
|
|
92
|
+
|
|
93
|
+
**Rust:**
|
|
94
|
+
- `Cargo.toml` - dependencies and test settings
|
|
95
|
+
- `tests/` directory - integration tests
|
|
96
|
+
- `#[cfg(test)]` modules for unit tests
|
|
97
|
+
|
|
98
|
+
**Java:**
|
|
99
|
+
- `pom.xml`, `build.gradle` - Maven/Gradle configs
|
|
100
|
+
- `src/test/java/` - test sources
|
|
101
|
+
- JUnit, TestNG configurations
|
|
102
|
+
|
|
103
|
+
**Ruby:**
|
|
104
|
+
- `Gemfile` - dependencies (rspec, minitest, capybara)
|
|
105
|
+
- `spec/`, `test/` directories
|
|
106
|
+
- `spec_helper.rb`, `rails_helper.rb` - RSpec setup
|
|
107
|
+
|
|
108
|
+
3. **Environment Variables**
|
|
109
|
+
- `.env`, `.env.example`, `.env.test`, `.env.local` - required environment variables
|
|
110
|
+
- `README.md`, `CONTRIBUTING.md` - setup documentation
|
|
111
|
+
- `.github/workflows/*.yml`, `.gitlab-ci.yml`, `Makefile` - CI configs show how tests are run
|
|
112
|
+
- Search for env var access patterns in existing tests:
|
|
113
|
+
- Python: `os.environ`, `os.getenv`, `dotenv`
|
|
114
|
+
- JavaScript: `process.env`, `dotenv`
|
|
115
|
+
- Go: `os.Getenv`
|
|
116
|
+
- Rust: `std::env::var`
|
|
117
|
+
- Java: `System.getenv`
|
|
118
|
+
|
|
119
|
+
4. **Authentication & Credentials**
|
|
120
|
+
- `*credentials*.json`, `*auth*.json`, `*service_account*.json` patterns
|
|
121
|
+
- Test fixtures that set up auth (look for setup/beforeAll/beforeEach functions)
|
|
122
|
+
- Mock auth utilities or test user credentials
|
|
123
|
+
- Environment variables for API keys, tokens, secrets
|
|
124
|
+
|
|
125
|
+
5. **Database & External Services**
|
|
126
|
+
- Docker Compose files (`docker-compose*.yml`) for test services
|
|
127
|
+
- Database fixtures, migration scripts for test DBs
|
|
128
|
+
- Mock servers or test doubles for external APIs
|
|
129
|
+
- Emulator configurations (Firebase, GCP, AWS LocalStack, etc.)
|
|
130
|
+
|
|
131
|
+
6. **Existing E2E Test Patterns**
|
|
132
|
+
- Find files with `e2e`, `integration`, `functional`, `acceptance` in name
|
|
133
|
+
- Study their imports, fixtures, setup/teardown patterns
|
|
134
|
+
- Note how they handle auth, env vars, and external services
|
|
135
|
+
- Copy their test structure and patterns exactly
|
|
136
|
+
|
|
137
|
+
**If you cannot find sufficient E2E infrastructure:**
|
|
138
|
+
- Check if E2E tests require manual setup steps documented elsewhere
|
|
139
|
+
- Output `E2E_SKIP: E2E infrastructure not found - manual setup required` with details
|
|
140
|
+
- Suggest what infrastructure would be needed in your GitHub comment
|
|
141
|
+
|
|
142
|
+
% Your Task
|
|
143
|
+
|
|
144
|
+
1. **Discover the E2E test environment** (REQUIRED FIRST STEP)
|
|
145
|
+
- Search for and read the configuration files listed above
|
|
146
|
+
- Identify required environment variables and how to set them
|
|
147
|
+
- Find existing E2E test fixtures and patterns to reuse
|
|
148
|
+
- Understand authentication requirements and test credentials
|
|
149
|
+
- If critical infrastructure is missing, output `E2E_SKIP` with explanation
|
|
150
|
+
|
|
151
|
+
2. **Analyze the bug's user-facing impact**
|
|
152
|
+
- How does this bug manifest to end users or API consumers?
|
|
153
|
+
- What user action or API call triggers the bug?
|
|
154
|
+
- What is the expected vs actual behavior from the user's perspective?
|
|
155
|
+
|
|
156
|
+
3. **Identify the E2E test approach**
|
|
157
|
+
- Determine what type of E2E test is appropriate:
|
|
158
|
+
- **CLI tests**: If the bug affects command-line behavior
|
|
159
|
+
- Python: subprocess, Click's CliRunner
|
|
160
|
+
- JavaScript: execa, child_process
|
|
161
|
+
- Go: os/exec, testing with command args
|
|
162
|
+
- Rust: std::process::Command, assert_cmd crate
|
|
163
|
+
- **API tests**: If the bug affects an API endpoint
|
|
164
|
+
- Python: requests, httpx, FastAPI TestClient
|
|
165
|
+
- JavaScript: supertest, axios, fetch
|
|
166
|
+
- Go: net/http/httptest
|
|
167
|
+
- Rust: reqwest, actix-web test utilities
|
|
168
|
+
- **Browser/UI tests**: If the bug affects web UI
|
|
169
|
+
- Playwright, Cypress, Selenium (cross-language)
|
|
170
|
+
- Puppeteer (JavaScript)
|
|
171
|
+
- **Integration tests**: If the bug affects data flow between components
|
|
172
|
+
- Reuse existing E2E test patterns, fixtures, and utilities from the repository
|
|
173
|
+
- Use the same test runner and framework as existing E2E tests
|
|
174
|
+
|
|
175
|
+
4. **Generate the E2E test**
|
|
176
|
+
- Create a test that exercises the full code path from user input to output
|
|
177
|
+
- Import and use existing fixtures for auth, database, environment setup
|
|
178
|
+
- Do NOT mock the buggy component - the E2E test should hit the real code
|
|
179
|
+
- The test should fail on the current buggy code
|
|
180
|
+
- Include clear assertions about expected behavior
|
|
181
|
+
- Follow the repository's existing E2E test patterns exactly
|
|
182
|
+
|
|
183
|
+
5. **Write and run the E2E test file**
|
|
184
|
+
- Place the E2E test in the same location as existing E2E tests
|
|
185
|
+
- Use the same naming conventions as existing E2E tests
|
|
186
|
+
- Ensure required environment variables are set before running
|
|
187
|
+
- Run the test to verify it fails for the right reason (the bug, not setup issues)
|
|
188
|
+
|
|
189
|
+
% Critical: E2E Test Requirements
|
|
190
|
+
|
|
191
|
+
The E2E test MUST:
|
|
192
|
+
- Exercise the real code path (minimal mocking)
|
|
193
|
+
- Fail on the current buggy code
|
|
194
|
+
- Pass once the bug is fixed
|
|
195
|
+
- Be deterministic and not flaky
|
|
196
|
+
- Complete within a reasonable timeout (< 60 seconds)
|
|
197
|
+
|
|
198
|
+
The E2E test should NOT:
|
|
199
|
+
- Mock the component that contains the bug
|
|
200
|
+
- Test edge cases already covered by unit tests
|
|
201
|
+
- Require external services that aren't available in CI
|
|
202
|
+
|
|
203
|
+
% Test Execution
|
|
204
|
+
|
|
205
|
+
After writing the E2E test, run it to verify:
|
|
206
|
+
1. The test executes without setup errors (imports work, fixtures load)
|
|
207
|
+
2. The test fails because of the bug (not because of test bugs)
|
|
208
|
+
3. The failure message clearly indicates the bug
|
|
209
|
+
|
|
210
|
+
If the test doesn't fail or fails for the wrong reason:
|
|
211
|
+
- Debug and fix the test
|
|
212
|
+
- Re-run until it correctly catches the bug
|
|
213
|
+
- Output `E2E_FAIL: Test does not catch bug correctly` if you cannot make the test work
|
|
214
|
+
|
|
215
|
+
% Output
|
|
216
|
+
|
|
217
|
+
After generating and running the test, use `gh issue comment` to post your findings to issue #{issue_number}:
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
gh issue comment {issue_number} --repo {repo_owner}/{repo_name} --body "..."
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Your comment should follow this format:
|
|
224
|
+
|
|
225
|
+
```markdown
|
|
226
|
+
## Step 9: E2E Test
|
|
227
|
+
|
|
228
|
+
### Environment Discovery
|
|
229
|
+
- **Repository language:** [Python / JavaScript / TypeScript / Go / Rust / Java / Ruby / etc.]
|
|
230
|
+
- **Test framework:** [pytest / Jest / Vitest / Playwright / go test / cargo test / JUnit / RSpec / etc.]
|
|
231
|
+
- **Existing E2E tests found:** [Yes/No - list files if found]
|
|
232
|
+
- **Test configuration:** [config file name and location]
|
|
233
|
+
- **Fixtures/helpers used:** [List shared test utilities discovered]
|
|
234
|
+
- **Environment variables required:** [List any env vars needed]
|
|
235
|
+
- **Auth/credentials:** [How auth is handled - fixtures, env vars, mock]
|
|
236
|
+
|
|
237
|
+
### E2E Test File
|
|
238
|
+
`{{e2e_test_file_path}}`
|
|
239
|
+
|
|
240
|
+
### Test Type
|
|
241
|
+
[CLI / API / Browser / Integration]
|
|
242
|
+
|
|
243
|
+
### What This Test Verifies
|
|
244
|
+
[Brief explanation of the user-facing behavior being tested]
|
|
245
|
+
|
|
246
|
+
### Test Code
|
|
247
|
+
```{{language}}
|
|
248
|
+
{{generated_e2e_test_code}}
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Test Execution Result
|
|
252
|
+
```
|
|
253
|
+
{{test_output}}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### E2E Test Status
|
|
257
|
+
**[PASS: E2E test correctly detects the bug | FAIL: E2E test does not work as expected]**
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
*Proceeding to Step 10: Create Draft PR*
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
% Machine-Readable Output
|
|
264
|
+
|
|
265
|
+
After writing files, output this line exactly (for automated tracking):
|
|
266
|
+
```
|
|
267
|
+
E2E_FILES_CREATED: <comma-separated list of file paths you created>
|
|
268
|
+
```
|
|
269
|
+
Examples by language:
|
|
270
|
+
- Python: `E2E_FILES_CREATED: tests/e2e/test_e2e_issue_123.py`
|
|
271
|
+
- JavaScript: `E2E_FILES_CREATED: tests/e2e/issue-123.e2e.test.js`
|
|
272
|
+
- TypeScript: `E2E_FILES_CREATED: tests/e2e/issue-123.e2e.spec.ts`
|
|
273
|
+
- Go: `E2E_FILES_CREATED: tests/e2e/issue_123_test.go`
|
|
274
|
+
- Rust: `E2E_FILES_CREATED: tests/e2e_issue_123.rs`
|
|
275
|
+
|
|
276
|
+
If you modified existing files instead of creating new ones:
|
|
277
|
+
```
|
|
278
|
+
E2E_FILES_MODIFIED: <comma-separated list of file paths you modified>
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
% Important
|
|
282
|
+
|
|
283
|
+
- The E2E test MUST fail on the current (buggy) code
|
|
284
|
+
- The test should pass once the bug is fixed
|
|
285
|
+
- Focus on the user-facing behavior, not internal implementation
|
|
286
|
+
- Run the test and include the actual output in your comment
|
|
287
|
+
- Always post your findings as a GitHub comment before completing
|
|
288
|
+
- If the test cannot be made to work, output `E2E_FAIL: Test does not catch bug correctly`
|
|
289
|
+
- If no E2E test is applicable (e.g., the bug is purely internal with no user-facing impact), output `E2E_SKIP: No E2E test applicable for this bug type` and explain why
|