pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# prompt_code_diff_LLM.prompt
|
|
2
|
+
|
|
3
|
+
You are a strict code analyst evaluating whether a PROMPT can REGENERATE the CODE.
|
|
4
|
+
|
|
5
|
+
CRITICAL QUESTION: If an LLM only had this prompt, could it produce code that passes the same tests?
|
|
6
|
+
|
|
7
|
+
PROMPT/REQUIREMENTS (with line numbers):
|
|
8
|
+
```
|
|
9
|
+
{prompt_numbered}
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
CODE (with line numbers):
|
|
13
|
+
```
|
|
14
|
+
{code_numbered}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Analysis Focus
|
|
18
|
+
|
|
19
|
+
**Be STRICT and PESSIMISTIC.** Your job is to find gaps that would cause regeneration failures.
|
|
20
|
+
|
|
21
|
+
1. **Regeneration Risk Analysis**: Identify ALL code knowledge NOT in the prompt:
|
|
22
|
+
- Magic values, constants, thresholds (e.g., timeout=30, retry=3, buffer_size=4096)
|
|
23
|
+
- Specific algorithms or implementation approaches chosen
|
|
24
|
+
- Edge case handling not mentioned in prompt
|
|
25
|
+
- Error messages, status codes, specific exceptions
|
|
26
|
+
- API contracts, data formats, field names
|
|
27
|
+
- Dependencies, imports, library-specific patterns
|
|
28
|
+
- Performance optimizations or workarounds
|
|
29
|
+
- Business logic details embedded in code
|
|
30
|
+
|
|
31
|
+
2. **Hidden Knowledge Detection**: Code often contains "tribal knowledge" that developers added but never documented:
|
|
32
|
+
- Why was THIS approach chosen over alternatives?
|
|
33
|
+
- What bugs or edge cases does this code handle that aren't obvious?
|
|
34
|
+
- What assumptions does the code make about inputs/environment?
|
|
35
|
+
|
|
36
|
+
3. **Test Failure Prediction**: Would regenerated code likely fail tests because:
|
|
37
|
+
- Exact values/strings don't match expectations?
|
|
38
|
+
- Edge cases aren't handled the same way?
|
|
39
|
+
- API contracts differ from what tests expect?
|
|
40
|
+
|
|
41
|
+
## Response Format
|
|
42
|
+
|
|
43
|
+
Respond with a JSON object:
|
|
44
|
+
|
|
45
|
+
1. "overallScore": integer 0-100
|
|
46
|
+
- 90-100: Prompt could regenerate code that passes tests
|
|
47
|
+
- 70-89: Minor details missing, regeneration might work with luck
|
|
48
|
+
- 50-69: Significant gaps, regeneration would likely fail some tests
|
|
49
|
+
- 0-49: Major knowledge missing, regeneration would definitely fail
|
|
50
|
+
|
|
51
|
+
2. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
|
|
52
|
+
|
|
53
|
+
3. "regenerationRisk": "low", "medium", "high", or "critical"
|
|
54
|
+
- "low": Prompt captures all essential details
|
|
55
|
+
- "medium": Some implementation details missing but core logic documented
|
|
56
|
+
- "high": Significant undocumented behavior that would differ on regeneration
|
|
57
|
+
- "critical": Code has major features/logic not in prompt at all
|
|
58
|
+
|
|
59
|
+
4. "summary": 1-2 sentences on regeneration viability, be direct about risks
|
|
60
|
+
|
|
61
|
+
5. "sections": array of PROMPT requirement sections, each with:
|
|
62
|
+
- "id": unique string like "req_1", "req_2"
|
|
63
|
+
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
64
|
+
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
|
|
65
|
+
- "status": "matched", "partial", or "missing"
|
|
66
|
+
- "matchConfidence": 0-100
|
|
67
|
+
- "semanticLabel": descriptive label like "Error Handling", "Input Validation"
|
|
68
|
+
- "notes": REQUIRED explanation - be specific about what's missing or at risk
|
|
69
|
+
|
|
70
|
+
6. "codeSections": array of CODE sections NOT adequately documented in prompt:
|
|
71
|
+
- "id": unique string like "code_1", "code_2"
|
|
72
|
+
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
|
|
73
|
+
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
74
|
+
- "status": "matched", "partial", or "extra"
|
|
75
|
+
- "matchConfidence": 0-100
|
|
76
|
+
- "semanticLabel": descriptive label
|
|
77
|
+
- "notes": REQUIRED - explain what knowledge would be LOST on regeneration
|
|
78
|
+
* For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
|
|
79
|
+
* For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
|
|
80
|
+
|
|
81
|
+
7. "hiddenKnowledge": array of objects describing undocumented code knowledge:
|
|
82
|
+
- "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
|
|
83
|
+
- "location": {{"startLine": int, "endLine": int}}
|
|
84
|
+
- "description": what the code knows that the prompt doesn't say
|
|
85
|
+
- "regenerationImpact": "would_differ" | "would_fail" | "might_work"
|
|
86
|
+
- "suggestedPromptAddition": what to add to the prompt to capture this
|
|
87
|
+
|
|
88
|
+
8. "lineMappings": array of line-level mappings:
|
|
89
|
+
- "promptLine": int
|
|
90
|
+
- "codeLines": array of ints
|
|
91
|
+
- "matchType": "exact", "semantic", "partial", "none"
|
|
92
|
+
|
|
93
|
+
9. "stats": {{
|
|
94
|
+
"totalRequirements": int,
|
|
95
|
+
"matchedRequirements": int,
|
|
96
|
+
"missingRequirements": int,
|
|
97
|
+
"totalCodeFeatures": int,
|
|
98
|
+
"documentedFeatures": int,
|
|
99
|
+
"undocumentedFeatures": int,
|
|
100
|
+
"promptToCodeCoverage": float,
|
|
101
|
+
"codeToPromptCoverage": float,
|
|
102
|
+
"hiddenKnowledgeCount": int,
|
|
103
|
+
"criticalGaps": int
|
|
104
|
+
}}
|
|
105
|
+
|
|
106
|
+
10. "missing": array of strings - requirements in prompt not implemented
|
|
107
|
+
11. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
|
|
108
|
+
12. "suggestions": array of specific additions to make to the prompt to enable regeneration
|
|
109
|
+
|
|
110
|
+
## Strictness Guidelines
|
|
111
|
+
|
|
112
|
+
- **Assume regeneration WILL differ** unless the prompt explicitly specifies behavior
|
|
113
|
+
- A function that "handles errors" in the prompt might handle them DIFFERENTLY on regeneration
|
|
114
|
+
- Constants, timeouts, retry counts, buffer sizes - if not in prompt, they WILL be different
|
|
115
|
+
- Specific error messages, log formats, status codes - WILL be different unless specified
|
|
116
|
+
- Algorithm choices (e.g., quicksort vs mergesort, BFS vs DFS) - WILL be different unless specified
|
|
117
|
+
- The goal is to make the prompt complete enough that ANY competent LLM would produce equivalent code
|
|
118
|
+
- Mark as "extra" anything in code that prompt doesn't EXPLICITLY require
|
|
119
|
+
- When in doubt, mark it as a gap - false positives are better than missed risks
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# prompt_diff_LLM.prompt
|
|
2
|
+
|
|
3
|
+
You are a prompt analyst comparing two versions of a prompt to identify semantic/linguistic differences.
|
|
4
|
+
|
|
5
|
+
## Version A (Original):
|
|
6
|
+
```
|
|
7
|
+
{prompt_a}
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Version B (Updated):
|
|
11
|
+
```
|
|
12
|
+
{prompt_b}
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Text Diff:
|
|
16
|
+
```diff
|
|
17
|
+
{text_diff}
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Your Task
|
|
21
|
+
|
|
22
|
+
Analyze the semantic differences between the two prompt versions. Focus on:
|
|
23
|
+
|
|
24
|
+
1. **Requirements**: New, removed, or changed functional requirements
|
|
25
|
+
2. **Constraints**: Modified limitations, rules, or boundaries
|
|
26
|
+
3. **Behavior**: Changes to expected behavior or outputs
|
|
27
|
+
4. **Format**: Changes to structure, formatting, or style guidelines
|
|
28
|
+
|
|
29
|
+
For each change, determine the **impact**:
|
|
30
|
+
- **breaking**: Change that would cause existing code to fail or behave differently
|
|
31
|
+
- **enhancement**: Addition or improvement that extends functionality
|
|
32
|
+
- **clarification**: Rewording or clarification that doesn't change meaning
|
|
33
|
+
|
|
34
|
+
## Response Format
|
|
35
|
+
|
|
36
|
+
Respond with a JSON object containing:
|
|
37
|
+
|
|
38
|
+
1. "summary": A 1-2 sentence summary of the overall changes between versions.
|
|
39
|
+
Be specific about what changed semantically, not just that "text was added/removed".
|
|
40
|
+
|
|
41
|
+
2. "changes": An array of change objects, each with:
|
|
42
|
+
- "change_type": "added" | "removed" | "modified"
|
|
43
|
+
- "category": "requirement" | "constraint" | "behavior" | "format"
|
|
44
|
+
- "description": Clear description of what changed and why it matters
|
|
45
|
+
- "old_text": The COMPLETE relevant text from version A (for modified/removed). Do NOT truncate or abbreviate with "..." - include the full text so users can see exactly what changed.
|
|
46
|
+
- "new_text": The COMPLETE relevant text from version B (for added/modified). Do NOT truncate or abbreviate with "..." - include the full text so users can see exactly what changed.
|
|
47
|
+
- "impact": "breaking" | "enhancement" | "clarification"
|
|
48
|
+
|
|
49
|
+
## Guidelines
|
|
50
|
+
|
|
51
|
+
- Focus on SEMANTIC differences, not just textual changes
|
|
52
|
+
- Combine related small changes into logical groups
|
|
53
|
+
- Highlight changes that would affect code generation differently
|
|
54
|
+
- Be specific about HOW a change would impact generated code
|
|
55
|
+
- If the two versions are semantically identical (just reformatted), say so clearly
|
|
56
|
+
- For "modified" changes, clearly explain what was different before vs. after
|
|
57
|
+
|
|
58
|
+
## Example Output
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{{
|
|
62
|
+
"summary": "Added retry logic requirement and relaxed the error message format constraint.",
|
|
63
|
+
"changes": [
|
|
64
|
+
{{
|
|
65
|
+
"change_type": "added",
|
|
66
|
+
"category": "requirement",
|
|
67
|
+
"description": "New requirement for retry logic on network failures",
|
|
68
|
+
"old_text": null,
|
|
69
|
+
"new_text": "Retry failed requests up to 3 times with exponential backoff",
|
|
70
|
+
"impact": "enhancement"
|
|
71
|
+
}},
|
|
72
|
+
{{
|
|
73
|
+
"change_type": "modified",
|
|
74
|
+
"category": "constraint",
|
|
75
|
+
"description": "Error message format is now flexible instead of strictly JSON",
|
|
76
|
+
"old_text": "Return errors as JSON objects with 'error' and 'code' fields",
|
|
77
|
+
"new_text": "Return descriptive error messages",
|
|
78
|
+
"impact": "breaking"
|
|
79
|
+
}}
|
|
80
|
+
]
|
|
81
|
+
}}
|
|
82
|
+
```
|
pdd/prompts/trace_LLM.prompt
CHANGED
|
@@ -1,30 +1,33 @@
|
|
|
1
|
-
%
|
|
2
|
-
|
|
3
|
-
%
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
% You are a highly accurate Python Software Engineer. Your job is to locate the exact line (or smallest excerpt) in the prompt file that produced the current line in the generated code.
|
|
2
|
+
|
|
3
|
+
% Inputs
|
|
4
|
+
code_file (str) : full contents of the generated code file
|
|
5
|
+
code_str (str) : the single line from the code file currently under inspection
|
|
6
|
+
prompt_file (str) : full contents of the originating prompt file
|
|
7
|
+
|
|
8
|
+
% Rules
|
|
9
|
+
1. Identify the minimal substring in prompt_file whose wording most directly corresponds to code_str. Copy it VERBATIM.
|
|
10
|
+
2. Do not paraphrase, summarize, or reformat; the substring must appear exactly in prompt_file.
|
|
11
|
+
3. If multiple lines apply, choose the most specific line or snippet (prefer the shortest exact match).
|
|
12
|
+
4. Provide a short explanation of why the substring matches code_str.
|
|
13
|
+
|
|
14
|
+
% Output format (MUST follow exactly; no additional text)
|
|
15
|
+
<analysis>
|
|
16
|
+
Explain your reasoning here in plain text (no JSON). Reference the file sections you compared.
|
|
17
|
+
</analysis>
|
|
18
|
+
<verbatim_prompt_line>
|
|
19
|
+
<<PASTE THE EXACT SUBSTRING FROM prompt_file HERE>>
|
|
20
|
+
</verbatim_prompt_line>
|
|
21
|
+
|
|
22
|
+
% Reference materials
|
|
13
23
|
<code_file>
|
|
14
|
-
|
|
24
|
+
{CODE_FILE}
|
|
15
25
|
</code_file>
|
|
16
26
|
|
|
17
|
-
% Here is the code_str to reference:
|
|
18
|
-
|
|
19
27
|
<code_str>
|
|
20
|
-
|
|
28
|
+
{CODE_STR}
|
|
21
29
|
</code_str>
|
|
22
30
|
|
|
23
|
-
% Here is the prompt_file to reference:
|
|
24
|
-
|
|
25
31
|
<prompt_file>
|
|
26
|
-
|
|
32
|
+
{PROMPT_FILE}
|
|
27
33
|
</prompt_file>
|
|
28
|
-
|
|
29
|
-
% To generate the prompt_line, find a substring of prompt_file that matches code_str, which is a substring of code_file.
|
|
30
|
-
|
|
@@ -1,18 +1,102 @@
|
|
|
1
1
|
% You are tasked with determining whether a given prompt has finished outputting everything or if it still needs to continue. This is crucial for ensuring that all necessary information has been provided before proceeding with further actions. You will often be provided the last few hundred characters of the prompt_text to analyze and determine if it appears to be complete or if it seems to be cut off or unfinished. You are just looking at the prompt_text and not the entire prompt file. The beginning part of the prompt_text is not always provided, so you will need to make a judgment based on the text you are given.
|
|
2
2
|
|
|
3
|
+
% IMPORTANT:
|
|
4
|
+
% - The prompt_text may contain code in various languages without Markdown fences.
|
|
5
|
+
% - Do NOT require triple backticks for completeness; judge the code/text itself.
|
|
6
|
+
% - Prefer concrete syntactic signals of completeness over stylistic ones.
|
|
7
|
+
|
|
3
8
|
% Here is the prompt text to analyze:
|
|
4
9
|
<prompt_text>
|
|
5
10
|
{PROMPT_TEXT}
|
|
6
11
|
</prompt_text>
|
|
7
12
|
|
|
13
|
+
% Optional language hint (may be empty or missing). If not provided, infer the language from the text:
|
|
14
|
+
<language>
|
|
15
|
+
{LANGUAGE}
|
|
16
|
+
</language>
|
|
17
|
+
|
|
8
18
|
% Carefully examine the provided prompt text and determine if it appears to be complete or if it seems to be cut off or unfinished. Consider the following factors:
|
|
9
19
|
1. Sentence structure: Are all sentences grammatically complete?
|
|
10
20
|
2. Content flow: Does the text end abruptly or does it have a natural conclusion?
|
|
11
21
|
3. Context: Based on the content, does it seem like all necessary information has been provided?
|
|
12
22
|
4. Formatting: Are there any unclosed parentheses, quotation marks, or other formatting issues that suggest incompleteness?
|
|
13
23
|
|
|
24
|
+
% Multi-language code completeness heuristics (apply when text looks like code):
|
|
25
|
+
- If the text forms a syntactically complete module/snippet for the language, treat it as finished (even without Markdown fences).
|
|
26
|
+
- Generic signals across languages:
|
|
27
|
+
* Balanced delimiters: (), [], {{}}, quotes, and block comments are closed.
|
|
28
|
+
* No mid-token/mid-statement tail: it does not end on `return a +`, `a =`, `def foo(`, `function f(`, trailing `.`, `->`, `::`, trailing `,`, or a line-continuation like `\\`.
|
|
29
|
+
* Block closure: constructs that open a block are closed (e.g., Python indentation after `:`, or matching `{{}}` in C/Java/JS/TS/Go).
|
|
30
|
+
- Language specifics (use LANGUAGE if given; otherwise infer from the text):
|
|
31
|
+
* Python: colon-introduced blocks closed; indentation consistent; triple-quoted strings balanced.
|
|
32
|
+
* JS/TS: braces and parentheses balanced; no dangling `export`/`import` without a following specifier; `/* ... */` comments closed.
|
|
33
|
+
* Java/C/C++/C#: braces and parentheses balanced; string/char literals closed; block comments closed.
|
|
34
|
+
* Go: braces balanced; no dangling keyword indicating an unfinished clause.
|
|
35
|
+
* HTML/XML: tags properly nested/closed; attributes properly quoted; no unfinished `<tag` or dangling `</`.
|
|
36
|
+
- If this is only the tail of a longer file, mark finished when the tail itself is syntactically complete and does not indicate a dangling continuation.
|
|
37
|
+
|
|
14
38
|
% Provide your reasoning for why you believe the prompt is complete or incomplete.
|
|
15
39
|
|
|
16
40
|
% Output a JSON object with two keys:
|
|
17
41
|
1. "reasoning": A string containing your structured reasoning
|
|
18
|
-
2. "is_finished": A boolean value (true if the prompt is complete, false if it's incomplete)
|
|
42
|
+
2. "is_finished": A boolean value (true if the prompt is complete, false if it's incomplete)
|
|
43
|
+
|
|
44
|
+
% Examples (concise):
|
|
45
|
+
<examples>
|
|
46
|
+
<example1>
|
|
47
|
+
<input>
|
|
48
|
+
<prompt_text>
|
|
49
|
+
def add(a, b):\n return a + b\n
|
|
50
|
+
</prompt_text>
|
|
51
|
+
</input>
|
|
52
|
+
<output>
|
|
53
|
+
{{"reasoning": "Python code parses; blocks and quotes are closed; ends on a complete return statement.", "is_finished": true}}
|
|
54
|
+
</output>
|
|
55
|
+
</example1>
|
|
56
|
+
<example2>
|
|
57
|
+
<input>
|
|
58
|
+
<prompt_text>
|
|
59
|
+
def add(a, b):\n return a +
|
|
60
|
+
</prompt_text>
|
|
61
|
+
</input>
|
|
62
|
+
<output>
|
|
63
|
+
{{"reasoning": "Ends mid-expression (`return a +`), indicates unfinished statement.", "is_finished": false}}
|
|
64
|
+
</output>
|
|
65
|
+
</example2>
|
|
66
|
+
<example3>
|
|
67
|
+
<input>
|
|
68
|
+
<prompt_text>
|
|
69
|
+
function add(a, b) {{\n return a + b;\n}}\n
|
|
70
|
+
</prompt_text>
|
|
71
|
+
<language>
|
|
72
|
+
JavaScript
|
|
73
|
+
</language>
|
|
74
|
+
</input>
|
|
75
|
+
<output>
|
|
76
|
+
{{"reasoning": "JS braces and parentheses balanced; ends at a statement boundary; no dangling tokens.", "is_finished": true}}
|
|
77
|
+
</output>
|
|
78
|
+
</example3>
|
|
79
|
+
<example4>
|
|
80
|
+
<input>
|
|
81
|
+
<prompt_text>
|
|
82
|
+
<div class=\"box\">Hello
|
|
83
|
+
</prompt_text>
|
|
84
|
+
<language>
|
|
85
|
+
HTML
|
|
86
|
+
</language>
|
|
87
|
+
</input>
|
|
88
|
+
<output>
|
|
89
|
+
{{"reasoning": "HTML tag not closed (missing </div>); attribute quotes OK but element is unclosed.", "is_finished": false}}
|
|
90
|
+
</output>
|
|
91
|
+
</example4>
|
|
92
|
+
<example5>
|
|
93
|
+
<input>
|
|
94
|
+
<prompt_text>
|
|
95
|
+
class C:\n def f(self):\n x = 1\n
|
|
96
|
+
</prompt_text>
|
|
97
|
+
</input>
|
|
98
|
+
<output>
|
|
99
|
+
{{"reasoning": "All blocks properly indented and closed in the visible tail; no dangling colon blocks or open delimiters; tail is syntactically complete.", "is_finished": true}}
|
|
100
|
+
</output>
|
|
101
|
+
</example5>
|
|
102
|
+
</examples>
|
|
@@ -16,4 +16,25 @@
|
|
|
16
16
|
1. Using the provided input_code and input_prompt, identify what the code does and how it was generated.
|
|
17
17
|
2. Compare the input_code and modified_code to determine the changes made by the user.
|
|
18
18
|
3. Identify what the modified_code does differently from the input_code.
|
|
19
|
-
4. Generate a modified_prompt that will guide the generation of the modified_code based on the identified changes.
|
|
19
|
+
4. Generate a modified_prompt that will guide the generation of the modified_code based on the identified changes.
|
|
20
|
+
5. Ensure that the modified_prompt adheres to the principles of Prompt-Driven Development (PDD) and includes all necessary sections: Role and Scope, Requirements, Dependencies & Context, Instructions, and Deliverables.
|
|
21
|
+
6. Try to preserve the structure and format of the existing prompt as much as possible while incorporating the necessary changes to reflect the modifications in the code.
|
|
22
|
+
|
|
23
|
+
% When generating the modified prompt, you must follow the core principles of Prompt-Driven Development (PDD).
|
|
24
|
+
% Here are the essential guidelines for structuring a PDD prompt:
|
|
25
|
+
<pdd_prompting_guide>
|
|
26
|
+
% The prompt you generate must follow this structure:
|
|
27
|
+
1) First paragraph: describe the role and responsibility of the module/component within the system (consider the LAYER if provided).
|
|
28
|
+
2) A "Requirements" section with numbered points covering functionality, contracts, error handling, validation, logging, performance, and security.
|
|
29
|
+
3) A "Dependencies" section using XML include tags for each dependency (see format below).
|
|
30
|
+
4) An "Instructions" section with precise implementation guidance (clarify inputs/outputs, function/class responsibilities, edge cases, and testing notes).
|
|
31
|
+
5) A clear "Deliverable" section describing the expected code artifacts and entry points.
|
|
32
|
+
|
|
33
|
+
% Dependencies format and conventions:
|
|
34
|
+
- Represent each dependency using an XML tag with the dependency name, and put the file path inside an <include> tag. For example:
|
|
35
|
+
<orders_service>
|
|
36
|
+
<include>context/orders_service_example.py</include>
|
|
37
|
+
</orders_service>
|
|
38
|
+
- Prefer real example files available in the provided context (use <include-many> when listing multiple). If examples are not provided, assume dependency examples live under context/ using the pattern context/[dependency_name]_example. You should always try to include example files when possible.
|
|
39
|
+
- Include all necessary dependencies for the module/component (based on the provided context and references).
|
|
40
|
+
</pdd_prompting_guide>
|
pdd/pytest_output.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
3
|
import io
|
|
4
|
+
import re
|
|
4
5
|
import sys
|
|
5
6
|
import pytest
|
|
6
7
|
import subprocess
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
from rich.console import Console
|
|
8
10
|
from rich.pretty import pprint
|
|
9
11
|
import os
|
|
@@ -11,6 +13,81 @@ from .python_env_detector import detect_host_python_executable
|
|
|
11
13
|
|
|
12
14
|
console = Console()
|
|
13
15
|
|
|
16
|
+
|
|
17
|
+
def _find_project_root(test_file: Path) -> Path | None:
|
|
18
|
+
"""
|
|
19
|
+
Find the project root directory by looking for .pddrc (definitive PDD marker).
|
|
20
|
+
|
|
21
|
+
Only .pddrc is used as the project marker to ensure we don't incorrectly
|
|
22
|
+
identify project roots for non-PDD projects. This is a conservative approach
|
|
23
|
+
that maintains backward compatibility.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
test_file: Path to the test file
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The project root directory if .pddrc is found, None otherwise.
|
|
30
|
+
When None is returned, the caller should use original behavior.
|
|
31
|
+
"""
|
|
32
|
+
current = test_file.resolve().parent
|
|
33
|
+
|
|
34
|
+
# Walk up the directory tree looking for .pddrc only
|
|
35
|
+
while current != current.parent:
|
|
36
|
+
if (current / ".pddrc").exists():
|
|
37
|
+
return current
|
|
38
|
+
current = current.parent
|
|
39
|
+
|
|
40
|
+
# No .pddrc found - return None to signal original behavior should be used
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _strip_ansi(text: str) -> str:
|
|
48
|
+
"""Remove ANSI escape sequences from text for reliable parsing."""
|
|
49
|
+
return _ANSI_ESCAPE_RE.sub("", text)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_failing_files_from_output(pytest_output: str) -> list[str]:
|
|
53
|
+
"""
|
|
54
|
+
Extract unique file paths from pytest FAILED output lines.
|
|
55
|
+
|
|
56
|
+
Parses patterns like:
|
|
57
|
+
- FAILED tests/test_foo.py::test_name - error message
|
|
58
|
+
- tests/test_foo.py::test_name FAILED
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
pytest_output: The combined stdout/stderr from a pytest run
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
List of unique file paths (without ::test_name suffix) that had failures,
|
|
65
|
+
in the order they were first encountered.
|
|
66
|
+
"""
|
|
67
|
+
cleaned_output = _strip_ansi(pytest_output)
|
|
68
|
+
|
|
69
|
+
failing_files = []
|
|
70
|
+
seen = set()
|
|
71
|
+
|
|
72
|
+
# Pattern 1: FAILED path/file.py::test_name (with optional error)
|
|
73
|
+
pattern1 = r'FAILED\s+([^\s:]+\.py)::'
|
|
74
|
+
for match in re.finditer(pattern1, cleaned_output):
|
|
75
|
+
file_path = match.group(1)
|
|
76
|
+
if file_path not in seen:
|
|
77
|
+
failing_files.append(file_path)
|
|
78
|
+
seen.add(file_path)
|
|
79
|
+
|
|
80
|
+
# Pattern 2: path/file.py::test_name FAILED (verbose output)
|
|
81
|
+
pattern2 = r'([^\s:]+\.py)::\S+\s+FAILED'
|
|
82
|
+
for match in re.finditer(pattern2, cleaned_output):
|
|
83
|
+
file_path = match.group(1)
|
|
84
|
+
if file_path not in seen:
|
|
85
|
+
failing_files.append(file_path)
|
|
86
|
+
seen.add(file_path)
|
|
87
|
+
|
|
88
|
+
return failing_files
|
|
89
|
+
|
|
90
|
+
|
|
14
91
|
class TestResultCollector:
|
|
15
92
|
__test__ = False # Prevent pytest from collecting this plugin as a test
|
|
16
93
|
|
|
@@ -84,31 +161,69 @@ def run_pytest_and_capture_output(test_file: str) -> dict:
|
|
|
84
161
|
|
|
85
162
|
# Use environment-aware Python executable for pytest execution
|
|
86
163
|
python_executable = detect_host_python_executable()
|
|
87
|
-
|
|
164
|
+
|
|
165
|
+
# Find the project root directory for proper pytest execution (PDD projects only)
|
|
166
|
+
test_path = Path(test_file).resolve()
|
|
167
|
+
project_root = _find_project_root(test_path)
|
|
168
|
+
|
|
169
|
+
# Build subprocess kwargs - only modify cwd/env for PDD projects (.pddrc found)
|
|
170
|
+
subprocess_kwargs = {
|
|
171
|
+
"capture_output": True,
|
|
172
|
+
"text": True,
|
|
173
|
+
"timeout": 300,
|
|
174
|
+
"stdin": subprocess.DEVNULL,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
pytest_args = [python_executable, "-B", "-m", "pytest", str(test_path), "-v"]
|
|
178
|
+
|
|
179
|
+
if project_root is not None:
|
|
180
|
+
# PDD project detected - set up proper environment
|
|
181
|
+
subprocess_kwargs["cwd"] = str(project_root)
|
|
182
|
+
|
|
183
|
+
# Build PYTHONPATH with both project root and src/ if it exists
|
|
184
|
+
paths_to_add = [str(project_root)]
|
|
185
|
+
src_dir = project_root / "src"
|
|
186
|
+
if src_dir.is_dir():
|
|
187
|
+
paths_to_add.insert(0, str(src_dir)) # src/ takes priority
|
|
188
|
+
|
|
189
|
+
env = os.environ.copy()
|
|
190
|
+
existing_pythonpath = env.get("PYTHONPATH", "")
|
|
191
|
+
if existing_pythonpath:
|
|
192
|
+
paths_to_add.append(existing_pythonpath)
|
|
193
|
+
env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
|
|
194
|
+
subprocess_kwargs["env"] = env
|
|
195
|
+
|
|
196
|
+
# Add --rootdir to ensure pytest uses project's config
|
|
197
|
+
pytest_args.append(f"--rootdir={project_root}")
|
|
198
|
+
|
|
88
199
|
try:
|
|
89
200
|
# Run pytest using subprocess with the detected Python executable
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
capture_output=True,
|
|
93
|
-
text=True,
|
|
94
|
-
timeout=300
|
|
95
|
-
)
|
|
201
|
+
# Use -B flag to disable bytecode caching, ensuring fresh imports
|
|
202
|
+
result = subprocess.run(pytest_args, **subprocess_kwargs)
|
|
96
203
|
|
|
97
204
|
stdout = result.stdout
|
|
98
205
|
stderr = result.stderr
|
|
99
206
|
return_code = result.returncode
|
|
207
|
+
parse_stdout = _strip_ansi(stdout or "")
|
|
100
208
|
|
|
101
209
|
# Parse the output to extract test results
|
|
102
210
|
# Count passed, failed, and skipped tests from the output
|
|
103
|
-
passed =
|
|
104
|
-
failures =
|
|
211
|
+
passed = parse_stdout.count(" PASSED")
|
|
212
|
+
failures = parse_stdout.count(" FAILED") + parse_stdout.count(" ERROR")
|
|
105
213
|
errors = 0 # Will be included in failures for subprocess execution
|
|
106
|
-
warnings =
|
|
214
|
+
warnings = parse_stdout.lower().count("warning")
|
|
107
215
|
|
|
108
216
|
# If return code is 2, it indicates a pytest error
|
|
109
217
|
if return_code == 2:
|
|
110
218
|
errors = 1
|
|
111
|
-
|
|
219
|
+
# Safety net: if parsing missed failures due to formatting (e.g., ANSI colors),
|
|
220
|
+
# never report a passing result on a non-zero return code.
|
|
221
|
+
if return_code != 0 and failures == 0 and errors == 0:
|
|
222
|
+
if return_code == 1:
|
|
223
|
+
failures = 1
|
|
224
|
+
else:
|
|
225
|
+
errors = 1
|
|
226
|
+
|
|
112
227
|
return {
|
|
113
228
|
"test_file": test_file,
|
|
114
229
|
"test_results": [
|
|
@@ -199,4 +314,4 @@ def main():
|
|
|
199
314
|
save_output_to_json(pytest_output)
|
|
200
315
|
|
|
201
316
|
if __name__ == "__main__":
|
|
202
|
-
main()
|
|
317
|
+
main()
|