pdd-cli 0.0.118__py3-none-any.whl → 0.0.121__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +1 -1
- pdd/agentic_bug_orchestrator.py +15 -6
- pdd/agentic_change_orchestrator.py +18 -7
- pdd/agentic_common.py +68 -40
- pdd/agentic_crash.py +2 -1
- pdd/agentic_e2e_fix_orchestrator.py +165 -9
- pdd/agentic_update.py +2 -1
- pdd/agentic_verify.py +3 -2
- pdd/auto_include.py +51 -0
- pdd/commands/analysis.py +32 -25
- pdd/commands/connect.py +69 -1
- pdd/commands/fix.py +31 -13
- pdd/commands/generate.py +5 -0
- pdd/commands/modify.py +47 -11
- pdd/commands/utility.py +12 -7
- pdd/core/cli.py +17 -4
- pdd/core/dump.py +68 -20
- pdd/fix_main.py +4 -2
- pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
- pdd/frontend/dist/index.html +1 -1
- pdd/llm_invoke.py +82 -12
- pdd/operation_log.py +342 -0
- pdd/postprocess.py +122 -100
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +11 -2
- pdd/prompts/generate_test_LLM.prompt +0 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +251 -0
- pdd/prompts/prompt_code_diff_LLM.prompt +29 -25
- pdd/server/routes/prompts.py +26 -1
- pdd/server/terminal_spawner.py +15 -7
- pdd/sync_orchestration.py +164 -147
- pdd/sync_order.py +304 -0
- pdd/update_main.py +48 -24
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/RECORD +37 -35
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +0 -449
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/postprocess.py
CHANGED
|
@@ -1,136 +1,158 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Tuple, Optional
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
8
|
+
|
|
9
|
+
from . import DEFAULT_STRENGTH, DEFAULT_TIME
|
|
4
10
|
from .load_prompt_template import load_prompt_template
|
|
5
11
|
from .llm_invoke import llm_invoke
|
|
6
|
-
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
7
16
|
|
|
8
17
|
class ExtractedCode(BaseModel):
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
18
|
+
focus: str = Field("", description="Focus of the code")
|
|
19
|
+
explanation: str = Field("", description="Explanation of the code")
|
|
20
|
+
extracted_code: str = Field(..., description="Extracted code")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def postprocess_0(llm_output: str, language: str) -> str:
|
|
24
|
+
"""Simple extraction of code blocks."""
|
|
25
|
+
if language == "prompt":
|
|
26
|
+
# Strip <prompt> tags
|
|
27
|
+
llm_output = re.sub(r"<prompt>\s*(.*?)\s*</prompt>", r"\1", llm_output, flags=re.DOTALL)
|
|
28
|
+
llm_output = llm_output.strip()
|
|
29
|
+
|
|
30
|
+
# Also strip triple backticks if present
|
|
31
|
+
lines = llm_output.splitlines()
|
|
32
|
+
if lines and lines[0].startswith("```"):
|
|
33
|
+
# Remove first line with opening backticks
|
|
34
|
+
lines = lines[1:]
|
|
35
|
+
# If there's a last line with closing backticks, remove it
|
|
36
|
+
if lines and lines[-1].startswith("```"):
|
|
37
|
+
lines = lines[:-1]
|
|
38
|
+
llm_output = "\n".join(lines)
|
|
39
|
+
|
|
40
|
+
return llm_output.strip()
|
|
41
|
+
|
|
42
|
+
# First try to find complete code blocks with closing backticks
|
|
43
|
+
code_blocks = re.findall(r"```(?:[a-zA-Z]+)?\n(.*?)\n```", llm_output, re.DOTALL)
|
|
44
|
+
if code_blocks:
|
|
45
|
+
return "\n".join(block.strip() for block in code_blocks)
|
|
46
|
+
|
|
47
|
+
# If no complete blocks found, try to find incomplete blocks (opening backticks without closing)
|
|
48
|
+
# But ensure there's actual content after the opening backticks
|
|
49
|
+
incomplete_match = re.search(r"```(?:[a-zA-Z]+)?\n(.+?)(?:\n```)?$", llm_output, re.DOTALL)
|
|
50
|
+
if incomplete_match:
|
|
51
|
+
content = incomplete_match.group(1).strip()
|
|
52
|
+
# Don't return if content is just closing backticks
|
|
53
|
+
if content and content != "```":
|
|
54
|
+
return content
|
|
55
|
+
|
|
56
|
+
return ""
|
|
13
57
|
|
|
14
|
-
def postprocess_0(text: str) -> str:
|
|
15
|
-
"""
|
|
16
|
-
Simple code extraction for strength = 0.
|
|
17
|
-
Extracts code between triple backticks.
|
|
18
|
-
"""
|
|
19
|
-
lines = text.split('\n')
|
|
20
|
-
code_lines = []
|
|
21
|
-
in_code_block = False
|
|
22
|
-
|
|
23
|
-
for line in lines:
|
|
24
|
-
if '```' in line: # MODIFIED: Was line.startswith('```')
|
|
25
|
-
if not in_code_block:
|
|
26
|
-
# Skip the language identifier line / content on opening delimiter line
|
|
27
|
-
in_code_block = True
|
|
28
|
-
continue
|
|
29
|
-
else:
|
|
30
|
-
# Content on closing delimiter line is skipped
|
|
31
|
-
in_code_block = False
|
|
32
|
-
continue
|
|
33
|
-
if in_code_block:
|
|
34
|
-
code_lines.append(line)
|
|
35
|
-
|
|
36
|
-
return '\n'.join(code_lines)
|
|
37
58
|
|
|
38
59
|
def postprocess(
|
|
39
60
|
llm_output: str,
|
|
40
61
|
language: str,
|
|
41
62
|
strength: float = DEFAULT_STRENGTH,
|
|
42
|
-
temperature: float = 0,
|
|
63
|
+
temperature: float = 0.0,
|
|
43
64
|
time: float = DEFAULT_TIME,
|
|
44
|
-
verbose: bool = False
|
|
65
|
+
verbose: bool = False,
|
|
45
66
|
) -> Tuple[str, float, str]:
|
|
46
67
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
68
|
+
Extracts code from a string output of an LLM.
|
|
69
|
+
|
|
49
70
|
Args:
|
|
50
|
-
llm_output
|
|
51
|
-
language
|
|
52
|
-
strength
|
|
53
|
-
temperature
|
|
54
|
-
time
|
|
55
|
-
verbose
|
|
56
|
-
|
|
71
|
+
llm_output: A string containing a mix of text and code sections.
|
|
72
|
+
language: A string specifying the programming language of the code to be extracted.
|
|
73
|
+
strength: A float between 0 and 1 that represents the strength of the LLM model to use.
|
|
74
|
+
temperature: A float between 0 and 1 that represents the temperature parameter for the LLM model.
|
|
75
|
+
time: A float between 0 and 1 that controls the thinking effort for the LLM model.
|
|
76
|
+
verbose: A boolean that indicates whether to print detailed processing information.
|
|
77
|
+
|
|
57
78
|
Returns:
|
|
58
|
-
|
|
79
|
+
A tuple containing the extracted code string, total cost float and model name string.
|
|
59
80
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if strength == 0:
|
|
73
|
-
if verbose:
|
|
74
|
-
print("[blue]Using simple code extraction (strength = 0)[/blue]")
|
|
75
|
-
return (postprocess_0(llm_output), 0.0, "simple_extraction")
|
|
76
|
-
|
|
77
|
-
# Step 2: Load the prompt template
|
|
78
|
-
prompt_template = load_prompt_template("extract_code_LLM")
|
|
79
|
-
if not prompt_template:
|
|
80
|
-
raise ValueError("Failed to load prompt template")
|
|
81
|
+
if not isinstance(llm_output, str) or not llm_output:
|
|
82
|
+
raise ValueError("llm_output must be a non-empty string")
|
|
83
|
+
if not isinstance(language, str) or not language:
|
|
84
|
+
raise ValueError("language must be a non-empty string")
|
|
85
|
+
if not isinstance(strength, (int, float)):
|
|
86
|
+
raise TypeError("strength must be a number")
|
|
87
|
+
if not 0 <= strength <= 1:
|
|
88
|
+
raise ValueError("strength must be between 0 and 1")
|
|
89
|
+
if not isinstance(temperature, (int, float)):
|
|
90
|
+
raise TypeError("temperature must be a number")
|
|
91
|
+
if not 0 <= temperature <= 1:
|
|
92
|
+
raise ValueError("temperature must be between 0 and 1")
|
|
81
93
|
|
|
94
|
+
if language == "prompt":
|
|
95
|
+
extracted_code = postprocess_0(llm_output, language)
|
|
96
|
+
return extracted_code, 0.0, "simple_extraction"
|
|
97
|
+
|
|
98
|
+
if strength == 0:
|
|
99
|
+
extracted_code = postprocess_0(llm_output, language)
|
|
82
100
|
if verbose:
|
|
83
|
-
print("[blue]
|
|
101
|
+
console.print("[blue]Using simple code extraction (strength = 0)[/blue]")
|
|
102
|
+
return extracted_code, 0.0, "simple_extraction"
|
|
103
|
+
|
|
104
|
+
prompt_name = "extract_code_LLM"
|
|
105
|
+
prompt = load_prompt_template(prompt_name)
|
|
84
106
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
}
|
|
107
|
+
if not prompt:
|
|
108
|
+
error_msg = "Failed to load prompt template"
|
|
109
|
+
console.print(f"[red]Error:[/red] {error_msg}")
|
|
110
|
+
raise ValueError(error_msg)
|
|
90
111
|
|
|
91
|
-
|
|
92
|
-
|
|
112
|
+
input_json = {"llm_output": llm_output, "language": language}
|
|
113
|
+
|
|
114
|
+
if verbose:
|
|
115
|
+
console.print("[blue]Loaded prompt template for code extraction[/blue]")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
result = llm_invoke(
|
|
119
|
+
prompt=prompt,
|
|
93
120
|
input_json=input_json,
|
|
94
121
|
strength=strength,
|
|
95
122
|
temperature=temperature,
|
|
96
123
|
time=time,
|
|
97
|
-
verbose=verbose,
|
|
98
124
|
output_pydantic=ExtractedCode,
|
|
99
|
-
|
|
125
|
+
verbose=verbose,
|
|
100
126
|
)
|
|
101
127
|
|
|
102
|
-
if not
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if not isinstance(result_obj, ExtractedCode):
|
|
107
|
-
# If we got a string (likely an error message from llm_invoke), fallback to simple extraction
|
|
108
|
-
if verbose:
|
|
109
|
-
print(f"[yellow]Structured extraction failed ({result_obj}). Falling back to simple extraction.[/yellow]")
|
|
110
|
-
return (postprocess_0(llm_output), response.get('cost', 0.0), response.get('model_name', 'fallback'))
|
|
128
|
+
if not result or "result" not in result:
|
|
129
|
+
error_msg = "Failed to get valid response from LLM"
|
|
130
|
+
console.print(f"[red]Error during LLM invocation:[/red] {error_msg}")
|
|
131
|
+
raise ValueError(error_msg)
|
|
111
132
|
|
|
112
|
-
|
|
113
|
-
code_text = extracted_code_obj.extracted_code
|
|
133
|
+
extracted_code = result["result"].extracted_code
|
|
114
134
|
|
|
115
|
-
#
|
|
116
|
-
lines =
|
|
117
|
-
if lines and lines[0].startswith(
|
|
135
|
+
# Clean up triple backticks
|
|
136
|
+
lines = extracted_code.splitlines()
|
|
137
|
+
if lines and lines[0].startswith("```"):
|
|
138
|
+
# Remove first line with opening backticks
|
|
118
139
|
lines = lines[1:]
|
|
119
|
-
|
|
120
|
-
lines
|
|
121
|
-
|
|
122
|
-
|
|
140
|
+
# If there's a last line with closing backticks, remove it
|
|
141
|
+
if lines and lines[-1].startswith("```"):
|
|
142
|
+
lines = lines[:-1]
|
|
143
|
+
extracted_code = "\n".join(lines)
|
|
144
|
+
|
|
145
|
+
total_cost = result["cost"]
|
|
146
|
+
model_name = result["model_name"]
|
|
123
147
|
|
|
124
148
|
if verbose:
|
|
125
|
-
print("[green]Successfully extracted code[/green]")
|
|
149
|
+
console.print("[green]Successfully extracted code[/green]")
|
|
126
150
|
|
|
127
|
-
|
|
128
|
-
return (
|
|
129
|
-
final_code,
|
|
130
|
-
response['cost'],
|
|
131
|
-
response['model_name']
|
|
132
|
-
)
|
|
151
|
+
return extracted_code, total_cost, model_name
|
|
133
152
|
|
|
153
|
+
except KeyError as e:
|
|
154
|
+
console.print(f"[red]Error in postprocess: {e}[/red]")
|
|
155
|
+
raise ValueError(f"Failed to get valid response from LLM: missing key {e}")
|
|
134
156
|
except Exception as e:
|
|
135
|
-
print(f"[red]Error in postprocess: {
|
|
157
|
+
console.print(f"[red]Error in postprocess: {e}[/red]")
|
|
136
158
|
raise
|
|
@@ -13,6 +13,8 @@ You are working on step 12 of 12 in an agentic change workflow. This is the fina
|
|
|
13
13
|
- Worktree Path: {worktree_path}
|
|
14
14
|
- Branch Name: change/issue-{issue_number}
|
|
15
15
|
- Files Changed: {files_to_stage}
|
|
16
|
+
- Sync Order Script: {sync_order_script}
|
|
17
|
+
- Sync Order Commands: {sync_order_list}
|
|
16
18
|
|
|
17
19
|
% Issue Content
|
|
18
20
|
<issue_content>
|
|
@@ -91,7 +93,14 @@ Closes #{issue_number}
|
|
|
91
93
|
|
|
92
94
|
## Next Steps After Merge
|
|
93
95
|
|
|
94
|
-
1.
|
|
96
|
+
1. Regenerate code from modified prompts in dependency order:
|
|
97
|
+
```bash
|
|
98
|
+
./sync_order.sh
|
|
99
|
+
```
|
|
100
|
+
Or manually:
|
|
101
|
+
```
|
|
102
|
+
{sync_order_list}
|
|
103
|
+
```
|
|
95
104
|
2. Run tests to verify functionality
|
|
96
105
|
3. Deploy if applicable
|
|
97
106
|
|
|
@@ -115,7 +124,7 @@ Closes #{issue_number}
|
|
|
115
124
|
|
|
116
125
|
### What's Next
|
|
117
126
|
1. Review the PR
|
|
118
|
-
2. Run `
|
|
127
|
+
2. Run `./sync_order.sh` after merge to regenerate code in dependency order
|
|
119
128
|
3. Run tests to verify
|
|
120
129
|
|
|
121
130
|
---
|
|
@@ -41,7 +41,6 @@
|
|
|
41
41
|
- Prefer function-scoped test resources over shared/module-scoped ones to ensure isolation
|
|
42
42
|
|
|
43
43
|
<include>context/test.prompt</include>
|
|
44
|
-
<include>context/pytest_isolation_example.py</include>
|
|
45
44
|
|
|
46
45
|
<instructions>
|
|
47
46
|
1. FIRST: Carefully analyze the ACTUAL code provided in code_under_test:
|
|
@@ -98,6 +98,257 @@
|
|
|
98
98
|
- Prefer function-scoped fixtures over module or session scope
|
|
99
99
|
- Use yield in fixtures to ensure cleanup runs even on test failure
|
|
100
100
|
|
|
101
|
+
% 7. MODULE-LEVEL SYS.MODULES FOR IMPORT-TIME DEPENDENCIES:
|
|
102
|
+
- Sometimes you must mock modules BEFORE importing the code under test
|
|
103
|
+
(e.g., when decorators or top-level imports need mocking)
|
|
104
|
+
- ALWAYS save original values, apply mocks, load module, then RESTORE immediately
|
|
105
|
+
- BAD: sys.modules.update(mocks); exec_module(...) # No cleanup - pollutes all tests!
|
|
106
|
+
- GOOD: See PATTERN 7 in pytest_isolation_example.py for the full save/restore pattern
|
|
107
|
+
|
|
108
|
+
<isolation_example>
|
|
109
|
+
"""
|
|
110
|
+
Example code patterns demonstrating proper test isolation to prevent test pollution.
|
|
111
|
+
|
|
112
|
+
This file provides reference implementations of CORRECT patterns that should be used
|
|
113
|
+
in generated tests. These patterns prevent test pollution and ensure tests are independent.
|
|
114
|
+
|
|
115
|
+
IMPORTANT: This is a context file for the LLM, not a runnable test file.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
import os
|
|
119
|
+
import sys
|
|
120
|
+
from pathlib import Path
|
|
121
|
+
from unittest.mock import MagicMock, patch
|
|
122
|
+
|
|
123
|
+
import pytest
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# =============================================================================
|
|
127
|
+
# PATTERN 1: Environment Variable Handling with monkeypatch
|
|
128
|
+
# =============================================================================
|
|
129
|
+
|
|
130
|
+
def test_set_env_var_with_monkeypatch(monkeypatch):
|
|
131
|
+
"""GOOD: Use monkeypatch.setenv() for setting env vars.
|
|
132
|
+
|
|
133
|
+
monkeypatch automatically restores the original value after the test,
|
|
134
|
+
preventing pollution of subsequent tests.
|
|
135
|
+
"""
|
|
136
|
+
monkeypatch.setenv("TEST_API_KEY", "test_key_123")
|
|
137
|
+
assert os.environ["TEST_API_KEY"] == "test_key_123"
|
|
138
|
+
# Automatically cleaned up after test
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_delete_env_var_with_monkeypatch(monkeypatch):
|
|
142
|
+
"""GOOD: Use monkeypatch.delenv() for removing env vars."""
|
|
143
|
+
monkeypatch.setenv("TEMP_VAR_TO_DELETE", "value")
|
|
144
|
+
monkeypatch.delenv("TEMP_VAR_TO_DELETE")
|
|
145
|
+
assert "TEMP_VAR_TO_DELETE" not in os.environ
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_multiple_env_vars(monkeypatch):
|
|
149
|
+
"""GOOD: Set multiple env vars safely with monkeypatch."""
|
|
150
|
+
monkeypatch.setenv("VAR_ONE", "value1")
|
|
151
|
+
monkeypatch.setenv("VAR_TWO", "value2")
|
|
152
|
+
monkeypatch.setenv("VAR_THREE", "value3")
|
|
153
|
+
# All automatically cleaned up
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# =============================================================================
|
|
157
|
+
# PATTERN 2: Mocking with monkeypatch and context managers
|
|
158
|
+
# =============================================================================
|
|
159
|
+
|
|
160
|
+
def test_mock_function_with_monkeypatch(monkeypatch):
|
|
161
|
+
"""GOOD: Use monkeypatch.setattr() for mocking functions."""
|
|
162
|
+
def mock_getcwd():
|
|
163
|
+
return "/mock/path"
|
|
164
|
+
|
|
165
|
+
monkeypatch.setattr(os, "getcwd", mock_getcwd)
|
|
166
|
+
assert os.getcwd() == "/mock/path"
|
|
167
|
+
# Original function automatically restored after test
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_mock_with_context_manager():
|
|
171
|
+
"""GOOD: Use patch as context manager for scoped mocking."""
|
|
172
|
+
with patch("os.path.exists") as mock_exists:
|
|
173
|
+
mock_exists.return_value = True
|
|
174
|
+
assert os.path.exists("/fake/nonexistent/path") is True
|
|
175
|
+
# Mock is automatically removed when context exits
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# =============================================================================
|
|
179
|
+
# PATTERN 3: File System Operations with tmp_path
|
|
180
|
+
# =============================================================================
|
|
181
|
+
|
|
182
|
+
def test_create_temp_file(tmp_path):
|
|
183
|
+
"""GOOD: Use tmp_path fixture for temporary files."""
|
|
184
|
+
test_file = tmp_path / "test_output.txt"
|
|
185
|
+
test_file.write_text("test content")
|
|
186
|
+
assert test_file.exists()
|
|
187
|
+
assert test_file.read_text() == "test content"
|
|
188
|
+
# tmp_path is automatically cleaned up by pytest
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_create_temp_directory_structure(tmp_path):
|
|
192
|
+
"""GOOD: Create directory structures in tmp_path."""
|
|
193
|
+
subdir = tmp_path / "subdir" / "nested"
|
|
194
|
+
subdir.mkdir(parents=True)
|
|
195
|
+
config_file = subdir / "config.json"
|
|
196
|
+
config_file.write_text('{{"key": "value"}}')
|
|
197
|
+
assert config_file.exists()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# =============================================================================
|
|
201
|
+
# PATTERN 4: Fixtures with Proper Cleanup
|
|
202
|
+
# =============================================================================
|
|
203
|
+
|
|
204
|
+
@pytest.fixture
|
|
205
|
+
def resource_with_cleanup():
|
|
206
|
+
"""GOOD: Fixture with proper cleanup using yield.
|
|
207
|
+
|
|
208
|
+
The cleanup code after yield always runs, even if the test fails.
|
|
209
|
+
"""
|
|
210
|
+
# Setup
|
|
211
|
+
resource = {{"initialized": True, "data": []}}
|
|
212
|
+
yield resource
|
|
213
|
+
# Cleanup - always runs
|
|
214
|
+
resource["initialized"] = False
|
|
215
|
+
resource["data"].clear()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@pytest.fixture
|
|
219
|
+
def mock_module_with_cleanup():
|
|
220
|
+
"""GOOD: Fixture for sys.modules with save/restore.
|
|
221
|
+
|
|
222
|
+
This pattern ensures sys.modules is always restored to its original
|
|
223
|
+
state after the test, preventing pollution.
|
|
224
|
+
"""
|
|
225
|
+
module_name = "test_mock_module"
|
|
226
|
+
saved = sys.modules.get(module_name)
|
|
227
|
+
|
|
228
|
+
mock_module = MagicMock()
|
|
229
|
+
sys.modules[module_name] = mock_module
|
|
230
|
+
|
|
231
|
+
yield mock_module
|
|
232
|
+
|
|
233
|
+
# Cleanup - restore original state
|
|
234
|
+
if saved is not None:
|
|
235
|
+
sys.modules[module_name] = saved
|
|
236
|
+
elif module_name in sys.modules:
|
|
237
|
+
del sys.modules[module_name]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_with_resource_cleanup(resource_with_cleanup):
|
|
241
|
+
"""Test using fixture with automatic cleanup."""
|
|
242
|
+
assert resource_with_cleanup["initialized"] is True
|
|
243
|
+
resource_with_cleanup["data"].append("test_item")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_with_mock_module_cleanup(mock_module_with_cleanup):
|
|
247
|
+
"""Test using sys.modules fixture with cleanup."""
|
|
248
|
+
assert "test_mock_module" in sys.modules
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# =============================================================================
|
|
252
|
+
# PATTERN 5: Exception Testing with Context Manager
|
|
253
|
+
# =============================================================================
|
|
254
|
+
|
|
255
|
+
def test_exception_with_context_manager():
|
|
256
|
+
"""GOOD: Use pytest.raises() as context manager."""
|
|
257
|
+
with pytest.raises(ValueError) as exc_info:
|
|
258
|
+
raise ValueError("expected error message")
|
|
259
|
+
assert "expected error message" in str(exc_info.value)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def test_exception_with_match():
|
|
263
|
+
"""GOOD: Use match parameter for regex matching."""
|
|
264
|
+
with pytest.raises(ValueError, match=r"invalid.*value"):
|
|
265
|
+
raise ValueError("invalid input value provided")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# =============================================================================
|
|
269
|
+
# PATTERN 6: Combining Multiple Isolation Techniques
|
|
270
|
+
# =============================================================================
|
|
271
|
+
|
|
272
|
+
def test_combined_env_and_file(monkeypatch, tmp_path):
|
|
273
|
+
"""GOOD: Combine monkeypatch and tmp_path for full isolation."""
|
|
274
|
+
config_path = tmp_path / "config"
|
|
275
|
+
config_path.mkdir()
|
|
276
|
+
monkeypatch.setenv("CONFIG_DIR", str(config_path))
|
|
277
|
+
|
|
278
|
+
config_file = config_path / "settings.json"
|
|
279
|
+
config_file.write_text('{{"debug": true}}')
|
|
280
|
+
|
|
281
|
+
assert os.environ["CONFIG_DIR"] == str(config_path)
|
|
282
|
+
assert config_file.exists()
|
|
283
|
+
# Both automatically cleaned up
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def test_combined_mock_and_env(monkeypatch):
|
|
287
|
+
"""GOOD: Combine function mocking with environment variables."""
|
|
288
|
+
monkeypatch.setattr(os.path, "isfile", lambda x: True)
|
|
289
|
+
monkeypatch.setenv("TEST_MODE", "true")
|
|
290
|
+
|
|
291
|
+
assert os.path.isfile("/any/path") is True
|
|
292
|
+
assert os.environ["TEST_MODE"] == "true"
|
|
293
|
+
# Both automatically cleaned up
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# =============================================================================
|
|
297
|
+
# PATTERN 7: Module-Level sys.modules for Import-Time Dependencies
|
|
298
|
+
# =============================================================================
|
|
299
|
+
#
|
|
300
|
+
# When you need to mock modules BEFORE importing code under test
|
|
301
|
+
# (e.g., for decorators or top-level imports), use this pattern.
|
|
302
|
+
#
|
|
303
|
+
# This is necessary when the code under test has decorators or module-level
|
|
304
|
+
# imports that you need to mock. Unlike fixture-based mocking, this happens
|
|
305
|
+
# at test file load time, before any test functions run.
|
|
306
|
+
#
|
|
307
|
+
# CRITICAL: You MUST restore original modules after loading, or you will
|
|
308
|
+
# pollute sys.modules for all other test files during collection!
|
|
309
|
+
#
|
|
310
|
+
# Example usage (place at module level, outside any test function):
|
|
311
|
+
#
|
|
312
|
+
# import importlib.util
|
|
313
|
+
# from unittest.mock import MagicMock
|
|
314
|
+
#
|
|
315
|
+
# # Step 1: Define mocks for dependencies that need mocking at import time
|
|
316
|
+
# _mock_decorator = lambda f: f # Pass-through decorator
|
|
317
|
+
# _mock_dependency = MagicMock(some_decorator=_mock_decorator)
|
|
318
|
+
# _module_mocks = {{
|
|
319
|
+
# "some.dependency": _mock_dependency,
|
|
320
|
+
# }}
|
|
321
|
+
#
|
|
322
|
+
# # Step 2: Save originals BEFORE patching
|
|
323
|
+
# _original_modules = {{key: sys.modules.get(key) for key in _module_mocks}}
|
|
324
|
+
#
|
|
325
|
+
# # Step 3: Apply mocks to sys.modules
|
|
326
|
+
# sys.modules.update(_module_mocks)
|
|
327
|
+
#
|
|
328
|
+
# # Step 4: Load the module under test using importlib
|
|
329
|
+
# _module_path = os.path.join(os.path.dirname(__file__), "..", "src", "module.py")
|
|
330
|
+
# _module_path = os.path.abspath(_module_path)
|
|
331
|
+
# _spec = importlib.util.spec_from_file_location("my_module", _module_path)
|
|
332
|
+
# _module = importlib.util.module_from_spec(_spec)
|
|
333
|
+
# sys.modules["my_module"] = _module
|
|
334
|
+
# _spec.loader.exec_module(_module)
|
|
335
|
+
# function_to_test = _module.function_to_test
|
|
336
|
+
#
|
|
337
|
+
# # Step 5: RESTORE originals immediately after load
|
|
338
|
+
# # This is CRITICAL to avoid polluting other test files!
|
|
339
|
+
# for key, original in _original_modules.items():
|
|
340
|
+
# if original is None:
|
|
341
|
+
# sys.modules.pop(key, None)
|
|
342
|
+
# else:
|
|
343
|
+
# sys.modules[key] = original
|
|
344
|
+
#
|
|
345
|
+
# # Now you can write normal test functions using function_to_test
|
|
346
|
+
# def test_something():
|
|
347
|
+
# result = function_to_test()
|
|
348
|
+
# assert result == expected
|
|
349
|
+
|
|
350
|
+
</isolation_example>
|
|
351
|
+
|
|
101
352
|
|
|
102
353
|
<instructions>
|
|
103
354
|
1. FIRST: Carefully analyze the EXAMPLE to understand:
|
|
@@ -48,17 +48,21 @@ Respond with a JSON object:
|
|
|
48
48
|
- 50-69: Significant gaps, regeneration would likely fail some tests
|
|
49
49
|
- 0-49: Major knowledge missing, regeneration would definitely fail
|
|
50
50
|
|
|
51
|
-
2. "
|
|
51
|
+
2. "promptToCodeScore": integer 0-100 - How well the code implements the prompt requirements
|
|
52
52
|
|
|
53
|
-
3. "
|
|
53
|
+
3. "codeToPromptScore": integer 0-100 - How well the prompt documents/describes the code
|
|
54
|
+
|
|
55
|
+
4. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
|
|
56
|
+
|
|
57
|
+
5. "regenerationRisk": "low", "medium", "high", or "critical"
|
|
54
58
|
- "low": Prompt captures all essential details
|
|
55
59
|
- "medium": Some implementation details missing but core logic documented
|
|
56
60
|
- "high": Significant undocumented behavior that would differ on regeneration
|
|
57
61
|
- "critical": Code has major features/logic not in prompt at all
|
|
58
62
|
|
|
59
|
-
|
|
63
|
+
6. "summary": 1-2 sentences on regeneration viability, be direct about risks
|
|
60
64
|
|
|
61
|
-
|
|
65
|
+
7. "sections": array of PROMPT requirement sections, each with:
|
|
62
66
|
- "id": unique string like "req_1", "req_2"
|
|
63
67
|
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
64
68
|
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
|
|
@@ -67,7 +71,7 @@ Respond with a JSON object:
|
|
|
67
71
|
- "semanticLabel": descriptive label like "Error Handling", "Input Validation"
|
|
68
72
|
- "notes": REQUIRED explanation - be specific about what's missing or at risk
|
|
69
73
|
|
|
70
|
-
|
|
74
|
+
8. "codeSections": array of CODE sections NOT adequately documented in prompt:
|
|
71
75
|
- "id": unique string like "code_1", "code_2"
|
|
72
76
|
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
|
|
73
77
|
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
@@ -78,34 +82,34 @@ Respond with a JSON object:
|
|
|
78
82
|
* For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
|
|
79
83
|
* For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
|
|
80
84
|
|
|
81
|
-
|
|
85
|
+
9. "hiddenKnowledge": array of objects describing undocumented code knowledge:
|
|
82
86
|
- "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
|
|
83
87
|
- "location": {{"startLine": int, "endLine": int}}
|
|
84
88
|
- "description": what the code knows that the prompt doesn't say
|
|
85
89
|
- "regenerationImpact": "would_differ" | "would_fail" | "might_work"
|
|
86
90
|
- "suggestedPromptAddition": what to add to the prompt to capture this
|
|
87
91
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
10. "lineMappings": array of line-level mappings:
|
|
93
|
+
- "promptLine": int
|
|
94
|
+
- "codeLines": array of ints
|
|
95
|
+
- "matchType": "exact", "semantic", "partial", "none"
|
|
96
|
+
|
|
97
|
+
11. "stats": {{
|
|
98
|
+
"totalRequirements": int,
|
|
99
|
+
"matchedRequirements": int,
|
|
100
|
+
"missingRequirements": int,
|
|
101
|
+
"totalCodeFeatures": int,
|
|
102
|
+
"documentedFeatures": int,
|
|
103
|
+
"undocumentedFeatures": int,
|
|
104
|
+
"promptToCodeCoverage": float,
|
|
105
|
+
"codeToPromptCoverage": float,
|
|
106
|
+
"hiddenKnowledgeCount": int,
|
|
107
|
+
"criticalGaps": int
|
|
104
108
|
}}
|
|
105
109
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
110
|
+
12. "missing": array of strings - requirements in prompt not implemented
|
|
111
|
+
13. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
|
|
112
|
+
14. "suggestions": array of specific additions to make to the prompt to enable regeneration
|
|
109
113
|
|
|
110
114
|
## Strictness Guidelines
|
|
111
115
|
|