pdd-cli 0.0.118__py3-none-any.whl → 0.0.121__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. pdd/__init__.py +1 -1
  2. pdd/agentic_bug_orchestrator.py +15 -6
  3. pdd/agentic_change_orchestrator.py +18 -7
  4. pdd/agentic_common.py +68 -40
  5. pdd/agentic_crash.py +2 -1
  6. pdd/agentic_e2e_fix_orchestrator.py +165 -9
  7. pdd/agentic_update.py +2 -1
  8. pdd/agentic_verify.py +3 -2
  9. pdd/auto_include.py +51 -0
  10. pdd/commands/analysis.py +32 -25
  11. pdd/commands/connect.py +69 -1
  12. pdd/commands/fix.py +31 -13
  13. pdd/commands/generate.py +5 -0
  14. pdd/commands/modify.py +47 -11
  15. pdd/commands/utility.py +12 -7
  16. pdd/core/cli.py +17 -4
  17. pdd/core/dump.py +68 -20
  18. pdd/fix_main.py +4 -2
  19. pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
  20. pdd/frontend/dist/index.html +1 -1
  21. pdd/llm_invoke.py +82 -12
  22. pdd/operation_log.py +342 -0
  23. pdd/postprocess.py +122 -100
  24. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +11 -2
  25. pdd/prompts/generate_test_LLM.prompt +0 -1
  26. pdd/prompts/generate_test_from_example_LLM.prompt +251 -0
  27. pdd/prompts/prompt_code_diff_LLM.prompt +29 -25
  28. pdd/server/routes/prompts.py +26 -1
  29. pdd/server/terminal_spawner.py +15 -7
  30. pdd/sync_orchestration.py +164 -147
  31. pdd/sync_order.py +304 -0
  32. pdd/update_main.py +48 -24
  33. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +3 -3
  34. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/RECORD +37 -35
  35. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +0 -449
  36. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
  37. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
  38. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
  39. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/postprocess.py CHANGED
@@ -1,136 +1,158 @@
1
- from typing import Tuple
2
- from rich import print
3
- from pydantic import BaseModel, Field
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Tuple, Optional
5
+
6
+ from rich.console import Console
7
+ from pydantic import BaseModel, Field, ValidationError
8
+
9
+ from . import DEFAULT_STRENGTH, DEFAULT_TIME
4
10
  from .load_prompt_template import load_prompt_template
5
11
  from .llm_invoke import llm_invoke
6
- from . import DEFAULT_TIME, DEFAULT_STRENGTH
12
+
13
+
14
+ console = Console()
15
+
7
16
 
8
17
  class ExtractedCode(BaseModel):
9
- """Pydantic model for the extracted code."""
10
- focus: str = Field(default="", description="The focus of the generation")
11
- explanation: str = Field(default="", description="Explanation of the extraction")
12
- extracted_code: str = Field(description="The extracted code from the LLM output")
18
+ focus: str = Field("", description="Focus of the code")
19
+ explanation: str = Field("", description="Explanation of the code")
20
+ extracted_code: str = Field(..., description="Extracted code")
21
+
22
+
23
+ def postprocess_0(llm_output: str, language: str) -> str:
24
+ """Simple extraction of code blocks."""
25
+ if language == "prompt":
26
+ # Strip <prompt> tags
27
+ llm_output = re.sub(r"<prompt>\s*(.*?)\s*</prompt>", r"\1", llm_output, flags=re.DOTALL)
28
+ llm_output = llm_output.strip()
29
+
30
+ # Also strip triple backticks if present
31
+ lines = llm_output.splitlines()
32
+ if lines and lines[0].startswith("```"):
33
+ # Remove first line with opening backticks
34
+ lines = lines[1:]
35
+ # If there's a last line with closing backticks, remove it
36
+ if lines and lines[-1].startswith("```"):
37
+ lines = lines[:-1]
38
+ llm_output = "\n".join(lines)
39
+
40
+ return llm_output.strip()
41
+
42
+ # First try to find complete code blocks with closing backticks
43
+ code_blocks = re.findall(r"```(?:[a-zA-Z]+)?\n(.*?)\n```", llm_output, re.DOTALL)
44
+ if code_blocks:
45
+ return "\n".join(block.strip() for block in code_blocks)
46
+
47
+ # If no complete blocks found, try to find incomplete blocks (opening backticks without closing)
48
+ # But ensure there's actual content after the opening backticks
49
+ incomplete_match = re.search(r"```(?:[a-zA-Z]+)?\n(.+?)(?:\n```)?$", llm_output, re.DOTALL)
50
+ if incomplete_match:
51
+ content = incomplete_match.group(1).strip()
52
+ # Don't return if content is just closing backticks
53
+ if content and content != "```":
54
+ return content
55
+
56
+ return ""
13
57
 
14
- def postprocess_0(text: str) -> str:
15
- """
16
- Simple code extraction for strength = 0.
17
- Extracts code between triple backticks.
18
- """
19
- lines = text.split('\n')
20
- code_lines = []
21
- in_code_block = False
22
-
23
- for line in lines:
24
- if '```' in line: # MODIFIED: Was line.startswith('```')
25
- if not in_code_block:
26
- # Skip the language identifier line / content on opening delimiter line
27
- in_code_block = True
28
- continue
29
- else:
30
- # Content on closing delimiter line is skipped
31
- in_code_block = False
32
- continue
33
- if in_code_block:
34
- code_lines.append(line)
35
-
36
- return '\n'.join(code_lines)
37
58
 
38
59
  def postprocess(
39
60
  llm_output: str,
40
61
  language: str,
41
62
  strength: float = DEFAULT_STRENGTH,
42
- temperature: float = 0,
63
+ temperature: float = 0.0,
43
64
  time: float = DEFAULT_TIME,
44
- verbose: bool = False
65
+ verbose: bool = False,
45
66
  ) -> Tuple[str, float, str]:
46
67
  """
47
- Extract code from LLM output string.
48
-
68
+ Extracts code from a string output of an LLM.
69
+
49
70
  Args:
50
- llm_output (str): The string output from the LLM containing code sections
51
- language (str): The programming language of the code to extract
52
- strength (float): The strength of the LLM model to use (0-1)
53
- temperature (float): The temperature parameter for the LLM (0-1)
54
- time (float): The thinking effort for the LLM model (0-1)
55
- verbose (bool): Whether to print detailed processing information
56
-
71
+ llm_output: A string containing a mix of text and code sections.
72
+ language: A string specifying the programming language of the code to be extracted.
73
+ strength: A float between 0 and 1 that represents the strength of the LLM model to use.
74
+ temperature: A float between 0 and 1 that represents the temperature parameter for the LLM model.
75
+ time: A float between 0 and 1 that controls the thinking effort for the LLM model.
76
+ verbose: A boolean that indicates whether to print detailed processing information.
77
+
57
78
  Returns:
58
- Tuple[str, float, str]: (extracted_code, total_cost, model_name)
79
+ A tuple containing the extracted code string, total cost float and model name string.
59
80
  """
60
- try:
61
- # Input validation
62
- if not llm_output or not isinstance(llm_output, str):
63
- raise ValueError("llm_output must be a non-empty string")
64
- if not language or not isinstance(language, str):
65
- raise ValueError("language must be a non-empty string")
66
- if not 0 <= strength <= 1:
67
- raise ValueError("strength must be between 0 and 1")
68
- if not 0 <= temperature <= 1:
69
- raise ValueError("temperature must be between 0 and 1")
70
-
71
- # Step 1: If strength is 0, use simple extraction
72
- if strength == 0:
73
- if verbose:
74
- print("[blue]Using simple code extraction (strength = 0)[/blue]")
75
- return (postprocess_0(llm_output), 0.0, "simple_extraction")
76
-
77
- # Step 2: Load the prompt template
78
- prompt_template = load_prompt_template("extract_code_LLM")
79
- if not prompt_template:
80
- raise ValueError("Failed to load prompt template")
81
+ if not isinstance(llm_output, str) or not llm_output:
82
+ raise ValueError("llm_output must be a non-empty string")
83
+ if not isinstance(language, str) or not language:
84
+ raise ValueError("language must be a non-empty string")
85
+ if not isinstance(strength, (int, float)):
86
+ raise TypeError("strength must be a number")
87
+ if not 0 <= strength <= 1:
88
+ raise ValueError("strength must be between 0 and 1")
89
+ if not isinstance(temperature, (int, float)):
90
+ raise TypeError("temperature must be a number")
91
+ if not 0 <= temperature <= 1:
92
+ raise ValueError("temperature must be between 0 and 1")
81
93
 
94
+ if language == "prompt":
95
+ extracted_code = postprocess_0(llm_output, language)
96
+ return extracted_code, 0.0, "simple_extraction"
97
+
98
+ if strength == 0:
99
+ extracted_code = postprocess_0(llm_output, language)
82
100
  if verbose:
83
- print("[blue]Loaded prompt template for code extraction[/blue]")
101
+ console.print("[blue]Using simple code extraction (strength = 0)[/blue]")
102
+ return extracted_code, 0.0, "simple_extraction"
103
+
104
+ prompt_name = "extract_code_LLM"
105
+ prompt = load_prompt_template(prompt_name)
84
106
 
85
- # Step 3: Process using llm_invoke
86
- input_json = {
87
- "llm_output": llm_output,
88
- "language": language
89
- }
107
+ if not prompt:
108
+ error_msg = "Failed to load prompt template"
109
+ console.print(f"[red]Error:[/red] {error_msg}")
110
+ raise ValueError(error_msg)
90
111
 
91
- response = llm_invoke(
92
- prompt=prompt_template,
112
+ input_json = {"llm_output": llm_output, "language": language}
113
+
114
+ if verbose:
115
+ console.print("[blue]Loaded prompt template for code extraction[/blue]")
116
+
117
+ try:
118
+ result = llm_invoke(
119
+ prompt=prompt,
93
120
  input_json=input_json,
94
121
  strength=strength,
95
122
  temperature=temperature,
96
123
  time=time,
97
- verbose=verbose,
98
124
  output_pydantic=ExtractedCode,
99
- language=language,
125
+ verbose=verbose,
100
126
  )
101
127
 
102
- if not response or 'result' not in response:
103
- raise ValueError("Failed to get valid response from LLM")
104
-
105
- result_obj = response['result']
106
- if not isinstance(result_obj, ExtractedCode):
107
- # If we got a string (likely an error message from llm_invoke), fallback to simple extraction
108
- if verbose:
109
- print(f"[yellow]Structured extraction failed ({result_obj}). Falling back to simple extraction.[/yellow]")
110
- return (postprocess_0(llm_output), response.get('cost', 0.0), response.get('model_name', 'fallback'))
128
+ if not result or "result" not in result:
129
+ error_msg = "Failed to get valid response from LLM"
130
+ console.print(f"[red]Error during LLM invocation:[/red] {error_msg}")
131
+ raise ValueError(error_msg)
111
132
 
112
- extracted_code_obj: ExtractedCode = result_obj
113
- code_text = extracted_code_obj.extracted_code
133
+ extracted_code = result["result"].extracted_code
114
134
 
115
- # Step 3c: Remove triple backticks and language identifier if present
116
- lines = code_text.split('\n')
117
- if lines and lines[0].startswith('```'):
135
+ # Clean up triple backticks
136
+ lines = extracted_code.splitlines()
137
+ if lines and lines[0].startswith("```"):
138
+ # Remove first line with opening backticks
118
139
  lines = lines[1:]
119
- if lines and lines[-1].startswith('```'): # Check if lines is not empty again after potentially removing first line
120
- lines = lines[:-1]
121
-
122
- final_code = '\n'.join(lines)
140
+ # If there's a last line with closing backticks, remove it
141
+ if lines and lines[-1].startswith("```"):
142
+ lines = lines[:-1]
143
+ extracted_code = "\n".join(lines)
144
+
145
+ total_cost = result["cost"]
146
+ model_name = result["model_name"]
123
147
 
124
148
  if verbose:
125
- print("[green]Successfully extracted code[/green]")
149
+ console.print("[green]Successfully extracted code[/green]")
126
150
 
127
- # Step 4: Return the results
128
- return (
129
- final_code,
130
- response['cost'],
131
- response['model_name']
132
- )
151
+ return extracted_code, total_cost, model_name
133
152
 
153
+ except KeyError as e:
154
+ console.print(f"[red]Error in postprocess: {e}[/red]")
155
+ raise ValueError(f"Failed to get valid response from LLM: missing key {e}")
134
156
  except Exception as e:
135
- print(f"[red]Error in postprocess: {str(e)}[/red]")
157
+ console.print(f"[red]Error in postprocess: {e}[/red]")
136
158
  raise
@@ -13,6 +13,8 @@ You are working on step 12 of 12 in an agentic change workflow. This is the fina
13
13
  - Worktree Path: {worktree_path}
14
14
  - Branch Name: change/issue-{issue_number}
15
15
  - Files Changed: {files_to_stage}
16
+ - Sync Order Script: {sync_order_script}
17
+ - Sync Order Commands: {sync_order_list}
16
18
 
17
19
  % Issue Content
18
20
  <issue_content>
@@ -91,7 +93,14 @@ Closes #{issue_number}
91
93
 
92
94
  ## Next Steps After Merge
93
95
 
94
- 1. Run `pdd sync <module>` to regenerate code from modified prompts
96
+ 1. Regenerate code from modified prompts in dependency order:
97
+ ```bash
98
+ ./sync_order.sh
99
+ ```
100
+ Or manually:
101
+ ```
102
+ {sync_order_list}
103
+ ```
95
104
  2. Run tests to verify functionality
96
105
  3. Deploy if applicable
97
106
 
@@ -115,7 +124,7 @@ Closes #{issue_number}
115
124
 
116
125
  ### What's Next
117
126
  1. Review the PR
118
- 2. Run `pdd sync` on affected modules after merge
127
+ 2. Run `./sync_order.sh` after merge to regenerate code in dependency order
119
128
  3. Run tests to verify
120
129
 
121
130
  ---
@@ -41,7 +41,6 @@
41
41
  - Prefer function-scoped test resources over shared/module-scoped ones to ensure isolation
42
42
 
43
43
  <include>context/test.prompt</include>
44
- <include>context/pytest_isolation_example.py</include>
45
44
 
46
45
  <instructions>
47
46
  1. FIRST: Carefully analyze the ACTUAL code provided in code_under_test:
@@ -98,6 +98,257 @@
98
98
  - Prefer function-scoped fixtures over module or session scope
99
99
  - Use yield in fixtures to ensure cleanup runs even on test failure
100
100
 
101
+ % 7. MODULE-LEVEL SYS.MODULES FOR IMPORT-TIME DEPENDENCIES:
102
+ - Sometimes you must mock modules BEFORE importing the code under test
103
+ (e.g., when decorators or top-level imports need mocking)
104
+ - ALWAYS save original values, apply mocks, load module, then RESTORE immediately
105
+ - BAD: sys.modules.update(mocks); exec_module(...) # No cleanup - pollutes all tests!
106
+ - GOOD: See PATTERN 7 in pytest_isolation_example.py for the full save/restore pattern
107
+
108
+ <isolation_example>
109
+ """
110
+ Example code patterns demonstrating proper test isolation to prevent test pollution.
111
+
112
+ This file provides reference implementations of CORRECT patterns that should be used
113
+ in generated tests. These patterns prevent test pollution and ensure tests are independent.
114
+
115
+ IMPORTANT: This is a context file for the LLM, not a runnable test file.
116
+ """
117
+
118
+ import os
119
+ import sys
120
+ from pathlib import Path
121
+ from unittest.mock import MagicMock, patch
122
+
123
+ import pytest
124
+
125
+
126
+ # =============================================================================
127
+ # PATTERN 1: Environment Variable Handling with monkeypatch
128
+ # =============================================================================
129
+
130
+ def test_set_env_var_with_monkeypatch(monkeypatch):
131
+ """GOOD: Use monkeypatch.setenv() for setting env vars.
132
+
133
+ monkeypatch automatically restores the original value after the test,
134
+ preventing pollution of subsequent tests.
135
+ """
136
+ monkeypatch.setenv("TEST_API_KEY", "test_key_123")
137
+ assert os.environ["TEST_API_KEY"] == "test_key_123"
138
+ # Automatically cleaned up after test
139
+
140
+
141
+ def test_delete_env_var_with_monkeypatch(monkeypatch):
142
+ """GOOD: Use monkeypatch.delenv() for removing env vars."""
143
+ monkeypatch.setenv("TEMP_VAR_TO_DELETE", "value")
144
+ monkeypatch.delenv("TEMP_VAR_TO_DELETE")
145
+ assert "TEMP_VAR_TO_DELETE" not in os.environ
146
+
147
+
148
+ def test_multiple_env_vars(monkeypatch):
149
+ """GOOD: Set multiple env vars safely with monkeypatch."""
150
+ monkeypatch.setenv("VAR_ONE", "value1")
151
+ monkeypatch.setenv("VAR_TWO", "value2")
152
+ monkeypatch.setenv("VAR_THREE", "value3")
153
+ # All automatically cleaned up
154
+
155
+
156
+ # =============================================================================
157
+ # PATTERN 2: Mocking with monkeypatch and context managers
158
+ # =============================================================================
159
+
160
+ def test_mock_function_with_monkeypatch(monkeypatch):
161
+ """GOOD: Use monkeypatch.setattr() for mocking functions."""
162
+ def mock_getcwd():
163
+ return "/mock/path"
164
+
165
+ monkeypatch.setattr(os, "getcwd", mock_getcwd)
166
+ assert os.getcwd() == "/mock/path"
167
+ # Original function automatically restored after test
168
+
169
+
170
+ def test_mock_with_context_manager():
171
+ """GOOD: Use patch as context manager for scoped mocking."""
172
+ with patch("os.path.exists") as mock_exists:
173
+ mock_exists.return_value = True
174
+ assert os.path.exists("/fake/nonexistent/path") is True
175
+ # Mock is automatically removed when context exits
176
+
177
+
178
+ # =============================================================================
179
+ # PATTERN 3: File System Operations with tmp_path
180
+ # =============================================================================
181
+
182
+ def test_create_temp_file(tmp_path):
183
+ """GOOD: Use tmp_path fixture for temporary files."""
184
+ test_file = tmp_path / "test_output.txt"
185
+ test_file.write_text("test content")
186
+ assert test_file.exists()
187
+ assert test_file.read_text() == "test content"
188
+ # tmp_path is automatically cleaned up by pytest
189
+
190
+
191
+ def test_create_temp_directory_structure(tmp_path):
192
+ """GOOD: Create directory structures in tmp_path."""
193
+ subdir = tmp_path / "subdir" / "nested"
194
+ subdir.mkdir(parents=True)
195
+ config_file = subdir / "config.json"
196
+ config_file.write_text('{{"key": "value"}}')
197
+ assert config_file.exists()
198
+
199
+
200
+ # =============================================================================
201
+ # PATTERN 4: Fixtures with Proper Cleanup
202
+ # =============================================================================
203
+
204
+ @pytest.fixture
205
+ def resource_with_cleanup():
206
+ """GOOD: Fixture with proper cleanup using yield.
207
+
208
+ The cleanup code after yield always runs, even if the test fails.
209
+ """
210
+ # Setup
211
+ resource = {{"initialized": True, "data": []}}
212
+ yield resource
213
+ # Cleanup - always runs
214
+ resource["initialized"] = False
215
+ resource["data"].clear()
216
+
217
+
218
+ @pytest.fixture
219
+ def mock_module_with_cleanup():
220
+ """GOOD: Fixture for sys.modules with save/restore.
221
+
222
+ This pattern ensures sys.modules is always restored to its original
223
+ state after the test, preventing pollution.
224
+ """
225
+ module_name = "test_mock_module"
226
+ saved = sys.modules.get(module_name)
227
+
228
+ mock_module = MagicMock()
229
+ sys.modules[module_name] = mock_module
230
+
231
+ yield mock_module
232
+
233
+ # Cleanup - restore original state
234
+ if saved is not None:
235
+ sys.modules[module_name] = saved
236
+ elif module_name in sys.modules:
237
+ del sys.modules[module_name]
238
+
239
+
240
+ def test_with_resource_cleanup(resource_with_cleanup):
241
+ """Test using fixture with automatic cleanup."""
242
+ assert resource_with_cleanup["initialized"] is True
243
+ resource_with_cleanup["data"].append("test_item")
244
+
245
+
246
+ def test_with_mock_module_cleanup(mock_module_with_cleanup):
247
+ """Test using sys.modules fixture with cleanup."""
248
+ assert "test_mock_module" in sys.modules
249
+
250
+
251
+ # =============================================================================
252
+ # PATTERN 5: Exception Testing with Context Manager
253
+ # =============================================================================
254
+
255
+ def test_exception_with_context_manager():
256
+ """GOOD: Use pytest.raises() as context manager."""
257
+ with pytest.raises(ValueError) as exc_info:
258
+ raise ValueError("expected error message")
259
+ assert "expected error message" in str(exc_info.value)
260
+
261
+
262
+ def test_exception_with_match():
263
+ """GOOD: Use match parameter for regex matching."""
264
+ with pytest.raises(ValueError, match=r"invalid.*value"):
265
+ raise ValueError("invalid input value provided")
266
+
267
+
268
+ # =============================================================================
269
+ # PATTERN 6: Combining Multiple Isolation Techniques
270
+ # =============================================================================
271
+
272
+ def test_combined_env_and_file(monkeypatch, tmp_path):
273
+ """GOOD: Combine monkeypatch and tmp_path for full isolation."""
274
+ config_path = tmp_path / "config"
275
+ config_path.mkdir()
276
+ monkeypatch.setenv("CONFIG_DIR", str(config_path))
277
+
278
+ config_file = config_path / "settings.json"
279
+ config_file.write_text('{{"debug": true}}')
280
+
281
+ assert os.environ["CONFIG_DIR"] == str(config_path)
282
+ assert config_file.exists()
283
+ # Both automatically cleaned up
284
+
285
+
286
+ def test_combined_mock_and_env(monkeypatch):
287
+ """GOOD: Combine function mocking with environment variables."""
288
+ monkeypatch.setattr(os.path, "isfile", lambda x: True)
289
+ monkeypatch.setenv("TEST_MODE", "true")
290
+
291
+ assert os.path.isfile("/any/path") is True
292
+ assert os.environ["TEST_MODE"] == "true"
293
+ # Both automatically cleaned up
294
+
295
+
296
+ # =============================================================================
297
+ # PATTERN 7: Module-Level sys.modules for Import-Time Dependencies
298
+ # =============================================================================
299
+ #
300
+ # When you need to mock modules BEFORE importing code under test
301
+ # (e.g., for decorators or top-level imports), use this pattern.
302
+ #
303
+ # This is necessary when the code under test has decorators or module-level
304
+ # imports that you need to mock. Unlike fixture-based mocking, this happens
305
+ # at test file load time, before any test functions run.
306
+ #
307
+ # CRITICAL: You MUST restore original modules after loading, or you will
308
+ # pollute sys.modules for all other test files during collection!
309
+ #
310
+ # Example usage (place at module level, outside any test function):
311
+ #
312
+ # import importlib.util
313
+ # from unittest.mock import MagicMock
314
+ #
315
+ # # Step 1: Define mocks for dependencies that need mocking at import time
316
+ # _mock_decorator = lambda f: f # Pass-through decorator
317
+ # _mock_dependency = MagicMock(some_decorator=_mock_decorator)
318
+ # _module_mocks = {{
319
+ # "some.dependency": _mock_dependency,
320
+ # }}
321
+ #
322
+ # # Step 2: Save originals BEFORE patching
323
+ # _original_modules = {{key: sys.modules.get(key) for key in _module_mocks}}
324
+ #
325
+ # # Step 3: Apply mocks to sys.modules
326
+ # sys.modules.update(_module_mocks)
327
+ #
328
+ # # Step 4: Load the module under test using importlib
329
+ # _module_path = os.path.join(os.path.dirname(__file__), "..", "src", "module.py")
330
+ # _module_path = os.path.abspath(_module_path)
331
+ # _spec = importlib.util.spec_from_file_location("my_module", _module_path)
332
+ # _module = importlib.util.module_from_spec(_spec)
333
+ # sys.modules["my_module"] = _module
334
+ # _spec.loader.exec_module(_module)
335
+ # function_to_test = _module.function_to_test
336
+ #
337
+ # # Step 5: RESTORE originals immediately after load
338
+ # # This is CRITICAL to avoid polluting other test files!
339
+ # for key, original in _original_modules.items():
340
+ # if original is None:
341
+ # sys.modules.pop(key, None)
342
+ # else:
343
+ # sys.modules[key] = original
344
+ #
345
+ # # Now you can write normal test functions using function_to_test
346
+ # def test_something():
347
+ # result = function_to_test()
348
+ # assert result == expected
349
+
350
+ </isolation_example>
351
+
101
352
 
102
353
  <instructions>
103
354
  1. FIRST: Carefully analyze the EXAMPLE to understand:
@@ -48,17 +48,21 @@ Respond with a JSON object:
48
48
  - 50-69: Significant gaps, regeneration would likely fail some tests
49
49
  - 0-49: Major knowledge missing, regeneration would definitely fail
50
50
 
51
- 2. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
51
+ 2. "promptToCodeScore": integer 0-100 - How well the code implements the prompt requirements
52
52
 
53
- 3. "regenerationRisk": "low", "medium", "high", or "critical"
53
+ 3. "codeToPromptScore": integer 0-100 - How well the prompt documents/describes the code
54
+
55
+ 4. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
56
+
57
+ 5. "regenerationRisk": "low", "medium", "high", or "critical"
54
58
  - "low": Prompt captures all essential details
55
59
  - "medium": Some implementation details missing but core logic documented
56
60
  - "high": Significant undocumented behavior that would differ on regeneration
57
61
  - "critical": Code has major features/logic not in prompt at all
58
62
 
59
- 4. "summary": 1-2 sentences on regeneration viability, be direct about risks
63
+ 6. "summary": 1-2 sentences on regeneration viability, be direct about risks
60
64
 
61
- 5. "sections": array of PROMPT requirement sections, each with:
65
+ 7. "sections": array of PROMPT requirement sections, each with:
62
66
  - "id": unique string like "req_1", "req_2"
63
67
  - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
64
68
  - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
@@ -67,7 +71,7 @@ Respond with a JSON object:
67
71
  - "semanticLabel": descriptive label like "Error Handling", "Input Validation"
68
72
  - "notes": REQUIRED explanation - be specific about what's missing or at risk
69
73
 
70
- 6. "codeSections": array of CODE sections NOT adequately documented in prompt:
74
+ 8. "codeSections": array of CODE sections NOT adequately documented in prompt:
71
75
  - "id": unique string like "code_1", "code_2"
72
76
  - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
73
77
  - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
@@ -78,34 +82,34 @@ Respond with a JSON object:
78
82
  * For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
79
83
  * For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
80
84
 
81
- 7. "hiddenKnowledge": array of objects describing undocumented code knowledge:
85
+ 9. "hiddenKnowledge": array of objects describing undocumented code knowledge:
82
86
  - "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
83
87
  - "location": {{"startLine": int, "endLine": int}}
84
88
  - "description": what the code knows that the prompt doesn't say
85
89
  - "regenerationImpact": "would_differ" | "would_fail" | "might_work"
86
90
  - "suggestedPromptAddition": what to add to the prompt to capture this
87
91
 
88
- 8. "lineMappings": array of line-level mappings:
89
- - "promptLine": int
90
- - "codeLines": array of ints
91
- - "matchType": "exact", "semantic", "partial", "none"
92
-
93
- 9. "stats": {{
94
- "totalRequirements": int,
95
- "matchedRequirements": int,
96
- "missingRequirements": int,
97
- "totalCodeFeatures": int,
98
- "documentedFeatures": int,
99
- "undocumentedFeatures": int,
100
- "promptToCodeCoverage": float,
101
- "codeToPromptCoverage": float,
102
- "hiddenKnowledgeCount": int,
103
- "criticalGaps": int
92
+ 10. "lineMappings": array of line-level mappings:
93
+ - "promptLine": int
94
+ - "codeLines": array of ints
95
+ - "matchType": "exact", "semantic", "partial", "none"
96
+
97
+ 11. "stats": {{
98
+ "totalRequirements": int,
99
+ "matchedRequirements": int,
100
+ "missingRequirements": int,
101
+ "totalCodeFeatures": int,
102
+ "documentedFeatures": int,
103
+ "undocumentedFeatures": int,
104
+ "promptToCodeCoverage": float,
105
+ "codeToPromptCoverage": float,
106
+ "hiddenKnowledgeCount": int,
107
+ "criticalGaps": int
104
108
  }}
105
109
 
106
- 10. "missing": array of strings - requirements in prompt not implemented
107
- 11. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
108
- 12. "suggestions": array of specific additions to make to the prompt to enable regeneration
110
+ 12. "missing": array of strings - requirements in prompt not implemented
111
+ 13. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
112
+ 14. "suggestions": array of specific additions to make to the prompt to enable regeneration
109
113
 
110
114
  ## Strictness Guidelines
111
115