PyPI - pdd-cli - Versions diffs - 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl - Mend

pdd-cli 0.0.24py3-none-any.whl → 0.0.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pdd-cli might be problematic. Click here for more details.

Files changed (49) hide show

pdd/__init__.py +14 -1
pdd/bug_main.py +5 -1
pdd/bug_to_unit_test.py +16 -5
pdd/change.py +2 -1
pdd/change_main.py +407 -189
pdd/cli.py +853 -301
pdd/code_generator.py +2 -1
pdd/conflicts_in_prompts.py +2 -1
pdd/construct_paths.py +377 -222
pdd/context_generator.py +2 -1
pdd/continue_generation.py +5 -2
pdd/crash_main.py +55 -20
pdd/data/llm_model.csv +18 -17
pdd/detect_change.py +2 -1
pdd/fix_code_loop.py +465 -160
pdd/fix_code_module_errors.py +7 -4
pdd/fix_error_loop.py +9 -9
pdd/fix_errors_from_unit_tests.py +207 -365
pdd/fix_main.py +32 -4
pdd/fix_verification_errors.py +148 -77
pdd/fix_verification_errors_loop.py +842 -768
pdd/fix_verification_main.py +412 -0
pdd/generate_output_paths.py +427 -189
pdd/generate_test.py +3 -2
pdd/increase_tests.py +2 -2
pdd/llm_invoke.py +1167 -343
pdd/preprocess.py +3 -3
pdd/process_csv_change.py +466 -154
pdd/prompts/bug_to_unit_test_LLM.prompt +11 -11
pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
pdd/prompts/find_verification_errors_LLM.prompt +11 -9
pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
pdd/prompts/fix_verification_errors_LLM.prompt +8 -1
pdd/prompts/generate_test_LLM.prompt +9 -3
pdd/prompts/trim_results_start_LLM.prompt +1 -1
pdd/prompts/update_prompt_LLM.prompt +3 -3
pdd/split.py +6 -5
pdd/split_main.py +13 -4
pdd/trace_main.py +7 -0
pdd/update_model_costs.py +446 -0
pdd/xml_tagger.py +2 -1
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/METADATA +8 -16
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/RECORD +49 -47
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/WHEEL +1 -1
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/licenses/LICENSE +0 -0
{pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/top_level.txt +0 -0

pdd/prompts/bug_to_unit_test_LLM.prompt CHANGED Viewed

@@ -1,17 +1,17 @@
-% You are an expert {language} Software Test Engineer. Your task is to generate a {language} unit test to identify issue(s) in a given code. The test should compare the current output with the desired output and to ensure the code behaves as expected. If Python, use Pytest.
+% You are an expert {language} Software Test Engineer. Your task is to generate a {language} unit test to detect issue(s) in code_under_test. The test should compare the current output with the desired output and to ensure the code behaves as expected. If Python, use Pytest.
 % Inputs:
-	•	Current output: ```{current_output}```
-	•	Desired output: ```{desired_output}```
-	•	Code under test: ```{code_under_test}```
-	•	Program used to run the code under test: ```{program_used_to_run_code_under_test}```
-	•	Prompt that generated the code: ```{prompt_that_generated_code}```
+	•	Current output: <current_output>{current_output}</current_output>
+	•	Desired output: <desired_output>{desired_output}</desired_output>
+	•	Code under test: <code_under_test>{code_under_test}</code_under_test>
+	•	Program used to run the code under test: <program_used_to_run_code_under_test>{program_used_to_run_code_under_test}</program_used_to_run_code_under_test>
+	•	Prompt that generated the code: <prompt_that_generated_code>{prompt_that_generated_code}</prompt_that_generated_code>
 % Output:
-	•	A unit test that covers the problem(s) and ensures the code meets the expected behavior.
+	•	A unit test that detects the problem(s) and ensures the code meets the expected behavior.
 % Follow these steps to generate the unit test:
-	1.	Analyze the current output: Compare the current and desired outputs to identify discrepancies.
-	2.	Generate a unit test: Write a test that highlights the issue in the current code to test whether the function produces the correct output as specified.
-	3.	Ensure correctness: The generated test should pass only when the code produces the desired output.
+	1.	Analyze the current output: Compare the current and desired outputs to identify discrepancies and explain the issue in several paragraphs.
+	2.  Based on the above analysis explain in several paragraphs how the issues can be reproduced without having false positives.
+	3.	Write a test that properly detects the issue in the code_under_test so that if the test passes, the issue is fixed.
-% Focus exclusively on generating a robust unit test to detect and identify the issue(s) in the code provided.
+% Focus exclusively on writing a robust unit test to detect and identify the issue(s) in the code provided. The test should not focus on the internals of the code but rather the inputs and outputs so that the test can be reused if the code is regenerated.

pdd/prompts/extract_prompt_update_LLM.prompt CHANGED Viewed

@@ -1,8 +1,14 @@
-% You are an expert Software Engineer. Your goal is to extract a JSON from the output of a LLM. This LLM changed a input_prompt into a modified_prompt.
+% You are an expert Software Engineer. Your goal is to extract the updated prompt from the LLM output.
-% Here is the generated llm_output: ```{llm_output}```
+% Here is the generated llm_output: <llm_output>{llm_output}</llm_output>
-% Output a JSON object with the following keys:
-    - 'modified_prompt': String containing the modified prompt that will generate the modified code.
+% The LLM output contains the modified prompt that will generate the modified code, possibly with some additional commentary or explanation.
+% Your task is to identify and extract ONLY the modified prompt itself, without adding any JSON structure or additional formatting.
+% Ensure you:
+% 1. Remove any "# Modified Prompt" headers or similar text that isn't part of the actual prompt
+% 2. Preserve all markdown, code blocks, and formatting within the actual prompt
+% 3. Don't add any explanatory text, JSON wrappers, or your own commentary
+% 4. Return only the text that constitutes the actual prompt
+% The "modified_prompt" should be the complete, standalone prompt that could be used directly to generate the modified code.

pdd/prompts/extract_unit_code_fix_LLM.prompt CHANGED Viewed

@@ -328,5 +328,5 @@
     - 'explanation': String explanation of whether the code under test needs to be fix and/or if the unit test needs to be fixed. Also, explain whether only a fragment of code was provided and the entire unit test and/or code under test needs to be reassembled from the original code and/or unit test.
     - 'update_unit_test': Boolean indicating whether the unit test needs to be updated.
     - 'update_code': Boolean indicating whether the code under test needs to be updated.
-    - 'fixed_unit_test': The entire updated unit test code or empty String if no update is needed.
-    - 'fixed_code': The entire updated code under test or empty String if no update is needed.
+    - 'fixed_unit_test': The entire updated unit test code or empty String if no update is needed. Don't lose prior comments in the unit test unless they are no longer valid.
+    - 'fixed_code': The entire updated code under test or empty String if no update is needed. Don't lose prior comments in the code under test unless they are no longer valid.

pdd/prompts/find_verification_errors_LLM.prompt CHANGED Viewed

@@ -1,4 +1,4 @@
-% You are an expert Software Engineer. Your goal is to verify a code_module or program for correctness and potential issues, even if it hasn't crashed.
+% You are an expert Software Engineer. Your goal is to identify any discrepancies between a program, its code_module, and a prompt. You also need to check for any potential bugs or issues in the code.
 % Here is the program that is running the code_module: <program>{program}</program>
@@ -8,18 +8,20 @@
 % Here are the output logs from the program run: <output>{output}</output>
-% Follow these steps to verify the program:
+% Follow these steps to identify any issues:
     Step 1. Compare the program and code_module against the prompt and explain any discrepancies.
     Step 2. Analyze the input/output behavior of the program and verify if it meets the expected behavior described in the prompt.
     Step 3. Identify any potential edge cases, error handling issues, or performance concerns that could cause problems in the future.
     Step 4. Check the code for potential bugs that haven't manifested yet.
     Step 5. If any issues are found, explain in detail the root cause of each issue and how it could impact the program's functioning.
-% Your response should include the following structured output:
+% After your analysis, determine the number of distinct issues found. If no issues are found, the count should be 0.
-<details>
-    The detailed output of steps 1-5
-</details>
-<issues_count>N</issues_count>
-% The issues_count field should be set to the number of issues, bugs, discrepancies, or potential problems identified (an integer >= 0). Set it to 0 if no issues are found.
+% Return your response as a single, valid JSON object.
+% The JSON object must conform to the following structure:
+% {{
+%   "issues_count": <integer_count_of_issues_found>,
+%   "details": "A detailed explanation of all steps taken during your analysis, including any discrepancies, bugs, or potential issues identified. If no issues are found, this can be a brief confirmation."
+% }}
+% Ensure the "details" field contains your complete textual analysis from Steps 1-5.
+% Ensure the "issues_count" is an integer representing the total number of distinct problems you've identified in your details.

pdd/prompts/fix_code_module_errors_LLM.prompt CHANGED Viewed

@@ -8,6 +8,35 @@
 % Here are the error log(s) from the program run and potentially from prior program run fixes: <errors>{errors}</errors>
+% NOTE: The errors field contains a structured history of previous fixing attempts with XML tags and human-readable content:
+    <attempt number="X"> - Start of each attempt record
+      <verification>
+        Status: Success/failure status with return code
+        Output: [Standard output text]
+        Error: [Error message text]
+      </verification>
+      <current_error>
+        [Current error message to be fixed]
+      </current_error>
+      <fixing>
+        <llm_analysis>
+          [Analysis from previous attempts in human-readable format]
+        </llm_analysis>
+        <decision>
+          update_program: true/false
+          update_code: true/false
+        </decision>
+      </fixing>
+    </attempt>
+% When analyzing errors, you should:
+  1. Review the history of previous attempts to understand what has been tried
+  2. Pay attention to which fixes worked partially or not at all
+  3. Avoid repeating approaches that failed in previous attempts
+  4. Focus on solving the current error found within the <current_error> tags
 % Follow these steps to solve these errors:
     Step 1. Compare the prompt to the code_module and explain differences, if any.
     Step 2. Compare the prompt to the program and explain differences, if any.

pdd/prompts/fix_errors_from_unit_tests_LLM.prompt CHANGED Viewed

@@ -9,7 +9,7 @@
 % This prompt is run iteratively. Here are the current errors and past potential fix attempts, if any, from the unit test and verification program run(s): <errors>{errors}</errors>
 % If the verfication program fails to run, the code_under_test and unit_test are unchanged from the previous iteration.
+<pdd>
 <examples>
     <example_1>
     % Here is an example_unit_test for the example_code_under_test: <example_unit_test><include>context/fix_errors_from_unit_tests/1/test_conflicts_in_prompts.py</include></example_unit_test>
@@ -34,7 +34,7 @@
     % Here is the prompt that generated the example_code_under_test: <example_prompt><include>context/fix_errors_from_unit_tests/3/context_generator_python.prompt</include></example_prompt>
     </example_3>
-<pdd>
     <example_4>
     % Here is an example_unit_test for the example_code_under_test: <example_unit_test><include>context/fix_errors_from_unit_tests/4/test_detect_change.py</include></example_unit_test>
@@ -51,8 +51,8 @@
     % Here is an example error/fix log showing how the issues were resolved: <example_error_fix_log><include>context/fix_errors_from_unit_tests/4/error.log</include></example_error_fix_log>
     </example_5>
-</pdd>
 </examples>
+</pdd>
 <instructions>
 % Follow these steps to solve these errors:
@@ -60,7 +60,7 @@
     Step 2. Compare the prompt to the unit_test and explain differences, if any.
     Step 3. For each prior attempted fix for the code_under_test and unit_test (if any), explain in a few paragraphs for each attempt why it might not have worked.
     Step 4. Write several paragraphs explaining the root cause of each of the errors and each of the warnings in the code_under_test and unit_test.
-    Step 5. Explain in detail step by step how to solve each of the errors and warnings. For each error and warning, there should be several paragraphs description of the solution steps. Sometimes logging or print statements can help debug the code in subsequent iterations.
+    Step 5. Explain in detail step by step how to solve each of the errors and warnings. For each error and warning, there should be several paragraphs description of the solution steps. Sometimes logging or print statements can help debug the code in subsequent iterations. It is important to make sure the tests are still sufficiently comprehensive to catch potential errors.
     Step 6. Review the above steps and correct for any errors and warnings in the code under test or unit test.
-    Step 7. For the code that need changes, write the complete instructions to correct code_under_test (surrounded by 'corrected_code_under_test' XML tags) and/or corrected unit_test (surrounded by 'corrected_unit_test' XML tags).
+    Step 7. For the code that need changes, write the corrected code_under_test and/or corrected unit_test in its/their entirety.
 </instructions>

pdd/prompts/fix_verification_errors_LLM.prompt CHANGED Viewed

@@ -17,4 +17,11 @@
     Step 4. Provide the complete fixed code_module and program with explanations for each significant change made.
     Step 5. Verify that the fixed code meets all requirements from the original prompt and addresses all identified issues.
-% Write the detail explanation for each step above in a <explanation> XML tag, the fixed code_module in a <fixed_code> XML tag and the fixed program in a <fixed_program> XML tag
+% Return your response as a single, valid JSON object.
+% The JSON object must conform to the following structure:
+% {{
+%   "explanation": "Detailed explanation of all steps taken, including analysis of issues, solutions developed, and verification that the fixes are correct and meet prompt requirements.",
+%   "fixed_code": "The complete, runnable, and fixed Python code for the code_module. This should ONLY be the code, with no additional text or commentary.",
+%   "fixed_program": "The complete, runnable, and fixed Python code for the program. This should ONLY be the code, with no additional text or commentary."
+% }}
+% Ensure that the "fixed_code" and "fixed_program" fields contain only the raw source code. Do not include any markdown formatting, comments (unless part of the code itself), or any other explanatory text within these fields.

pdd/prompts/generate_test_LLM.prompt CHANGED Viewed

@@ -1,4 +1,4 @@
-% You are an expert Software Test Engineer. Your goal is to generate a unit test that ensures correct functionality of the code under test.
+% You are an expert Software Test Engineer. Your goal is to generate tests that ensures correct functionality of the code under test.
 % Here a description of what the code is supposed to do and was the prompt that generated the code: <prompt_that_generated_code>{prompt_that_generated_code}</prompt_that_generated_code>
@@ -9,12 +9,18 @@
     - The unit test should be in {language}. If Python, use pytest.
     - Use individual test functions for each case to make it easier to identify which specific cases pass or fail.
     - Use the description of the functionality in the prompt to generate tests with useful tests with good code coverage.
-    - The code might get regenerated by a LLM so focus the test on the functionality of the code, not the implementation details.
+    - The code might get regenerated by a LLM so focus the tests on the functionality of the code, not the implementation details.
+    - NEVER access internal implementation details (variables/functions starting with underscore) in your tests.
+    - Setup and teardown methods should only use public APIs and environment variables, never reset internal module state directly.
+    - Design tests to be independent of implementation details that might change when code is regenerated.
+    - For test isolation, use fixtures and mocking of external dependencies rather than manipulating internal module state. In general minimize the amount of mocking needed so that the tests are more robust to changes in the code under test and more code is tested.
 <include>./context/test.prompt</include>
 <instructions>
     1. Carefully read the prompt that generated the code under test and determine what might be possible edge cases.
     2. For each edge case explain whether it is better to do the test using Z3 formal verification or unit tests.
     3. Develop a detailed test plan that will ensure the code under test is correct. This should involve both Z3 formal verification and unit tests.
-    4. Write the unit tests and Z3 formal verification tests that are runnable as unit tests.
+    4. Now write the test file.
+        a) The first part of the test file should be the detailed test plan from step 3 above in comments.
+        b) Then write the tests and Z3 formal verification tests that are runnable as unit tests.
 </instructions>

pdd/prompts/trim_results_start_LLM.prompt CHANGED Viewed

@@ -1,4 +1,4 @@
-% You are an expert JSON editor. You will be processing the output of a language model (LLM) to extract the unfinished main code block being generated and provide an explanation of how you determined what to cut out. Here is the llm_output to process:
+% You are an expert editor and JSON creator. You will be processing the output of a language model (LLM) to extract the unfinished main code block being generated and provide an explanation of how you determined what to cut out. Here is the llm_output to process:
 <llm_output>
 {LLM_OUTPUT}
 </llm_output>

pdd/prompts/update_prompt_LLM.prompt CHANGED Viewed

@@ -8,9 +8,9 @@
     Output:
         'modified_prompt' - A string that contains the updated prompt that will generate the modified code.
-% Here is the input_prompt to change: ```{input_prompt}```
-% Here is the input_code: ```{input_code}```
-% Here is the modified_code: ```{modified_code}```
+% Here is the input_prompt to change: <input_prompt>{input_prompt}</input_prompt>
+% Here is the input_code: <input_code>{input_code}</input_code>
+% Here is the modified_code: <modified_code>{modified_code}</modified_code>
 % To generate the modified prompt, perform the following sequence of steps:
     1. Using the provided input_code and input_prompt, identify what the code does and how it was generated.

pdd/split.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pydantic import BaseModel, Field
 from .load_prompt_template import load_prompt_template
 from .preprocess import preprocess
 from .llm_invoke import llm_invoke
+from . import EXTRACTION_STRENGTH
 class PromptSplit(BaseModel):
     extracted_functionality: str = Field(description="The extracted functionality as a sub-module prompt")
@@ -17,7 +18,7 @@ def split(
     strength: float,
     temperature: float,
     verbose: bool = False
-) -> Tuple[str, str, str, float]:
+) -> Tuple[str, str, float, str]:
     """
     Split a prompt into extracted functionality and remaining prompt.
@@ -30,7 +31,7 @@ def split(
         verbose (bool): Whether to print detailed information.
     Returns:
-        Tuple[str, str, str, float]: (extracted_functionality, remaining_prompt, model_name, total_cost)
+        Tuple[str, str, float, str]: (extracted_functionality, remaining_prompt, model_name, total_cost)
             where model_name is the name of the model used (returned as the second to last tuple element)
             and total_cost is the aggregated cost from all LLM invocations.
     """
@@ -91,7 +92,7 @@ def split(
         extract_response = llm_invoke(
             prompt=processed_extract_prompt,
             input_json={"llm_output": split_response["result"]},
-            strength=0.97,  # Fixed strength for extraction
+            strength=EXTRACTION_STRENGTH,  # Fixed strength for extraction
             temperature=temperature,
             output_pydantic=PromptSplit,
             verbose=verbose
@@ -111,8 +112,8 @@ def split(
             rprint(f"[bold cyan]Total Cost: ${total_cost:.6f}[/bold cyan]")
             rprint(f"[bold cyan]Model used: {model_name}[/bold cyan]")
-        # 6. Return results (model_name is the 2nd to last element)
-        return extracted_functionality, remaining_prompt, model_name, total_cost
+        # 6. Return results with standardized order: (result_data, cost, model_name)
+        return (extracted_functionality, remaining_prompt), total_cost, model_name
     except Exception as e:
         # Print an error message, then raise an exception that includes

pdd/split_main.py CHANGED Viewed

@@ -13,7 +13,7 @@ def split_main(
     example_code_file: str,
     output_sub: Optional[str],
     output_modified: Optional[str]
-) -> Tuple[str, str, str, float]:
+) -> Tuple[str, str, float, str]:
     """
     CLI wrapper for splitting a prompt into extracted functionality and remaining prompt.
@@ -60,8 +60,8 @@ def split_main(
         strength = ctx.obj.get('strength', 0.5)
         temperature = ctx.obj.get('temperature', 0)
-        # Call the split function and unpack the new tuple signature
-        extracted_functionality, remaining_prompt, model_name, total_cost = split(
+        # Call the split function with the standardized return pattern (result_data, cost, model_name)
+        result_tuple, total_cost, model_name = split(
             input_prompt=input_strings["input_prompt"],
             input_code=input_strings["input_code"],
             example_code=input_strings["example_code"],
@@ -69,6 +69,9 @@ def split_main(
             temperature=temperature,
             verbose=not ctx.obj.get('quiet', False)
         )
+        # Unpack the result tuple
+        extracted_functionality, remaining_prompt = result_tuple
         # Save the output files
         try:
@@ -87,7 +90,13 @@ def split_main(
             rprint(f"[bold]Model used:[/bold] {model_name}")
             rprint(f"[bold]Total cost:[/bold] ${total_cost:.6f}")
-        return extracted_functionality, remaining_prompt, total_cost, model_name
+        # Return with standardized order (result_data, cost, model_name)
+        return {
+            "sub_prompt_content": extracted_functionality,
+            "modified_prompt_content": remaining_prompt,
+            "output_sub": output_file_paths["output_sub"],
+            "output_modified": output_file_paths["output_modified"]
+        }, total_cost, model_name
     except Exception as e:
         # Handle errors and provide appropriate feedback

pdd/trace_main.py CHANGED Viewed

@@ -56,6 +56,13 @@ def trace_main(ctx: click.Context, prompt_file: str, code_file: str, code_line:
                 code_content, code_line, prompt_content, strength, temperature
             )
             logger.debug(f"Trace analysis completed: prompt_line={prompt_line}, total_cost={total_cost}, model_name={model_name}")
+            # Exit with error if trace returned None (indicating an error occurred)
+            if prompt_line is None:
+                if not quiet:
+                    rprint(f"[bold red]Trace analysis failed[/bold red]")
+                logger.error("Trace analysis failed (prompt_line is None)")
+                ctx.exit(1)
         except ValueError as e:
             if not quiet:
                 rprint(f"[bold red]Invalid input: {e}[/bold red]")

pdd-cli 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl

Potentially problematic release.

pdd-cli 0.0.24py3-none-any.whl → 0.0.26py3-none-any.whl