pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,19 @@
16
16
  <example>
17
17
  <input_example>
18
18
  <prompt_list_example>
19
- [{"PROMPT_NAME": "change_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python Software Engineer. Your goal is to write a Python function, \"change\", that will modify an input_prompt into a modified_prompt per the change_prompt. All output to the console will be pretty printed using the Python Rich library. Ensure that the module imports are done using relative imports.\n\n% Here are the inputs and outputs of the function:\n Inputs:\n - 'input_prompt' - A string that contains the prompt that will be modified by the change_prompt.\n - 'input_code' - A string that contains the code that was generated from the input_prompt.\n - 'change_prompt' - A string that contains the instructions of how to modify the input_prompt.\n - 'strength': A float value representing the strength parameter for the LLM model, used to influence the model's behavior.\n - 'temperature': A float value representing the temperature parameter for the LLM model, used to control the randomness of the model's output.\n Outputs:\n - 'modified_prompt' - A string that contains the modified prompt that was changed based on the change_prompt.\n - 'total_cost': A float value representing the total cost of running the function.\n - 'model_name': A string representing the name of the selected LLM model.\n\n% Here is an example how to preprocess the prompt from a file: ```<./context/preprocess_example.py>```\n\n% Example usage of the Langchain LCEL program: ```<./context/langchain_lcel_example.py>```\n\n% Example of selecting a Langchain LLM and counting tokens using llm_selector: ```<./context/llm_selector_example.py>```\n\n% Steps to be followed by the function:\n 1. Load the '$PDD_PATH/prompts/xml/change_LLM.prompt' and '$PDD_PATH/prompts/extract_prompt_change_LLM.prompt' files.\n 2. Preprocess the change_LLM prompt using the preprocess function from the preprocess module and set double_curly_brackets to false.\n 3. Create a Langchain LCEL template from the processed change_LLM prompt to return a string output. \n 4. Use the llm_selector function for the LLM model and token counting.\n 5. Run the input_prompt through the model using Langchain LCEL:\n - a. Pass the following string parameters to the prompt during invocation: \n * 'input_prompt'\n * 'input_code'\n * 'change_prompt' (preprocess this with double_curly_brackets set to false)\n - b. Calculate the input and output token count using token_counter from llm_selector and pretty print the output of 4a, including the token count and estimated cost. The cost from llm_selector is in dollars per million tokens.\n 6. Create a Langchain LCEL template with strength .9 from the extract_prompt_change_LLM prompt that outputs JSON:\n - a. Pass the following string parameters to the prompt during invocation: 'llm_output' (this string is from Step 4).\n - b. Calculate input and output token count using token_counter from llm_selector and pretty print the running message with the token count and cost.\n - c. Use 'get' function to extract 'modified_prompt' key values using from the dictionary output.\n 7. Pretty print the extracted modified_prompt using Rich Markdown function. Include token counts and costs.\n 8. Return the 'modified_prompt' string, the total_cost of both invokes and model_name use for the change_LLM prompt.\n\n% Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages."}, {"PROMPT_NAME": "preprocess_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a Python function, 'preprocess_prompt', that will preprocess the prompt from a prompt string for a LLM. This will use regular expressions to preprocess specific XML-like tags, if any, in the prompt. All output to the console will be pretty print using the Python rich library.\n\n% Here are the inputs and outputs of the function:\n Input: \n 'prompt' - A string that is the prompt to preprocess\n 'recursive' - A boolean that is True if the program needs to recursively process the includes in the prompt and False if it does not need to recursively process the prompt. Default is True.\n 'double_curly_brackets' - A boolean that is True if the curly brackets need to be doubled and False if they do not need to be doubled. Default is True.\n 'exclude_keys' - An optional list of strings that are excluded from the curly bracket doubling.\n Output: returns a string that is the preprocessed prompt, with any leading or trailing whitespace removed.\n\n% Here are the XML-like tags to preprocess, other tags will remain unmodified:\n 'include' - This tag will include the content of the file indicated in the include tag. The 'include tag' will be directly replaced with the content of the file in the prompt, without wrapping it in a new tag.\n 'pdd' - This tag indicates a comment and anything in this XML will be deleted from the string including the 'pdd' tags themselves.\n 'shell' - This tag indicates that there are shell commands to run. Capture all output of the shell commands and include it in the prompt but remove the shell tags.\n\n% Includes can be nested, that is there can be includes inside of the files of the includes and 'preprocess' should be called recursively on these include files if recursive is True. There are two ways of having includes in the prompt:\n 1. Will check to see if the file has any angle brackets in triple backticks. If so, it will read the included file indicated in the angle brackets and replace the angle brackets with the content of the included file. This will be done recursively until there are no more angle brackets in triple backticks. The program will then remove the angle brackets but leave the contents in the triple backticks.\n 2. The XML 'include' mentioned above.\n\n% If double_curly_brackets is True, the program will check to see if the file has any single curly brackets and if it does and the string in the curly brackets are not in the exclude_keys list, it will check to see if the curly brackets are already doubled before doubling the curly brackets.\n\n% The program should resolve file paths using the PDD_PATH environment variable. Implement a function 'get_file_path' that takes a file name and returns the full path using this environment variable.\n\n% Keep the user informed of the progress of the program by pretty printing messages."}, {"PROMPT_NAME": "unfinished_prompt_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a python function called 'unfinished_prompt' that will determine if a given prompt is complete or needs to continue.\n\n% Here are the inputs and outputs of the function:\n Inputs:\n 'prompt_text' - A string containing the prompt text to analyze.\n 'strength' - A float that is the strength of the LLM model to use for the analysis. Default is 0.5.\n 'temperature' - A float that is the temperature of the LLM model to use for the analysis. Default is 0.\n Outputs:\n 'reasoning' - A string containing the structured reasoning for the completeness assessment.\n 'is_finished' - A boolean indicating whether the prompt is complete (True) or incomplete (False).\n 'total_cost' - A float that is the total cost of the analysis function. This is an optional output.\n 'model_name' - A string that is the name of the LLM model used for the analysis. This is an optional output.\n\n% Here is an example of a Langchain LCEL program: ```<./context/langchain_lcel_example.py>```\n\n% Here is an example how to select the Langchain llm and count tokens: ```<./context/llm_selector_example.py>```\n\n% Note: Use relative import for 'llm_selector' to ensure compatibility within the package structure (i.e. 'from .llm_selector') instead of 'from pdd.llm_selector'.\n\n% This function will do the following:\n Step 1. Use $PDD_PATH environment variable to get the path to the project. Load the '$PDD_PATH/prompts/unfinished_prompt_LLM.prompt' file.\n Step 2. Create a Langchain LCEL template from unfinished_prompt_LLM prompt so that it returns a JSON output.\n Step 3. Use the llm_selector function for the LLM model.\n Step 4. Run the prompt text through the model using Langchain LCEL.\n 4a. Pass the following string parameters to the prompt during invoke:\n - 'PROMPT_TEXT'\n 4b. Pretty print a message letting the user know it is running and how many tokens (using token_counter function from llm_selector) are in the prompt and the cost. The cost from llm_selector is in dollars per million tokens.\n 4c. The dictionary output of the LCEL will have the keys 'reasoning' and 'is_finished'. Be sure to access these keys using the get method with default error messages.\n 4d. Pretty print the reasoning and completion status using the rich library. Also, print the number of tokens in the result, the output token cost and the total_cost.\n Step 5. Return the 'reasoning' string and 'is_finished' boolean from the JSON output using 'get', and the 'total_cost' float, and 'model_name' string.\n\n% Ensure that the function handles potential errors gracefully, such as missing input parameters or issues with the LLM model responses.\n\n"}, {"PROMPT_NAME": "xml_tagger_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a Python function, \"xml_tagger\", that will enhance a given LLM prompt by adding XML tags to improve its structure and readability.\n\n% The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name'). All output to the console will be pretty printed using the Python Rich library. Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.
20
- % The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```\n\n% Here are the inputs and outputs of the function:\n Input: \n 'raw_prompt' - A string containing the prompt that needs XML tagging to improve its organization and clarity.\n 'strength' - A float value representing the strength parameter for the LLM model.\n 'temperature' - A float value representing the temperature parameter for the LLM model.\n Output: \n 'xml_tagged' - A string containing the prompt with properly added XML tags.\n 'total_cost' - A float representing the total cost of running the LCELs.\n 'model_name' - A string representing the name of the selected LLM model.\n\n% Here is an example of a LangChain Expression Language (LCEL) program: <lcel_example>import os
19
+ [{"PROMPT_NAME": "change_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python Software Engineer. Your goal is to write a Python function, \"change\", that will modify an input_prompt into a modified_prompt per the change_prompt. All output to the console will be pretty printed using the Python Rich library. Ensure that the module imports are done using relative imports.\n\n% Here are the inputs and outputs of the function:\n Inputs:\n - 'input_prompt' - A string that contains the prompt that will be modified by the change_prompt.\n - 'input_code' - A string that contains the code that was generated from the input_prompt.\n - 'change_prompt' - A string that contains the instructions of how to modify the input_prompt.\n - 'strength': A float value representing the strength parameter for the LLM model, used to influence the model's behavior.\n - 'temperature': A float value representing the temperature parameter for the LLM model, used to control the randomness of the model's output.\n Outputs:\n - 'modified_prompt' - A string that contains the modified prompt that was changed based on the change_prompt.\n - 'total_cost': A float value representing the total cost of running the function.\n - 'model_name': A string representing the name of the selected LLM model.\n\n% Here is an example how to preprocess the prompt from a file: ```<./context/preprocess_example.py>```\n\n% Example usage of the Langchain LCEL program: ```<./context/langchain_lcel_example.py>```\n\n% Example of selecting a Langchain LLM and counting tokens using llm_selector: ```<./context/llm_selector_example.py>```\n\n% Steps to be followed by the function:\n 1. Load the '$PDD_PATH/prompts/xml/change_LLM.prompt' and '$PDD_PATH/prompts/extract_prompt_change_LLM.prompt' files.\n 2. Preprocess the change_LLM prompt using the preprocess function from the preprocess module and set double_curly_brackets to false.\n 3. Create a Langchain LCEL template from the processed change_LLM prompt to return a string output. \n 4. Use the llm_selector function for the LLM model and token counting.\n 5. Run the input_prompt through the model using Langchain LCEL:\n - a. Pass the following string parameters to the prompt during invocation: \n * 'input_prompt'\n * 'input_code'\n * 'change_prompt' (preprocess this with double_curly_brackets set to false)\n - b. Calculate the input and output token count using token_counter from llm_selector and pretty print the output of 4a, including the token count and estimated cost. The cost from llm_selector is in dollars per million tokens.\n 6. Create a Langchain LCEL template with strength .9 from the extract_prompt_change_LLM prompt that outputs JSON:\n - a. Pass the following string parameters to the prompt during invocation: 'llm_output' (this string is from Step 4).\n - b. Calculate input and output token count using token_counter from llm_selector and pretty print the running message with the token count and cost.\n - c. Use 'get' function to extract 'modified_prompt' key values using from the dictionary output.\n 7. Pretty print the extracted modified_prompt using Rich Markdown function. Include token counts and costs.\n 8. Return the 'modified_prompt' string, the total_cost of both invokes and model_name use for the change_LLM prompt.\n\n% Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages."}, {"PROMPT_NAME": "preprocess_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a Python function, 'preprocess_prompt', that will preprocess the prompt from a prompt string for a LLM. This will use regular expressions to preprocess specific XML-like tags, if any, in the prompt. All output to the console will be pretty print using the Python rich library.\n\n% Here are the inputs and outputs of the function:\n Input: \n 'prompt' - A string that is the prompt to preprocess\n 'recursive' - A boolean that is True if the program needs to recursively process the includes in the prompt and False if it does not need to recursively process the prompt. Default is True.\n 'double_curly_brackets' - A boolean that is True if the curly brackets need to be doubled and False if they do not need to be doubled. Default is True.\n 'exclude_keys' - An optional list of strings that are excluded from the curly bracket doubling.\n Output: returns a string that is the preprocessed prompt, with any leading or trailing whitespace removed.\n\n% Here are the XML-like tags to preprocess, other tags will remain unmodified:\n 'include' - This tag will include the content of the file indicated in the include tag. The 'include tag' will be directly replaced with the content of the file in the prompt, without wrapping it in a new tag.\n 'pdd' - This tag indicates a comment and anything in this XML will be deleted from the string including the 'pdd' tags themselves.\n 'shell' - This tag indicates that there are shell commands to run. Capture all output of the shell commands and include it in the prompt but remove the shell tags.\n\n% Includes can be nested, that is there can be includes inside of the files of the includes and 'preprocess' should be called recursively on these include files if recursive is True. There are two ways of having includes in the prompt:\n 1. Will check to see if the file has any angle brackets in triple backticks. If so, it will read the included file indicated in the angle brackets and replace the angle brackets with the content of the included file. This will be done recursively until there are no more angle brackets in triple backticks. The program will then remove the angle brackets but leave the contents in the triple backticks.\n 2. The XML 'include' mentioned above.\n\n% If double_curly_brackets is True, the program will check to see if the file has any single curly brackets and if it does and the string in the curly brackets are not in the exclude_keys list, it will check to see if the curly brackets are already doubled before doubling the curly brackets.\n\n% The program should resolve file paths using the PDD_PATH environment variable. Implement a function 'get_file_path' that takes a file name and returns the full path using this environment variable.\n\n% Keep the user informed of the progress of the program by pretty printing messages."}, {"PROMPT_NAME": "unfinished_prompt_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a python function called 'unfinished_prompt' that will determine if a given prompt is complete or needs to continue.\n\n% Here are the inputs and outputs of the function:\n Inputs:\n 'prompt_text' - A string containing the prompt text to analyze.\n 'strength' - A float that is the strength of the LLM model to use for the analysis. Default is 0.5.\n 'temperature' - A float that is the temperature of the LLM model to use for the analysis. Default is 0.\n Outputs:\n 'reasoning' - A string containing the structured reasoning for the completeness assessment.\n 'is_finished' - A boolean indicating whether the prompt is complete (True) or incomplete (False).\n 'total_cost' - A float that is the total cost of the analysis function. This is an optional output.\n 'model_name' - A string that is the name of the LLM model used for the analysis. This is an optional output.\n\n% Here is an example of a Langchain LCEL program: ```<./context/langchain_lcel_example.py>```\n\n% Here is an example how to select the Langchain llm and count tokens: ```<./context/llm_selector_example.py>```\n\n% Note: Use relative import for 'llm_selector' to ensure compatibility within the package structure (i.e. 'from .llm_selector') instead of 'from pdd.llm_selector'.\n\n% This function will do the following:\n Step 1. Use $PDD_PATH environment variable to get the path to the project. Load the '$PDD_PATH/prompts/unfinished_prompt_LLM.prompt' file.\n Step 2. Create a Langchain LCEL template from unfinished_prompt_LLM prompt so that it returns a JSON output.\n Step 3. Use the llm_selector function for the LLM model.\n Step 4. Run the prompt text through the model using Langchain LCEL.\n 4a. Pass the following string parameters to the prompt during invoke:\n - 'PROMPT_TEXT'\n 4b. Pretty print a message letting the user know it is running and how many tokens (using token_counter function from llm_selector) are in the prompt and the cost. The cost from llm_selector is in dollars per million tokens.\n 4c. The dictionary output of the LCEL will have the keys 'reasoning' and 'is_finished'. Be sure to access these keys using the get method with default error messages.\n 4d. Pretty print the reasoning and completion status using the rich library. Also, print the number of tokens in the result, the output token cost and the total_cost.\n Step 5. Return the 'reasoning' string and 'is_finished' boolean from the JSON output using 'get', and the 'total_cost' float, and 'model_name' string.\n\n% Ensure that the function handles potential errors gracefully, such as missing input parameters or issues with the LLM model responses.\n\n"}, {"PROMPT_NAME": "xml_tagger_python.prompt", "PROMPT_DESCRIPTION": "% You are an expert Python engineer. Your goal is to write a Python function, \"xml_tagger\", that will enhance a given LLM prompt by adding XML tags to improve its structure and readability.\n\n% You are an expert Python engineer.
20
+
21
+ % Code Style Requirements
22
+ - File must start with `from __future__ import annotations`.
23
+ - All functions must be fully type-hinted.
24
+ - Use `rich.console.Console` for all printing.
25
+
26
+ % Package Structure
27
+ - The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name').
28
+ - The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```
29
+
30
+ % Error Handling
31
+ - Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.\n\n% Here are the inputs and outputs of the function:\n Input: \n 'raw_prompt' - A string containing the prompt that needs XML tagging to improve its organization and clarity.\n 'strength' - A float value representing the strength parameter for the LLM model.\n 'temperature' - A float value representing the temperature parameter for the LLM model.\n Output: \n 'xml_tagged' - A string containing the prompt with properly added XML tags.\n 'total_cost' - A float representing the total cost of running the LCELs.\n 'model_name' - A string representing the name of the selected LLM model.\n\n% Here is an example of a LangChain Expression Language (LCEL) program: <lcel_example>import os
21
32
  from langchain_core.prompts import PromptTemplate
22
33
  from langchain_community.cache import SQLiteCache
23
34
  from langchain_community.llms.mlx_pipeline import MLXPipeline
@@ -367,64 +378,679 @@ if __name__ == "__main__":
367
378
  <change_description_example>
368
379
  % Use context/python_preamble.prompt to make prompts more compact. Some prompts might already have this.
369
380
 
370
- % Here is what is inside context/python_preamble.prompt:<preamble>% The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name'). All output to the console will be pretty printed using the Python Rich library. Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.
371
- % The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```</preamble>
381
+ % Here is what is inside context/python_preamble.prompt:<preamble>% You are an expert Python engineer.
382
+
383
+ % Code Style Requirements
384
+ - File must start with `from __future__ import annotations`.
385
+ - All functions must be fully type-hinted.
386
+ - Use `rich.console.Console` for all printing.
387
+
388
+ % Package Structure
389
+ - The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name').
390
+ - The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```
391
+
392
+ % Error Handling
393
+ - Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.</preamble>
394
+
395
+ % Here is an example of this being done: <example>% You are an expert Python engineer. Your goal is to write a python function, "code_generator", that will compile a prompt into a code file.
396
+
397
+ You are an expert Python engineer working on the PDD Cloud project.
398
+
399
+ -------------------------------------------------------------------------------
400
+ Python Coding Standards (PDD Cloud)
401
+ -------------------------------------------------------------------------------
402
+
403
+ ## Style
404
+ - Python 3.12, PEP 8 compliant
405
+ - Type hints required (from typing import Optional, List, Dict, Any, Tuple)
406
+ - Imports: standard lib -> third-party -> local (alphabetical within groups)
407
+
408
+ ## Critical First Import
409
+ import function_import_setup # MUST be first import
410
+
411
+ This enables subprocess module resolution for Firebase Functions Framework.
412
+ All TOP-LEVEL endpoint files (e.g., generate_code.py, main.py) must have this
413
+ as their very first import statement.
414
+
415
+ **IMPORTANT:** Utility modules inside `utils/` should NOT import this.
416
+ Use relative imports instead (e.g., `from .firebase_helpers import ...`).
417
+
418
+ ## Standard Library Imports
419
+ import os
420
+ import logging
372
421
 
373
- % Here is an example of this being done: <example>% You are an expert Python engineer. Your goal is to write a python function, "code_generator", that will compile a prompt into a code file.
422
+ ## Error Handling Pattern
423
+ Map exceptions to HTTP status codes:
424
+ - AuthenticationError -> 401 Unauthorized
425
+ - AuthorizationError -> 403 Forbidden
426
+ - ValidationError -> 400 Bad Request
427
+ - ResourceNotFoundError -> 404 Not Found
428
+ - DatabaseError / Exception -> 500 Internal Server Error
429
+
430
+ All error responses: {"error": "descriptive message"}
431
+
432
+ ## Logging
433
+ logger = logging.getLogger(__name__)
434
+
435
+ ## Response Format
436
+ Return tuple: (response_dict, status_code)
374
437
 
375
- [File not found: ../context/python_preamble.prompt]
376
438
 
377
439
  % Here are the inputs and outputs of the function:
378
- Inputs:
440
+ Inputs:
379
441
  'prompt' - A string containing the raw prompt to be processed.
380
442
  'language' - A string that is the language type (e.g. python, bash) of file that will be outputed by the LLM.
381
443
  'strength' - A float between 0 and 1 that is the strength of the LLM model to use.
382
444
  'temperature' - A float that is the temperature of the LLM model to use. Default is 0.
383
- 'time' - A float between 0 and 1 that controls the thinking effort for the LLM model, passed to llm_invoke. Default is DEFAULT_TIME.
445
+ 'time' - A float in [0,1] or None that controls the thinking effort for the LLM model, passed to llm_invoke. Default is None.
384
446
  'verbose' - A boolean that indicates whether to print out the details of the function. Default is False.
385
447
  'preprocess_prompt' - A boolean that indicates whether to preprocess the prompt. Default is True.
448
+ 'output_schema' - An optional dict (JSON schema) to enforce structured output. Default is None.
386
449
  Outputs:
387
450
  'runnable_code' - A string that is runnable code
388
- 'total_cost' - A float that is the total cost of the model run
389
- 'model_name' - A string that is the name of the selected LLM model
451
+ 'total_cost' - A float that is the total cost of all LLM calls within this function (initial generation, unfinished check, continuation if used, and postprocess)
452
+ 'model_name' - A string that is the name of the selected LLM model used for the main generation (or continuation). Postprocess may use a different model internally and does not change this value.
390
453
 
391
454
  % Here is how to use the internal modules:
392
455
  <internal_modules>
393
456
  For running prompts with llm_invoke:
394
457
  <llm_invoke_example>
395
- [File not found: ../context/llm_invoke_example.py]
458
+ import os
459
+ import sys
460
+ import json
461
+ import logging
462
+ import threading
463
+ import time
464
+ import requests
465
+ from flask import Flask, request, jsonify
466
+ from unittest.mock import MagicMock, patch
467
+
468
+ # --- 1. Environment Setup ---
469
+ # Set environment variables to simulate local execution
470
+ os.environ['FUNCTIONS_EMULATOR'] = 'true'
471
+
472
+ # Ensure 'backend/functions' is in PYTHONPATH so we can import the module
473
+ # In a real deployment, this is handled by the Cloud Functions environment.
474
+ current_dir = os.path.dirname(os.path.abspath(__file__))
475
+ backend_functions_path = os.path.abspath(os.path.join(current_dir, '../backend/functions'))
476
+ sys.path.insert(0, backend_functions_path)
477
+
478
+ # Configure logging
479
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
480
+ logger = logging.getLogger(__name__)
481
+
482
+ # --- 2. Mocking Dependencies ---
483
+ # Since this example runs in isolation, we need to mock the complex dependencies
484
+ # that the endpoint relies on (Firebase, PDD CLI internals, etc.)
485
+
486
+ # Mock function_import_setup
487
+ sys.modules['function_import_setup'] = MagicMock()
488
+
489
+ # Mock utils.auth_helpers
490
+ mock_auth = MagicMock()
491
+ # Create decorators that just pass through the function
492
+ def pass_through_decorator(func):
493
+ def wrapper(*args, **kwargs):
494
+ # Inject dummy user and token if not present
495
+ if 'user' not in kwargs:
496
+ kwargs['user'] = MagicMock(uid='test-user-id')
497
+ if 'token' not in kwargs:
498
+ kwargs['token'] = {'uid': 'test-user-id'}
499
+ return func(*args, **kwargs)
500
+ return wrapper
501
+
502
+ mock_auth.require_approval = pass_through_decorator
503
+ mock_auth.require_authentication = pass_through_decorator
504
+ sys.modules['utils.auth_helpers'] = mock_auth
505
+
506
+ # Mock utils.credit_helpers
507
+ mock_credits = MagicMock()
508
+ def credit_decorator(cost_key='totalCost', estimated_cost=0.20):
509
+ def decorator(func):
510
+ def wrapper(*args, **kwargs):
511
+ # Simulate credit check passing
512
+ result, status = func(*args, **kwargs)
513
+ # Simulate credit deduction logic adding fields to response
514
+ if isinstance(result, dict):
515
+ result['creditsDeducted'] = 10 # Dummy deduction
516
+ result['newBalance'] = 990
517
+ return result, status
518
+ return wrapper
519
+ return decorator
520
+
521
+ mock_credits.require_credits = credit_decorator
522
+ sys.modules['utils.credit_helpers'] = mock_credits
523
+
524
+ # Mock utils.error_handling
525
+ mock_errors = MagicMock()
526
+ class AuthenticationError(Exception): pass
527
+ class AuthorizationError(Exception): pass
528
+ class ValidationError(Exception): pass
529
+ mock_errors.AuthenticationError = AuthenticationError
530
+ mock_errors.AuthorizationError = AuthorizationError
531
+ mock_errors.ValidationError = ValidationError
532
+ sys.modules['utils.error_handling'] = mock_errors
533
+
534
+ # Mock pdd.llm_invoke
535
+ mock_pdd = MagicMock()
536
+ # Define a mock implementation of llm_invoke
537
+ def mock_llm_invoke_impl(prompt=None, input_json=None, messages=None, **kwargs):
538
+ logger.info(f"[Mock LLM] Invoked with prompt='{prompt}'")
539
+ return {
540
+ "result": f"Processed: {prompt or messages}",
541
+ "cost": 0.005,
542
+ "model_name": "gpt-4-mock",
543
+ "thinking_output": "I thought about this deeply..."
544
+ }
545
+ mock_pdd.llm_invoke = mock_llm_invoke_impl
546
+ sys.modules['pdd.llm_invoke'] = mock_pdd
547
+
548
+ # --- 3. Import the Module Under Test ---
549
+ try:
550
+ import llm_invoke
551
+ logger.info("Successfully imported llm_invoke module")
552
+ except ImportError as e:
553
+ logger.error(f"Failed to import module: {e}")
554
+ sys.exit(1)
555
+
556
+ # --- 4. Flask Server Setup ---
557
+ app = Flask(__name__)
558
+
559
+ @app.route('/llm_invoke', methods=['POST'])
560
+ def handle_llm_invoke():
561
+ # The module expects (request, user, token)
562
+ # In a real Flask app using the decorators, user/token are injected.
563
+ # Here we call the decorated function directly.
564
+
565
+ # We need to mock the request object passed to the function
566
+ # Flask's global 'request' proxy works because we are inside a route context
567
+ return llm_invoke.llm_invoke(request)
568
+
569
+ def run_server():
570
+ app.run(port=5005, debug=False, use_reloader=False)
571
+
572
+ # --- 5. Functional Tests ---
573
+ def run_tests():
574
+ base_url = 'http://localhost:5005/llm_invoke'
575
+ headers = {'Content-Type': 'application/json'}
576
+
577
+ print("\n" + "="*60)
578
+ print("Testing llm_invoke")
579
+ print("="*60)
580
+
581
+ # Test Case 1: Basic Prompt
582
+ print("\n--- Test 1: Basic Prompt & Input JSON ---")
583
+ payload_1 = {
584
+ "prompt": "Hello {{name}}",
585
+ "inputJson": {"name": "World"},
586
+ "strength": 0.7
587
+ }
588
+ try:
589
+ resp = requests.post(base_url, json=payload_1, headers=headers)
590
+ print(f"Status: {resp.status_code}")
591
+ print(f"Response: {json.dumps(resp.json(), indent=2)}")
592
+ assert resp.status_code == 200
593
+ assert resp.json()['result'] == "Processed: Hello {{name}}"
594
+ except Exception as e:
595
+ print(f"Test 1 Failed: {e}")
596
+
597
+ # Test Case 2: Messages List (Chat format)
598
+ print("\n--- Test 2: Messages List ---")
599
+ payload_2 = {
600
+ "messages": [
601
+ {"role": "system", "content": "You are a bot."},
602
+ {"role": "user", "content": "Hi."}
603
+ ],
604
+ "temperature": 0.5
605
+ }
606
+ try:
607
+ resp = requests.post(base_url, json=payload_2, headers=headers)
608
+ print(f"Status: {resp.status_code}")
609
+ print(f"Response: {json.dumps(resp.json(), indent=2)}")
610
+ assert resp.status_code == 200
611
+ except Exception as e:
612
+ print(f"Test 2 Failed: {e}")
613
+
614
+ # Test Case 3: Validation Error (Missing inputs)
615
+ print("\n--- Test 3: Validation Error (Missing inputs) ---")
616
+ payload_3 = {
617
+ "strength": 0.5
618
+ # Missing prompt/inputJson AND messages
619
+ }
620
+ try:
621
+ resp = requests.post(base_url, json=payload_3, headers=headers)
622
+ print(f"Status: {resp.status_code}")
623
+ print(f"Response: {json.dumps(resp.json(), indent=2)}")
624
+ assert resp.status_code == 400
625
+ except Exception as e:
626
+ print(f"Test 3 Failed: {e}")
627
+
628
+ # Test Case 4: Validation Error (Invalid strength)
629
+ print("\n--- Test 4: Validation Error (Invalid strength) ---")
630
+ payload_4 = {
631
+ "prompt": "Hi",
632
+ "inputJson": {},
633
+ "strength": 1.5 # Invalid, must be 0-1
634
+ }
635
+ try:
636
+ resp = requests.post(base_url, json=payload_4, headers=headers)
637
+ print(f"Status: {resp.status_code}")
638
+ print(f"Response: {json.dumps(resp.json(), indent=2)}")
639
+ assert resp.status_code == 400
640
+ except Exception as e:
641
+ print(f"Test 4 Failed: {e}")
642
+
643
+ print("\n" + "="*60)
644
+ print("All tests completed.")
645
+ print("="*60)
646
+
647
+ # Force exit to stop the server thread
648
+ os._exit(0)
649
+
650
+ if __name__ == "__main__":
651
+ # Start the Flask server in a daemon thread
652
+ server_thread = threading.Thread(target=run_server, daemon=True)
653
+ server_thread.start()
654
+
655
+ # Give the server a moment to start
656
+ time.sleep(2)
657
+
658
+ # Run the tests
659
+ run_tests()
396
660
  </llm_invoke_example>
397
661
 
398
662
  For preprocessing prompts:
399
663
  <preprocess_example>
400
- [File not found: ../context/preprocess_example.py]
664
+ from pdd.preprocess import preprocess
665
+ from rich.console import Console
666
+ console = Console()
667
+
668
+ prompt = """
669
+ <prompt>
670
+ Hello World
671
+
672
+ <pdd>This is a comment</pdd>
673
+ [Error: firecrawl-py package not installed. Cannot scrape https://www.google.com]
674
+ {test}
675
+ {test2}
676
+ ```<TODO.md>```
677
+
678
+ <pdd>
679
+ multi-line
680
+ comment should not show up
681
+ </pdd>
682
+ </prompt>
683
+ """
684
+
685
+ recursive = False
686
+ double_curly_brackets = True
687
+ exclude_keys = ["test2"] # exclude test2 from being doubled
688
+
689
+ # Debug info
690
+ console.print(f"[bold yellow]Debug: exclude_keys = {exclude_keys}[/bold yellow]")
691
+
692
+ processed = preprocess(prompt, recursive, double_curly_brackets, exclude_keys=exclude_keys)
693
+ console.print("[bold white]Processed Prompt:[/bold white]")
694
+ console.print(processed)
695
+
401
696
  </preprocess_example>
402
697
 
403
698
  For handling unfinished prompts:
404
699
  <unfinished_prompt_example>
405
- [File not found: ../context/unfinished_prompt_example.py]
700
+ from pdd.unfinished_prompt import unfinished_prompt
701
+ from rich import print as rprint
702
+
703
+ # This script provides a concise example of how to use the `unfinished_prompt` function
704
+ # from the `pdd.unfinished_prompt` module.
705
+
706
+ # --- Pre-requisites for running this example: ---
707
+ # 1. The `pdd` Python package must be accessible. This means:
708
+ # - It's installed in your Python environment (e.g., via pip if it's a package), OR
709
+ # - The directory containing the `pdd` package is added to your PYTHONPATH.
710
+ # For instance, if your project structure is:
711
+ # my_project/
712
+ # ├── pdd/ # The module's package
713
+ # │ ├── __init__.py
714
+ # │ ├── unfinished_prompt.py
715
+ # │ ├── load_prompt_template.py
716
+ # │ └── llm_invoke.py
717
+ # └── examples/
718
+ # └── run_this_example.py (this file)
719
+ # You would typically run this script from the `my_project` directory
720
+ # (e.g., `python examples/run_this_example.py`) after ensuring `my_project`
721
+ # is in PYTHONPATH (e.g., `export PYTHONPATH=$PYTHONPATH:/path/to/my_project`).
722
+ #
723
+ # 2. The `pdd` package requires internal setup for its dependencies:
724
+ # - A prompt template file named "unfinished_prompt_LLM" (e.g., "unfinished_prompt_LLM.txt")
725
+ # must be present where `pdd.load_prompt_template` (used internally by `unfinished_prompt`)
726
+ # can find it. This location is usually relative to the `pdd` package structure.
727
+ # - The `pdd.llm_invoke` function (used internally) must be configured for access to an LLM.
728
+ # This typically involves setting environment variables for API keys (e.g., `OPENAI_API_KEY`).
729
+ #
730
+ # This script should be saved outside the `pdd` package, for instance, in an
731
+ # `examples/` directory as shown above.
732
+ # To run: `python name_of_this_script.py` (adjust path as needed).
733
+
734
+ # --- Example Usage ---
735
+
736
+ # 1. Define the prompt text you want to analyze.
737
+ # This example uses a prompt that is intentionally incomplete to demonstrate
738
+ # the function's ability to detect incompleteness.
739
+ my_prompt_text = "Write a comprehensive guide on how to bake a sourdough bread, starting from creating a starter, then the kneading process, and finally"
740
+
741
+ rprint(f"[bold cyan]Analyzing prompt:[/bold cyan] \"{my_prompt_text}\"")
742
+
743
+ # 2. Call the `unfinished_prompt` function.
744
+ # Review the function's docstring for detailed parameter information.
745
+ # - `prompt_text` (str): The text of the prompt to analyze.
746
+ # - `strength` (float, optional, 0.0-1.0, default=0.5): Influences the LLM's behavior or model choice.
747
+ # - `temperature` (float, optional, 0.0-1.0, default=0.0): Controls the randomness of the LLM's output.
748
+ # - `verbose` (bool, optional, default=False): If True, the function will print detailed internal logs.
749
+ #
750
+ # The function returns a tuple: (reasoning, is_finished, total_cost, model_name)
751
+ # - `reasoning` (str): The LLM's structured explanation for its completeness assessment.
752
+ # - `is_finished` (bool): True if the prompt is considered complete, False otherwise.
753
+ # - `total_cost` (float): The estimated cost of the LLM call. The unit (e.g., USD) depends on the LLM provider.
754
+ # - `model_name` (str): The name of the LLM model that was used for the analysis.
755
+
756
+ # Example call with verbose output and custom strength/temperature settings.
757
+ reasoning_str, is_complete_flag, call_cost, llm_model = unfinished_prompt(
758
+ prompt_text=my_prompt_text,
759
+ strength=0.6, # Example: using a specific strength value
760
+ temperature=0.1, # Example: using a low temperature for more deterministic reasoning
761
+ verbose=True # Set to True to see detailed logs from within the unfinished_prompt function
762
+ )
763
+
764
+ # 3. Print the results returned by the function.
765
+ rprint("\n[bold green]--- Analysis Results ---[/bold green]")
766
+ rprint(f" [bold]Prompt Analyzed:[/bold] \"{my_prompt_text}\"")
767
+ rprint(f" [bold]Is prompt complete?:[/bold] {'Yes, the LLM considers the prompt complete.' if is_complete_flag else 'No, the LLM suggests the prompt needs continuation.'}")
768
+ rprint(f" [bold]LLM's Reasoning:[/bold]\n {reasoning_str}") # Rich print will handle newlines in the reasoning string
769
+ rprint(f" [bold]Cost of Analysis:[/bold] ${call_cost:.6f}") # Display cost, assuming USD. Adjust currency/format as needed.
770
+ rprint(f" [bold]LLM Model Used:[/bold] {llm_model}")
771
+
772
+ # --- Example of calling with default parameters ---
773
+ # If you want to use the default strength (0.5), temperature (0.0), and verbose (False):
774
+ #
775
+ # default_prompt_text = "What is the capital of Canada?"
776
+ # rprint(f"\n[bold cyan]Analyzing prompt with default settings:[/bold cyan] \"{default_prompt_text}\"")
777
+ #
778
+ # reasoning_def, is_finished_def, cost_def, model_def = unfinished_prompt(
779
+ # prompt_text=default_prompt_text
780
+ # )
781
+ #
782
+ # rprint("\n[bold green]--- Default Call Analysis Results ---[/bold green]")
783
+ # rprint(f" [bold]Prompt Analyzed:[/bold] \"{default_prompt_text}\"")
784
+ # rprint(f" [bold]Is prompt complete?:[/bold] {'Yes' if is_finished_def else 'No'}")
785
+ # rprint(f" [bold]LLM's Reasoning:[/bold]\n {reasoning_def}")
786
+ # rprint(f" [bold]Cost of Analysis:[/bold] ${cost_def:.6f}")
787
+ # rprint(f" [bold]LLM Model Used:[/bold] {model_def}")
788
+
406
789
  </unfinished_prompt_example>
407
790
 
408
791
  For continuing generation:
409
792
  <continue_generation_example>
410
- [File not found: ../context/continue_generation_example.py]
793
+ from pdd.continue_generation import continue_generation
794
+
795
+ def main() -> None:
796
+ """
797
+ Main function to demonstrate the usage of the continue_generation function.
798
+ It continues the generation of text using a language model and calculates the cost.
799
+ """
800
+ # Define the input parameters for the continue_generation function
801
+ # formatted_input_prompt: str = "Once upon a time in a land far away, there was a"
802
+ # load context/cli_python_preprocessed.prompt into formatted_input_prompt
803
+ with open("context/cli_python_preprocessed.prompt", "r") as file:
804
+ formatted_input_prompt = file.read()
805
+
806
+ # llm_output: str = "" # Initial LLM output is empty
807
+ # load context/unfinished_prompt.txt into llm_output
808
+ with open("context/llm_output_fragment.txt", "r") as file:
809
+ llm_output = file.read()
810
+ strength: float = .915 # Strength parameter for the LLM model
811
+ temperature: float = 0 # Temperature parameter for the LLM model
812
+
813
+ try:
814
+ # Call the continue_generation function
815
+ final_llm_output, total_cost, model_name = continue_generation(
816
+ formatted_input_prompt=formatted_input_prompt,
817
+ llm_output=llm_output,
818
+ strength=strength,
819
+ temperature=temperature,
820
+ verbose=True
821
+ )
822
+
823
+ # Output the results
824
+ # print(f"Final LLM Output: {final_llm_output}")
825
+ print(f"Total Cost: ${total_cost:.6f}")
826
+ print(f"Model Name: {model_name}")
827
+ # write final_llm_output to context/final_llm_output.txt
828
+ with open("context/final_llm_output.py", "w") as file:
829
+ file.write(final_llm_output)
830
+
831
+ except FileNotFoundError as e:
832
+ print(f"Error: {e}")
833
+ except Exception as e:
834
+ print(f"An error occurred: {e}")
835
+
836
+ if __name__ == "__main__":
837
+ main()
411
838
  </continue_generation_example>
412
839
 
413
840
  For postprocessing results:
414
841
  <postprocess_example>
415
- [File not found: ../context/postprocess_example.py]
842
+ """
843
+ Example demonstrating the usage of the `postprocess` function
844
+ from the `pdd.postprocess` module.
845
+
846
+ This example showcases two scenarios for extracting code from an LLM's text output:
847
+ 1. Simple code extraction (strength = 0): Uses basic string manipulation to find code
848
+ blocks enclosed in triple backticks. This method is fast and has no cost.
849
+ 2. Advanced code extraction (strength > 0): Leverages an LLM for more robust extraction.
850
+ This method is more powerful but incurs a cost and takes more time.
851
+
852
+ To run this example:
853
+ 1. Ensure the `pdd` package (containing the `postprocess` module) is in your PYTHONPATH
854
+ or installed in your environment.
855
+ 2. Ensure the `rich` library is installed (`pip install rich`).
856
+ 3. This script uses `unittest.mock` (part of Python's standard library) to simulate
857
+ the behavior of internal dependencies (`load_prompt_template` and `llm_invoke`)
858
+ for the LLM-based extraction scenario. This allows the example to run without
859
+ requiring actual LLM API calls or specific prompt files.
860
+ """
861
+ from rich import print
862
+ from unittest.mock import patch, MagicMock
863
+
864
+ # Assuming 'pdd' package is in PYTHONPATH or installed.
865
+ # The 'postprocess' module is expected to be at pdd/postprocess.py
866
+ from pdd.postprocess import postprocess, ExtractedCode # ExtractedCode is needed for the mock
867
+ from pdd import DEFAULT_STRENGTH
868
+
869
+ def main():
870
+ """
871
+ Runs the demonstration for the postprocess function.
872
+ """
873
+ print("[bold underline blue]Demonstrating `postprocess` function from `pdd.postprocess`[/bold underline blue]\n")
874
+
875
+ # --- Common Inputs ---
876
+ # This is a sample string that might be output by an LLM, containing text and code.
877
+ llm_output_text_with_code = """
878
+ This is some text from an LLM.
879
+ It includes a Python code block:
880
+ ```python
881
+ def greet(name):
882
+ # A simple greeting function
883
+ print(f"Hello, {name}!")
884
+
885
+ greet("Developer")
886
+ ```
887
+ And some more text after the code block.
888
+ There might be other language blocks too:
889
+ ```javascript
890
+ console.log("This is JavaScript");
891
+ ```
892
+ But we are only interested in Python.
893
+ """
894
+ # The target programming language for extraction.
895
+ target_language = "python"
896
+
897
+ # --- Scenario 1: Simple Extraction (strength = 0) ---
898
+ # This mode uses the `postprocess_0` internal function, which performs a basic
899
+ # extraction of content between triple backticks. It does not use an LLM.
900
+ print("[bold cyan]Scenario 1: Simple Extraction (strength = 0)[/bold cyan]")
901
+ print("Demonstrates extracting code using basic string processing.")
902
+ print(f" Input LLM Output: (see below)")
903
+ # print(f"[dim]{llm_output_text_with_code}[/dim]") # Printing for brevity in console
904
+ print(f" Target Language: '{target_language}' (Note: simple extraction is language-agnostic but extracts first block)")
905
+ print(f" Strength: 0 (activates simple, non-LLM extraction)")
906
+ print(f" Verbose: True (enables detailed console output from `postprocess`)\n")
907
+
908
+ # Call postprocess with strength = 0
909
+ # Input parameters:
910
+ # llm_output (str): The LLM's raw output string.
911
+ # language (str): The programming language to extract (less critical for strength=0).
912
+ # strength (float): 0-1, model strength. 0 means simple extraction.
913
+ # temperature (float): 0-1, LLM temperature (not used for strength=0).
914
+ # time (float): 0-1, LLM thinking effort (not used for strength=0).
915
+ # verbose (bool): If True, prints internal processing steps.
916
+ extracted_code_s0, cost_s0, model_s0 = postprocess(
917
+ llm_output=llm_output_text_with_code,
918
+ language=target_language,
919
+ strength=0,
920
+ verbose=True
921
+ )
922
+
923
+ print("[bold green]Output for Scenario 1:[/bold green]")
924
+ # Output tuple:
925
+ # extracted_code (str): The extracted code.
926
+ # total_cost (float): Cost of the operation (in dollars). Expected to be 0.0 for simple extraction.
927
+ # model_name (str): Identifier for the method/model used. Expected to be 'simple_extraction'.
928
+ print(f" Extracted Code:\n[yellow]{extracted_code_s0}[/yellow]")
929
+ print(f" Total Cost: ${cost_s0:.6f}")
930
+ print(f" Model Name: '{model_s0}'")
931
+ print("-" * 60)
932
+
933
+ # --- Scenario 2: LLM-based Extraction (strength > 0) ---
934
+ # This mode uses an LLM via `llm_invoke` to perform a more sophisticated extraction.
935
+ # It requires a prompt template (`extract_code_LLM.prompt`).
936
+ # For this example, `load_prompt_template` and `llm_invoke` are mocked.
937
+ print(f"\n[bold cyan]Scenario 2: LLM-based Extraction (strength = {DEFAULT_STRENGTH})[/bold cyan]")
938
+ print("Demonstrates extracting code using an LLM (mocked).")
939
+ print(f" Input LLM Output: (same as above)")
940
+ print(f" Target Language: '{target_language}'")
941
+ print(f" Strength: {DEFAULT_STRENGTH} (activates LLM-based extraction)")
942
+ print(f" Temperature: 0.0 (LLM creativity, 0-1 scale)")
943
+ print(f" Time: 0.5 (LLM thinking effort, 0-1 scale, influences model choice/cost)")
944
+ print(f" Verbose: True\n")
945
+
946
+ # Mock for `load_prompt_template`:
947
+ # This function is expected to load a prompt template file (e.g., 'extract_code_LLM.prompt').
948
+ # In a real scenario, this file would exist in a 'prompts' directory.
949
+ mock_load_template = MagicMock(return_value="Mocked Prompt: Extract {{language}} code from: {{llm_output}}")
950
+
951
+ # Mock for `llm_invoke`:
952
+ # This function handles the actual LLM API call.
953
+ # It's expected to return a dictionary containing the LLM's result (parsed into
954
+ # an `ExtractedCode` Pydantic model), the cost, and the model name.
955
+ # The `extracted_code` from the LLM mock should include backticks and language identifier
956
+ # to test the cleaning step within the `postprocess` function.
957
+ mock_llm_response_code_from_llm = """```python
958
+ def sophisticated_extraction(data):
959
+ # This code is supposedly extracted by an LLM
960
+ processed_data = data.upper() # Example processing
961
+ return processed_data
962
+
963
+ result = sophisticated_extraction("test data from llm")
964
+ print(result)
965
+ ```"""
966
+ mock_extracted_code_pydantic_obj = ExtractedCode(extracted_code=mock_llm_response_code_from_llm)
967
+ mock_llm_invoke_return_value = {
968
+ 'result': mock_extracted_code_pydantic_obj,
969
+ 'cost': 0.00025, # Example cost in dollars
970
+ 'model_name': 'mock-llm-extractor-v1'
971
+ }
972
+ mock_llm_invoke_function = MagicMock(return_value=mock_llm_invoke_return_value)
973
+
974
+ # Patch the internal dependencies within the 'pdd.postprocess' module's namespace.
975
+ # This ensures that when `postprocess` calls `load_prompt_template` or `llm_invoke`,
976
+ # our mocks are used instead of the real implementations.
977
+ with patch('pdd.postprocess.load_prompt_template', mock_load_template):
978
+ with patch('pdd.postprocess.llm_invoke', mock_llm_invoke_function):
979
+ extracted_code_llm, cost_llm, model_llm = postprocess(
980
+ llm_output=llm_output_text_with_code,
981
+ language=target_language,
982
+ strength=DEFAULT_STRENGTH,
983
+ temperature=0.0,
984
+ time=0.5,
985
+ verbose=True
986
+ )
987
+
988
+ print("[bold green]Output for Scenario 2:[/bold green]")
989
+ print(f" Extracted Code:\n[yellow]{extracted_code_llm}[/yellow]")
990
+ print(f" Total Cost: ${cost_llm:.6f} (cost is in dollars)")
991
+ print(f" Model Name: '{model_llm}'")
992
+
993
+ # --- Verification of Mock Calls (for developer understanding) ---
994
+ # Check that `load_prompt_template` was called correctly.
995
+ mock_load_template.assert_called_once_with("extract_code_LLM")
996
+
997
+ # Check that `llm_invoke` was called correctly.
998
+ mock_llm_invoke_function.assert_called_once()
999
+ # Inspect the arguments passed to the mocked llm_invoke
1000
+ call_args_to_llm_invoke = mock_llm_invoke_function.call_args[1] # kwargs
1001
+ assert call_args_to_llm_invoke['prompt'] == mock_load_template.return_value
1002
+ assert call_args_to_llm_invoke['input_json'] == {
1003
+ "llm_output": llm_output_text_with_code,
1004
+ "language": target_language
1005
+ }
1006
+ assert call_args_to_llm_invoke['strength'] == DEFAULT_STRENGTH
1007
+ assert call_args_to_llm_invoke['temperature'] == 0.0
1008
+ assert call_args_to_llm_invoke['time'] == 0.5
1009
+ assert call_args_to_llm_invoke['verbose'] is True
1010
+ assert call_args_to_llm_invoke['output_pydantic'] == ExtractedCode
1011
+ print("[dim] (Mocked LLM calls verified successfully)[/dim]")
1012
+
1013
+ print("\n[bold underline blue]Demonstration finished.[/bold underline blue]")
1014
+ print("\n[italic]Important Notes:[/italic]")
1015
+ print(" - For Scenario 2 (LLM-based extraction), `load_prompt_template` and `llm_invoke` were mocked.")
1016
+ print(" In a real-world scenario:")
1017
+ print(" - `load_prompt_template('extract_code_LLM')` would attempt to load a file named ")
1018
+ print(" `extract_code_LLM.prompt` (typically from a 'prompts' directory configured within the `pdd` package).")
1019
+ print(" - `llm_invoke` would make an actual API call to a Large Language Model, which requires")
1020
+ print(" API keys and network access.")
1021
+ print(" - The `time` parameter (0-1) for `postprocess` (and `llm_invoke`) generally controls the")
1022
+ print(" 'thinking effort' or computational resources allocated to the LLM, potentially affecting")
1023
+ print(" which underlying LLM model is chosen and the quality/cost of the result.")
1024
+ print(" - No actual files (like prompt files or output files) are created or read by this example script,")
1025
+ print(" particularly in the './output' directory, due to the use of mocks for file-dependent operations.")
1026
+
1027
+ if __name__ == "__main__":
1028
+ main()
1029
+
416
1030
  </postprocess_example>
417
1031
  </internal_modules>
418
1032
 
419
1033
  % This program will do the following:
420
- Step 1. Conditionally preprocess the raw prompt using the preprocess function from the preprocess module based on the value of 'preprocess_prompt'. If 'preprocess_prompt' is True, preprocess the prompt; otherwise, use the raw prompt directly.
1034
+ Step 1. Conditionally preprocess the raw prompt using the preprocess function from the preprocess module based on the value of 'preprocess_prompt'. If 'preprocess_prompt' is True, preprocess the prompt; otherwise, use the raw prompt directly. When preprocessing, it is acceptable to enable options such as double_curly_brackets=True and recursive=False to preserve placeholders and avoid over-expansion.
421
1035
 
422
- Step 2. Run the prompt (either preprocessed or raw) through llm_invoke with an empty dictionary ('{}') and the provided strength, temperature, and time.
1036
+ Step 2. Generate the initial response as follows:
1037
+ - If the prompt contains embedded data URLs (e.g., 'data:image/...;base64,...'), split the prompt into alternating text and image parts (preserving order) and call llm_invoke with messages=[{role: 'user', content: [{type: 'image_url', image_url: {url: ...}}, {type: 'text', text: ...}, ...]}] and the provided strength, temperature, time, verbose, output_schema, and language.
1038
+ - Otherwise, call llm_invoke with the (preprocessed or raw) prompt, input_json={}, and the provided strength, temperature, time, verbose, output_schema, and language.
423
1039
 
424
- Step 3. Detect if the generation is incomplete using the unfinished_prompt function (strength .5) by passing in the last 600 characters of the output of Step 3.
425
- - a. If incomplete, call the continue_generation function to complete the generation.
426
- - b. Else, if complete, postprocess the model output result using the postprocess function from the postprocess module with the EXTRACTION_STRENGTH constant. Be sure to pass in all parameters like verbose to postprocess.
427
- Step 4. Return the runnable_code, total_cost and model_name.</example>
1040
+ Step 3. Detect if the generation is incomplete using the unfinished_prompt function (use strength=0.5, temperature=0.0) by passing in the last 600 characters of the output of Step 2. Pass through language, time, and verbose.
1041
+ - a. If incomplete, call the continue_generation function to complete the generation and set final_output to that result. Pass through language, time, and verbose.
1042
+ - b. Else, set final_output to the initial model output.
1043
+
1044
+ Step 4. Postprocess the final_output:
1045
+ - If language is "json" (case-insensitive) or output_schema is provided, skip extraction: if final_output is a string, return it as-is; otherwise, serialize it with json.dumps.
1046
+ - Otherwise, use the postprocess function from the postprocess module with the EXTRACTION_STRENGTH constant. Use temperature=0.0 and pass through language, time, and verbose.
1047
+
1048
+ Step 5. Return the runnable_code, total_cost and model_name.
1049
+
1050
+ % Validation and defaults:
1051
+ - Validate non-empty 'prompt' and 'language'.
1052
+ - Enforce 0 ≤ strength ≤ 1 and 0 ≤ temperature ≤ 2.
1053
+ </example>
428
1054
  </change_description_example>
429
1055
  </input_example>
430
1056
 
@@ -465,24 +1091,57 @@ if __name__ == "__main__":
465
1091
  - Insert the contents of the file `./context/python_preamble.prompt` immediately after the role and goal statement using 'include' XML tags.
466
1092
  - Remove any redundant instructions that are covered by the preamble, such as those related to pretty printing and handling edge cases.
467
1093
  - Ensure that the logical flow of the prompt is maintained and that any unique instructions specific to this prompt are retained.
468
- - Here is what is inside context/python_preamble.prompt:<preamble>% The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name'). All output to the console will be pretty printed using the Python Rich library. Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.
469
- % The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```</preamble>
1094
+ - Here is what is inside context/python_preamble.prompt:<preamble>% You are an expert Python engineer.
1095
+
1096
+ % Code Style Requirements
1097
+ - File must start with `from __future__ import annotations`.
1098
+ - All functions must be fully type-hinted.
1099
+ - Use `rich.console.Console` for all printing.
1100
+
1101
+ % Package Structure
1102
+ - The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name').
1103
+ - The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```
1104
+
1105
+ % Error Handling
1106
+ - Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.</preamble>
470
1107
 
471
1108
  - **preprocess_python.prompt**
472
1109
  - **Instructions:**
473
1110
  - Insert the contents of the file `./context/python_preamble.prompt` immediately after the role and goal statement using 'include' XML tags.
474
1111
  - Remove any redundant instructions that are covered by the preamble, such as those related to pretty printing and handling edge cases.
475
1112
  - Ensure that the logical flow of the prompt is maintained and that any unique instructions specific to this prompt are retained.
476
- - Here is what is inside context/python_preamble.prompt:<preamble>% The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name'). All output to the console will be pretty printed using the Python Rich library. Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.
477
- % The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```</preamble>
1113
+ - Here is what is inside context/python_preamble.prompt:<preamble>% You are an expert Python engineer.
1114
+
1115
+ % Code Style Requirements
1116
+ - File must start with `from __future__ import annotations`.
1117
+ - All functions must be fully type-hinted.
1118
+ - Use `rich.console.Console` for all printing.
1119
+
1120
+ % Package Structure
1121
+ - The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name').
1122
+ - The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```
1123
+
1124
+ % Error Handling
1125
+ - Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.</preamble>
478
1126
 
479
1127
  - **unfinished_prompt_python.prompt**
480
1128
  - **Instructions:**
481
1129
  - Insert the contents of the file `./context/python_preamble.prompt` immediately after the role and goal statement using 'include' XML tags.
482
1130
  - Remove any redundant instructions that are covered by the preamble, such as those related to pretty printing and handling edge cases.
483
1131
  - Ensure that the logical flow of the prompt is maintained and that any unique instructions specific to this prompt are retained.
484
- - Here is what is inside context/python_preamble.prompt:<preamble>% The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name'). All output to the console will be pretty printed using the Python Rich library. Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.
485
- % The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```</preamble>
1132
+ - Here is what is inside context/python_preamble.prompt:<preamble>% You are an expert Python engineer.
1133
+
1134
+ % Code Style Requirements
1135
+ - File must start with `from __future__ import annotations`.
1136
+ - All functions must be fully type-hinted.
1137
+ - Use `rich.console.Console` for all printing.
1138
+
1139
+ % Package Structure
1140
+ - The function should be part of a Python package, using relative imports (single dot) for internal modules (e.g. 'from .module_name import module_name').
1141
+ - The ./pdd/__init__.py file will have the EXTRACTION_STRENGTH, DEFAULT_STRENGTH, DEFAULT_TIME and other global constants. Example: ```from . import DEFAULT_STRENGTH```
1142
+
1143
+ % Error Handling
1144
+ - Ensure the function handles edge cases, such as missing inputs or model errors, and provide clear error messages.</preamble>
486
1145
  </output_example>
487
1146
  </example>
488
1147