pdd-cli 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

Files changed (95) hide show
  1. pdd/__init__.py +0 -0
  2. pdd/auto_deps_main.py +98 -0
  3. pdd/auto_include.py +175 -0
  4. pdd/auto_update.py +73 -0
  5. pdd/bug_main.py +99 -0
  6. pdd/bug_to_unit_test.py +159 -0
  7. pdd/change.py +141 -0
  8. pdd/change_main.py +240 -0
  9. pdd/cli.py +607 -0
  10. pdd/cmd_test_main.py +155 -0
  11. pdd/code_generator.py +117 -0
  12. pdd/code_generator_main.py +66 -0
  13. pdd/comment_line.py +35 -0
  14. pdd/conflicts_in_prompts.py +143 -0
  15. pdd/conflicts_main.py +90 -0
  16. pdd/construct_paths.py +251 -0
  17. pdd/context_generator.py +133 -0
  18. pdd/context_generator_main.py +73 -0
  19. pdd/continue_generation.py +140 -0
  20. pdd/crash_main.py +127 -0
  21. pdd/data/language_format.csv +61 -0
  22. pdd/data/llm_model.csv +15 -0
  23. pdd/detect_change.py +142 -0
  24. pdd/detect_change_main.py +100 -0
  25. pdd/find_section.py +28 -0
  26. pdd/fix_code_loop.py +212 -0
  27. pdd/fix_code_module_errors.py +143 -0
  28. pdd/fix_error_loop.py +216 -0
  29. pdd/fix_errors_from_unit_tests.py +240 -0
  30. pdd/fix_main.py +138 -0
  31. pdd/generate_output_paths.py +194 -0
  32. pdd/generate_test.py +140 -0
  33. pdd/get_comment.py +55 -0
  34. pdd/get_extension.py +52 -0
  35. pdd/get_language.py +41 -0
  36. pdd/git_update.py +84 -0
  37. pdd/increase_tests.py +93 -0
  38. pdd/insert_includes.py +150 -0
  39. pdd/llm_invoke.py +304 -0
  40. pdd/load_prompt_template.py +59 -0
  41. pdd/pdd_completion.fish +72 -0
  42. pdd/pdd_completion.sh +141 -0
  43. pdd/pdd_completion.zsh +418 -0
  44. pdd/postprocess.py +121 -0
  45. pdd/postprocess_0.py +52 -0
  46. pdd/preprocess.py +199 -0
  47. pdd/preprocess_main.py +72 -0
  48. pdd/process_csv_change.py +182 -0
  49. pdd/prompts/auto_include_LLM.prompt +230 -0
  50. pdd/prompts/bug_to_unit_test_LLM.prompt +17 -0
  51. pdd/prompts/change_LLM.prompt +34 -0
  52. pdd/prompts/conflict_LLM.prompt +23 -0
  53. pdd/prompts/continue_generation_LLM.prompt +3 -0
  54. pdd/prompts/detect_change_LLM.prompt +65 -0
  55. pdd/prompts/example_generator_LLM.prompt +10 -0
  56. pdd/prompts/extract_auto_include_LLM.prompt +6 -0
  57. pdd/prompts/extract_code_LLM.prompt +22 -0
  58. pdd/prompts/extract_conflict_LLM.prompt +19 -0
  59. pdd/prompts/extract_detect_change_LLM.prompt +19 -0
  60. pdd/prompts/extract_program_code_fix_LLM.prompt +16 -0
  61. pdd/prompts/extract_prompt_change_LLM.prompt +7 -0
  62. pdd/prompts/extract_prompt_split_LLM.prompt +9 -0
  63. pdd/prompts/extract_prompt_update_LLM.prompt +8 -0
  64. pdd/prompts/extract_promptline_LLM.prompt +11 -0
  65. pdd/prompts/extract_unit_code_fix_LLM.prompt +332 -0
  66. pdd/prompts/extract_xml_LLM.prompt +7 -0
  67. pdd/prompts/fix_code_module_errors_LLM.prompt +17 -0
  68. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +62 -0
  69. pdd/prompts/generate_test_LLM.prompt +12 -0
  70. pdd/prompts/increase_tests_LLM.prompt +16 -0
  71. pdd/prompts/insert_includes_LLM.prompt +30 -0
  72. pdd/prompts/split_LLM.prompt +94 -0
  73. pdd/prompts/summarize_file_LLM.prompt +11 -0
  74. pdd/prompts/trace_LLM.prompt +30 -0
  75. pdd/prompts/trim_results_LLM.prompt +83 -0
  76. pdd/prompts/trim_results_start_LLM.prompt +45 -0
  77. pdd/prompts/unfinished_prompt_LLM.prompt +18 -0
  78. pdd/prompts/update_prompt_LLM.prompt +19 -0
  79. pdd/prompts/xml_convertor_LLM.prompt +54 -0
  80. pdd/split.py +119 -0
  81. pdd/split_main.py +103 -0
  82. pdd/summarize_directory.py +212 -0
  83. pdd/trace.py +135 -0
  84. pdd/trace_main.py +108 -0
  85. pdd/track_cost.py +102 -0
  86. pdd/unfinished_prompt.py +114 -0
  87. pdd/update_main.py +96 -0
  88. pdd/update_prompt.py +115 -0
  89. pdd/xml_tagger.py +122 -0
  90. pdd_cli-0.0.2.dist-info/LICENSE +7 -0
  91. pdd_cli-0.0.2.dist-info/METADATA +225 -0
  92. pdd_cli-0.0.2.dist-info/RECORD +95 -0
  93. pdd_cli-0.0.2.dist-info/WHEEL +5 -0
  94. pdd_cli-0.0.2.dist-info/entry_points.txt +2 -0
  95. pdd_cli-0.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,83 @@
1
+ % You are an expert JSON editor. You are tasked with trimming potential overlap between a partially generated text and its continuation. This is crucial for seamlessly combining text segments without duplication.
2
+
3
+ % You will be given two pieces of text that will be appended together afterwards to form a coherent whole:
4
+ <inputs>
5
+ 1) This is the text that has been generated so far:
6
+ <generated_results>
7
+ {GENERATED_RESULTS}
8
+ </generated_results>
9
+
10
+ 2) This is the continuation of the generation:
11
+ <continued_generation>
12
+ {CONTINUED_GENERATION}
13
+ </continued_generation>
14
+ </inputs>
15
+
16
+ % Your task is to:
17
+ 1. Compare the end of the generated_results with the beginning of the continued_generation.
18
+ 2. Identify any overlapping text.
19
+ 3. If overlap exists, trim it from the start of the continued_generation and be careful to keep or remove spaces so that the text makes sense.
20
+ 4. If no overlap exists, leave the continued_generation as is.
21
+ 5. Trim out the preamble text before the code block that starts with a triple backtick (```) including the triple backticks and language tag. Also, there might be a xml tag, 'llm_output', at the start or end of the continued_generation, which should be removed.
22
+
23
+ % After completing these steps, provide your output in JSON format with the following keys:
24
+ - explanation: A string explaining what was trimmed, if anything. If nothing was trimmed, explain why.
25
+ - trimmed_continued_generation: The trimmed continued_generation string. If no trimming was necessary, this should be identical to the original continued_generation.
26
+
27
+ % Ensure your JSON is properly formatted and contains only these two keys.
28
+
29
+ % Here are example of how your outputs should look like for given inputs:
30
+ <examples>
31
+ <example1>
32
+ <inputs>
33
+ <generated_results>
34
+ "The quick brown fox jumps over the lazy dog and the "
35
+ </generated_results>
36
+ <continued_generation>
37
+ """```text\n and the cat jumped over the lazy dog."""
38
+ </continued_generation>
39
+ </inputs>
40
+ <output>
41
+ {{
42
+ "explanation": "Trimmed '```text\n and the ' from the start of the continued_generation as it overlapped with the end of generated_results and the triple backticks indicate a start of a code block.",
43
+ "trimmed_continued_generation": "cat jumped over the lazy dog."
44
+ }}
45
+ </output>
46
+ </example1>
47
+
48
+ <example2>
49
+ <inputs>
50
+ <generated_results>
51
+ ""
52
+ </generated_results>
53
+ <continued_generation>
54
+ "Telling a short story. ```text\nThe quick brown fox jumps over the lazy dog."
55
+ </continued_generation>
56
+ </inputs>
57
+ <output>
58
+ {{
59
+ "explanation": "There was no overlap between the generated_results and the continued_generation. Trimmed preamble.","trimmed_continued_generation": "The quick brown fox jumps over the lazy dog."
60
+ }}
61
+ </output>
62
+ </example2>
63
+
64
+ <example3>
65
+ <inputs>
66
+ <generated_results>
67
+ '''\n input_strings, output_file_paths, language = construct_paths(\n input_file_paths=input_'''
68
+ </generated_results>
69
+ <continued_generation>
70
+ '''file_paths,\n force=global_opts.force,\n quiet=global_opts.quiet,\n '''
71
+ </continued_generation>
72
+ </inputs>
73
+ <output>
74
+ {{
75
+ "explanation": "'file_paths' is a continuation of 'input_' so there is no overlap between the generated_results and the continued_generation.",
76
+ "trimmed_continued_generation": "file_paths,\n force=global_opts.force,\n quiet=global_opts.quiet,\n "
77
+ }}
78
+ </output>
79
+ </example3>
80
+ <examples>
81
+
82
+ % Remember to carefully compare the texts to ensure accurate trimming and provide a clear explanation of your actions so that the trimmed_continued_generation can be properly appended to generated_results without needed further edits.
83
+
@@ -0,0 +1,45 @@
1
+ % You are an expert JSON editor. You will be processing the output of a language model (LLM) to extract the unfinished main code block being generated and provide an explanation of how you determined what to cut out. Here is the llm_output to process:
2
+ <llm_output>
3
+ {LLM_OUTPUT}
4
+ </llm_output>
5
+
6
+ % Your task is to trim away everything before the code block that starts with a triple backtick (```). Follow these steps:
7
+ 1. Analyze the LLM output and locate the main code block. This block will start with a triple backtick (```) followed by a language tag (e.g. 'python').
8
+ 2. Everything before this triple backtick should be removed.
9
+ 3. Extract the entire code block, excluding the opening triple backticks and language tag (e.g. 'python').
10
+ 4. Prepare an explanation of how you determined what to cut out. This should be a brief description of your process.
11
+ 5. Format your output as a JSON object with two keys:
12
+ - 'explanation': Your explanation of how you determined what to cut out
13
+ - 'code_block': The extracted code block text
14
+
15
+ % Here are examples of how your outputs should be given various inputs:
16
+ <examples>
17
+ <example1>
18
+ <input>
19
+ <llm_output>
20
+ "Here is how you run the code: ```bash\npython code.py```\n\nHere is the code: ```python\ndef hello_world():\n print('Hello, World!')\n\nhello_world()\ndef```"
21
+ </llm_output>
22
+ </input>
23
+ <output>
24
+ {{
25
+ "explanation": "I identified the main unfinished code block which starts with triple backticks (```) in the text and removed all content before it. The remaining text, excluding the backticks and language tag, was extracted as the code block.",
26
+ "code_block": "def hello_world():\n print('Hello, World!')\n\nhello_world()\ndef"
27
+ }}
28
+ </output>
29
+ </example1>
30
+ <example2>
31
+ <input>
32
+ <llm_output>
33
+ ""
34
+ </llm_output>
35
+ </input>
36
+ <output>
37
+ {{
38
+ "explanation": "There is nothing.",
39
+ "code_block": ""
40
+ }}
41
+ </output>
42
+ </example2>
43
+ </examples>
44
+
45
+ % Please provide your output in this JSON format.
@@ -0,0 +1,18 @@
1
+ % You are tasked with determining whether a given prompt has finished outputting everything or if it still needs to continue. This is crucial for ensuring that all necessary information has been provided before proceeding with further actions. You will often be provided the last few hundred characters of the prompt_text to analyze and determine if it appears to be complete or if it seems to be cut off or unfinished. You are just looking at the prompt_text and not the entire prompt file. The beginning part of the prompt_text is not always provided, so you will need to make a judgment based on the text you are given.
2
+
3
+ % Here is the prompt text to analyze:
4
+ <prompt_text>
5
+ {PROMPT_TEXT}
6
+ </prompt_text>
7
+
8
+ % Carefully examine the provided prompt text and determine if it appears to be complete or if it seems to be cut off or unfinished. Consider the following factors:
9
+ 1. Sentence structure: Are all sentences grammatically complete?
10
+ 2. Content flow: Does the text end abruptly or does it have a natural conclusion?
11
+ 3. Context: Based on the content, does it seem like all necessary information has been provided?
12
+ 4. Formatting: Are there any unclosed parentheses, quotation marks, or other formatting issues that suggest incompleteness?
13
+
14
+ % Provide your reasoning for why you believe the prompt is complete or incomplete.
15
+
16
+ % Output a JSON object with two keys:
17
+ 1. "reasoning": A string containing your structured reasoning
18
+ 2. "is_finished": A boolean value (true if the prompt is complete, false if it's incomplete)
@@ -0,0 +1,19 @@
1
+ % You are an expert LLM Prompt Engineer. Your goal is to take the original code and modified code, and to update the prompt that generated the original code.
2
+
3
+ % Here are the inputs and outputs of this prompt:
4
+ Input:
5
+ 'input_prompt' - A string that contains the prompt that generated the original code.
6
+ 'input_code' - A string that contains the original code that was generated from the original_prompt.
7
+ 'modified_code' - A string that contains the code that was modified by the user.
8
+ Output:
9
+ 'modified_prompt' - A string that contains the updated prompt that will generate the modified code.
10
+
11
+ % Here is the input_prompt to change: ```{input_prompt}```
12
+ % Here is the input_code: ```{input_code}```
13
+ % Here is the modified_code: ```{modified_code}```
14
+
15
+ % To generate the modified prompt, perform the following sequence of steps:
16
+ 1. Using the provided input_code and input_prompt, identify what the code does and how it was generated.
17
+ 2. Compare the input_code and modified_code to determine the changes made by the user.
18
+ 3. Identify what the modified_code does differently from the input_code.
19
+ 4. Generate a modified_prompt that will guide the generation of the modified_code based on the identified changes.
@@ -0,0 +1,54 @@
1
+ % Here are examples of inserting in XML tags to improve the structure and readability of a prompt.
2
+ <examples>
3
+ <example_1>
4
+ % Here is an example_raw_prompt that needs XML tagging:
5
+ <example_raw_prompt>
6
+ <include>context/xml/1/split_LLM.prompt</include>
7
+ </example_raw_prompt>
8
+
9
+ % Here is an example_tagged_prompt from the example_raw_prompt above:
10
+ <example_tagged_prompt>
11
+ <include>context/xml/1/split_xml_llm.prompt</include>
12
+ </example_tagged_prompt>
13
+ </example_1>
14
+
15
+ <example_2>
16
+ % Here is an example_raw_prompt that needs XML tagging:
17
+ <example_raw_prompt>
18
+ <include>context/xml/2/xml_convertor_LLM.prompt</include>
19
+ </example_raw_prompt>
20
+
21
+ % Here is an example_tagged_prompt from the example_raw_prompt above:
22
+ <example_tagged_prompt>
23
+ <include>context/xml/2/xml_convertor_xml_LLM.prompt</include>
24
+ </example_tagged_prompt>
25
+ </example_2>
26
+ </examples>
27
+
28
+ <role>You are an expert Prompt Engineer.</role>
29
+
30
+ <task>Your goal is to enhance a given prompt by only adding XML tags where necessary to improve its structure and readability. Do not add any additional content or XML tags unless it is clearly required by the structure of the input_raw_prompt.</task>
31
+
32
+ <context>
33
+ Here is the input_raw_prompt that needs XML tagging to improve its organization: <input_raw_prompt>{raw_prompt}</input_raw_prompt>
34
+ </context>
35
+
36
+ <output_instructions>
37
+ Output a string with the `input_raw_prompt` properly tagged using XML as metadata and structural elements to enhance clarity and organization. The output may include, but is not limited to:
38
+ 1. `<instructions>`: Guidelines or directives for the model's output.
39
+ 2. `<context>`: Background information or relevant data for understanding the task.
40
+ 3. `<examples>`: Specific instances that guide the model's response.
41
+ 4. `<formatting>`: Special formatting instructions for the output.
42
+ </output_instructions>
43
+
44
+ <steps>
45
+ Follow these steps to tag the prompt:
46
+ Step 1. Write out the analysis of the input_raw_prompt by identifying components like instructions, context, and examples.
47
+ Step 2. Discuss what could be appropriate XML tags for this input_raw_prompt.
48
+ Step 3. Insert the XML tags at the correct locations in the input_raw_prompt without introducing any new content. Only add tags to existing content. The XML tags should be enhancing the input_raw_prompt's format, structure and readability.
49
+ </steps>
50
+
51
+ <general_points>
52
+ - With triple backtick includes and curly bracket placeholders, there could be lots of text in them after preprocessing so XML tags will help to organize the content.
53
+ - No need to include the initial and ending triple backticks for the XML code block.
54
+ </general_points>
pdd/split.py ADDED
@@ -0,0 +1,119 @@
1
+ from typing import Tuple
2
+ from rich import print as rprint
3
+ from rich.markdown import Markdown
4
+ from pydantic import BaseModel, Field
5
+ from .load_prompt_template import load_prompt_template
6
+ from .preprocess import preprocess
7
+ from .llm_invoke import llm_invoke
8
+
9
+ class PromptSplit(BaseModel):
10
+ sub_prompt: str = Field(description="The extracted sub-prompt")
11
+ modified_prompt: str = Field(description="The modified original prompt")
12
+
13
+ def split(
14
+ input_prompt: str,
15
+ input_code: str,
16
+ example_code: str,
17
+ strength: float,
18
+ temperature: float,
19
+ verbose: bool = False
20
+ ) -> Tuple[str, str, float]:
21
+ """
22
+ Split a prompt into a sub_prompt and modified_prompt.
23
+
24
+ Args:
25
+ input_prompt (str): The prompt to split
26
+ input_code (str): The code generated from the input_prompt
27
+ example_code (str): Example code showing usage
28
+ strength (float): LLM strength parameter (0-1)
29
+ temperature (float): LLM temperature parameter (0-1)
30
+ verbose (bool): Whether to print detailed information
31
+
32
+ Returns:
33
+ Tuple[str, str, float]: (sub_prompt, modified_prompt, total_cost)
34
+ """
35
+ total_cost = 0.0
36
+
37
+ # Input validation
38
+ if not all([input_prompt, input_code, example_code]):
39
+ raise ValueError("All input parameters (input_prompt, input_code, example_code) must be provided")
40
+
41
+ if not 0 <= strength <= 1 or not 0 <= temperature <= 1:
42
+ raise ValueError("Strength and temperature must be between 0 and 1")
43
+
44
+ try:
45
+ # 1. Load prompt templates
46
+ split_prompt = load_prompt_template("split_LLM")
47
+ extract_prompt = load_prompt_template("extract_prompt_split_LLM")
48
+
49
+ if not split_prompt or not extract_prompt:
50
+ raise ValueError("Failed to load prompt templates")
51
+
52
+ # 2. Preprocess prompts
53
+ processed_split_prompt = preprocess(
54
+ split_prompt,
55
+ recursive=False,
56
+ double_curly_brackets=True,
57
+ exclude_keys=['input_prompt', 'input_code', 'example_code']
58
+ )
59
+
60
+ processed_extract_prompt = preprocess(
61
+ extract_prompt,
62
+ recursive=False,
63
+ double_curly_brackets=False
64
+ )
65
+
66
+ # 3. First LLM invocation
67
+ if verbose:
68
+ rprint("[bold blue]Running initial prompt split...[/bold blue]")
69
+
70
+ split_response = llm_invoke(
71
+ prompt=processed_split_prompt,
72
+ input_json={
73
+ "input_prompt": input_prompt,
74
+ "input_code": input_code,
75
+ "example_code": example_code
76
+ },
77
+ strength=strength,
78
+ temperature=temperature,
79
+ verbose=verbose
80
+ )
81
+
82
+ total_cost += split_response["cost"]
83
+
84
+ # 4. Extract JSON with second LLM invocation
85
+ if verbose:
86
+ rprint("[bold blue]Extracting split prompts...[/bold blue]")
87
+
88
+ extract_response = llm_invoke(
89
+ prompt=processed_extract_prompt,
90
+ input_json={"llm_output": split_response["result"]},
91
+ strength=0.89, # Fixed strength for extraction
92
+ temperature=temperature,
93
+ output_pydantic=PromptSplit,
94
+ verbose=verbose
95
+ )
96
+
97
+ total_cost += extract_response["cost"]
98
+
99
+ # Extract results
100
+ result: PromptSplit = extract_response["result"]
101
+ sub_prompt = result.sub_prompt
102
+ modified_prompt = result.modified_prompt
103
+
104
+ # 5. Print verbose output if requested
105
+ if verbose:
106
+ rprint("\n[bold green]Final Results:[/bold green]")
107
+ rprint(Markdown(f"### Sub Prompt\n{sub_prompt}"))
108
+ rprint(Markdown(f"### Modified Prompt\n{modified_prompt}"))
109
+ rprint(f"[bold cyan]Total Cost: ${total_cost:.6f}[/bold cyan]")
110
+
111
+ # 6. Return results
112
+ return sub_prompt, modified_prompt, total_cost
113
+
114
+ except Exception as e:
115
+ # Print an error message, then raise an exception that includes
116
+ # the prefix “Error in split function: …” in its final message.
117
+ rprint(f"[bold red]Error in split function: {str(e)}[/bold red]")
118
+ # Re-raise using the same exception type but with a modified message.
119
+ raise type(e)(f"Error in split function: {str(e)}") from e
pdd/split_main.py ADDED
@@ -0,0 +1,103 @@
1
+ import sys
2
+ from typing import Tuple, Optional
3
+ import click
4
+ from rich import print as rprint
5
+
6
+ from .construct_paths import construct_paths
7
+ from .split import split
8
+
9
+ def split_main(
10
+ ctx: click.Context,
11
+ input_prompt_file: str,
12
+ input_code_file: str,
13
+ example_code_file: str,
14
+ output_sub: Optional[str],
15
+ output_modified: Optional[str]
16
+ ) -> Tuple[str, str, float]:
17
+ """
18
+ CLI wrapper for splitting a prompt into a sub_prompt and modified_prompt.
19
+
20
+ Args:
21
+ ctx: Click context containing command-line parameters.
22
+ input_prompt_file: Path to the input prompt file to be split.
23
+ input_code_file: Path to the code file generated from the input prompt.
24
+ example_code_file: Path to the example code file showing usage.
25
+ output_sub: Optional path where to save the sub-prompt.
26
+ output_modified: Optional path where to save the modified prompt.
27
+
28
+ Returns:
29
+ Tuple containing:
30
+ - str: The sub-prompt content
31
+ - str: The modified prompt content
32
+ - float: The total cost of the operation
33
+
34
+ Raises:
35
+ SystemExit: If any error occurs during execution.
36
+ """
37
+ try:
38
+ # Construct file paths
39
+ input_file_paths = {
40
+ "input_prompt": input_prompt_file,
41
+ "input_code": input_code_file,
42
+ "example_code": example_code_file
43
+ }
44
+ command_options = {
45
+ "output_sub": output_sub,
46
+ "output_modified": output_modified
47
+ }
48
+
49
+ # Get input strings and output paths
50
+ input_strings, output_file_paths, _ = construct_paths(
51
+ input_file_paths=input_file_paths,
52
+ force=ctx.obj.get('force', False),
53
+ quiet=ctx.obj.get('quiet', False),
54
+ command="split",
55
+ command_options=command_options
56
+ )
57
+
58
+ # Get parameters from context
59
+ strength = ctx.obj.get('strength', 0.5)
60
+ temperature = ctx.obj.get('temperature', 0)
61
+
62
+ # Call the split function
63
+ sub_prompt, modified_prompt, total_cost = split(
64
+ input_prompt=input_strings["input_prompt"],
65
+ input_code=input_strings["input_code"],
66
+ example_code=input_strings["example_code"],
67
+ strength=strength,
68
+ temperature=temperature,
69
+ verbose=not ctx.obj.get('quiet', False)
70
+ )
71
+
72
+ # Save the output files
73
+ try:
74
+ with open(output_file_paths["output_sub"], 'w') as f:
75
+ f.write(sub_prompt)
76
+ with open(output_file_paths["output_modified"], 'w') as f:
77
+ f.write(modified_prompt)
78
+ except IOError as e:
79
+ raise IOError(f"Failed to save output files: {str(e)}")
80
+
81
+ # Provide user feedback if not in quiet mode
82
+ if not ctx.obj.get('quiet', False):
83
+ rprint("[bold green]Successfully split the prompt![/bold green]")
84
+ rprint(f"[bold]Sub-prompt saved to:[/bold] {output_file_paths['output_sub']}")
85
+ rprint(f"[bold]Modified prompt saved to:[/bold] {output_file_paths['output_modified']}")
86
+ rprint(f"[bold]Total cost:[/bold] ${total_cost:.6f}")
87
+
88
+ return sub_prompt, modified_prompt, total_cost
89
+
90
+ except Exception as e:
91
+ # Handle errors and provide appropriate feedback
92
+ if not ctx.obj.get('quiet', False):
93
+ rprint(f"[bold red]Error:[/bold red] {str(e)}")
94
+
95
+ # Provide more specific error messages based on the exception type
96
+ if isinstance(e, FileNotFoundError):
97
+ rprint("[yellow]Hint: Check if all input files exist and are accessible.[/yellow]")
98
+ elif isinstance(e, IOError):
99
+ rprint("[yellow]Hint: Check file permissions and disk space.[/yellow]")
100
+ elif isinstance(e, ValueError):
101
+ rprint("[yellow]Hint: Check if input files have valid content.[/yellow]")
102
+
103
+ sys.exit(1)
@@ -0,0 +1,212 @@
1
+ from typing import Optional, Tuple
2
+ from datetime import datetime, UTC
3
+ from io import StringIO
4
+ import os
5
+ import glob
6
+ import csv
7
+
8
+ from pydantic import BaseModel, Field
9
+ from rich import print
10
+ from rich.progress import track
11
+
12
+ from .load_prompt_template import load_prompt_template
13
+ from .llm_invoke import llm_invoke
14
+
15
+ class FileSummary(BaseModel):
16
+ file_summary: str = Field(description="The summary of the file")
17
+
18
+ def validate_csv_format(csv_content: str) -> bool:
19
+ """Validate CSV has required columns and proper format."""
20
+ try:
21
+ if not csv_content or csv_content.isspace():
22
+ return False
23
+ reader = csv.DictReader(StringIO(csv_content.lstrip()))
24
+ if not reader.fieldnames:
25
+ return False
26
+ required_columns = {'full_path', 'file_summary', 'date'}
27
+ if not all(col in reader.fieldnames for col in required_columns):
28
+ return False
29
+ try:
30
+ first_row = next(reader, None)
31
+ if not first_row:
32
+ return True
33
+ return all(key in first_row for key in required_columns)
34
+ except csv.Error:
35
+ return False
36
+ except Exception:
37
+ return False
38
+
39
+ def normalize_path(path: str) -> str:
40
+ """Normalize path for consistent comparison."""
41
+ return os.path.normpath(path.strip().strip('"').strip())
42
+
43
+ def parse_date(date_str: str) -> datetime:
44
+ """Parse date string to datetime with proper error handling."""
45
+ try:
46
+ dt = datetime.fromisoformat(date_str.strip())
47
+ return dt if dt.tzinfo else dt.replace(tzinfo=UTC)
48
+ except Exception:
49
+ return datetime.now(UTC)
50
+
51
+ def parse_existing_csv(csv_content: str, verbose: bool = False) -> dict:
52
+ """Parse existing CSV file and return normalized data."""
53
+ existing_data = {}
54
+ try:
55
+ # Clean the CSV content by removing leading/trailing whitespace from each line
56
+ cleaned_lines = [line.strip() for line in csv_content.splitlines()]
57
+ cleaned_content = '\n'.join(cleaned_lines)
58
+
59
+ reader = csv.DictReader(StringIO(cleaned_content))
60
+ for row in reader:
61
+ try:
62
+ normalized_path = normalize_path(row['full_path'])
63
+ existing_data[normalized_path] = {
64
+ 'file_summary': row['file_summary'].strip().strip('"'),
65
+ 'date': row['date'].strip()
66
+ }
67
+ if verbose:
68
+ print(f"[green]Parsed existing entry for: {normalized_path}[/green]")
69
+ except Exception as e:
70
+ if verbose:
71
+ print(f"[yellow]Warning: Skipping invalid CSV row: {str(e)}[/yellow]")
72
+ except Exception as e:
73
+ if verbose:
74
+ print(f"[yellow]Warning: Error parsing CSV: {str(e)}[/yellow]")
75
+ raise ValueError("Invalid CSV file format.")
76
+ return existing_data
77
+
78
+ def summarize_directory(
79
+ directory_path: str,
80
+ strength: float,
81
+ temperature: float,
82
+ verbose: bool,
83
+ csv_file: Optional[str] = None
84
+ ) -> Tuple[str, float, str]:
85
+ """
86
+ Summarize files in a directory and generate a CSV containing the summaries.
87
+
88
+ Parameters:
89
+ directory_path (str): Path to the directory to summarize with wildcard (e.g., /path/to/directory/*.py)
90
+ strength (float): Between 0 and 1 that is the strength of the LLM model to use.
91
+ temperature (float): Controls the randomness of the LLM's output.
92
+ verbose (bool): Whether to print out the details of the function.
93
+ csv_file (Optional[str]): Current CSV file contents if it already exists.
94
+
95
+ Returns:
96
+ Tuple[str, float, str]: A tuple containing:
97
+ - csv_output (str): Updated CSV content with 'full_path', 'file_summary', and 'date'.
98
+ - total_cost (float): Total cost of the LLM runs.
99
+ - model_name (str): Name of the LLM model used.
100
+ """
101
+ try:
102
+ if not isinstance(directory_path, str) or not directory_path:
103
+ raise ValueError("Invalid 'directory_path'.")
104
+ if not (0.0 <= strength <= 1.0):
105
+ raise ValueError("Invalid 'strength' value.")
106
+ if not isinstance(temperature, (int, float)) or temperature < 0:
107
+ raise ValueError("Invalid 'temperature' value.")
108
+ if not isinstance(verbose, bool):
109
+ raise ValueError("Invalid 'verbose' value.")
110
+
111
+ prompt_template = load_prompt_template("summarize_file_LLM")
112
+ if not prompt_template:
113
+ raise FileNotFoundError("Prompt template 'summarize_file_LLM.prompt' not found.")
114
+
115
+ csv_output = "full_path,file_summary,date\n"
116
+ total_cost = 0.0
117
+ model_name = "None"
118
+
119
+ existing_data = {}
120
+ if csv_file:
121
+ if not validate_csv_format(csv_file):
122
+ raise ValueError("Invalid CSV file format.")
123
+ existing_data = parse_existing_csv(csv_file, verbose)
124
+
125
+ # Get list of files first to ensure consistent order
126
+ files = sorted(glob.glob(directory_path, recursive=True))
127
+ if not files:
128
+ if verbose:
129
+ print("[yellow]No files found.[/yellow]")
130
+ return csv_output, total_cost, model_name
131
+
132
+ # Get all modification times at once to ensure consistent order
133
+ file_mod_times = {f: os.path.getmtime(f) for f in files}
134
+
135
+ for file_path in track(files, description="Processing files..."):
136
+ try:
137
+ relative_path = os.path.relpath(file_path)
138
+ normalized_path = normalize_path(relative_path)
139
+ file_mod_time = file_mod_times[file_path]
140
+ date_generated = datetime.now(UTC).isoformat()
141
+
142
+ if verbose:
143
+ print(f"\nProcessing file: {normalized_path}")
144
+ print(f"Modification time: {datetime.fromtimestamp(file_mod_time, UTC)}")
145
+
146
+ needs_summary = True
147
+ if normalized_path in existing_data:
148
+ try:
149
+ existing_entry = existing_data[normalized_path]
150
+ existing_date = parse_date(existing_entry['date'])
151
+ file_date = datetime.fromtimestamp(file_mod_time, UTC)
152
+
153
+ if verbose:
154
+ print(f"Existing date: {existing_date}")
155
+ print(f"File date: {file_date}")
156
+
157
+ # Explicitly check if file is newer
158
+ if file_date > existing_date:
159
+ if verbose:
160
+ print(f"[blue]File modified, generating new summary[/blue]")
161
+ needs_summary = True
162
+ else:
163
+ needs_summary = False
164
+ file_summary = existing_entry['file_summary']
165
+ date_generated = existing_entry['date']
166
+ if verbose:
167
+ print(f"[green]Reusing existing summary[/green]")
168
+ except Exception as e:
169
+ if verbose:
170
+ print(f"[yellow]Warning: Date comparison error: {str(e)}[/yellow]")
171
+ needs_summary = True
172
+ elif verbose:
173
+ print("[blue]New file, generating summary[/blue]")
174
+
175
+ if needs_summary:
176
+ if verbose:
177
+ print(f"[blue]Generating summary for: {normalized_path}[/blue]")
178
+ with open(file_path, 'r', encoding='utf-8') as f:
179
+ file_contents = f.read()
180
+
181
+ input_params = {"file_contents": file_contents}
182
+ response = llm_invoke(
183
+ prompt=prompt_template,
184
+ input_json=input_params,
185
+ strength=strength,
186
+ temperature=temperature,
187
+ verbose=verbose,
188
+ output_pydantic=FileSummary
189
+ )
190
+
191
+ if response.get('error'):
192
+ file_summary = "Error in summarization."
193
+ if verbose:
194
+ print(f"[red]Error summarizing file: {response['error']}[/red]")
195
+ else:
196
+ file_summary = response['result'].file_summary
197
+ total_cost += response.get('cost', 0.0)
198
+ model_name = response.get('model_name', model_name)
199
+
200
+ csv_output += f'"{relative_path}","{file_summary.replace(chr(34), "")}",{date_generated}\n'
201
+
202
+ except Exception as e:
203
+ if verbose:
204
+ print(f"[red]Error processing file: {str(e)}[/red]")
205
+ date_generated = datetime.now(UTC).isoformat()
206
+ csv_output += f'"{relative_path}","Error processing file",{date_generated}\n'
207
+
208
+ return csv_output, total_cost, model_name
209
+
210
+ except Exception as e:
211
+ print(f"[red]An error occurred: {str(e)}[/red]")
212
+ raise