pdd-cli 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +10 -2
- pdd/cli.py +26 -4
- pdd/code_generator_main.py +335 -105
- pdd/fix_verification_errors_loop.py +57 -31
- pdd/fix_verification_main.py +76 -48
- pdd/incremental_code_generator.py +198 -0
- pdd/prompts/code_patcher_LLM.prompt +63 -0
- pdd/prompts/diff_analyzer_LLM.prompt +69 -0
- pdd/prompts/find_verification_errors_LLM.prompt +13 -13
- pdd/prompts/fix_verification_errors_LLM.prompt +17 -16
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/RECORD +16 -13
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/WHEEL +1 -1
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.30.dist-info → pdd_cli-0.0.31.dist-info}/top_level.txt +0 -0
pdd/fix_verification_main.py
CHANGED
|
@@ -2,12 +2,14 @@ import sys
|
|
|
2
2
|
import os
|
|
3
3
|
import subprocess
|
|
4
4
|
import click
|
|
5
|
+
import logging
|
|
5
6
|
from typing import Optional, Tuple, List, Dict, Any
|
|
6
7
|
|
|
7
8
|
# Use Rich for pretty printing to the console
|
|
8
9
|
from rich import print as rich_print
|
|
9
10
|
from rich.panel import Panel
|
|
10
11
|
from rich.syntax import Syntax
|
|
12
|
+
from rich.text import Text
|
|
11
13
|
|
|
12
14
|
# Internal imports using relative paths
|
|
13
15
|
from .construct_paths import construct_paths
|
|
@@ -17,9 +19,15 @@ from .fix_verification_errors_loop import fix_verification_errors_loop
|
|
|
17
19
|
from . import DEFAULT_STRENGTH
|
|
18
20
|
|
|
19
21
|
# Default values from the README
|
|
22
|
+
DEFAULT_TEMPERATURE = 0.0
|
|
20
23
|
DEFAULT_MAX_ATTEMPTS = 3
|
|
21
24
|
DEFAULT_BUDGET = 5.0
|
|
22
|
-
|
|
25
|
+
|
|
26
|
+
# Configure logging
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
# Define a constant for the verification program name
|
|
30
|
+
VERIFICATION_PROGRAM_NAME = "verification_program.py" # Example, adjust if needed
|
|
23
31
|
|
|
24
32
|
def run_program(program_path: str, args: List[str] = []) -> Tuple[bool, str, str]:
|
|
25
33
|
"""
|
|
@@ -78,8 +86,8 @@ def run_program(program_path: str, args: List[str] = []) -> Tuple[bool, str, str
|
|
|
78
86
|
rich_print(f"[bold red]Error:[/bold red] Program execution timed out: '{program_path}'")
|
|
79
87
|
return False, "", f"Program execution timed out: {program_path}"
|
|
80
88
|
except Exception as e:
|
|
81
|
-
|
|
82
|
-
return False, "", f"
|
|
89
|
+
logger.error(f"An unexpected error occurred while running {program_path}: {e}")
|
|
90
|
+
return False, "", f"An unexpected error occurred: {e}"
|
|
83
91
|
|
|
84
92
|
def fix_verification_main(
|
|
85
93
|
ctx: click.Context,
|
|
@@ -122,10 +130,9 @@ def fix_verification_main(
|
|
|
122
130
|
- model_name (str): Name of the LLM used.
|
|
123
131
|
"""
|
|
124
132
|
# Extract global options from context
|
|
125
|
-
|
|
126
|
-
strength: float = ctx.obj.get('strength', DEFAULT_STRENGTH) # Get globals from obj
|
|
133
|
+
strength: float = ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
127
134
|
temperature: float = ctx.obj.get('temperature', DEFAULT_TEMPERATURE)
|
|
128
|
-
force: bool = ctx.obj.get('force', False)
|
|
135
|
+
force: bool = ctx.obj.get('force', False)
|
|
129
136
|
quiet: bool = ctx.obj.get('quiet', False)
|
|
130
137
|
verbose: bool = ctx.obj.get('verbose', False)
|
|
131
138
|
|
|
@@ -220,14 +227,9 @@ def fix_verification_main(
|
|
|
220
227
|
base_prog, ext_prog = os.path.splitext(program_file)
|
|
221
228
|
output_program_path = f"{base_prog}_verified{ext_prog}"
|
|
222
229
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
elif program_file.endswith(".js"):
|
|
227
|
-
language = "javascript"
|
|
228
|
-
elif program_file.endswith(".sh"):
|
|
229
|
-
language = "bash"
|
|
230
|
-
|
|
230
|
+
if program_file.endswith(".py"): language = "python"
|
|
231
|
+
elif program_file.endswith(".js"): language = "javascript"
|
|
232
|
+
elif program_file.endswith(".sh"): language = "bash"
|
|
231
233
|
else:
|
|
232
234
|
# Some other error – re‑raise / abort
|
|
233
235
|
rich_print(f"[bold red]Error:[/bold red] Failed during path construction: {e}")
|
|
@@ -238,12 +240,12 @@ def fix_verification_main(
|
|
|
238
240
|
|
|
239
241
|
# --- Core Logic ---
|
|
240
242
|
success: bool = False
|
|
241
|
-
final_program: str = input_strings.get("program_file", "")
|
|
242
|
-
final_code: str = input_strings.get("code_file", "")
|
|
243
|
+
final_program: str = input_strings.get("program_file", "")
|
|
244
|
+
final_code: str = input_strings.get("code_file", "")
|
|
243
245
|
attempts: int = 0
|
|
244
246
|
total_cost: float = 0.0
|
|
245
247
|
model_name: str = "N/A"
|
|
246
|
-
results_log_content: str = ""
|
|
248
|
+
results_log_content: str = ""
|
|
247
249
|
|
|
248
250
|
try:
|
|
249
251
|
if loop:
|
|
@@ -258,19 +260,19 @@ def fix_verification_main(
|
|
|
258
260
|
loop_results = fix_verification_errors_loop(
|
|
259
261
|
program_file=program_file,
|
|
260
262
|
code_file=code_file,
|
|
261
|
-
prompt=input_strings["prompt_file"],
|
|
262
|
-
verification_program=verification_program,
|
|
263
|
+
prompt=input_strings["prompt_file"],
|
|
264
|
+
verification_program=verification_program,
|
|
263
265
|
strength=strength,
|
|
264
266
|
temperature=temperature,
|
|
265
267
|
max_attempts=max_attempts,
|
|
266
268
|
budget=budget,
|
|
267
|
-
verification_log_file=output_results_path,
|
|
269
|
+
verification_log_file=output_results_path,
|
|
268
270
|
verbose=verbose,
|
|
269
271
|
program_args=program_args
|
|
270
272
|
)
|
|
271
273
|
success = loop_results['success']
|
|
272
|
-
final_program = loop_results['final_program']
|
|
273
|
-
final_code = loop_results['final_code']
|
|
274
|
+
final_program = loop_results['final_program']
|
|
275
|
+
final_code = loop_results['final_code']
|
|
274
276
|
attempts = loop_results['total_attempts']
|
|
275
277
|
total_cost = loop_results['total_cost']
|
|
276
278
|
model_name = loop_results['model_name']
|
|
@@ -300,12 +302,11 @@ def fix_verification_main(
|
|
|
300
302
|
# 2. Call fix_verification_errors with content and program output
|
|
301
303
|
if not quiet:
|
|
302
304
|
rich_print("Calling LLM to verify program output against prompt...")
|
|
303
|
-
|
|
304
305
|
fix_results = fix_verification_errors(
|
|
305
306
|
program=input_strings["program_file"],
|
|
306
307
|
prompt=input_strings["prompt_file"],
|
|
307
308
|
code=input_strings["code_file"],
|
|
308
|
-
output=program_output,
|
|
309
|
+
output=program_output,
|
|
309
310
|
strength=strength,
|
|
310
311
|
temperature=temperature,
|
|
311
312
|
verbose=verbose
|
|
@@ -319,19 +320,15 @@ def fix_verification_main(
|
|
|
319
320
|
|
|
320
321
|
if not issues_found:
|
|
321
322
|
success = True
|
|
322
|
-
if not quiet:
|
|
323
|
-
rich_print("[green]Verification Passed:[/green] LLM found no discrepancies.")
|
|
323
|
+
if not quiet: rich_print("[green]Verification Passed:[/green] LLM found no discrepancies.")
|
|
324
324
|
elif code_updated or program_updated:
|
|
325
325
|
# If issues were found AND fixes were made, assume success for this single pass.
|
|
326
326
|
# A more robust check might re-run the program with fixed code, but that's the loop's job.
|
|
327
327
|
success = True
|
|
328
|
-
if not quiet:
|
|
329
|
-
rich_print("[yellow]Verification Issues Found:[/yellow] LLM proposed fixes.")
|
|
328
|
+
if not quiet: rich_print("[yellow]Verification Issues Found:[/yellow] LLM proposed fixes.")
|
|
330
329
|
else:
|
|
331
|
-
success = False
|
|
332
|
-
if not quiet:
|
|
333
|
-
rich_print("[red]Verification Failed:[/red] LLM found discrepancies but proposed no fixes.")
|
|
334
|
-
|
|
330
|
+
success = False
|
|
331
|
+
if not quiet: rich_print("[red]Verification Failed:[/red] LLM found discrepancies but proposed no fixes.")
|
|
335
332
|
|
|
336
333
|
final_program = fix_results['fixed_program']
|
|
337
334
|
final_code = fix_results['fixed_code']
|
|
@@ -340,7 +337,7 @@ def fix_verification_main(
|
|
|
340
337
|
|
|
341
338
|
# Build results log content for single pass
|
|
342
339
|
results_log_content = "PDD Verify Results (Single Pass)\n"
|
|
343
|
-
results_log_content += f"Timestamp: {os.path.getmtime(prompt_file)}\n"
|
|
340
|
+
results_log_content += f"Timestamp: {os.path.getmtime(prompt_file)}\n"
|
|
344
341
|
results_log_content += f"Prompt File: {prompt_file}\n"
|
|
345
342
|
results_log_content += f"Code File: {code_file}\n"
|
|
346
343
|
results_log_content += f"Program File: {program_file}\n"
|
|
@@ -371,27 +368,46 @@ def fix_verification_main(
|
|
|
371
368
|
saved_results_path: Optional[str] = None
|
|
372
369
|
saved_program_path: Optional[str] = None
|
|
373
370
|
|
|
374
|
-
if
|
|
371
|
+
if verbose:
|
|
372
|
+
rich_print(f"[cyan bold DEBUG] In fix_verification_main, BEFORE save attempt for CODE:")
|
|
373
|
+
rich_print(f" success: {success}")
|
|
374
|
+
rich_print(f" output_code_path: {output_code_path!r}")
|
|
375
|
+
rich_print(f" final_code is None: {final_code is None}")
|
|
376
|
+
if final_code is not None:
|
|
377
|
+
rich_print(f" len(final_code): {len(final_code)}")
|
|
378
|
+
|
|
379
|
+
if success and output_code_path and final_code is not None:
|
|
375
380
|
try:
|
|
381
|
+
if verbose:
|
|
382
|
+
rich_print(f"[cyan bold DEBUG] In fix_verification_main, ATTEMPTING to write code to: {output_code_path!r}")
|
|
376
383
|
with open(output_code_path, "w") as f:
|
|
377
384
|
f.write(final_code)
|
|
378
385
|
saved_code_path = output_code_path
|
|
379
386
|
if not quiet:
|
|
380
387
|
rich_print(f"Successfully verified code saved to: [green]{output_code_path}[/green]")
|
|
381
|
-
except
|
|
382
|
-
rich_print(f"[bold red]Error:[/bold red] Failed to write
|
|
383
|
-
|
|
384
|
-
if
|
|
388
|
+
except Exception as e:
|
|
389
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to write code file '{output_code_path}': {type(e).__name__} - {e}")
|
|
390
|
+
|
|
391
|
+
if verbose:
|
|
392
|
+
rich_print(f"[cyan bold DEBUG] In fix_verification_main, BEFORE save attempt for PROGRAM:")
|
|
393
|
+
rich_print(f" success: {success}")
|
|
394
|
+
rich_print(f" output_program_path: {output_program_path!r}")
|
|
395
|
+
rich_print(f" final_program is None: {final_program is None}")
|
|
396
|
+
if final_program is not None:
|
|
397
|
+
rich_print(f" len(final_program): {len(final_program)}")
|
|
398
|
+
|
|
399
|
+
if success and output_program_path and final_program is not None:
|
|
385
400
|
try:
|
|
401
|
+
if verbose:
|
|
402
|
+
rich_print(f"[cyan bold DEBUG] In fix_verification_main, ATTEMPTING to write program to: {output_program_path!r}")
|
|
386
403
|
with open(output_program_path, "w") as f:
|
|
387
404
|
f.write(final_program)
|
|
388
405
|
saved_program_path = output_program_path
|
|
389
406
|
if not quiet:
|
|
390
407
|
rich_print(f"Successfully verified program saved to: [green]{output_program_path}[/green]")
|
|
391
|
-
except
|
|
392
|
-
rich_print(f"[bold red]Error:[/bold red] Failed to write
|
|
408
|
+
except Exception as e:
|
|
409
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to write program file '{output_program_path}': {type(e).__name__} - {e}")
|
|
393
410
|
|
|
394
|
-
# Write results log (only for single pass, loop writes its own)
|
|
395
411
|
if not loop and output_results_path:
|
|
396
412
|
try:
|
|
397
413
|
with open(output_results_path, "w") as f:
|
|
@@ -405,28 +421,40 @@ def fix_verification_main(
|
|
|
405
421
|
# For loop, just confirm the path where the loop function *should* have saved the log
|
|
406
422
|
saved_results_path = output_results_path
|
|
407
423
|
if not quiet:
|
|
408
|
-
|
|
424
|
+
# We assume fix_verification_errors_loop handles its own logging confirmation.
|
|
425
|
+
# This message confirms the path was passed.
|
|
426
|
+
rich_print(f"Verification results log (from loop) expected at: [green]{output_results_path}[/green]")
|
|
427
|
+
|
|
409
428
|
|
|
410
429
|
# --- Final User Feedback ---
|
|
430
|
+
if verbose:
|
|
431
|
+
rich_print(f"[cyan bold DEBUG] Before summary - saved_code_path: {saved_code_path!r}, output_code_path: {output_code_path!r}[/cyan bold DEBUG]")
|
|
432
|
+
rich_print(f"[cyan bold DEBUG] Before summary - saved_program_path: {saved_program_path!r}, output_program_path: {output_program_path!r}[/cyan bold DEBUG]")
|
|
433
|
+
|
|
411
434
|
if not quiet:
|
|
412
435
|
rich_print("\n" + "="*40)
|
|
413
436
|
title = "[bold green]Verification Complete[/bold green]" if success else "[bold red]Verification Failed[/bold red]"
|
|
414
|
-
|
|
437
|
+
summary_panel_content = (
|
|
415
438
|
f"Status: {'[green]Success[/green]' if success else '[red]Failure[/red]'}\n"
|
|
416
439
|
f"Attempts: {attempts}\n"
|
|
417
440
|
f"Total Cost: ${total_cost:.6f}\n"
|
|
418
441
|
f"Model Used: {model_name}\n"
|
|
419
|
-
f"Verified Code Saved: {saved_code_path or 'N/A'}\n"
|
|
420
|
-
f"Verified Program Saved: {saved_program_path or 'N/A'}\n"
|
|
421
|
-
f"Results Log Saved: {saved_results_path or 'N/A'}"
|
|
442
|
+
f"Verified Code Saved: {saved_code_path or 'N/A (Not saved on failure or no path)'}\n"
|
|
443
|
+
f"Verified Program Saved: {saved_program_path or 'N/A (Not saved on failure or no path)'}\n"
|
|
444
|
+
f"Results Log Saved: {saved_results_path or 'N/A'}"
|
|
445
|
+
)
|
|
446
|
+
summary_panel = Panel(
|
|
447
|
+
summary_panel_content,
|
|
422
448
|
title=title,
|
|
423
449
|
border_style="green" if success else "red"
|
|
424
450
|
)
|
|
425
451
|
rich_print(summary_panel)
|
|
426
452
|
|
|
427
|
-
if verbose and not success and not loop:
|
|
428
|
-
rich_print("[bold yellow]Final Code (after failed single pass):[/bold yellow]")
|
|
453
|
+
if verbose and not success and not loop: # Only show final code if verbose, failed, and single pass
|
|
454
|
+
rich_print("[bold yellow]Final Code (after failed single pass, not saved):[/bold yellow]")
|
|
429
455
|
rich_print(Syntax(final_code, language or "python", theme="default", line_numbers=True))
|
|
456
|
+
rich_print("[bold yellow]Final Program (after failed single pass, not saved):[/bold yellow]")
|
|
457
|
+
rich_print(Syntax(final_program, language or "python", theme="default", line_numbers=True))
|
|
430
458
|
|
|
431
459
|
|
|
432
460
|
return success, final_program, final_code, attempts, total_cost, model_name
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
from typing import Optional, Tuple
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.markdown import Markdown
|
|
5
|
+
|
|
6
|
+
from .llm_invoke import llm_invoke
|
|
7
|
+
from .load_prompt_template import load_prompt_template
|
|
8
|
+
from .preprocess import preprocess
|
|
9
|
+
from . import DEFAULT_STRENGTH # Removed unused EXTRACTION_STRENGTH
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
# Pydantic models for structured output
|
|
14
|
+
class DiffAnalysis(BaseModel):
|
|
15
|
+
is_big_change: bool = Field(description="Whether the change is considered significant enough to require full regeneration")
|
|
16
|
+
change_description: str = Field(description="A description of the changes between the original and new prompts")
|
|
17
|
+
analysis: str = Field(description="Detailed analysis of the differences and recommendation")
|
|
18
|
+
|
|
19
|
+
class CodePatchResult(BaseModel):
|
|
20
|
+
patched_code: str = Field(description="The updated code with incremental patches applied")
|
|
21
|
+
analysis: str = Field(description="Analysis of the patching process")
|
|
22
|
+
planned_modifications: str = Field(description="Description of the modifications planned and applied")
|
|
23
|
+
|
|
24
|
+
def incremental_code_generator(
|
|
25
|
+
original_prompt: str,
|
|
26
|
+
new_prompt: str,
|
|
27
|
+
existing_code: str,
|
|
28
|
+
language: str,
|
|
29
|
+
strength: float = DEFAULT_STRENGTH,
|
|
30
|
+
temperature: float = 0.0,
|
|
31
|
+
time: float = 0.25,
|
|
32
|
+
force_incremental: bool = False,
|
|
33
|
+
verbose: bool = False,
|
|
34
|
+
preprocess_prompt: bool = True
|
|
35
|
+
) -> Tuple[Optional[str], bool, float, str]:
|
|
36
|
+
"""
|
|
37
|
+
Analyzes changes to a prompt and either incrementally patches existing code or suggests full regeneration.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
original_prompt (str): The original prompt used to generate the existing code.
|
|
41
|
+
new_prompt (str): The updated prompt that needs to be processed.
|
|
42
|
+
existing_code (str): The existing code generated from the original prompt.
|
|
43
|
+
language (str): The programming language of the output code (e.g., 'python', 'bash').
|
|
44
|
+
strength (float): Strength parameter for the LLM model (0 to 1). Defaults to DEFAULT_STRENGTH.
|
|
45
|
+
temperature (float): Temperature parameter for randomness in LLM output (0 to 1). Defaults to 0.0.
|
|
46
|
+
time (float): Thinking time or reasoning effort for the LLM model (0 to 1). Defaults to 0.25.
|
|
47
|
+
force_incremental (bool): Forces incremental patching even if full regeneration is suggested. Defaults to False.
|
|
48
|
+
verbose (bool): If True, prints detailed information about the process. Defaults to False.
|
|
49
|
+
preprocess_prompt (bool): If True, preprocesses the prompt before invocation. Defaults to True.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Tuple[Optional[str], bool, float, str]: A tuple containing:
|
|
53
|
+
- updated_code (Optional[str]): The updated code if incremental patching is applied, None if full regeneration is needed.
|
|
54
|
+
- is_incremental (bool): True if incremental patching was applied, False if full regeneration is needed.
|
|
55
|
+
- total_cost (float): The total cost of all LLM invocations.
|
|
56
|
+
- model_name (str): The name of the LLM model used for the main operation.
|
|
57
|
+
"""
|
|
58
|
+
# Validate inputs (moved outside the main try-except block)
|
|
59
|
+
if not original_prompt or not new_prompt or not existing_code or not language:
|
|
60
|
+
raise ValueError("All required inputs (original_prompt, new_prompt, existing_code, language) must be provided.")
|
|
61
|
+
|
|
62
|
+
if not 0 <= strength <= 1 or not 0 <= temperature <= 1 or not 0 <= time <= 1:
|
|
63
|
+
raise ValueError("Strength, temperature, and time must be between 0 and 1.")
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
total_cost = 0.0
|
|
67
|
+
model_name = ""
|
|
68
|
+
|
|
69
|
+
# Step 1: Load and preprocess the diff_analyzer_LLM prompt template
|
|
70
|
+
diff_analyzer_template = load_prompt_template("diff_analyzer_LLM")
|
|
71
|
+
if preprocess_prompt:
|
|
72
|
+
diff_analyzer_template = preprocess(
|
|
73
|
+
diff_analyzer_template,
|
|
74
|
+
recursive=False,
|
|
75
|
+
double_curly_brackets=True,
|
|
76
|
+
exclude_keys=["ORIGINAL_PROMPT", "NEW_PROMPT", "EXISTING_CODE"]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if verbose:
|
|
80
|
+
console.print("[bold cyan]Step 1: Loaded diff_analyzer_LLM template[/bold cyan]")
|
|
81
|
+
|
|
82
|
+
# Step 2: Run diff_analyzer_LLM through llm_invoke
|
|
83
|
+
input_json = {
|
|
84
|
+
"ORIGINAL_PROMPT": original_prompt,
|
|
85
|
+
"NEW_PROMPT": new_prompt,
|
|
86
|
+
"EXISTING_CODE": existing_code
|
|
87
|
+
}
|
|
88
|
+
diff_response = llm_invoke(
|
|
89
|
+
prompt=diff_analyzer_template,
|
|
90
|
+
input_json=input_json,
|
|
91
|
+
strength=strength,
|
|
92
|
+
temperature=temperature,
|
|
93
|
+
time=time,
|
|
94
|
+
verbose=verbose,
|
|
95
|
+
output_pydantic=DiffAnalysis
|
|
96
|
+
)
|
|
97
|
+
diff_result: DiffAnalysis = diff_response['result']
|
|
98
|
+
total_cost += diff_response['cost']
|
|
99
|
+
model_name = diff_response['model_name'] # Initial model name
|
|
100
|
+
|
|
101
|
+
if verbose:
|
|
102
|
+
console.print("[bold green]Diff Analyzer Results:[/bold green]")
|
|
103
|
+
console.print(f"Is Big Change: {diff_result.is_big_change}")
|
|
104
|
+
console.print(Markdown(f"**Analysis:**\n{diff_result.analysis}"))
|
|
105
|
+
console.print(f"Cost so far: ${total_cost:.6f}")
|
|
106
|
+
|
|
107
|
+
# Step 3: Determine whether to use incremental patching or full regeneration
|
|
108
|
+
should_regenerate = not force_incremental and diff_result.is_big_change
|
|
109
|
+
|
|
110
|
+
if verbose and force_incremental and diff_result.is_big_change:
|
|
111
|
+
console.print("[bold yellow]Forcing incremental patching despite major change detection[/bold yellow]")
|
|
112
|
+
|
|
113
|
+
# Step 4: Handle regeneration or incremental patching
|
|
114
|
+
if should_regenerate:
|
|
115
|
+
if verbose:
|
|
116
|
+
console.print("[bold red]Major change detected. Recommending full regeneration.[/bold red]")
|
|
117
|
+
return None, False, total_cost, model_name
|
|
118
|
+
else:
|
|
119
|
+
# Load and preprocess the code_patcher_LLM prompt template
|
|
120
|
+
patcher_template = load_prompt_template("code_patcher_LLM")
|
|
121
|
+
if preprocess_prompt:
|
|
122
|
+
patcher_template = preprocess(
|
|
123
|
+
patcher_template,
|
|
124
|
+
recursive=False,
|
|
125
|
+
double_curly_brackets=True,
|
|
126
|
+
exclude_keys=["ORIGINAL_PROMPT", "NEW_PROMPT", "EXISTING_CODE", "CHANGE_DESCRIPTION"]
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if verbose:
|
|
130
|
+
console.print("[bold cyan]Step 4: Loaded code_patcher_LLM template for incremental patching[/bold cyan]")
|
|
131
|
+
|
|
132
|
+
# Run code_patcher_LLM through llm_invoke
|
|
133
|
+
patch_input_json = {
|
|
134
|
+
"ORIGINAL_PROMPT": original_prompt,
|
|
135
|
+
"NEW_PROMPT": new_prompt,
|
|
136
|
+
"EXISTING_CODE": existing_code,
|
|
137
|
+
"CHANGE_DESCRIPTION": diff_result.change_description
|
|
138
|
+
}
|
|
139
|
+
patch_response = llm_invoke(
|
|
140
|
+
prompt=patcher_template,
|
|
141
|
+
input_json=patch_input_json,
|
|
142
|
+
strength=strength,
|
|
143
|
+
temperature=temperature,
|
|
144
|
+
time=time,
|
|
145
|
+
verbose=verbose,
|
|
146
|
+
output_pydantic=CodePatchResult
|
|
147
|
+
)
|
|
148
|
+
patch_result: CodePatchResult = patch_response['result']
|
|
149
|
+
total_cost += patch_response['cost']
|
|
150
|
+
model_name = patch_response['model_name'] # Update model_name to patcher's model
|
|
151
|
+
|
|
152
|
+
if verbose:
|
|
153
|
+
console.print("[bold green]Code Patcher Results:[/bold green]")
|
|
154
|
+
console.print(Markdown(f"**Analysis:**\n{patch_result.analysis}"))
|
|
155
|
+
console.print(Markdown(f"**Planned Modifications:**\n{patch_result.planned_modifications}"))
|
|
156
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
157
|
+
|
|
158
|
+
return patch_result.patched_code, True, total_cost, model_name
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
# This will now catch errors from LLM calls or other unexpected runtime issues,
|
|
162
|
+
# not the initial input validation ValueErrors.
|
|
163
|
+
console.print(f"[bold red]Error in incremental_code_generator: {str(e)}[/bold red]")
|
|
164
|
+
raise RuntimeError(f"Failed to process incremental code generation: {str(e)}")
|
|
165
|
+
|
|
166
|
+
if __name__ == "__main__":
|
|
167
|
+
# Example usage for testing purposes
|
|
168
|
+
try:
|
|
169
|
+
original_prompt = "Write a Python function to calculate the factorial of a number."
|
|
170
|
+
new_prompt = "Write a Python function to calculate the factorial of a number with input validation."
|
|
171
|
+
existing_code = """
|
|
172
|
+
def factorial(n):
|
|
173
|
+
if n == 0 or n == 1:
|
|
174
|
+
return 1
|
|
175
|
+
return n * factorial(n - 1)
|
|
176
|
+
"""
|
|
177
|
+
language = "python"
|
|
178
|
+
updated_code, is_incremental, total_cost, model_name = incremental_code_generator(
|
|
179
|
+
original_prompt=original_prompt,
|
|
180
|
+
new_prompt=new_prompt,
|
|
181
|
+
existing_code=existing_code,
|
|
182
|
+
language=language,
|
|
183
|
+
strength=DEFAULT_STRENGTH,
|
|
184
|
+
temperature=0.0,
|
|
185
|
+
time=0.25,
|
|
186
|
+
force_incremental=False,
|
|
187
|
+
verbose=True
|
|
188
|
+
)
|
|
189
|
+
console.print("[bold magenta]Final Results:[/bold magenta]")
|
|
190
|
+
if is_incremental:
|
|
191
|
+
console.print("[bold green]Incremental Patch Applied[/bold green]")
|
|
192
|
+
console.print(Markdown(f"**Updated Code:**\n```python\n{updated_code}\n```"))
|
|
193
|
+
else:
|
|
194
|
+
console.print("[bold red]Full Regeneration Recommended[/bold red]")
|
|
195
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
196
|
+
console.print(f"Model Used: {model_name}")
|
|
197
|
+
except Exception as e:
|
|
198
|
+
console.print(f"[bold red]Test Error: {str(e)}[/bold red]")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
% You are an expert code patcher, tasked with incrementally updating existing code based on changes between two prompts.
|
|
2
|
+
|
|
3
|
+
% Given:
|
|
4
|
+
- The original prompt used to generate code
|
|
5
|
+
- A new, updated prompt
|
|
6
|
+
- The existing code generated from the original prompt
|
|
7
|
+
- A description of the changes needed to update the code
|
|
8
|
+
% Your task is to:
|
|
9
|
+
1. Apply the necessary changes to the existing code while maintaining its overall structure
|
|
10
|
+
2. Make only the modifications required to address the changes between the prompts
|
|
11
|
+
3. Ensure the updated code is complete, correct, and meets all requirements in the new prompt
|
|
12
|
+
4. Preserve the style, variable naming conventions, and organization of the original code
|
|
13
|
+
|
|
14
|
+
% Output a JSON object with:
|
|
15
|
+
- analysis: string - a detailed analysis of the changes needed
|
|
16
|
+
- planned_modifications: string - a detailed description of the modifications needed and why this change should not break the existing code
|
|
17
|
+
- patched_code: string - the complete updated code after applying all necessary changes
|
|
18
|
+
|
|
19
|
+
ORIGINAL_PROMPT:
|
|
20
|
+
<original_prompt>
|
|
21
|
+
{ORIGINAL_PROMPT}
|
|
22
|
+
</original_prompt>
|
|
23
|
+
|
|
24
|
+
NEW_PROMPT:
|
|
25
|
+
<new_prompt>
|
|
26
|
+
{NEW_PROMPT}
|
|
27
|
+
</new_prompt>
|
|
28
|
+
|
|
29
|
+
EXISTING_CODE:
|
|
30
|
+
<existing_code>
|
|
31
|
+
{EXISTING_CODE}
|
|
32
|
+
</existing_code>
|
|
33
|
+
|
|
34
|
+
CHANGE_DESCRIPTION:
|
|
35
|
+
<change_description>
|
|
36
|
+
{CHANGE_DESCRIPTION}
|
|
37
|
+
</change_description>
|
|
38
|
+
|
|
39
|
+
Based on the change description and differences between the prompts, carefully update the existing code.
|
|
40
|
+
|
|
41
|
+
Remember to:
|
|
42
|
+
1. Only modify what's necessary to implement the changes
|
|
43
|
+
2. Maintain consistency with the existing code style and structure
|
|
44
|
+
3. Ensure all functionality from the original code still works correctly
|
|
45
|
+
4. Implement all new requirements from the updated prompt
|
|
46
|
+
5. Test the logic of your changes to ensure correctness
|
|
47
|
+
6. Include appropriate error handling for any new code
|
|
48
|
+
|
|
49
|
+
First, analyze the existing code to understand its structure and functionality:
|
|
50
|
+
[Your code structure analysis]
|
|
51
|
+
|
|
52
|
+
Next, determine specific modifications needed based on the change description:
|
|
53
|
+
[Your planned modifications]
|
|
54
|
+
|
|
55
|
+
Now, implement those changes to create the updated code:
|
|
56
|
+
|
|
57
|
+
<output_json_example>
|
|
58
|
+
{{
|
|
59
|
+
"analysis": "A detailed analysis of the changes needed,
|
|
60
|
+
"planned_modifications": "A detailed description of the modifications needed and why this change should not break the existing code",
|
|
61
|
+
"patched_code": "YOUR COMPLETE UPDATED CODE HERE"
|
|
62
|
+
}}
|
|
63
|
+
</output_json_example>
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
% You are an expert code analyzer, tasked with determining if changes between two prompts are significant enough to warrant complete code regeneration.
|
|
2
|
+
|
|
3
|
+
% Given:
|
|
4
|
+
- The original prompt used to generate code
|
|
5
|
+
- A new, updated prompt
|
|
6
|
+
- The existing code generated from the original prompt
|
|
7
|
+
% Your task is to:
|
|
8
|
+
1. Analyze the differences between the original and new prompts
|
|
9
|
+
2. Evaluate how significantly these differences would impact the code implementation
|
|
10
|
+
3. Decide if the changes require completely regenerating the code or if incremental patching would be sufficient
|
|
11
|
+
4. Provide a clear description of the changes needed, which will be used for patching if applicable
|
|
12
|
+
|
|
13
|
+
% Output a JSON object with:
|
|
14
|
+
- is_big_change: boolean - true if changes are significant enough to require complete regeneration, false if incremental patching is sufficient
|
|
15
|
+
- change_description: string - detailed description of changes needed, especially helpful for incremental patching
|
|
16
|
+
|
|
17
|
+
% Guidelines for determining significant changes:
|
|
18
|
+
- Major architectural changes (significantly different data structures, algorithms, or patterns)
|
|
19
|
+
- Addition of entirely new functionalities that alter the core purpose
|
|
20
|
+
- Changes to return types or critical input parameters
|
|
21
|
+
- Major changes to the programming paradigm or approach
|
|
22
|
+
- Substantial adjustments to business logic
|
|
23
|
+
|
|
24
|
+
% Examples of changes that can be handled incrementally:
|
|
25
|
+
- Bug fixes or minor logic corrections
|
|
26
|
+
- Adding or modifying non-critical validation
|
|
27
|
+
- Adding simple helper functions
|
|
28
|
+
- Renaming variables or functions
|
|
29
|
+
- Adding documentation or comments
|
|
30
|
+
- Minor enhancements to existing functionality
|
|
31
|
+
- Small adjustments to error handling
|
|
32
|
+
|
|
33
|
+
% Be conservative in determining if a change is "big" - if there's significant uncertainty, prefer complete regeneration.
|
|
34
|
+
|
|
35
|
+
ORIGINAL_PROMPT:
|
|
36
|
+
<original_prompt>
|
|
37
|
+
{ORIGINAL_PROMPT}
|
|
38
|
+
</original_prompt>
|
|
39
|
+
|
|
40
|
+
NEW_PROMPT:
|
|
41
|
+
<new_prompt>
|
|
42
|
+
{NEW_PROMPT}
|
|
43
|
+
</new_prompt>
|
|
44
|
+
|
|
45
|
+
EXISTING_CODE:
|
|
46
|
+
<existing_code>
|
|
47
|
+
{EXISTING_CODE}
|
|
48
|
+
</existing_code>
|
|
49
|
+
|
|
50
|
+
Analyze the differences between the original and new prompts. Consider how these differences would impact the implementation of the code.
|
|
51
|
+
|
|
52
|
+
First, identify and list the specific changes between the prompts:
|
|
53
|
+
|
|
54
|
+
1. [List each significant change]
|
|
55
|
+
|
|
56
|
+
Next, evaluate if these changes fundamentally alter the approach needed in the code implementation:
|
|
57
|
+
|
|
58
|
+
2. [Your detailed analysis]
|
|
59
|
+
|
|
60
|
+
Based on your analysis, determine whether these changes require completely regenerating the code or if they can be addressed with targeted modifications to the existing code.
|
|
61
|
+
|
|
62
|
+
Output your conclusion as a JSON object with the following format:
|
|
63
|
+
<output_json_example>
|
|
64
|
+
{{
|
|
65
|
+
"analysis": "A detailed analysis of the changes needed 1 and 2 above",
|
|
66
|
+
"change_description": "A detailed description of the changes needed..."
|
|
67
|
+
"is_big_change": true/false,
|
|
68
|
+
}}
|
|
69
|
+
</output_json_example>
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
% Here is the code_module that is being used by the program: <code_module>{code}</code_module>
|
|
8
8
|
|
|
9
|
-
% Here are the output logs from the program run: <
|
|
9
|
+
% Here are the output logs from the program run: <output_logs>{output}</output_logs>
|
|
10
10
|
|
|
11
11
|
% IMPORTANT CONSIDERATIONS:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
1. The prompt may only describe part of the functionality needed by the program.
|
|
13
|
+
2. Always consider compatibility between the program and code_module as the highest priority.
|
|
14
|
+
3. Functions used by the program must exist in the code_module, even if not mentioned in the prompt.
|
|
15
|
+
4. The prompt might only request new functionality to be added to existing code.
|
|
16
16
|
|
|
17
17
|
% Follow these steps to identify any issues:
|
|
18
18
|
Step 1. First, identify all functions and features in the code_module that are used by the program, as these must be preserved.
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
|
|
29
29
|
% After your analysis, determine the number of distinct issues found. If no issues are found, the count should be 0.
|
|
30
30
|
|
|
31
|
-
% Return your response as a single, valid JSON object.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
% Ensure the "issues_count" is an integer representing the total number of distinct problems you've identified in your details.
|
|
31
|
+
% Return your response as a single, valid JSON object. The JSON object must conform to the following structure:
|
|
32
|
+
<example_output>
|
|
33
|
+
{{
|
|
34
|
+
"details": "A detailed explanation of all steps taken during your analysis, including any discrepancies, bugs, or potential issues identified. If no issues are found, this can be a brief confirmation.",
|
|
35
|
+
"issues_count": <integer_count_of_issues_found>
|
|
36
|
+
}}
|
|
37
|
+
</example_output>
|
|
38
|
+
% Ensure the "details" field contains your complete textual analysis from Steps 1-7 and ensure the "issues_count" is an integer representing the total number of distinct problems you've identified in your details.
|