pdd-cli 0.0.90__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +38 -6
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +521 -786
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +118 -3
- pdd/agentic_update.py +25 -8
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +63 -53
- pdd/auto_include.py +185 -3
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +195 -23
- pdd/cmd_test_main.py +345 -197
- pdd/code_generator.py +4 -2
- pdd/code_generator_main.py +118 -32
- pdd/commands/__init__.py +6 -0
- pdd/commands/analysis.py +87 -29
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +136 -113
- pdd/commands/maintenance.py +3 -2
- pdd/commands/misc.py +8 -0
- pdd/commands/modify.py +190 -164
- pdd/commands/sessions.py +284 -0
- pdd/construct_paths.py +334 -32
- pdd/context_generator_main.py +167 -170
- pdd/continue_generation.py +6 -3
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +27 -3
- pdd/core/cloud.py +237 -0
- pdd/core/errors.py +4 -0
- pdd/core/remote_session.py +61 -0
- pdd/crash_main.py +219 -23
- pdd/data/llm_model.csv +4 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +208 -34
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +291 -38
- pdd/fix_main.py +204 -4
- pdd/fix_verification_errors_loop.py +235 -26
- pdd/fix_verification_main.py +269 -83
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +46 -5
- pdd/generate_test.py +212 -151
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +309 -20
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +7 -5
- pdd/insert_includes.py +2 -1
- pdd/llm_invoke.py +459 -95
- pdd/load_prompt_template.py +15 -34
- pdd/path_resolution.py +140 -0
- pdd/postprocess.py +4 -1
- pdd/preprocess.py +68 -12
- pdd/preprocess_main.py +33 -1
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
- pdd/prompts/agentic_update_LLM.prompt +192 -338
- pdd/prompts/auto_include_LLM.prompt +22 -0
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +571 -14
- pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
- pdd/prompts/generate_test_LLM.prompt +20 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/insert_includes_LLM.prompt +262 -252
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/remote_session.py +876 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/summarize_directory.py +236 -237
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +329 -47
- pdd/sync_main.py +272 -28
- pdd/sync_orchestration.py +136 -75
- pdd/template_expander.py +161 -0
- pdd/templates/architecture/architecture_json.prompt +41 -46
- pdd/trace.py +1 -1
- pdd/track_cost.py +0 -13
- pdd/unfinished_prompt.py +2 -1
- pdd/update_main.py +23 -5
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +15 -10
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- pdd_cli-0.0.90.dist-info/RECORD +0 -153
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -63,142 +63,147 @@ if __name__ == "__main__":
|
|
|
63
63
|
|
|
64
64
|
For running prompts with llm_invoke:
|
|
65
65
|
<llm_invoke_example>
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
from typing import List,
|
|
66
|
+
import os
|
|
67
|
+
import sys
|
|
68
|
+
from typing import List, Optional
|
|
69
|
+
from pydantic import BaseModel, Field
|
|
70
|
+
from rich.console import Console
|
|
71
|
+
|
|
72
|
+
# Ensure the package is in the python path for this example
|
|
73
|
+
# In a real installation, this would just be 'from pdd.llm_invoke import llm_invoke'
|
|
74
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
75
|
+
|
|
76
|
+
from pdd.llm_invoke import llm_invoke
|
|
77
|
+
|
|
78
|
+
console = Console()
|
|
79
|
+
|
|
80
|
+
# --- Example 1: Simple Text Generation ---
|
|
81
|
+
def example_simple_text():
|
|
82
|
+
console.print("[bold blue]--- Example 1: Simple Text Generation ---[/bold blue]")
|
|
83
|
+
|
|
84
|
+
# Define a prompt template
|
|
85
|
+
prompt_template = "Explain the concept of {concept} to a {audience} in one sentence."
|
|
86
|
+
|
|
87
|
+
# Define input variables
|
|
88
|
+
input_data = {
|
|
89
|
+
"concept": "quantum entanglement",
|
|
90
|
+
"audience": "5-year-old"
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Invoke the LLM
|
|
94
|
+
# strength=0.5 targets the 'base' model (usually a balance of cost/performance)
|
|
95
|
+
result = llm_invoke(
|
|
96
|
+
prompt=prompt_template,
|
|
97
|
+
input_json=input_data,
|
|
98
|
+
strength=0.5,
|
|
99
|
+
temperature=0.7,
|
|
100
|
+
verbose=True # Set to True to see detailed logs about model selection and cost
|
|
101
|
+
)
|
|
69
102
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
setup: str = Field(description="The setup of the joke")
|
|
73
|
-
punchline: str = Field(description="The punchline of the joke")
|
|
103
|
+
console.print(f"[green]Result:[/green] {result['result']}")
|
|
104
|
+
console.print(f"[dim]Model used: {result['model_name']} | Cost: ${result['cost']:.6f}[/dim]\n")
|
|
74
105
|
|
|
75
106
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
107
|
+
# --- Example 2: Structured Output with Pydantic ---
|
|
108
|
+
class MovieReview(BaseModel):
|
|
109
|
+
title: str = Field(..., description="The title of the movie")
|
|
110
|
+
rating: int = Field(..., description="Rating out of 10")
|
|
111
|
+
summary: str = Field(..., description="A brief summary of the plot")
|
|
112
|
+
tags: List[str] = Field(..., description="List of genre tags")
|
|
79
113
|
|
|
80
|
-
|
|
81
|
-
|
|
114
|
+
def example_structured_output():
|
|
115
|
+
console.print("[bold blue]--- Example 2: Structured Output (Pydantic) ---[/bold blue]")
|
|
82
116
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
"""
|
|
86
|
-
model_df = _load_model_data(LLM_MODEL_CSV_PATH)
|
|
87
|
-
|
|
88
|
-
ranges = []
|
|
89
|
-
current_model = None
|
|
90
|
-
range_start = 0.0
|
|
91
|
-
|
|
92
|
-
# Sample strength values to find model boundaries
|
|
93
|
-
strength = 0.0
|
|
94
|
-
while strength <= 1.0:
|
|
95
|
-
candidates = _select_model_candidates(strength, DEFAULT_BASE_MODEL, model_df)
|
|
96
|
-
selected_model = candidates[0]['model'] if candidates else None
|
|
97
|
-
|
|
98
|
-
if current_model != selected_model:
|
|
99
|
-
if current_model is not None:
|
|
100
|
-
ranges.append({
|
|
101
|
-
'model': current_model,
|
|
102
|
-
'start': range_start,
|
|
103
|
-
'end': round(strength - step, 3),
|
|
104
|
-
'midpoint': round((range_start + strength - step) / 2, 3)
|
|
105
|
-
})
|
|
106
|
-
current_model = selected_model
|
|
107
|
-
range_start = strength
|
|
108
|
-
|
|
109
|
-
strength = round(strength + step, 3)
|
|
110
|
-
|
|
111
|
-
# Add the final range
|
|
112
|
-
if current_model is not None:
|
|
113
|
-
ranges.append({
|
|
114
|
-
'model': current_model,
|
|
115
|
-
'start': range_start,
|
|
116
|
-
'end': 1.0,
|
|
117
|
-
'midpoint': round((range_start + 1.0) / 2, 3)
|
|
118
|
-
})
|
|
119
|
-
|
|
120
|
-
return ranges
|
|
117
|
+
prompt = "Generate a review for a fictional sci-fi movie about {topic}."
|
|
118
|
+
input_data = {"topic": "time traveling cats"}
|
|
121
119
|
|
|
120
|
+
# Invoke with output_pydantic to enforce a schema
|
|
121
|
+
# strength=0.8 targets a higher-performance model (better at following schemas)
|
|
122
|
+
result = llm_invoke(
|
|
123
|
+
prompt=prompt,
|
|
124
|
+
input_json=input_data,
|
|
125
|
+
strength=0.8,
|
|
126
|
+
output_pydantic=MovieReview,
|
|
127
|
+
temperature=0.5
|
|
128
|
+
)
|
|
122
129
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
130
|
+
# The 'result' key will contain an instance of the Pydantic model
|
|
131
|
+
review: MovieReview = result['result']
|
|
132
|
+
|
|
133
|
+
console.print(f"[green]Title:[/green] {review.title}")
|
|
134
|
+
console.print(f"[green]Rating:[/green] {review.rating}/10")
|
|
135
|
+
console.print(f"[green]Tags:[/green] {', '.join(review.tags)}")
|
|
136
|
+
console.print(f"[dim]Model used: {result['model_name']}[/dim]\n")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# --- Example 3: Batch Processing ---
|
|
140
|
+
def example_batch_processing():
|
|
141
|
+
console.print("[bold blue]--- Example 3: Batch Processing ---[/bold blue]")
|
|
142
|
+
|
|
143
|
+
prompt = "What is the capital of {country}?"
|
|
144
|
+
|
|
145
|
+
# List of inputs triggers batch mode
|
|
146
|
+
batch_inputs = [
|
|
147
|
+
{"country": "France"},
|
|
148
|
+
{"country": "Japan"},
|
|
149
|
+
{"country": "Brazil"}
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# use_batch_mode=True uses the provider's batch API if available/supported by LiteLLM
|
|
153
|
+
# strength=0.2 targets a cheaper/faster model
|
|
154
|
+
results = llm_invoke(
|
|
155
|
+
prompt=prompt,
|
|
156
|
+
input_json=batch_inputs,
|
|
157
|
+
use_batch_mode=True,
|
|
158
|
+
strength=0.2,
|
|
159
|
+
temperature=0.1
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# In batch mode, 'result' is a list of strings (or objects)
|
|
163
|
+
for i, res in enumerate(results['result']):
|
|
164
|
+
console.print(f"[green]Input:[/green] {batch_inputs[i]['country']} -> [green]Output:[/green] {res}")
|
|
165
|
+
|
|
166
|
+
console.print(f"[dim]Model used: {results['model_name']} | Total Cost: ${results['cost']:.6f}[/dim]\n")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# --- Example 4: Reasoning / Thinking Time ---
|
|
170
|
+
def example_reasoning():
|
|
171
|
+
console.print("[bold blue]--- Example 4: Reasoning / Thinking Time ---[/bold blue]")
|
|
172
|
+
|
|
173
|
+
# Some models (like Claude 3.7 or OpenAI o1/o3) support explicit thinking steps.
|
|
174
|
+
# Setting time > 0 enables this behavior based on the model's configuration in llm_model.csv.
|
|
175
|
+
|
|
176
|
+
prompt = "Solve this riddle: {riddle}"
|
|
177
|
+
input_data = {"riddle": "I speak without a mouth and hear without ears. I have no body, but I come alive with wind. What am I?"}
|
|
178
|
+
|
|
179
|
+
result = llm_invoke(
|
|
180
|
+
prompt=prompt,
|
|
181
|
+
input_json=input_data,
|
|
182
|
+
strength=1.0, # Target highest capability model
|
|
183
|
+
time=0.5, # Request moderate thinking time/budget
|
|
184
|
+
verbose=True
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
console.print(f"[green]Answer:[/green] {result['result']}")
|
|
188
|
+
|
|
189
|
+
# If the model supports it, thinking output is captured separately
|
|
190
|
+
if result.get('thinking_output'):
|
|
191
|
+
console.print(f"[yellow]Thinking Process:[/yellow] {result['thinking_output']}")
|
|
192
|
+
else:
|
|
193
|
+
console.print("[dim]No separate thinking output returned for this model.[/dim]")
|
|
126
194
|
|
|
127
|
-
Automatically calculates model ranges and runs each model once
|
|
128
|
-
at its midpoint strength value.
|
|
129
|
-
"""
|
|
130
|
-
# Calculate model ranges automatically
|
|
131
|
-
print("Calculating model strength ranges...")
|
|
132
|
-
model_ranges = calculate_model_ranges()
|
|
133
|
-
|
|
134
|
-
# Print the calculated ranges
|
|
135
|
-
print("\n=== Model Strength Ranges ===")
|
|
136
|
-
for range_info in model_ranges:
|
|
137
|
-
print(f"{range_info['model']}: {range_info['start']:.3f} to {range_info['end']:.3f} (midpoint: {range_info['midpoint']:.3f})")
|
|
138
|
-
|
|
139
|
-
prompt = "Tell me a joke about {topic}"
|
|
140
|
-
input_json = {"topic": "programmers"}
|
|
141
|
-
temperature = 1
|
|
142
|
-
verbose = False
|
|
143
|
-
|
|
144
|
-
# Run each model once at its midpoint strength
|
|
145
|
-
print("\n=== Running Each Model Once ===")
|
|
146
|
-
for range_info in model_ranges:
|
|
147
|
-
model_name = range_info['model']
|
|
148
|
-
midpoint = range_info['midpoint']
|
|
149
|
-
|
|
150
|
-
print(f"\n--- Model: {model_name} (strength: {midpoint}) ---")
|
|
151
|
-
|
|
152
|
-
# Example 1: Unstructured Output
|
|
153
|
-
print("\n Unstructured Output:")
|
|
154
|
-
response = llm_invoke(
|
|
155
|
-
prompt=prompt,
|
|
156
|
-
input_json=input_json,
|
|
157
|
-
strength=midpoint,
|
|
158
|
-
temperature=temperature,
|
|
159
|
-
verbose=verbose
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
print(f" Result: {response['result']}")
|
|
163
|
-
print(f" Cost: ${response['cost']:.6f}")
|
|
164
|
-
print(f" Model Used: {response['model_name']}")
|
|
165
|
-
|
|
166
|
-
# Example 2: Structured Output with Pydantic Model
|
|
167
|
-
prompt_structured = (
|
|
168
|
-
"Generate a joke about {topic}. \n"
|
|
169
|
-
"Return it in this exact JSON format:\n"
|
|
170
|
-
"{{ \n"
|
|
171
|
-
' "setup": "your setup here",\n'
|
|
172
|
-
' "punchline": "your punchline here"\n'
|
|
173
|
-
"}}\n"
|
|
174
|
-
"Return ONLY the JSON with no additional text or explanation."
|
|
175
|
-
)
|
|
176
|
-
input_json_structured = {"topic": "data scientists"}
|
|
177
|
-
output_pydantic = Joke
|
|
178
|
-
|
|
179
|
-
print("\n Structured Output:")
|
|
180
|
-
try:
|
|
181
|
-
response_structured = llm_invoke(
|
|
182
|
-
prompt=prompt_structured,
|
|
183
|
-
input_json=input_json_structured,
|
|
184
|
-
strength=midpoint,
|
|
185
|
-
temperature=temperature,
|
|
186
|
-
verbose=verbose,
|
|
187
|
-
output_pydantic=output_pydantic
|
|
188
|
-
)
|
|
189
|
-
print(f" Result: {response_structured['result']}")
|
|
190
|
-
print(f" Cost: ${response_structured['cost']:.6f}")
|
|
191
|
-
print(f" Model Used: {response_structured['model_name']}")
|
|
192
|
-
|
|
193
|
-
# Access structured data
|
|
194
|
-
joke: Joke = response_structured['result']
|
|
195
|
-
print(f"\n Joke Setup: {joke.setup}")
|
|
196
|
-
print(f" Joke Punchline: {joke.punchline}")
|
|
197
|
-
except Exception as e:
|
|
198
|
-
print(f" Error encountered during structured output: {e}")
|
|
199
195
|
|
|
200
196
|
if __name__ == "__main__":
|
|
201
|
-
|
|
197
|
+
# Ensure you have a valid .env file or environment variables set for API keys
|
|
198
|
+
# (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
example_simple_text()
|
|
202
|
+
example_structured_output()
|
|
203
|
+
example_batch_processing()
|
|
204
|
+
example_reasoning()
|
|
205
|
+
except Exception as e:
|
|
206
|
+
console.print(f"[bold red]Error running examples:[/bold red] {e}")
|
|
202
207
|
</llm_invoke_example>
|
|
203
208
|
</internal_modules>
|
|
204
209
|
</dependencies_to_insert>
|
|
@@ -252,142 +257,147 @@ if __name__ == "__main__":
|
|
|
252
257
|
|
|
253
258
|
For running prompts with llm_invoke:
|
|
254
259
|
<llm_invoke_example>
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
from typing import List,
|
|
260
|
+
import os
|
|
261
|
+
import sys
|
|
262
|
+
from typing import List, Optional
|
|
263
|
+
from pydantic import BaseModel, Field
|
|
264
|
+
from rich.console import Console
|
|
265
|
+
|
|
266
|
+
# Ensure the package is in the python path for this example
|
|
267
|
+
# In a real installation, this would just be 'from pdd.llm_invoke import llm_invoke'
|
|
268
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
269
|
+
|
|
270
|
+
from pdd.llm_invoke import llm_invoke
|
|
271
|
+
|
|
272
|
+
console = Console()
|
|
273
|
+
|
|
274
|
+
# --- Example 1: Simple Text Generation ---
|
|
275
|
+
def example_simple_text():
|
|
276
|
+
console.print("[bold blue]--- Example 1: Simple Text Generation ---[/bold blue]")
|
|
277
|
+
|
|
278
|
+
# Define a prompt template
|
|
279
|
+
prompt_template = "Explain the concept of {concept} to a {audience} in one sentence."
|
|
280
|
+
|
|
281
|
+
# Define input variables
|
|
282
|
+
input_data = {
|
|
283
|
+
"concept": "quantum entanglement",
|
|
284
|
+
"audience": "5-year-old"
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
# Invoke the LLM
|
|
288
|
+
# strength=0.5 targets the 'base' model (usually a balance of cost/performance)
|
|
289
|
+
result = llm_invoke(
|
|
290
|
+
prompt=prompt_template,
|
|
291
|
+
input_json=input_data,
|
|
292
|
+
strength=0.5,
|
|
293
|
+
temperature=0.7,
|
|
294
|
+
verbose=True # Set to True to see detailed logs about model selection and cost
|
|
295
|
+
)
|
|
258
296
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
setup: str = Field(description="The setup of the joke")
|
|
262
|
-
punchline: str = Field(description="The punchline of the joke")
|
|
297
|
+
console.print(f"[green]Result:[/green] {result['result']}")
|
|
298
|
+
console.print(f"[dim]Model used: {result['model_name']} | Cost: ${result['cost']:.6f}[/dim]\n")
|
|
263
299
|
|
|
264
300
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
301
|
+
# --- Example 2: Structured Output with Pydantic ---
|
|
302
|
+
class MovieReview(BaseModel):
|
|
303
|
+
title: str = Field(..., description="The title of the movie")
|
|
304
|
+
rating: int = Field(..., description="Rating out of 10")
|
|
305
|
+
summary: str = Field(..., description="A brief summary of the plot")
|
|
306
|
+
tags: List[str] = Field(..., description="List of genre tags")
|
|
268
307
|
|
|
269
|
-
|
|
270
|
-
|
|
308
|
+
def example_structured_output():
|
|
309
|
+
console.print("[bold blue]--- Example 2: Structured Output (Pydantic) ---[/bold blue]")
|
|
271
310
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
"""
|
|
275
|
-
model_df = _load_model_data(LLM_MODEL_CSV_PATH)
|
|
276
|
-
|
|
277
|
-
ranges = []
|
|
278
|
-
current_model = None
|
|
279
|
-
range_start = 0.0
|
|
280
|
-
|
|
281
|
-
# Sample strength values to find model boundaries
|
|
282
|
-
strength = 0.0
|
|
283
|
-
while strength <= 1.0:
|
|
284
|
-
candidates = _select_model_candidates(strength, DEFAULT_BASE_MODEL, model_df)
|
|
285
|
-
selected_model = candidates[0]['model'] if candidates else None
|
|
286
|
-
|
|
287
|
-
if current_model != selected_model:
|
|
288
|
-
if current_model is not None:
|
|
289
|
-
ranges.append({
|
|
290
|
-
'model': current_model,
|
|
291
|
-
'start': range_start,
|
|
292
|
-
'end': round(strength - step, 3),
|
|
293
|
-
'midpoint': round((range_start + strength - step) / 2, 3)
|
|
294
|
-
})
|
|
295
|
-
current_model = selected_model
|
|
296
|
-
range_start = strength
|
|
297
|
-
|
|
298
|
-
strength = round(strength + step, 3)
|
|
299
|
-
|
|
300
|
-
# Add the final range
|
|
301
|
-
if current_model is not None:
|
|
302
|
-
ranges.append({
|
|
303
|
-
'model': current_model,
|
|
304
|
-
'start': range_start,
|
|
305
|
-
'end': 1.0,
|
|
306
|
-
'midpoint': round((range_start + 1.0) / 2, 3)
|
|
307
|
-
})
|
|
308
|
-
|
|
309
|
-
return ranges
|
|
311
|
+
prompt = "Generate a review for a fictional sci-fi movie about {topic}."
|
|
312
|
+
input_data = {"topic": "time traveling cats"}
|
|
310
313
|
|
|
314
|
+
# Invoke with output_pydantic to enforce a schema
|
|
315
|
+
# strength=0.8 targets a higher-performance model (better at following schemas)
|
|
316
|
+
result = llm_invoke(
|
|
317
|
+
prompt=prompt,
|
|
318
|
+
input_json=input_data,
|
|
319
|
+
strength=0.8,
|
|
320
|
+
output_pydantic=MovieReview,
|
|
321
|
+
temperature=0.5
|
|
322
|
+
)
|
|
311
323
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
324
|
+
# The 'result' key will contain an instance of the Pydantic model
|
|
325
|
+
review: MovieReview = result['result']
|
|
326
|
+
|
|
327
|
+
console.print(f"[green]Title:[/green] {review.title}")
|
|
328
|
+
console.print(f"[green]Rating:[/green] {review.rating}/10")
|
|
329
|
+
console.print(f"[green]Tags:[/green] {', '.join(review.tags)}")
|
|
330
|
+
console.print(f"[dim]Model used: {result['model_name']}[/dim]\n")
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# --- Example 3: Batch Processing ---
|
|
334
|
+
def example_batch_processing():
|
|
335
|
+
console.print("[bold blue]--- Example 3: Batch Processing ---[/bold blue]")
|
|
336
|
+
|
|
337
|
+
prompt = "What is the capital of {country}?"
|
|
338
|
+
|
|
339
|
+
# List of inputs triggers batch mode
|
|
340
|
+
batch_inputs = [
|
|
341
|
+
{"country": "France"},
|
|
342
|
+
{"country": "Japan"},
|
|
343
|
+
{"country": "Brazil"}
|
|
344
|
+
]
|
|
345
|
+
|
|
346
|
+
# use_batch_mode=True uses the provider's batch API if available/supported by LiteLLM
|
|
347
|
+
# strength=0.2 targets a cheaper/faster model
|
|
348
|
+
results = llm_invoke(
|
|
349
|
+
prompt=prompt,
|
|
350
|
+
input_json=batch_inputs,
|
|
351
|
+
use_batch_mode=True,
|
|
352
|
+
strength=0.2,
|
|
353
|
+
temperature=0.1
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# In batch mode, 'result' is a list of strings (or objects)
|
|
357
|
+
for i, res in enumerate(results['result']):
|
|
358
|
+
console.print(f"[green]Input:[/green] {batch_inputs[i]['country']} -> [green]Output:[/green] {res}")
|
|
359
|
+
|
|
360
|
+
console.print(f"[dim]Model used: {results['model_name']} | Total Cost: ${results['cost']:.6f}[/dim]\n")
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# --- Example 4: Reasoning / Thinking Time ---
|
|
364
|
+
def example_reasoning():
|
|
365
|
+
console.print("[bold blue]--- Example 4: Reasoning / Thinking Time ---[/bold blue]")
|
|
366
|
+
|
|
367
|
+
# Some models (like Claude 3.7 or OpenAI o1/o3) support explicit thinking steps.
|
|
368
|
+
# Setting time > 0 enables this behavior based on the model's configuration in llm_model.csv.
|
|
369
|
+
|
|
370
|
+
prompt = "Solve this riddle: {riddle}"
|
|
371
|
+
input_data = {"riddle": "I speak without a mouth and hear without ears. I have no body, but I come alive with wind. What am I?"}
|
|
372
|
+
|
|
373
|
+
result = llm_invoke(
|
|
374
|
+
prompt=prompt,
|
|
375
|
+
input_json=input_data,
|
|
376
|
+
strength=1.0, # Target highest capability model
|
|
377
|
+
time=0.5, # Request moderate thinking time/budget
|
|
378
|
+
verbose=True
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
console.print(f"[green]Answer:[/green] {result['result']}")
|
|
382
|
+
|
|
383
|
+
# If the model supports it, thinking output is captured separately
|
|
384
|
+
if result.get('thinking_output'):
|
|
385
|
+
console.print(f"[yellow]Thinking Process:[/yellow] {result['thinking_output']}")
|
|
386
|
+
else:
|
|
387
|
+
console.print("[dim]No separate thinking output returned for this model.[/dim]")
|
|
315
388
|
|
|
316
|
-
Automatically calculates model ranges and runs each model once
|
|
317
|
-
at its midpoint strength value.
|
|
318
|
-
"""
|
|
319
|
-
# Calculate model ranges automatically
|
|
320
|
-
print("Calculating model strength ranges...")
|
|
321
|
-
model_ranges = calculate_model_ranges()
|
|
322
|
-
|
|
323
|
-
# Print the calculated ranges
|
|
324
|
-
print("\n=== Model Strength Ranges ===")
|
|
325
|
-
for range_info in model_ranges:
|
|
326
|
-
print(f"{range_info['model']}: {range_info['start']:.3f} to {range_info['end']:.3f} (midpoint: {range_info['midpoint']:.3f})")
|
|
327
|
-
|
|
328
|
-
prompt = "Tell me a joke about {topic}"
|
|
329
|
-
input_json = {"topic": "programmers"}
|
|
330
|
-
temperature = 1
|
|
331
|
-
verbose = False
|
|
332
|
-
|
|
333
|
-
# Run each model once at its midpoint strength
|
|
334
|
-
print("\n=== Running Each Model Once ===")
|
|
335
|
-
for range_info in model_ranges:
|
|
336
|
-
model_name = range_info['model']
|
|
337
|
-
midpoint = range_info['midpoint']
|
|
338
|
-
|
|
339
|
-
print(f"\n--- Model: {model_name} (strength: {midpoint}) ---")
|
|
340
|
-
|
|
341
|
-
# Example 1: Unstructured Output
|
|
342
|
-
print("\n Unstructured Output:")
|
|
343
|
-
response = llm_invoke(
|
|
344
|
-
prompt=prompt,
|
|
345
|
-
input_json=input_json,
|
|
346
|
-
strength=midpoint,
|
|
347
|
-
temperature=temperature,
|
|
348
|
-
verbose=verbose
|
|
349
|
-
)
|
|
350
|
-
|
|
351
|
-
print(f" Result: {response['result']}")
|
|
352
|
-
print(f" Cost: ${response['cost']:.6f}")
|
|
353
|
-
print(f" Model Used: {response['model_name']}")
|
|
354
|
-
|
|
355
|
-
# Example 2: Structured Output with Pydantic Model
|
|
356
|
-
prompt_structured = (
|
|
357
|
-
"Generate a joke about {topic}. \n"
|
|
358
|
-
"Return it in this exact JSON format:\n"
|
|
359
|
-
"{{ \n"
|
|
360
|
-
' "setup": "your setup here",\n'
|
|
361
|
-
' "punchline": "your punchline here"\n'
|
|
362
|
-
"}}\n"
|
|
363
|
-
"Return ONLY the JSON with no additional text or explanation."
|
|
364
|
-
)
|
|
365
|
-
input_json_structured = {"topic": "data scientists"}
|
|
366
|
-
output_pydantic = Joke
|
|
367
|
-
|
|
368
|
-
print("\n Structured Output:")
|
|
369
|
-
try:
|
|
370
|
-
response_structured = llm_invoke(
|
|
371
|
-
prompt=prompt_structured,
|
|
372
|
-
input_json=input_json_structured,
|
|
373
|
-
strength=midpoint,
|
|
374
|
-
temperature=temperature,
|
|
375
|
-
verbose=verbose,
|
|
376
|
-
output_pydantic=output_pydantic
|
|
377
|
-
)
|
|
378
|
-
print(f" Result: {response_structured['result']}")
|
|
379
|
-
print(f" Cost: ${response_structured['cost']:.6f}")
|
|
380
|
-
print(f" Model Used: {response_structured['model_name']}")
|
|
381
|
-
|
|
382
|
-
# Access structured data
|
|
383
|
-
joke: Joke = response_structured['result']
|
|
384
|
-
print(f"\n Joke Setup: {joke.setup}")
|
|
385
|
-
print(f" Joke Punchline: {joke.punchline}")
|
|
386
|
-
except Exception as e:
|
|
387
|
-
print(f" Error encountered during structured output: {e}")
|
|
388
389
|
|
|
389
390
|
if __name__ == "__main__":
|
|
390
|
-
|
|
391
|
+
# Ensure you have a valid .env file or environment variables set for API keys
|
|
392
|
+
# (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
example_simple_text()
|
|
396
|
+
example_structured_output()
|
|
397
|
+
example_batch_processing()
|
|
398
|
+
example_reasoning()
|
|
399
|
+
except Exception as e:
|
|
400
|
+
console.print(f"[bold red]Error running examples:[/bold red] {e}")
|
|
391
401
|
</llm_invoke_example>
|
|
392
402
|
</internal_modules>
|
|
393
403
|
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# prompt_code_diff_LLM.prompt
|
|
2
|
+
|
|
3
|
+
You are a strict code analyst evaluating whether a PROMPT can REGENERATE the CODE.
|
|
4
|
+
|
|
5
|
+
CRITICAL QUESTION: If an LLM only had this prompt, could it produce code that passes the same tests?
|
|
6
|
+
|
|
7
|
+
PROMPT/REQUIREMENTS (with line numbers):
|
|
8
|
+
```
|
|
9
|
+
{prompt_numbered}
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
CODE (with line numbers):
|
|
13
|
+
```
|
|
14
|
+
{code_numbered}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Analysis Focus
|
|
18
|
+
|
|
19
|
+
**Be STRICT and PESSIMISTIC.** Your job is to find gaps that would cause regeneration failures.
|
|
20
|
+
|
|
21
|
+
1. **Regeneration Risk Analysis**: Identify ALL code knowledge NOT in the prompt:
|
|
22
|
+
- Magic values, constants, thresholds (e.g., timeout=30, retry=3, buffer_size=4096)
|
|
23
|
+
- Specific algorithms or implementation approaches chosen
|
|
24
|
+
- Edge case handling not mentioned in prompt
|
|
25
|
+
- Error messages, status codes, specific exceptions
|
|
26
|
+
- API contracts, data formats, field names
|
|
27
|
+
- Dependencies, imports, library-specific patterns
|
|
28
|
+
- Performance optimizations or workarounds
|
|
29
|
+
- Business logic details embedded in code
|
|
30
|
+
|
|
31
|
+
2. **Hidden Knowledge Detection**: Code often contains "tribal knowledge" that developers added but never documented:
|
|
32
|
+
- Why was THIS approach chosen over alternatives?
|
|
33
|
+
- What bugs or edge cases does this code handle that aren't obvious?
|
|
34
|
+
- What assumptions does the code make about inputs/environment?
|
|
35
|
+
|
|
36
|
+
3. **Test Failure Prediction**: Would regenerated code likely fail tests because:
|
|
37
|
+
- Exact values/strings don't match expectations?
|
|
38
|
+
- Edge cases aren't handled the same way?
|
|
39
|
+
- API contracts differ from what tests expect?
|
|
40
|
+
|
|
41
|
+
## Response Format
|
|
42
|
+
|
|
43
|
+
Respond with a JSON object:
|
|
44
|
+
|
|
45
|
+
1. "overallScore": integer 0-100
|
|
46
|
+
- 90-100: Prompt could regenerate code that passes tests
|
|
47
|
+
- 70-89: Minor details missing, regeneration might work with luck
|
|
48
|
+
- 50-69: Significant gaps, regeneration would likely fail some tests
|
|
49
|
+
- 0-49: Major knowledge missing, regeneration would definitely fail
|
|
50
|
+
|
|
51
|
+
2. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
|
|
52
|
+
|
|
53
|
+
3. "regenerationRisk": "low", "medium", "high", or "critical"
|
|
54
|
+
- "low": Prompt captures all essential details
|
|
55
|
+
- "medium": Some implementation details missing but core logic documented
|
|
56
|
+
- "high": Significant undocumented behavior that would differ on regeneration
|
|
57
|
+
- "critical": Code has major features/logic not in prompt at all
|
|
58
|
+
|
|
59
|
+
4. "summary": 1-2 sentences on regeneration viability, be direct about risks
|
|
60
|
+
|
|
61
|
+
5. "sections": array of PROMPT requirement sections, each with:
|
|
62
|
+
- "id": unique string like "req_1", "req_2"
|
|
63
|
+
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
64
|
+
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
|
|
65
|
+
- "status": "matched", "partial", or "missing"
|
|
66
|
+
- "matchConfidence": 0-100
|
|
67
|
+
- "semanticLabel": descriptive label like "Error Handling", "Input Validation"
|
|
68
|
+
- "notes": REQUIRED explanation - be specific about what's missing or at risk
|
|
69
|
+
|
|
70
|
+
6. "codeSections": array of CODE sections NOT adequately documented in prompt:
|
|
71
|
+
- "id": unique string like "code_1", "code_2"
|
|
72
|
+
- "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
|
|
73
|
+
- "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
|
|
74
|
+
- "status": "matched", "partial", or "extra"
|
|
75
|
+
- "matchConfidence": 0-100
|
|
76
|
+
- "semanticLabel": descriptive label
|
|
77
|
+
- "notes": REQUIRED - explain what knowledge would be LOST on regeneration
|
|
78
|
+
* For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
|
|
79
|
+
* For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
|
|
80
|
+
|
|
81
|
+
7. "hiddenKnowledge": array of objects describing undocumented code knowledge:
|
|
82
|
+
- "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
|
|
83
|
+
- "location": {{"startLine": int, "endLine": int}}
|
|
84
|
+
- "description": what the code knows that the prompt doesn't say
|
|
85
|
+
- "regenerationImpact": "would_differ" | "would_fail" | "might_work"
|
|
86
|
+
- "suggestedPromptAddition": what to add to the prompt to capture this
|
|
87
|
+
|
|
88
|
+
8. "lineMappings": array of line-level mappings:
|
|
89
|
+
- "promptLine": int
|
|
90
|
+
- "codeLines": array of ints
|
|
91
|
+
- "matchType": "exact", "semantic", "partial", "none"
|
|
92
|
+
|
|
93
|
+
9. "stats": {{
|
|
94
|
+
"totalRequirements": int,
|
|
95
|
+
"matchedRequirements": int,
|
|
96
|
+
"missingRequirements": int,
|
|
97
|
+
"totalCodeFeatures": int,
|
|
98
|
+
"documentedFeatures": int,
|
|
99
|
+
"undocumentedFeatures": int,
|
|
100
|
+
"promptToCodeCoverage": float,
|
|
101
|
+
"codeToPromptCoverage": float,
|
|
102
|
+
"hiddenKnowledgeCount": int,
|
|
103
|
+
"criticalGaps": int
|
|
104
|
+
}}
|
|
105
|
+
|
|
106
|
+
10. "missing": array of strings - requirements in prompt not implemented
|
|
107
|
+
11. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
|
|
108
|
+
12. "suggestions": array of specific additions to make to the prompt to enable regeneration
|
|
109
|
+
|
|
110
|
+
## Strictness Guidelines
|
|
111
|
+
|
|
112
|
+
- **Assume regeneration WILL differ** unless the prompt explicitly specifies behavior
|
|
113
|
+
- A function that "handles errors" in the prompt might handle them DIFFERENTLY on regeneration
|
|
114
|
+
- Constants, timeouts, retry counts, buffer sizes - if not in prompt, they WILL be different
|
|
115
|
+
- Specific error messages, log formats, status codes - WILL be different unless specified
|
|
116
|
+
- Algorithm choices (e.g., quicksort vs mergesort, BFS vs DFS) - WILL be different unless specified
|
|
117
|
+
- The goal is to make the prompt complete enough that ANY competent LLM would produce equivalent code
|
|
118
|
+
- Mark as "extra" anything in code that prompt doesn't EXPLICITLY require
|
|
119
|
+
- When in doubt, mark it as a gap - false positives are better than missed risks
|