pdd-cli 0.0.90__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. pdd/__init__.py +38 -6
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +521 -786
  7. pdd/agentic_e2e_fix.py +319 -0
  8. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  9. pdd/agentic_fix.py +118 -3
  10. pdd/agentic_update.py +25 -8
  11. pdd/architecture_sync.py +565 -0
  12. pdd/auth_service.py +210 -0
  13. pdd/auto_deps_main.py +63 -53
  14. pdd/auto_include.py +185 -3
  15. pdd/auto_update.py +125 -47
  16. pdd/bug_main.py +195 -23
  17. pdd/cmd_test_main.py +345 -197
  18. pdd/code_generator.py +4 -2
  19. pdd/code_generator_main.py +118 -32
  20. pdd/commands/__init__.py +6 -0
  21. pdd/commands/analysis.py +87 -29
  22. pdd/commands/auth.py +309 -0
  23. pdd/commands/connect.py +290 -0
  24. pdd/commands/fix.py +136 -113
  25. pdd/commands/maintenance.py +3 -2
  26. pdd/commands/misc.py +8 -0
  27. pdd/commands/modify.py +190 -164
  28. pdd/commands/sessions.py +284 -0
  29. pdd/construct_paths.py +334 -32
  30. pdd/context_generator_main.py +167 -170
  31. pdd/continue_generation.py +6 -3
  32. pdd/core/__init__.py +33 -0
  33. pdd/core/cli.py +27 -3
  34. pdd/core/cloud.py +237 -0
  35. pdd/core/errors.py +4 -0
  36. pdd/core/remote_session.py +61 -0
  37. pdd/crash_main.py +219 -23
  38. pdd/data/llm_model.csv +4 -4
  39. pdd/docs/prompting_guide.md +864 -0
  40. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  41. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  42. pdd/fix_code_loop.py +208 -34
  43. pdd/fix_code_module_errors.py +6 -2
  44. pdd/fix_error_loop.py +291 -38
  45. pdd/fix_main.py +204 -4
  46. pdd/fix_verification_errors_loop.py +235 -26
  47. pdd/fix_verification_main.py +269 -83
  48. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  49. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  50. pdd/frontend/dist/index.html +376 -0
  51. pdd/frontend/dist/logo.svg +33 -0
  52. pdd/generate_output_paths.py +46 -5
  53. pdd/generate_test.py +212 -151
  54. pdd/get_comment.py +19 -44
  55. pdd/get_extension.py +8 -9
  56. pdd/get_jwt_token.py +309 -20
  57. pdd/get_language.py +8 -7
  58. pdd/get_run_command.py +7 -5
  59. pdd/insert_includes.py +2 -1
  60. pdd/llm_invoke.py +459 -95
  61. pdd/load_prompt_template.py +15 -34
  62. pdd/path_resolution.py +140 -0
  63. pdd/postprocess.py +4 -1
  64. pdd/preprocess.py +68 -12
  65. pdd/preprocess_main.py +33 -1
  66. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  67. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  68. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  69. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  70. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  71. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  72. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  73. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  74. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  75. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  76. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  77. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  78. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  79. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  80. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  81. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  82. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  83. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  84. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  85. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  86. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  87. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  88. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  89. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  90. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  91. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  92. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  93. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  94. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  95. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  96. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  97. pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
  98. pdd/prompts/agentic_update_LLM.prompt +192 -338
  99. pdd/prompts/auto_include_LLM.prompt +22 -0
  100. pdd/prompts/change_LLM.prompt +3093 -1
  101. pdd/prompts/detect_change_LLM.prompt +571 -14
  102. pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
  103. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
  104. pdd/prompts/generate_test_LLM.prompt +20 -1
  105. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  106. pdd/prompts/insert_includes_LLM.prompt +262 -252
  107. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  108. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  109. pdd/remote_session.py +876 -0
  110. pdd/server/__init__.py +52 -0
  111. pdd/server/app.py +335 -0
  112. pdd/server/click_executor.py +587 -0
  113. pdd/server/executor.py +338 -0
  114. pdd/server/jobs.py +661 -0
  115. pdd/server/models.py +241 -0
  116. pdd/server/routes/__init__.py +31 -0
  117. pdd/server/routes/architecture.py +451 -0
  118. pdd/server/routes/auth.py +364 -0
  119. pdd/server/routes/commands.py +929 -0
  120. pdd/server/routes/config.py +42 -0
  121. pdd/server/routes/files.py +603 -0
  122. pdd/server/routes/prompts.py +1322 -0
  123. pdd/server/routes/websocket.py +473 -0
  124. pdd/server/security.py +243 -0
  125. pdd/server/terminal_spawner.py +209 -0
  126. pdd/server/token_counter.py +222 -0
  127. pdd/summarize_directory.py +236 -237
  128. pdd/sync_animation.py +8 -4
  129. pdd/sync_determine_operation.py +329 -47
  130. pdd/sync_main.py +272 -28
  131. pdd/sync_orchestration.py +136 -75
  132. pdd/template_expander.py +161 -0
  133. pdd/templates/architecture/architecture_json.prompt +41 -46
  134. pdd/trace.py +1 -1
  135. pdd/track_cost.py +0 -13
  136. pdd/unfinished_prompt.py +2 -1
  137. pdd/update_main.py +23 -5
  138. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +15 -10
  139. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  140. pdd_cli-0.0.90.dist-info/RECORD +0 -153
  141. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  142. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  143. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +0 -0
  144. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -63,142 +63,147 @@ if __name__ == "__main__":
63
63
 
64
64
  For running prompts with llm_invoke:
65
65
  <llm_invoke_example>
66
- from pydantic import BaseModel, Field
67
- from pdd.llm_invoke import llm_invoke, _load_model_data, _select_model_candidates, LLM_MODEL_CSV_PATH, DEFAULT_BASE_MODEL
68
- from typing import List, Dict, Any
66
+ import os
67
+ import sys
68
+ from typing import List, Optional
69
+ from pydantic import BaseModel, Field
70
+ from rich.console import Console
71
+
72
+ # Ensure the package is in the python path for this example
73
+ # In a real installation, this would just be 'from pdd.llm_invoke import llm_invoke'
74
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
75
+
76
+ from pdd.llm_invoke import llm_invoke
77
+
78
+ console = Console()
79
+
80
+ # --- Example 1: Simple Text Generation ---
81
+ def example_simple_text():
82
+ console.print("[bold blue]--- Example 1: Simple Text Generation ---[/bold blue]")
83
+
84
+ # Define a prompt template
85
+ prompt_template = "Explain the concept of {concept} to a {audience} in one sentence."
86
+
87
+ # Define input variables
88
+ input_data = {
89
+ "concept": "quantum entanglement",
90
+ "audience": "5-year-old"
91
+ }
92
+
93
+ # Invoke the LLM
94
+ # strength=0.5 targets the 'base' model (usually a balance of cost/performance)
95
+ result = llm_invoke(
96
+ prompt=prompt_template,
97
+ input_json=input_data,
98
+ strength=0.5,
99
+ temperature=0.7,
100
+ verbose=True # Set to True to see detailed logs about model selection and cost
101
+ )
69
102
 
70
- # Define a Pydantic model for structured output
71
- class Joke(BaseModel):
72
- setup: str = Field(description="The setup of the joke")
73
- punchline: str = Field(description="The punchline of the joke")
103
+ console.print(f"[green]Result:[/green] {result['result']}")
104
+ console.print(f"[dim]Model used: {result['model_name']} | Cost: ${result['cost']:.6f}[/dim]\n")
74
105
 
75
106
 
76
- def calculate_model_ranges(step: float = 0.001) -> List[Dict[str, Any]]:
77
- """
78
- Calculate the strength ranges for each model by sampling strength values.
107
+ # --- Example 2: Structured Output with Pydantic ---
108
+ class MovieReview(BaseModel):
109
+ title: str = Field(..., description="The title of the movie")
110
+ rating: int = Field(..., description="Rating out of 10")
111
+ summary: str = Field(..., description="A brief summary of the plot")
112
+ tags: List[str] = Field(..., description="List of genre tags")
79
113
 
80
- Args:
81
- step: The step size for sampling strength values (default 0.001)
114
+ def example_structured_output():
115
+ console.print("[bold blue]--- Example 2: Structured Output (Pydantic) ---[/bold blue]")
82
116
 
83
- Returns:
84
- List of dicts with 'model', 'start', 'end', and 'midpoint' keys
85
- """
86
- model_df = _load_model_data(LLM_MODEL_CSV_PATH)
87
-
88
- ranges = []
89
- current_model = None
90
- range_start = 0.0
91
-
92
- # Sample strength values to find model boundaries
93
- strength = 0.0
94
- while strength <= 1.0:
95
- candidates = _select_model_candidates(strength, DEFAULT_BASE_MODEL, model_df)
96
- selected_model = candidates[0]['model'] if candidates else None
97
-
98
- if current_model != selected_model:
99
- if current_model is not None:
100
- ranges.append({
101
- 'model': current_model,
102
- 'start': range_start,
103
- 'end': round(strength - step, 3),
104
- 'midpoint': round((range_start + strength - step) / 2, 3)
105
- })
106
- current_model = selected_model
107
- range_start = strength
108
-
109
- strength = round(strength + step, 3)
110
-
111
- # Add the final range
112
- if current_model is not None:
113
- ranges.append({
114
- 'model': current_model,
115
- 'start': range_start,
116
- 'end': 1.0,
117
- 'midpoint': round((range_start + 1.0) / 2, 3)
118
- })
119
-
120
- return ranges
117
+ prompt = "Generate a review for a fictional sci-fi movie about {topic}."
118
+ input_data = {"topic": "time traveling cats"}
121
119
 
120
+ # Invoke with output_pydantic to enforce a schema
121
+ # strength=0.8 targets a higher-performance model (better at following schemas)
122
+ result = llm_invoke(
123
+ prompt=prompt,
124
+ input_json=input_data,
125
+ strength=0.8,
126
+ output_pydantic=MovieReview,
127
+ temperature=0.5
128
+ )
122
129
 
123
- def main():
124
- """
125
- Main function to demonstrate the usage of `llm_invoke`.
130
+ # The 'result' key will contain an instance of the Pydantic model
131
+ review: MovieReview = result['result']
132
+
133
+ console.print(f"[green]Title:[/green] {review.title}")
134
+ console.print(f"[green]Rating:[/green] {review.rating}/10")
135
+ console.print(f"[green]Tags:[/green] {', '.join(review.tags)}")
136
+ console.print(f"[dim]Model used: {result['model_name']}[/dim]\n")
137
+
138
+
139
+ # --- Example 3: Batch Processing ---
140
+ def example_batch_processing():
141
+ console.print("[bold blue]--- Example 3: Batch Processing ---[/bold blue]")
142
+
143
+ prompt = "What is the capital of {country}?"
144
+
145
+ # List of inputs triggers batch mode
146
+ batch_inputs = [
147
+ {"country": "France"},
148
+ {"country": "Japan"},
149
+ {"country": "Brazil"}
150
+ ]
151
+
152
+ # use_batch_mode=True uses the provider's batch API if available/supported by LiteLLM
153
+ # strength=0.2 targets a cheaper/faster model
154
+ results = llm_invoke(
155
+ prompt=prompt,
156
+ input_json=batch_inputs,
157
+ use_batch_mode=True,
158
+ strength=0.2,
159
+ temperature=0.1
160
+ )
161
+
162
+ # In batch mode, 'result' is a list of strings (or objects)
163
+ for i, res in enumerate(results['result']):
164
+ console.print(f"[green]Input:[/green] {batch_inputs[i]['country']} -> [green]Output:[/green] {res}")
165
+
166
+ console.print(f"[dim]Model used: {results['model_name']} | Total Cost: ${results['cost']:.6f}[/dim]\n")
167
+
168
+
169
+ # --- Example 4: Reasoning / Thinking Time ---
170
+ def example_reasoning():
171
+ console.print("[bold blue]--- Example 4: Reasoning / Thinking Time ---[/bold blue]")
172
+
173
+ # Some models (like Claude 3.7 or OpenAI o1/o3) support explicit thinking steps.
174
+ # Setting time > 0 enables this behavior based on the model's configuration in llm_model.csv.
175
+
176
+ prompt = "Solve this riddle: {riddle}"
177
+ input_data = {"riddle": "I speak without a mouth and hear without ears. I have no body, but I come alive with wind. What am I?"}
178
+
179
+ result = llm_invoke(
180
+ prompt=prompt,
181
+ input_json=input_data,
182
+ strength=1.0, # Target highest capability model
183
+ time=0.5, # Request moderate thinking time/budget
184
+ verbose=True
185
+ )
186
+
187
+ console.print(f"[green]Answer:[/green] {result['result']}")
188
+
189
+ # If the model supports it, thinking output is captured separately
190
+ if result.get('thinking_output'):
191
+ console.print(f"[yellow]Thinking Process:[/yellow] {result['thinking_output']}")
192
+ else:
193
+ console.print("[dim]No separate thinking output returned for this model.[/dim]")
126
194
 
127
- Automatically calculates model ranges and runs each model once
128
- at its midpoint strength value.
129
- """
130
- # Calculate model ranges automatically
131
- print("Calculating model strength ranges...")
132
- model_ranges = calculate_model_ranges()
133
-
134
- # Print the calculated ranges
135
- print("\n=== Model Strength Ranges ===")
136
- for range_info in model_ranges:
137
- print(f"{range_info['model']}: {range_info['start']:.3f} to {range_info['end']:.3f} (midpoint: {range_info['midpoint']:.3f})")
138
-
139
- prompt = "Tell me a joke about {topic}"
140
- input_json = {"topic": "programmers"}
141
- temperature = 1
142
- verbose = False
143
-
144
- # Run each model once at its midpoint strength
145
- print("\n=== Running Each Model Once ===")
146
- for range_info in model_ranges:
147
- model_name = range_info['model']
148
- midpoint = range_info['midpoint']
149
-
150
- print(f"\n--- Model: {model_name} (strength: {midpoint}) ---")
151
-
152
- # Example 1: Unstructured Output
153
- print("\n Unstructured Output:")
154
- response = llm_invoke(
155
- prompt=prompt,
156
- input_json=input_json,
157
- strength=midpoint,
158
- temperature=temperature,
159
- verbose=verbose
160
- )
161
-
162
- print(f" Result: {response['result']}")
163
- print(f" Cost: ${response['cost']:.6f}")
164
- print(f" Model Used: {response['model_name']}")
165
-
166
- # Example 2: Structured Output with Pydantic Model
167
- prompt_structured = (
168
- "Generate a joke about {topic}. \n"
169
- "Return it in this exact JSON format:\n"
170
- "{{ \n"
171
- ' "setup": "your setup here",\n'
172
- ' "punchline": "your punchline here"\n'
173
- "}}\n"
174
- "Return ONLY the JSON with no additional text or explanation."
175
- )
176
- input_json_structured = {"topic": "data scientists"}
177
- output_pydantic = Joke
178
-
179
- print("\n Structured Output:")
180
- try:
181
- response_structured = llm_invoke(
182
- prompt=prompt_structured,
183
- input_json=input_json_structured,
184
- strength=midpoint,
185
- temperature=temperature,
186
- verbose=verbose,
187
- output_pydantic=output_pydantic
188
- )
189
- print(f" Result: {response_structured['result']}")
190
- print(f" Cost: ${response_structured['cost']:.6f}")
191
- print(f" Model Used: {response_structured['model_name']}")
192
-
193
- # Access structured data
194
- joke: Joke = response_structured['result']
195
- print(f"\n Joke Setup: {joke.setup}")
196
- print(f" Joke Punchline: {joke.punchline}")
197
- except Exception as e:
198
- print(f" Error encountered during structured output: {e}")
199
195
 
200
196
  if __name__ == "__main__":
201
- main()
197
+ # Ensure you have a valid .env file or environment variables set for API keys
198
+ # (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)
199
+
200
+ try:
201
+ example_simple_text()
202
+ example_structured_output()
203
+ example_batch_processing()
204
+ example_reasoning()
205
+ except Exception as e:
206
+ console.print(f"[bold red]Error running examples:[/bold red] {e}")
202
207
  </llm_invoke_example>
203
208
  </internal_modules>
204
209
  </dependencies_to_insert>
@@ -252,142 +257,147 @@ if __name__ == "__main__":
252
257
 
253
258
  For running prompts with llm_invoke:
254
259
  <llm_invoke_example>
255
- from pydantic import BaseModel, Field
256
- from pdd.llm_invoke import llm_invoke, _load_model_data, _select_model_candidates, LLM_MODEL_CSV_PATH, DEFAULT_BASE_MODEL
257
- from typing import List, Dict, Any
260
+ import os
261
+ import sys
262
+ from typing import List, Optional
263
+ from pydantic import BaseModel, Field
264
+ from rich.console import Console
265
+
266
+ # Ensure the package is in the python path for this example
267
+ # In a real installation, this would just be 'from pdd.llm_invoke import llm_invoke'
268
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
269
+
270
+ from pdd.llm_invoke import llm_invoke
271
+
272
+ console = Console()
273
+
274
+ # --- Example 1: Simple Text Generation ---
275
+ def example_simple_text():
276
+ console.print("[bold blue]--- Example 1: Simple Text Generation ---[/bold blue]")
277
+
278
+ # Define a prompt template
279
+ prompt_template = "Explain the concept of {concept} to a {audience} in one sentence."
280
+
281
+ # Define input variables
282
+ input_data = {
283
+ "concept": "quantum entanglement",
284
+ "audience": "5-year-old"
285
+ }
286
+
287
+ # Invoke the LLM
288
+ # strength=0.5 targets the 'base' model (usually a balance of cost/performance)
289
+ result = llm_invoke(
290
+ prompt=prompt_template,
291
+ input_json=input_data,
292
+ strength=0.5,
293
+ temperature=0.7,
294
+ verbose=True # Set to True to see detailed logs about model selection and cost
295
+ )
258
296
 
259
- # Define a Pydantic model for structured output
260
- class Joke(BaseModel):
261
- setup: str = Field(description="The setup of the joke")
262
- punchline: str = Field(description="The punchline of the joke")
297
+ console.print(f"[green]Result:[/green] {result['result']}")
298
+ console.print(f"[dim]Model used: {result['model_name']} | Cost: ${result['cost']:.6f}[/dim]\n")
263
299
 
264
300
 
265
- def calculate_model_ranges(step: float = 0.001) -> List[Dict[str, Any]]:
266
- """
267
- Calculate the strength ranges for each model by sampling strength values.
301
+ # --- Example 2: Structured Output with Pydantic ---
302
+ class MovieReview(BaseModel):
303
+ title: str = Field(..., description="The title of the movie")
304
+ rating: int = Field(..., description="Rating out of 10")
305
+ summary: str = Field(..., description="A brief summary of the plot")
306
+ tags: List[str] = Field(..., description="List of genre tags")
268
307
 
269
- Args:
270
- step: The step size for sampling strength values (default 0.001)
308
+ def example_structured_output():
309
+ console.print("[bold blue]--- Example 2: Structured Output (Pydantic) ---[/bold blue]")
271
310
 
272
- Returns:
273
- List of dicts with 'model', 'start', 'end', and 'midpoint' keys
274
- """
275
- model_df = _load_model_data(LLM_MODEL_CSV_PATH)
276
-
277
- ranges = []
278
- current_model = None
279
- range_start = 0.0
280
-
281
- # Sample strength values to find model boundaries
282
- strength = 0.0
283
- while strength <= 1.0:
284
- candidates = _select_model_candidates(strength, DEFAULT_BASE_MODEL, model_df)
285
- selected_model = candidates[0]['model'] if candidates else None
286
-
287
- if current_model != selected_model:
288
- if current_model is not None:
289
- ranges.append({
290
- 'model': current_model,
291
- 'start': range_start,
292
- 'end': round(strength - step, 3),
293
- 'midpoint': round((range_start + strength - step) / 2, 3)
294
- })
295
- current_model = selected_model
296
- range_start = strength
297
-
298
- strength = round(strength + step, 3)
299
-
300
- # Add the final range
301
- if current_model is not None:
302
- ranges.append({
303
- 'model': current_model,
304
- 'start': range_start,
305
- 'end': 1.0,
306
- 'midpoint': round((range_start + 1.0) / 2, 3)
307
- })
308
-
309
- return ranges
311
+ prompt = "Generate a review for a fictional sci-fi movie about {topic}."
312
+ input_data = {"topic": "time traveling cats"}
310
313
 
314
+ # Invoke with output_pydantic to enforce a schema
315
+ # strength=0.8 targets a higher-performance model (better at following schemas)
316
+ result = llm_invoke(
317
+ prompt=prompt,
318
+ input_json=input_data,
319
+ strength=0.8,
320
+ output_pydantic=MovieReview,
321
+ temperature=0.5
322
+ )
311
323
 
312
- def main():
313
- """
314
- Main function to demonstrate the usage of `llm_invoke`.
324
+ # The 'result' key will contain an instance of the Pydantic model
325
+ review: MovieReview = result['result']
326
+
327
+ console.print(f"[green]Title:[/green] {review.title}")
328
+ console.print(f"[green]Rating:[/green] {review.rating}/10")
329
+ console.print(f"[green]Tags:[/green] {', '.join(review.tags)}")
330
+ console.print(f"[dim]Model used: {result['model_name']}[/dim]\n")
331
+
332
+
333
+ # --- Example 3: Batch Processing ---
334
+ def example_batch_processing():
335
+ console.print("[bold blue]--- Example 3: Batch Processing ---[/bold blue]")
336
+
337
+ prompt = "What is the capital of {country}?"
338
+
339
+ # List of inputs triggers batch mode
340
+ batch_inputs = [
341
+ {"country": "France"},
342
+ {"country": "Japan"},
343
+ {"country": "Brazil"}
344
+ ]
345
+
346
+ # use_batch_mode=True uses the provider's batch API if available/supported by LiteLLM
347
+ # strength=0.2 targets a cheaper/faster model
348
+ results = llm_invoke(
349
+ prompt=prompt,
350
+ input_json=batch_inputs,
351
+ use_batch_mode=True,
352
+ strength=0.2,
353
+ temperature=0.1
354
+ )
355
+
356
+ # In batch mode, 'result' is a list of strings (or objects)
357
+ for i, res in enumerate(results['result']):
358
+ console.print(f"[green]Input:[/green] {batch_inputs[i]['country']} -> [green]Output:[/green] {res}")
359
+
360
+ console.print(f"[dim]Model used: {results['model_name']} | Total Cost: ${results['cost']:.6f}[/dim]\n")
361
+
362
+
363
+ # --- Example 4: Reasoning / Thinking Time ---
364
+ def example_reasoning():
365
+ console.print("[bold blue]--- Example 4: Reasoning / Thinking Time ---[/bold blue]")
366
+
367
+ # Some models (like Claude 3.7 or OpenAI o1/o3) support explicit thinking steps.
368
+ # Setting time > 0 enables this behavior based on the model's configuration in llm_model.csv.
369
+
370
+ prompt = "Solve this riddle: {riddle}"
371
+ input_data = {"riddle": "I speak without a mouth and hear without ears. I have no body, but I come alive with wind. What am I?"}
372
+
373
+ result = llm_invoke(
374
+ prompt=prompt,
375
+ input_json=input_data,
376
+ strength=1.0, # Target highest capability model
377
+ time=0.5, # Request moderate thinking time/budget
378
+ verbose=True
379
+ )
380
+
381
+ console.print(f"[green]Answer:[/green] {result['result']}")
382
+
383
+ # If the model supports it, thinking output is captured separately
384
+ if result.get('thinking_output'):
385
+ console.print(f"[yellow]Thinking Process:[/yellow] {result['thinking_output']}")
386
+ else:
387
+ console.print("[dim]No separate thinking output returned for this model.[/dim]")
315
388
 
316
- Automatically calculates model ranges and runs each model once
317
- at its midpoint strength value.
318
- """
319
- # Calculate model ranges automatically
320
- print("Calculating model strength ranges...")
321
- model_ranges = calculate_model_ranges()
322
-
323
- # Print the calculated ranges
324
- print("\n=== Model Strength Ranges ===")
325
- for range_info in model_ranges:
326
- print(f"{range_info['model']}: {range_info['start']:.3f} to {range_info['end']:.3f} (midpoint: {range_info['midpoint']:.3f})")
327
-
328
- prompt = "Tell me a joke about {topic}"
329
- input_json = {"topic": "programmers"}
330
- temperature = 1
331
- verbose = False
332
-
333
- # Run each model once at its midpoint strength
334
- print("\n=== Running Each Model Once ===")
335
- for range_info in model_ranges:
336
- model_name = range_info['model']
337
- midpoint = range_info['midpoint']
338
-
339
- print(f"\n--- Model: {model_name} (strength: {midpoint}) ---")
340
-
341
- # Example 1: Unstructured Output
342
- print("\n Unstructured Output:")
343
- response = llm_invoke(
344
- prompt=prompt,
345
- input_json=input_json,
346
- strength=midpoint,
347
- temperature=temperature,
348
- verbose=verbose
349
- )
350
-
351
- print(f" Result: {response['result']}")
352
- print(f" Cost: ${response['cost']:.6f}")
353
- print(f" Model Used: {response['model_name']}")
354
-
355
- # Example 2: Structured Output with Pydantic Model
356
- prompt_structured = (
357
- "Generate a joke about {topic}. \n"
358
- "Return it in this exact JSON format:\n"
359
- "{{ \n"
360
- ' "setup": "your setup here",\n'
361
- ' "punchline": "your punchline here"\n'
362
- "}}\n"
363
- "Return ONLY the JSON with no additional text or explanation."
364
- )
365
- input_json_structured = {"topic": "data scientists"}
366
- output_pydantic = Joke
367
-
368
- print("\n Structured Output:")
369
- try:
370
- response_structured = llm_invoke(
371
- prompt=prompt_structured,
372
- input_json=input_json_structured,
373
- strength=midpoint,
374
- temperature=temperature,
375
- verbose=verbose,
376
- output_pydantic=output_pydantic
377
- )
378
- print(f" Result: {response_structured['result']}")
379
- print(f" Cost: ${response_structured['cost']:.6f}")
380
- print(f" Model Used: {response_structured['model_name']}")
381
-
382
- # Access structured data
383
- joke: Joke = response_structured['result']
384
- print(f"\n Joke Setup: {joke.setup}")
385
- print(f" Joke Punchline: {joke.punchline}")
386
- except Exception as e:
387
- print(f" Error encountered during structured output: {e}")
388
389
 
389
390
  if __name__ == "__main__":
390
- main()
391
+ # Ensure you have a valid .env file or environment variables set for API keys
392
+ # (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)
393
+
394
+ try:
395
+ example_simple_text()
396
+ example_structured_output()
397
+ example_batch_processing()
398
+ example_reasoning()
399
+ except Exception as e:
400
+ console.print(f"[bold red]Error running examples:[/bold red] {e}")
391
401
  </llm_invoke_example>
392
402
  </internal_modules>
393
403
 
@@ -0,0 +1,119 @@
1
+ # prompt_code_diff_LLM.prompt
2
+
3
+ You are a strict code analyst evaluating whether a PROMPT can REGENERATE the CODE.
4
+
5
+ CRITICAL QUESTION: If an LLM only had this prompt, could it produce code that passes the same tests?
6
+
7
+ PROMPT/REQUIREMENTS (with line numbers):
8
+ ```
9
+ {prompt_numbered}
10
+ ```
11
+
12
+ CODE (with line numbers):
13
+ ```
14
+ {code_numbered}
15
+ ```
16
+
17
+ ## Analysis Focus
18
+
19
+ **Be STRICT and PESSIMISTIC.** Your job is to find gaps that would cause regeneration failures.
20
+
21
+ 1. **Regeneration Risk Analysis**: Identify ALL code knowledge NOT in the prompt:
22
+ - Magic values, constants, thresholds (e.g., timeout=30, retry=3, buffer_size=4096)
23
+ - Specific algorithms or implementation approaches chosen
24
+ - Edge case handling not mentioned in prompt
25
+ - Error messages, status codes, specific exceptions
26
+ - API contracts, data formats, field names
27
+ - Dependencies, imports, library-specific patterns
28
+ - Performance optimizations or workarounds
29
+ - Business logic details embedded in code
30
+
31
+ 2. **Hidden Knowledge Detection**: Code often contains "tribal knowledge" that developers added but never documented:
32
+ - Why was THIS approach chosen over alternatives?
33
+ - What bugs or edge cases does this code handle that aren't obvious?
34
+ - What assumptions does the code make about inputs/environment?
35
+
36
+ 3. **Test Failure Prediction**: Would regenerated code likely fail tests because:
37
+ - Exact values/strings don't match expectations?
38
+ - Edge cases aren't handled the same way?
39
+ - API contracts differ from what tests expect?
40
+
41
+ ## Response Format
42
+
43
+ Respond with a JSON object:
44
+
45
+ 1. "overallScore": integer 0-100
46
+ - 90-100: Prompt could regenerate code that passes tests
47
+ - 70-89: Minor details missing, regeneration might work with luck
48
+ - 50-69: Significant gaps, regeneration would likely fail some tests
49
+ - 0-49: Major knowledge missing, regeneration would definitely fail
50
+
51
+ 2. "canRegenerate": boolean - Conservative assessment: could this prompt produce working code?
52
+
53
+ 3. "regenerationRisk": "low", "medium", "high", or "critical"
54
+ - "low": Prompt captures all essential details
55
+ - "medium": Some implementation details missing but core logic documented
56
+ - "high": Significant undocumented behavior that would differ on regeneration
57
+ - "critical": Code has major features/logic not in prompt at all
58
+
59
+ 4. "summary": 1-2 sentences on regeneration viability, be direct about risks
60
+
61
+ 5. "sections": array of PROMPT requirement sections, each with:
62
+ - "id": unique string like "req_1", "req_2"
63
+ - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}}
64
+ - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if missing)
65
+ - "status": "matched", "partial", or "missing"
66
+ - "matchConfidence": 0-100
67
+ - "semanticLabel": descriptive label like "Error Handling", "Input Validation"
68
+ - "notes": REQUIRED explanation - be specific about what's missing or at risk
69
+
70
+ 6. "codeSections": array of CODE sections NOT adequately documented in prompt:
71
+ - "id": unique string like "code_1", "code_2"
72
+ - "promptRange": {{"startLine": int, "endLine": int, "text": "excerpt"}} (empty if undocumented)
73
+ - "codeRanges": array of {{"startLine": int, "endLine": int, "text": "excerpt"}}
74
+ - "status": "matched", "partial", or "extra"
75
+ - "matchConfidence": 0-100
76
+ - "semanticLabel": descriptive label
77
+ - "notes": REQUIRED - explain what knowledge would be LOST on regeneration
78
+ * For "extra": "REGENERATION RISK: [specific feature/value/logic] is not in prompt and would be lost or different"
79
+ * For "partial": "INCOMPLETE: Prompt mentions [X] but doesn't specify [critical detail Y]"
80
+
81
+ 7. "hiddenKnowledge": array of objects describing undocumented code knowledge:
82
+ - "type": "magic_value" | "algorithm_choice" | "edge_case" | "error_handling" | "api_contract" | "optimization" | "business_logic" | "assumption"
83
+ - "location": {{"startLine": int, "endLine": int}}
84
+ - "description": what the code knows that the prompt doesn't say
85
+ - "regenerationImpact": "would_differ" | "would_fail" | "might_work"
86
+ - "suggestedPromptAddition": what to add to the prompt to capture this
87
+
88
+ 8. "lineMappings": array of line-level mappings:
89
+ - "promptLine": int
90
+ - "codeLines": array of ints
91
+ - "matchType": "exact", "semantic", "partial", "none"
92
+
93
+ 9. "stats": {{
94
+ "totalRequirements": int,
95
+ "matchedRequirements": int,
96
+ "missingRequirements": int,
97
+ "totalCodeFeatures": int,
98
+ "documentedFeatures": int,
99
+ "undocumentedFeatures": int,
100
+ "promptToCodeCoverage": float,
101
+ "codeToPromptCoverage": float,
102
+ "hiddenKnowledgeCount": int,
103
+ "criticalGaps": int
104
+ }}
105
+
106
+ 10. "missing": array of strings - requirements in prompt not implemented
107
+ 11. "extra": array of strings - CRITICAL: code features that would be LOST on regeneration
108
+ 12. "suggestions": array of specific additions to make to the prompt to enable regeneration
109
+
110
+ ## Strictness Guidelines
111
+
112
+ - **Assume regeneration WILL differ** unless the prompt explicitly specifies behavior
113
+ - A function that "handles errors" in the prompt might handle them DIFFERENTLY on regeneration
114
+ - Constants, timeouts, retry counts, buffer sizes - if not in prompt, they WILL be different
115
+ - Specific error messages, log formats, status codes - WILL be different unless specified
116
+ - Algorithm choices (e.g., quicksort vs mergesort, BFS vs DFS) - WILL be different unless specified
117
+ - The goal is to make the prompt complete enough that ANY competent LLM would produce equivalent code
118
+ - Mark as "extra" anything in code that prompt doesn't EXPLICITLY require
119
+ - When in doubt, mark it as a gap - false positives are better than missed risks