pdd-cli 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

Files changed (49) hide show
  1. pdd/__init__.py +14 -1
  2. pdd/bug_main.py +5 -1
  3. pdd/bug_to_unit_test.py +16 -5
  4. pdd/change.py +2 -1
  5. pdd/change_main.py +407 -189
  6. pdd/cli.py +853 -301
  7. pdd/code_generator.py +2 -1
  8. pdd/conflicts_in_prompts.py +2 -1
  9. pdd/construct_paths.py +377 -222
  10. pdd/context_generator.py +2 -1
  11. pdd/continue_generation.py +5 -2
  12. pdd/crash_main.py +55 -20
  13. pdd/data/llm_model.csv +18 -17
  14. pdd/detect_change.py +2 -1
  15. pdd/fix_code_loop.py +465 -160
  16. pdd/fix_code_module_errors.py +7 -4
  17. pdd/fix_error_loop.py +9 -9
  18. pdd/fix_errors_from_unit_tests.py +207 -365
  19. pdd/fix_main.py +32 -4
  20. pdd/fix_verification_errors.py +148 -77
  21. pdd/fix_verification_errors_loop.py +842 -768
  22. pdd/fix_verification_main.py +412 -0
  23. pdd/generate_output_paths.py +427 -189
  24. pdd/generate_test.py +3 -2
  25. pdd/increase_tests.py +2 -2
  26. pdd/llm_invoke.py +1167 -343
  27. pdd/preprocess.py +3 -3
  28. pdd/process_csv_change.py +466 -154
  29. pdd/prompts/bug_to_unit_test_LLM.prompt +11 -11
  30. pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
  31. pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
  32. pdd/prompts/find_verification_errors_LLM.prompt +11 -9
  33. pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
  34. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
  35. pdd/prompts/fix_verification_errors_LLM.prompt +8 -1
  36. pdd/prompts/generate_test_LLM.prompt +9 -3
  37. pdd/prompts/trim_results_start_LLM.prompt +1 -1
  38. pdd/prompts/update_prompt_LLM.prompt +3 -3
  39. pdd/split.py +6 -5
  40. pdd/split_main.py +13 -4
  41. pdd/trace_main.py +7 -0
  42. pdd/update_model_costs.py +446 -0
  43. pdd/xml_tagger.py +2 -1
  44. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/METADATA +8 -16
  45. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/RECORD +49 -47
  46. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/WHEEL +1 -1
  47. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/entry_points.txt +0 -0
  48. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/licenses/LICENSE +0 -0
  49. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,446 @@
1
+ # update_model_costs.py
2
+
3
+ import argparse
4
+ import os
5
+ import pandas as pd
6
+ import litellm
7
+ from rich.console import Console
8
+ from rich.table import Table
9
+ import math # For isnan check, although pd.isna is preferred
10
+ from pathlib import Path
11
+ from rich.text import Text # Import Text for explicit string conversion if needed
12
+
13
+ # Initialize Rich Console for pretty printing
14
+ console = Console()
15
+
16
+ # Define expected columns in the CSV, including the manually maintained one
17
+ EXPECTED_COLUMNS = [
18
+ 'provider', 'model', 'input', 'output', 'coding_arena_elo', 'base_url',
19
+ 'api_key',
20
+ 'max_reasoning_tokens', 'structured_output'
21
+ ]
22
+
23
+ # Define columns that should be nullable integers
24
+ INT_COLUMNS = ['coding_arena_elo', 'max_reasoning_tokens']
25
+
26
+ # Placeholder for missing numeric values (optional, pd.NA is generally better)
27
+ # MISSING_VALUE_PLACEHOLDER = -1.0 # Not used in current logic, pd.NA preferred
28
+
29
+ def update_model_data(csv_path: str) -> None:
30
+ """
31
+ Reads the llm_model.csv file, updates missing costs and structured output
32
+ support using LiteLLM, and saves the updated file.
33
+
34
+ Args:
35
+ csv_path (str): The path to the llm_model.csv file.
36
+ """
37
+ console.print(f"[bold blue]Starting model data update for:[/bold blue] {csv_path}")
38
+
39
+ # --- 1. Load CSV and Handle Initial Errors ---
40
+ try:
41
+ df = pd.read_csv(csv_path)
42
+ console.print(f"[green]Successfully loaded:[/green] {csv_path}")
43
+ except FileNotFoundError:
44
+ console.print(f"[bold red]Error:[/bold red] CSV file not found at {csv_path}")
45
+ return
46
+ except Exception as e:
47
+ console.print(f"[bold red]Error:[/bold red] Failed to load CSV file: {e}")
48
+ return
49
+
50
+ # Keep a copy for comparison later to determine if actual data changed
51
+ # Do this *before* schema changes and type enforcement
52
+ original_df = df.copy()
53
+
54
+ # --- 2. Check and Add Missing Columns ---
55
+ updated_schema = False
56
+ for col in EXPECTED_COLUMNS:
57
+ if col not in df.columns:
58
+ updated_schema = True
59
+ console.print(f"[yellow]Warning:[/yellow] Column '{col}' missing. Adding it.")
60
+ # Initialize with pd.NA regardless of type, enforcement happens next
61
+ df[col] = pd.NA
62
+ if updated_schema:
63
+ console.print("[cyan]CSV schema updated with missing columns.[/cyan]")
64
+ # Reorder columns to match expected order if schema was updated
65
+ df = df.reindex(columns=EXPECTED_COLUMNS)
66
+
67
+
68
+ # --- 3. Enforce Correct Data Types ---
69
+ # Do this *after* loading and adding any missing columns
70
+ console.print("\n[bold blue]Enforcing data types...[/bold blue]")
71
+ try:
72
+ # Floats (allow NA)
73
+ if 'input' in df.columns:
74
+ df['input'] = pd.to_numeric(df['input'], errors='coerce')
75
+ if 'output' in df.columns:
76
+ df['output'] = pd.to_numeric(df['output'], errors='coerce')
77
+
78
+ # Boolean/Object (allow NA)
79
+ if 'structured_output' in df.columns:
80
+ # Convert common string representations to bool or NA
81
+ df['structured_output'] = df['structured_output'].apply(
82
+ lambda x: pd.NA if pd.isna(x) or str(x).strip().lower() in ['', 'na', 'nan', '<na>'] else (
83
+ True if str(x).strip().lower() == 'true' else (
84
+ False if str(x).strip().lower() == 'false' else pd.NA
85
+ )
86
+ )
87
+ ).astype('object') # Keep as object to hold True, False, pd.NA
88
+
89
+ # Integers (allow NA)
90
+ for col in INT_COLUMNS:
91
+ if col in df.columns:
92
+ # Convert to numeric first (handles strings like '123', errors become NA),
93
+ # then cast to nullable Int64.
94
+ df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
95
+ console.print(f"[cyan]Ensured '{col}' is nullable integer (Int64).[/cyan]")
96
+
97
+ console.print("[green]Data types enforced successfully.[/green]")
98
+
99
+ except Exception as e:
100
+ console.print(f"[bold red]Error during type enforcement:[/bold red] {e}")
101
+ return # Exit if types can't be enforced correctly
102
+
103
+ # --- 4. Iterate Through Models and Update ---
104
+ models_updated_count = 0 # Tracks rows where data was actually changed
105
+ # models_failed_count = 0 # Replaced by unique_failed_models later
106
+ mismatched_cost_count = 0 # Track mismatches
107
+ # Add a temporary column to track failures directly
108
+ df['_failed'] = False
109
+ # Track if any actual data change happened beyond schema or type coercion
110
+ data_changed = False
111
+
112
+ console.print("\n[bold blue]Processing models...[/bold blue]")
113
+ table = Table(title="Model Update Status", show_lines=True)
114
+ table.add_column("Model Identifier", style="cyan")
115
+ table.add_column("Cost Update", style="magenta")
116
+ table.add_column("Struct. Output Update", style="yellow")
117
+ table.add_column("Cost Match", style="blue") # New column for matching status
118
+ table.add_column("Status", style="green")
119
+
120
+ # Pre-fetch all model costs from LiteLLM once
121
+ all_model_costs = {} # Initialize as empty
122
+ try:
123
+ # Access the property/attribute inside the try block
124
+ cost_data_dict = litellm.model_cost
125
+ # Ensure it's a dictionary, handle None case
126
+ all_model_costs = cost_data_dict if isinstance(cost_data_dict, dict) else {}
127
+ if not all_model_costs:
128
+ console.print("[yellow]Warning:[/yellow] `litellm.model_cost` returned empty or None. Cost updates might be skipped.")
129
+ else:
130
+ console.print("[green]Successfully fetched LiteLLM model cost data.[/green]")
131
+ except Exception as e:
132
+ console.print(f"[bold red]Error:[/bold red] Could not fetch LiteLLM model cost data: {e}")
133
+ # all_model_costs remains {}
134
+
135
+ for index, row in df.iterrows():
136
+ model_identifier = row['model']
137
+ # Ensure model_identifier is treated as a string for consistency
138
+ model_identifier_str = str(model_identifier) if not pd.isna(model_identifier) else None
139
+
140
+ if not model_identifier_str:
141
+ console.print(f"[yellow]Warning:[/yellow] Skipping row {index} due to missing model identifier.")
142
+ continue
143
+
144
+ # --- Cost Comparison Variables ---
145
+ fetched_input_cost = None
146
+ fetched_output_cost = None
147
+ cost_match_status = "[grey]N/A[/grey]" # Default if no litellm data or comparison not possible
148
+ cost_data_available = False
149
+
150
+ # --- 5. Initial Model Validation & Schema Check ---
151
+ is_valid_model = True
152
+ schema_check_result = None # Store result if check succeeds
153
+ struct_check_error = None # Store potential error details
154
+
155
+ try:
156
+ # Use the string version for the LiteLLM call
157
+ schema_check_result = litellm.supports_response_schema(model=model_identifier_str)
158
+ except ValueError as ve:
159
+ is_valid_model = False
160
+ struct_check_error = ve # Store the specific error
161
+ row_status = "[red]Fail (Invalid/Unknown Model?)[/red]"
162
+ cost_update_msg = "[red]Skipped[/red]"
163
+ struct_update_msg = f"[red]Validation Failed: {ve}[/red]"
164
+ df.loc[index, '_failed'] = True
165
+ cost_match_status = "[red]Skipped (Validation Failed)[/red]" # Also skip matching
166
+ except Exception as e:
167
+ # Catch other potential errors during the initial check
168
+ is_valid_model = False # Treat other errors as validation failure too
169
+ struct_check_error = e
170
+ row_status = "[red]Fail (Schema Check Error)[/red]"
171
+ cost_update_msg = "[red]Skipped[/red]"
172
+ struct_update_msg = f"[red]Check Error: {e}[/red]"
173
+ df.loc[index, '_failed'] = True
174
+ cost_match_status = "[red]Skipped (Schema Check Error)[/red]" # Also skip matching
175
+
176
+ # If initial validation failed, skip further processing for this row
177
+ if not is_valid_model:
178
+ # Use string identifier for table
179
+ table.add_row(model_identifier_str, cost_update_msg, struct_update_msg, cost_match_status, row_status)
180
+ continue
181
+
182
+ # --- If validation passed, proceed with cost and struct updates ---
183
+ cost_update_msg = "Checked"
184
+ struct_update_msg = "Checked"
185
+ row_status = "[green]OK[/green]"
186
+ row_needs_update = False # Track if this specific row's data changed
187
+
188
+ # --- 6. Check and Update Costs ---
189
+ # Use string identifier to look up cost data
190
+ cost_data = all_model_costs.get(model_identifier_str)
191
+ cost_fetch_error = None
192
+
193
+ if cost_data and isinstance(cost_data, dict): # Ensure cost_data is a dict
194
+ cost_data_available = True
195
+ try:
196
+ input_cost_per_token = cost_data.get('input_cost_per_token')
197
+ output_cost_per_token = cost_data.get('output_cost_per_token')
198
+
199
+ # Ensure costs are numeric before calculation
200
+ if input_cost_per_token is not None:
201
+ try:
202
+ fetched_input_cost = float(input_cost_per_token) * 1_000_000
203
+ except (ValueError, TypeError):
204
+ console.print(f"[yellow]Warning ({model_identifier_str}):[/yellow] Invalid input_cost_per_token format: {input_cost_per_token}")
205
+ fetched_input_cost = None
206
+ cost_fetch_error = cost_fetch_error or ValueError("Invalid input cost format")
207
+ if output_cost_per_token is not None:
208
+ try:
209
+ fetched_output_cost = float(output_cost_per_token) * 1_000_000
210
+ except (ValueError, TypeError):
211
+ console.print(f"[yellow]Warning ({model_identifier_str}):[/yellow] Invalid output_cost_per_token format: {output_cost_per_token}")
212
+ fetched_output_cost = None
213
+ cost_fetch_error = cost_fetch_error or ValueError("Invalid output cost format")
214
+
215
+
216
+ except Exception as e:
217
+ # Catch errors during the .get or multiplication
218
+ cost_fetch_error = e
219
+ cost_update_msg = f"[red]Error processing costs: {e}[/red]"
220
+ if "Fail" not in row_status: row_status = "[red]Fail (Cost Error)[/red]"
221
+ df.loc[index, '_failed'] = True # Mark failure
222
+
223
+ # Decide action based on fetched data and existing values
224
+ # Use .loc for robust NA check after type enforcement
225
+ input_cost_missing = pd.isna(df.loc[index, 'input'])
226
+ output_cost_missing = pd.isna(df.loc[index, 'output'])
227
+
228
+ updated_costs_messages = []
229
+ mismatched_costs_messages = []
230
+ matched_costs_messages = []
231
+
232
+ if cost_data_available and not cost_fetch_error:
233
+ current_input_cost = df.loc[index, 'input']
234
+ current_output_cost = df.loc[index, 'output']
235
+
236
+ # Update Input Cost if missing
237
+ if input_cost_missing and fetched_input_cost is not None:
238
+ df.loc[index, 'input'] = fetched_input_cost
239
+ updated_costs_messages.append(f"Input: {fetched_input_cost:.4f}")
240
+ row_needs_update = True
241
+ # Compare Input Cost if not missing
242
+ elif not input_cost_missing and fetched_input_cost is not None:
243
+ # Use isclose for float comparison
244
+ if not math.isclose(current_input_cost, fetched_input_cost, rel_tol=1e-6):
245
+ mismatched_costs_messages.append(f"Input (CSV: {current_input_cost:.4f}, LLM: {fetched_input_cost:.4f})")
246
+ else:
247
+ matched_costs_messages.append("Input")
248
+ elif not input_cost_missing and fetched_input_cost is None:
249
+ # CSV has cost, but LiteLLM doesn't provide input cost
250
+ matched_costs_messages.append("Input (CSV Only)")
251
+
252
+
253
+ # Update Output Cost if missing
254
+ if output_cost_missing and fetched_output_cost is not None:
255
+ df.loc[index, 'output'] = fetched_output_cost
256
+ updated_costs_messages.append(f"Output: {fetched_output_cost:.4f}")
257
+ row_needs_update = True
258
+ # Compare Output Cost if not missing
259
+ elif not output_cost_missing and fetched_output_cost is not None:
260
+ # Use isclose for float comparison
261
+ if not math.isclose(current_output_cost, fetched_output_cost, rel_tol=1e-6):
262
+ mismatched_costs_messages.append(f"Output (CSV: {current_output_cost:.4f}, LLM: {fetched_output_cost:.4f})")
263
+ else:
264
+ matched_costs_messages.append("Output")
265
+ elif not output_cost_missing and fetched_output_cost is None:
266
+ # CSV has cost, but LiteLLM doesn't provide output cost
267
+ matched_costs_messages.append("Output (CSV Only)")
268
+
269
+
270
+ # Set Cost Update Message
271
+ if updated_costs_messages:
272
+ cost_update_msg = f"[green]Updated ({', '.join(updated_costs_messages)})[/green]"
273
+ elif mismatched_costs_messages or matched_costs_messages: # If compared, even if no update
274
+ cost_update_msg = "[blue]Checked (No missing values)[/blue]"
275
+ else: # No cost data in litellm for either input/output that could be processed
276
+ cost_update_msg = "[yellow]No comparable cost data in LiteLLM[/yellow]"
277
+ if row_status == "[green]OK[/green]": row_status = "[yellow]Info (No Cost Data)[/yellow]"
278
+
279
+ # Set Cost Match Status Message
280
+ if mismatched_costs_messages:
281
+ cost_match_status = f"[bold red]Mismatch! ({', '.join(mismatched_costs_messages)})[/bold red]"
282
+ mismatched_cost_count += 1 # Increment mismatch counter
283
+ elif matched_costs_messages == ["Input (CSV Only)", "Output (CSV Only)"]:
284
+ cost_match_status = "[grey]N/A (No LLM Data)[/grey]"
285
+ elif matched_costs_messages:
286
+ # Mix of matched and CSV only is still a match for available data
287
+ match_details = ', '.join(m for m in matched_costs_messages if 'CSV Only' not in m)
288
+ if match_details:
289
+ cost_match_status = f"[green]Match ({match_details})[/green]"
290
+ else: # Only CSV Only messages
291
+ cost_match_status = "[grey]N/A (No LLM Data)[/grey]"
292
+ elif updated_costs_messages: # If costs were updated, they now 'match'
293
+ cost_match_status = f"[blue]N/A (Updated)[/blue]"
294
+ else: # If no costs existed to compare (e.g., LLM has no cost data)
295
+ cost_match_status = "[grey]N/A (No LLM Data)[/grey]"
296
+
297
+ elif cost_fetch_error:
298
+ cost_match_status = "[red]Error during fetch/process[/red]"
299
+ # Ensure row status reflects failure if not already set
300
+ if "Fail" not in row_status: row_status = "[red]Fail (Cost Error)[/red]"
301
+ df.loc[index, '_failed'] = True # Mark failure
302
+
303
+ elif not cost_data_available:
304
+ cost_update_msg = "[yellow]Cost data not found in LiteLLM[/yellow]"
305
+ cost_match_status = "[grey]N/A (No LLM Data)[/grey]"
306
+ if row_status == "[green]OK[/green]": row_status = "[yellow]Info (No Cost Data)[/yellow]"
307
+ else: # Should not happen, but catchall
308
+ cost_update_msg = "[orange]Unknown Cost State[/orange]"
309
+ cost_match_status = "[orange]Unknown[/orange]"
310
+
311
+ # --- 7. Check and Update Structured Output Support ---
312
+ # Use .loc for robust NA check
313
+ struct_output_missing = pd.isna(df.loc[index, 'structured_output'])
314
+
315
+ if struct_output_missing:
316
+ # Use the result from the initial check if it succeeded
317
+ if schema_check_result is not None:
318
+ new_value = bool(schema_check_result)
319
+ df.loc[index, 'structured_output'] = new_value # Store as True/False
320
+ struct_update_msg = f"[green]Updated ({new_value})[/green]"
321
+ row_needs_update = True
322
+ else:
323
+ # This case means initial validation passed, but schema_check_result is None (shouldn't happen)
324
+ # or initial validation failed (handled earlier, but double-check struct_check_error)
325
+ if struct_check_error:
326
+ # Error already reported during validation phase
327
+ struct_update_msg = f"[red]Update Failed (Initial Check Error)[/red]"
328
+ df.loc[index, 'structured_output'] = pd.NA # Ensure NA on error
329
+ if "Fail" not in row_status:
330
+ row_status = "[red]Fail (Struct Error)[/red]"
331
+ df.loc[index, '_failed'] = True # Mark failure
332
+ else:
333
+ # Fallback if validation passed but result is missing
334
+ struct_update_msg = "[orange]Update Skipped (Unknown State)[/orange]"
335
+ df.loc[index, 'structured_output'] = pd.NA
336
+ else:
337
+ # Value already exists, no need to update
338
+ struct_update_msg = "Checked (Exists)"
339
+
340
+ # Tally updates and failures
341
+ if df.loc[index, '_failed']: # Check the failure flag
342
+ pass # Failure already marked, status set earlier
343
+ elif row_needs_update: # Only count as updated if no failure occurred and data changed
344
+ models_updated_count += 1
345
+ data_changed = True # Mark that some data was actually modified
346
+ if row_status == "[green]OK[/green]": # Status was OK before update checks
347
+ row_status = "[blue]Updated[/blue]"
348
+ elif "[yellow]" in row_status: # Update happened alongside info
349
+ row_status = "[blue]Updated (Info)[/blue]"
350
+
351
+ # Add the row to the table using the string identifier
352
+ table.add_row(model_identifier_str, cost_update_msg, struct_update_msg, cost_match_status, row_status)
353
+
354
+ console.print(table)
355
+ console.print(f"\n[bold]Summary:[/bold]")
356
+ console.print(f"- Models processed: {len(df)}")
357
+ # Count unique models with failures for better reporting
358
+ unique_failed_models = df[df['_failed']]['model'].nunique()
359
+ console.print(f"- Models with fetch/check errors: {unique_failed_models}")
360
+ console.print(f"- Rows potentially updated (data changed): {models_updated_count}")
361
+ console.print(f"- Models with cost mismatches: {mismatched_cost_count}")
362
+ if mismatched_cost_count > 0:
363
+ console.print(f" [bold red](Note: Mismatched costs were NOT automatically updated. Check CSV vs LiteLLM.)[/bold red]")
364
+
365
+ # Add confirmation if all models passed initial validation
366
+ if unique_failed_models == 0 and len(df) > 0:
367
+ console.print(f"[green]All {len(df)} model identifiers passed initial validation.[/green]")
368
+
369
+ # --- 8. Save Updated DataFrame ---
370
+ # Save if schema was updated OR if actual data values changed
371
+ # Compare current df (without _failed col) to original df (before type enforcement/updates)
372
+ df_to_save = df.drop(columns=['_failed'])
373
+
374
+ # Use df.equals() for robust comparison, requires identical types and values
375
+ # Note: Type enforcement might change dtypes (e.g., int to Int64) causing equals() to be false
376
+ # even if values look the same. A more lenient check might be needed if saving only on value change is critical.
377
+ # For now, save if schema changed OR data changed (tracked by data_changed flag).
378
+ should_save = updated_schema or data_changed
379
+
380
+ # Add logging for save condition
381
+ console.print(f"\n[grey]Save check: updated_schema={updated_schema}, data_changed={data_changed}[/grey]")
382
+
383
+ if should_save:
384
+ try:
385
+ console.print(f"[cyan]Attempting to save updates to {csv_path}...[/cyan]")
386
+ # Ensure NA values are saved correctly (as empty strings by default)
387
+ df_to_save.to_csv(csv_path, index=False, na_rep='') # Save NA as empty string
388
+ console.print(f"[bold green]Successfully saved updated data to:[/bold green] {csv_path}")
389
+ except Exception as e:
390
+ console.print(f"[bold red]Error:[/bold red] Failed to save updated CSV file: {e}")
391
+ else:
392
+ console.print("\n[bold blue]No schema changes or data updates needed. CSV file not saved.[/bold blue]")
393
+
394
+ # --- 9. Reminder about Manual Column ---
395
+ console.print(f"\n[bold yellow]Reminder:[/bold yellow] The '{'max_reasoning_tokens'}' column is not automatically updated by this script and requires manual maintenance.")
396
+ console.print(f"[bold blue]Model data update process finished.[/bold blue]")
397
+
398
+
399
+ def main():
400
+ """Main function to parse arguments and run the update process."""
401
+ parser = argparse.ArgumentParser(
402
+ description="Update LLM model costs and structured output support in a CSV file using LiteLLM."
403
+ )
404
+ parser.add_argument(
405
+ "--csv-path",
406
+ type=str,
407
+ default="data/llm_model.csv",
408
+ help="Path to the llm_model.csv file (default: data/llm_model.csv)"
409
+ )
410
+ args = parser.parse_args()
411
+
412
+ # --- Determine final CSV path ---
413
+ user_pdd_dir = Path.home() / ".pdd"
414
+ user_model_csv_path = user_pdd_dir / "llm_model.csv"
415
+ # Resolve the default/provided path to an absolute path
416
+ default_or_arg_path = Path(args.csv_path).resolve()
417
+
418
+ final_csv_path = default_or_arg_path # Start with the resolved default/provided path
419
+
420
+ if user_model_csv_path.is_file():
421
+ final_csv_path = user_model_csv_path
422
+ console.print(f"[bold cyan]Found user-specific config, using:[/bold cyan] {final_csv_path}")
423
+ else:
424
+ console.print(f"[cyan]User-specific config not found. Using default/provided path:[/cyan] {final_csv_path}")
425
+ # Ensure the directory for the *final* path exists only if it's not the user path
426
+ final_csv_dir = final_csv_path.parent
427
+ if not final_csv_dir.exists():
428
+ try:
429
+ # Use exist_ok=True to avoid error if dir exists (race condition)
430
+ os.makedirs(final_csv_dir, exist_ok=True)
431
+ console.print(f"[cyan]Created directory:[/cyan] {final_csv_dir}")
432
+ except OSError as e:
433
+ console.print(f"[bold red]Error:[/bold red] Could not create directory {final_csv_dir}: {e}")
434
+ return # Exit if directory cannot be created
435
+ # Note: update_model_data will handle if the *file* doesn't exist at final_csv_path
436
+
437
+ # Pass the determined absolute path string to the update function
438
+ update_model_data(str(final_csv_path))
439
+
440
+ if __name__ == "__main__":
441
+ # --- Crucial Note ---
442
+ console.print("[bold yellow]Important:[/bold yellow] This script assumes the 'model' column in the CSV contains")
443
+ console.print(" [bold yellow]valid LiteLLM model identifiers[/bold yellow] (e.g., 'openai/gpt-4o-mini',")
444
+ console.print(" 'ollama/llama3', 'anthropic/claude-3-haiku-20240307').")
445
+ console.print(" Please verify the identifiers before running.\n")
446
+ main()
pdd/xml_tagger.py CHANGED
@@ -4,6 +4,7 @@ from rich.markdown import Markdown
4
4
  from pydantic import BaseModel, Field
5
5
  from .load_prompt_template import load_prompt_template
6
6
  from .llm_invoke import llm_invoke
7
+ from . import EXTRACTION_STRENGTH
7
8
 
8
9
  class XMLOutput(BaseModel):
9
10
  xml_tagged: str = Field(description="The XML-tagged version of the prompt")
@@ -72,7 +73,7 @@ def xml_tagger(
72
73
  extraction_response = llm_invoke(
73
74
  prompt=extract_xml_prompt,
74
75
  input_json={"xml_generated_analysis": xml_generated_analysis},
75
- strength=0.97, # Fixed strength as specified
76
+ strength=EXTRACTION_STRENGTH, # Fixed strength for extraction
76
77
  temperature=temperature,
77
78
  verbose=verbose,
78
79
  output_pydantic=XMLOutput
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdd-cli
3
- Version: 0.0.24
3
+ Version: 0.0.26
4
4
  Summary: PDD (Prompt-Driven Development) Command Line Interface
5
5
  Author: Greg Tanaka
6
6
  Author-email: glt@alumni.caltech.edu
@@ -26,34 +26,26 @@ Requires-Dist: click==8.1.7
26
26
  Requires-Dist: firecrawl-py
27
27
  Requires-Dist: firebase_admin==6.6.0
28
28
  Requires-Dist: keyring==25.6.0
29
- Requires-Dist: langchain==0.3.23
30
- Requires-Dist: langchain_anthropic==0.3.10
31
- Requires-Dist: langchain_community==0.3.21
32
- Requires-Dist: langchain_core==0.3.51
33
- Requires-Dist: langchain_fireworks==0.2.9
34
- Requires-Dist: langchain_google_genai==2.1.2
35
- Requires-Dist: langchain_google_vertexai==2.0.19
36
- Requires-Dist: langchain_groq==0.3.2
37
- Requires-Dist: langchain_mcp_adapters==0.0.7
38
- Requires-Dist: langchain_ollama==0.3.0
39
- Requires-Dist: langchain_openai==0.3.12
40
- Requires-Dist: langchain_together==0.3.0
41
- Requires-Dist: langgraph==0.3.25
42
29
  Requires-Dist: nest_asyncio==1.6.0
43
30
  Requires-Dist: pandas==2.2.3
44
31
  Requires-Dist: psutil==5.9.0
45
32
  Requires-Dist: pydantic==2.11.2
33
+ Requires-Dist: litellm
46
34
  Requires-Dist: rich==14.0.0
47
35
  Requires-Dist: semver==3.0.2
48
36
  Requires-Dist: setuptools
37
+ Requires-Dist: boto3==1.35.99
49
38
  Requires-Dist: python-Levenshtein
50
39
  Provides-Extra: dev
51
40
  Requires-Dist: commitizen; extra == "dev"
52
41
  Requires-Dist: pytest; extra == "dev"
53
42
  Requires-Dist: pytest-cov; extra == "dev"
43
+ Requires-Dist: pytest-mock; extra == "dev"
44
+ Requires-Dist: pytest-asyncio; extra == "dev"
45
+ Requires-Dist: z3-solver; extra == "dev"
54
46
  Dynamic: license-file
55
47
 
56
- .. image:: https://img.shields.io/badge/pdd--cli-v0.0.24-blue
48
+ .. image:: https://img.shields.io/badge/pdd--cli-v0.0.26-blue
57
49
  :alt: PDD-CLI Version
58
50
 
59
51
  PDD (Prompt-Driven Development) Command Line Interface
@@ -114,7 +106,7 @@ After installation, verify:
114
106
 
115
107
  pdd --version
116
108
 
117
- You'll see the current PDD version (e.g., 0.0.24).
109
+ You'll see the current PDD version (e.g., 0.0.26).
118
110
 
119
111
  Advanced Installation Tips
120
112
  --------------------------