pdd-cli 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pdd/__init__.py +1 -1
  2. pdd/auto_deps_main.py +1 -1
  3. pdd/auto_update.py +73 -78
  4. pdd/bug_main.py +3 -3
  5. pdd/bug_to_unit_test.py +46 -38
  6. pdd/change.py +20 -13
  7. pdd/change_main.py +223 -163
  8. pdd/cli.py +192 -95
  9. pdd/cmd_test_main.py +51 -36
  10. pdd/code_generator_main.py +3 -2
  11. pdd/conflicts_main.py +1 -1
  12. pdd/construct_paths.py +221 -19
  13. pdd/context_generator_main.py +27 -12
  14. pdd/crash_main.py +44 -50
  15. pdd/data/llm_model.csv +1 -1
  16. pdd/detect_change_main.py +1 -1
  17. pdd/fix_code_module_errors.py +12 -0
  18. pdd/fix_main.py +2 -2
  19. pdd/fix_verification_errors.py +13 -0
  20. pdd/fix_verification_main.py +3 -3
  21. pdd/generate_output_paths.py +113 -21
  22. pdd/generate_test.py +53 -16
  23. pdd/llm_invoke.py +162 -0
  24. pdd/logo_animation.py +455 -0
  25. pdd/preprocess_main.py +1 -1
  26. pdd/process_csv_change.py +1 -1
  27. pdd/prompts/extract_program_code_fix_LLM.prompt +2 -1
  28. pdd/prompts/sync_analysis_LLM.prompt +82 -0
  29. pdd/split_main.py +1 -1
  30. pdd/sync_animation.py +643 -0
  31. pdd/sync_determine_operation.py +1039 -0
  32. pdd/sync_main.py +333 -0
  33. pdd/sync_orchestration.py +639 -0
  34. pdd/trace_main.py +1 -1
  35. pdd/update_main.py +7 -2
  36. pdd/xml_tagger.py +15 -6
  37. pdd_cli-0.0.42.dist-info/METADATA +307 -0
  38. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/RECORD +42 -36
  39. pdd_cli-0.0.40.dist-info/METADATA +0 -269
  40. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/WHEEL +0 -0
  41. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/entry_points.txt +0 -0
  42. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/licenses/LICENSE +0 -0
  43. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ COMMAND_OUTPUT_KEYS: Dict[str, List[str]] = {
26
26
  'bug': ['output'],
27
27
  'auto-deps': ['output'],
28
28
  'verify': ['output_results', 'output_code', 'output_program'],
29
+ 'sync': ['generate_output_path', 'test_output_path', 'example_output_path'],
29
30
  }
30
31
 
31
32
  # Define default filename patterns for each output key
@@ -48,8 +49,8 @@ DEFAULT_FILENAMES: Dict[str, Dict[str, str]] = {
48
49
  },
49
50
  'change': {'output': 'modified_{basename}.prompt'},
50
51
  'update': {'output': 'modified_{basename}.prompt'}, # Consistent with change/split default
51
- 'detect': {'output': '{basename}_detect.csv'}, # Using basename as change_file_basename isn't available here
52
- 'conflicts': {'output': '{basename}_conflict.csv'}, # Using basename as prompt1/2 basenames aren't available
52
+ 'detect': {'output': '{basename}_detect.csv'}, # basename here is from change_file per construct_paths logic
53
+ 'conflicts': {'output': '{basename}_conflict.csv'}, # basename here is combined sorted prompt basenames per construct_paths logic
53
54
  'crash': {
54
55
  'output': '{basename}_fixed{ext}',
55
56
  # Using basename as program_basename isn't available here
@@ -63,6 +64,11 @@ DEFAULT_FILENAMES: Dict[str, Dict[str, str]] = {
63
64
  'output_code': '{basename}_verified{ext}',
64
65
  'output_program': '{basename}_program_verified{ext}',
65
66
  },
67
+ 'sync': {
68
+ 'generate_output_path': '{basename}{ext}',
69
+ 'test_output_path': 'test_{basename}{ext}',
70
+ 'example_output_path': '{basename}_example{ext}',
71
+ },
66
72
  }
67
73
 
68
74
  # Define the mapping from command/output key to environment variables
@@ -96,6 +102,50 @@ ENV_VAR_MAP: Dict[str, Dict[str, str]] = {
96
102
  'output_code': 'PDD_VERIFY_CODE_OUTPUT_PATH',
97
103
  'output_program': 'PDD_VERIFY_PROGRAM_OUTPUT_PATH',
98
104
  },
105
+ 'sync': {
106
+ 'generate_output_path': 'PDD_GENERATE_OUTPUT_PATH',
107
+ 'test_output_path': 'PDD_TEST_OUTPUT_PATH',
108
+ 'example_output_path': 'PDD_EXAMPLE_OUTPUT_PATH',
109
+ },
110
+ }
111
+
112
+ # Define mapping from context config keys to output keys for different commands
113
+ CONTEXT_CONFIG_MAP: Dict[str, Dict[str, str]] = {
114
+ 'generate': {'output': 'generate_output_path'},
115
+ 'example': {'output': 'example_output_path'},
116
+ 'test': {'output': 'test_output_path'},
117
+ 'sync': {
118
+ 'generate_output_path': 'generate_output_path',
119
+ 'test_output_path': 'test_output_path',
120
+ 'example_output_path': 'example_output_path',
121
+ },
122
+ # For other commands, they can use the general mapping if needed
123
+ 'preprocess': {'output': 'generate_output_path'}, # fallback
124
+ 'fix': {
125
+ 'output_test': 'test_output_path',
126
+ 'output_code': 'generate_output_path',
127
+ 'output_results': 'generate_output_path', # fallback for results
128
+ },
129
+ 'split': {
130
+ 'output_sub': 'generate_output_path', # fallback
131
+ 'output_modified': 'generate_output_path', # fallback
132
+ },
133
+ 'change': {'output': 'generate_output_path'},
134
+ 'update': {'output': 'generate_output_path'},
135
+ 'detect': {'output': 'generate_output_path'},
136
+ 'conflicts': {'output': 'generate_output_path'},
137
+ 'crash': {
138
+ 'output': None, # Use default CWD behavior, not context paths
139
+ 'output_program': None, # Use default CWD behavior, not context paths
140
+ },
141
+ 'trace': {'output': 'generate_output_path'},
142
+ 'bug': {'output': 'test_output_path'},
143
+ 'auto-deps': {'output': 'generate_output_path'},
144
+ 'verify': {
145
+ 'output_results': 'generate_output_path',
146
+ 'output_code': 'generate_output_path',
147
+ 'output_program': 'generate_output_path',
148
+ },
99
149
  }
100
150
 
101
151
  # --- Helper Function ---
@@ -127,14 +177,15 @@ def generate_output_paths(
127
177
  output_locations: Dict[str, Optional[str]],
128
178
  basename: str,
129
179
  language: str,
130
- file_extension: str
180
+ file_extension: str,
181
+ context_config: Optional[Dict[str, str]] = None
131
182
  ) -> Dict[str, str]:
132
183
  """
133
184
  Generates the full, absolute output paths for a given PDD command.
134
185
 
135
- It prioritizes user-specified paths (--output options), then environment
136
- variables, and finally falls back to default naming conventions in the
137
- current working directory.
186
+ It prioritizes user-specified paths (--output options), then context
187
+ configuration from .pddrc, then environment variables, and finally
188
+ falls back to default naming conventions in the current working directory.
138
189
 
139
190
  Args:
140
191
  command: The PDD command being executed (e.g., 'generate', 'fix').
@@ -146,6 +197,8 @@ def generate_output_paths(
146
197
  language: The programming language associated with the operation.
147
198
  file_extension: The file extension (including '.') for the language,
148
199
  used when default patterns require it.
200
+ context_config: Optional dictionary with context-specific paths from .pddrc
201
+ configuration (e.g., {'generate_output_path': 'src/'}).
149
202
 
150
203
  Returns:
151
204
  A dictionary where keys are the standardized output identifiers
@@ -155,8 +208,10 @@ def generate_output_paths(
155
208
  """
156
209
  logger.debug(f"Generating output paths for command: {command}")
157
210
  logger.debug(f"User output locations: {output_locations}")
211
+ logger.debug(f"Context config: {context_config}")
158
212
  logger.debug(f"Basename: {basename}, Language: {language}, Extension: {file_extension}")
159
213
 
214
+ context_config = context_config or {}
160
215
  result_paths: Dict[str, str] = {}
161
216
 
162
217
  if not basename:
@@ -183,6 +238,11 @@ def generate_output_paths(
183
238
  logger.debug(f"Processing output key: {output_key}")
184
239
 
185
240
  user_path: Optional[str] = processed_output_locations.get(output_key)
241
+
242
+ # Get context configuration path for this output key
243
+ context_config_key = CONTEXT_CONFIG_MAP.get(command, {}).get(output_key)
244
+ context_path: Optional[str] = context_config.get(context_config_key) if context_config_key else None
245
+
186
246
  env_var_name: Optional[str] = ENV_VAR_MAP.get(command, {}).get(output_key)
187
247
  env_path: Optional[str] = os.environ.get(env_var_name) if env_var_name else None
188
248
 
@@ -215,7 +275,26 @@ def generate_output_paths(
215
275
  logger.debug(f"User path '{user_path}' identified as a specific file path.")
216
276
  final_path = user_path # Assume it's a full path or filename
217
277
 
218
- # 2. Check Environment Variable Path
278
+ # 2. Check Context Configuration Path (.pddrc)
279
+ elif context_path:
280
+ source = "context"
281
+ # Check if the context path is a directory
282
+ is_dir = context_path.endswith(os.path.sep) or context_path.endswith('/')
283
+ if not is_dir:
284
+ try:
285
+ if os.path.exists(context_path) and os.path.isdir(context_path):
286
+ is_dir = True
287
+ except Exception as e:
288
+ logger.warning(f"Could not check if context path '{context_path}' is a directory: {e}")
289
+
290
+ if is_dir:
291
+ logger.debug(f"Context path '{context_path}' identified as a directory.")
292
+ final_path = os.path.join(context_path, default_filename)
293
+ else:
294
+ logger.debug(f"Context path '{context_path}' identified as a specific file path.")
295
+ final_path = context_path
296
+
297
+ # 3. Check Environment Variable Path
219
298
  elif env_path:
220
299
  source = "environment"
221
300
  # Check if the environment variable points to a directory
@@ -234,7 +313,7 @@ def generate_output_paths(
234
313
  logger.debug(f"Env path '{env_path}' identified as a specific file path.")
235
314
  final_path = env_path # Assume it's a full path or filename
236
315
 
237
- # 3. Use Default Naming Convention in CWD
316
+ # 4. Use Default Naming Convention in CWD
238
317
  else:
239
318
  source = "default"
240
319
  logger.debug(f"Using default filename '{default_filename}' in current directory.")
@@ -273,7 +352,8 @@ if __name__ == '__main__':
273
352
  output_locations={}, # No user input
274
353
  basename=mock_basename,
275
354
  language=mock_language,
276
- file_extension=mock_extension
355
+ file_extension=mock_extension,
356
+ context_config={}
277
357
  )
278
358
  print(f"Result: {paths1}")
279
359
  # Expected: {'output': '/path/to/cwd/my_module.py'}
@@ -285,7 +365,8 @@ if __name__ == '__main__':
285
365
  output_locations={'output': 'generated_code.py'},
286
366
  basename=mock_basename,
287
367
  language=mock_language,
288
- file_extension=mock_extension
368
+ file_extension=mock_extension,
369
+ context_config={}
289
370
  )
290
371
  print(f"Result: {paths2}")
291
372
  # Expected: {'output': '/path/to/cwd/generated_code.py'}
@@ -300,7 +381,8 @@ if __name__ == '__main__':
300
381
  output_locations={'output': test_dir_gen + os.path.sep}, # Explicit directory
301
382
  basename=mock_basename,
302
383
  language=mock_language,
303
- file_extension=mock_extension
384
+ file_extension=mock_extension,
385
+ context_config={}
304
386
  )
305
387
  print(f"Result: {paths3}")
306
388
  # Expected: {'output': '/path/to/cwd/temp_gen_output/my_module.py'}
@@ -319,7 +401,8 @@ if __name__ == '__main__':
319
401
  },
320
402
  basename=mock_basename,
321
403
  language=mock_language,
322
- file_extension=mock_extension
404
+ file_extension=mock_extension,
405
+ context_config={}
323
406
  )
324
407
  print(f"Result: {paths4}")
325
408
  # Expected: {
@@ -344,7 +427,8 @@ if __name__ == '__main__':
344
427
  output_locations={}, # No user input
345
428
  basename=mock_basename,
346
429
  language=mock_language,
347
- file_extension=mock_extension
430
+ file_extension=mock_extension,
431
+ context_config={}
348
432
  )
349
433
  print(f"Result: {paths5}")
350
434
  # Expected: {
@@ -365,7 +449,8 @@ if __name__ == '__main__':
365
449
  output_locations={},
366
450
  basename=mock_basename,
367
451
  language=mock_language,
368
- file_extension=mock_extension # This extension is ignored for preprocess default
452
+ file_extension=mock_extension, # This extension is ignored for preprocess default
453
+ context_config={}
369
454
  )
370
455
  print(f"Result: {paths6}")
371
456
  # Expected: {'output': '/path/to/cwd/my_module_python_preprocessed.prompt'}
@@ -377,7 +462,8 @@ if __name__ == '__main__':
377
462
  output_locations={},
378
463
  basename=mock_basename,
379
464
  language=mock_language,
380
- file_extension=mock_extension
465
+ file_extension=mock_extension,
466
+ context_config={}
381
467
  )
382
468
  print(f"Result: {paths7}")
383
469
  # Expected: {}
@@ -389,7 +475,8 @@ if __name__ == '__main__':
389
475
  output_locations={},
390
476
  basename="complex_prompt",
391
477
  language="javascript",
392
- file_extension=".js" # Ignored for split defaults
478
+ file_extension=".js", # Ignored for split defaults
479
+ context_config={}
393
480
  )
394
481
  print(f"Result: {paths8}")
395
482
  # Expected: {
@@ -404,7 +491,8 @@ if __name__ == '__main__':
404
491
  output_locations={},
405
492
  basename="feature_analysis", # Used instead of change_file_basename
406
493
  language="", # Not relevant for detect default
407
- file_extension="" # Not relevant for detect default
494
+ file_extension="", # Not relevant for detect default
495
+ context_config={}
408
496
  )
409
497
  print(f"Result: {paths9}")
410
498
  # Expected: {'output': '/path/to/cwd/feature_analysis_detect.csv'}
@@ -416,7 +504,8 @@ if __name__ == '__main__':
416
504
  output_locations={},
417
505
  basename="crashed_module", # Used for both code and program defaults
418
506
  language="java",
419
- file_extension=".java"
507
+ file_extension=".java",
508
+ context_config={}
420
509
  )
421
510
  print(f"Result: {paths10}")
422
511
  # Expected: {
@@ -431,7 +520,8 @@ if __name__ == '__main__':
431
520
  output_locations={},
432
521
  basename="module_to_verify",
433
522
  language="python",
434
- file_extension=".py"
523
+ file_extension=".py",
524
+ context_config={}
435
525
  )
436
526
  print(f"Result: {paths11}")
437
527
  # Expected: {
@@ -449,7 +539,8 @@ if __name__ == '__main__':
449
539
  output_locations={'output_program': test_dir_verify_prog + os.path.sep},
450
540
  basename="module_to_verify",
451
541
  language="python",
452
- file_extension=".py"
542
+ file_extension=".py",
543
+ context_config={}
453
544
  )
454
545
  print(f"Result: {paths12}")
455
546
  # Expected: {
@@ -468,7 +559,8 @@ if __name__ == '__main__':
468
559
  output_locations={},
469
560
  basename="another_module_verify",
470
561
  language="python",
471
- file_extension=".py"
562
+ file_extension=".py",
563
+ context_config={}
472
564
  )
473
565
  print(f"Result: {paths13}")
474
566
  # Expected: {
pdd/generate_test.py CHANGED
@@ -72,19 +72,34 @@ def generate_test(
72
72
  model_name = response['model_name']
73
73
  result = response['result']
74
74
 
75
+ # Validate that we got a non-empty result
76
+ if not result or not result.strip():
77
+ raise ValueError(f"LLM test generation returned empty result. Model: {model_name}, Cost: ${response['cost']:.6f}")
78
+
75
79
  if verbose:
76
80
  console.print(Markdown(result))
77
81
  console.print(f"[bold green]Initial generation cost: ${total_cost:.6f}[/bold green]")
78
82
 
79
83
  # Step 4: Check if generation is complete
80
84
  last_600_chars = result[-600:] if len(result) > 600 else result
81
- reasoning, is_finished, check_cost, check_model = unfinished_prompt(
82
- prompt_text=last_600_chars,
83
- strength=strength,
84
- temperature=temperature,
85
- time=time,
86
- verbose=verbose
87
- )
85
+
86
+ # Validate that the last_600_chars is not empty after stripping
87
+ if not last_600_chars.strip():
88
+ # If the tail is empty, assume generation is complete
89
+ if verbose:
90
+ console.print("[bold yellow]Last 600 chars are empty, assuming generation is complete[/bold yellow]")
91
+ reasoning = "Generation appears complete (tail is empty)"
92
+ is_finished = True
93
+ check_cost = 0.0
94
+ check_model = model_name
95
+ else:
96
+ reasoning, is_finished, check_cost, check_model = unfinished_prompt(
97
+ prompt_text=last_600_chars,
98
+ strength=strength,
99
+ temperature=temperature,
100
+ time=time,
101
+ verbose=verbose
102
+ )
88
103
  total_cost += check_cost
89
104
 
90
105
  if not is_finished:
@@ -104,15 +119,37 @@ def generate_test(
104
119
  model_name = continue_model
105
120
 
106
121
  # Process the final result
107
- processed_result, post_cost, post_model = postprocess(
108
- result,
109
- language=language,
110
- strength=EXTRACTION_STRENGTH,
111
- temperature=temperature,
112
- time=time,
113
- verbose=verbose
114
- )
115
- total_cost += post_cost
122
+ try:
123
+ processed_result, post_cost, post_model = postprocess(
124
+ result,
125
+ language=language,
126
+ strength=EXTRACTION_STRENGTH,
127
+ temperature=temperature,
128
+ time=time,
129
+ verbose=verbose
130
+ )
131
+ total_cost += post_cost
132
+ except Exception as e:
133
+ console.print(f"[bold red]Postprocess failed: {str(e)}[/bold red]")
134
+ console.print(f"[bold yellow]Falling back to raw result[/bold yellow]")
135
+
136
+ # Try to extract code blocks directly from the raw result
137
+ import re
138
+ code_blocks = re.findall(r'```(?:python)?\s*(.*?)```', result, re.DOTALL | re.IGNORECASE)
139
+
140
+ if code_blocks:
141
+ # Use the first substantial code block
142
+ for block in code_blocks:
143
+ if len(block.strip()) > 100 and ('def test_' in block or 'import' in block):
144
+ processed_result = block.strip()
145
+ break
146
+ else:
147
+ processed_result = code_blocks[0].strip() if code_blocks else result
148
+ else:
149
+ # No code blocks found, use raw result
150
+ processed_result = result
151
+
152
+ post_cost = 0.0
116
153
 
117
154
  # Step 5: Print total cost if verbose
118
155
  if verbose:
pdd/llm_invoke.py CHANGED
@@ -81,6 +81,58 @@ from pdd import DEFAULT_LLM_MODEL
81
81
  # Opt-in to future pandas behavior regarding downcasting
82
82
  pd.set_option('future.no_silent_downcasting', True)
83
83
 
84
+
85
+ def _is_wsl_environment() -> bool:
86
+ """
87
+ Detect if we're running in WSL (Windows Subsystem for Linux) environment.
88
+
89
+ Returns:
90
+ True if running in WSL, False otherwise
91
+ """
92
+ try:
93
+ # Check for WSL-specific indicators
94
+ if os.path.exists('/proc/version'):
95
+ with open('/proc/version', 'r') as f:
96
+ version_info = f.read().lower()
97
+ return 'microsoft' in version_info or 'wsl' in version_info
98
+
99
+ # Alternative check: WSL_DISTRO_NAME environment variable
100
+ if os.getenv('WSL_DISTRO_NAME'):
101
+ return True
102
+
103
+ # Check for Windows-style paths in PATH
104
+ path_env = os.getenv('PATH', '')
105
+ return '/mnt/c/' in path_env.lower()
106
+
107
+ except Exception:
108
+ return False
109
+
110
+
111
+ def _get_environment_info() -> Dict[str, str]:
112
+ """
113
+ Get environment information for debugging and error reporting.
114
+
115
+ Returns:
116
+ Dictionary containing environment details
117
+ """
118
+ import platform
119
+
120
+ info = {
121
+ 'platform': platform.system(),
122
+ 'platform_release': platform.release(),
123
+ 'platform_version': platform.version(),
124
+ 'architecture': platform.machine(),
125
+ 'is_wsl': str(_is_wsl_environment()),
126
+ 'python_version': platform.python_version(),
127
+ }
128
+
129
+ # Add WSL-specific information
130
+ if _is_wsl_environment():
131
+ info['wsl_distro'] = os.getenv('WSL_DISTRO_NAME', 'unknown')
132
+ info['wsl_interop'] = os.getenv('WSL_INTEROP', 'not_set')
133
+
134
+ return info
135
+
84
136
  # <<< SET LITELLM DEBUG LOGGING >>>
85
137
  # os.environ['LITELLM_LOG'] = 'DEBUG' # Keep commented out unless debugging LiteLLM itself
86
138
 
@@ -164,6 +216,12 @@ GCS_REGION_NAME = os.getenv("GCS_REGION_NAME", "auto") # Often 'auto' works for
164
216
  GCS_HMAC_ACCESS_KEY_ID = os.getenv("GCS_HMAC_ACCESS_KEY_ID") # Load HMAC Key ID
165
217
  GCS_HMAC_SECRET_ACCESS_KEY = os.getenv("GCS_HMAC_SECRET_ACCESS_KEY") # Load HMAC Secret
166
218
 
219
+ # Sanitize GCS credentials to handle WSL environment issues
220
+ if GCS_HMAC_ACCESS_KEY_ID:
221
+ GCS_HMAC_ACCESS_KEY_ID = GCS_HMAC_ACCESS_KEY_ID.strip()
222
+ if GCS_HMAC_SECRET_ACCESS_KEY:
223
+ GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
224
+
167
225
  cache_configured = False
168
226
 
169
227
  if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
@@ -448,6 +506,54 @@ def _select_model_candidates(
448
506
  return candidates
449
507
 
450
508
 
509
+ def _sanitize_api_key(key_value: str) -> str:
510
+ """
511
+ Sanitize API key by removing whitespace and carriage returns.
512
+
513
+ This fixes WSL environment issues where API keys may contain trailing \r characters
514
+ that make them invalid for HTTP headers.
515
+
516
+ Args:
517
+ key_value: The raw API key value from environment
518
+
519
+ Returns:
520
+ Sanitized API key with whitespace and carriage returns removed
521
+
522
+ Raises:
523
+ ValueError: If the API key format is invalid after sanitization
524
+ """
525
+ if not key_value:
526
+ return key_value
527
+
528
+ # Strip all whitespace including carriage returns, newlines, etc.
529
+ sanitized = key_value.strip()
530
+
531
+ # Additional validation: ensure no remaining control characters
532
+ if any(ord(c) < 32 for c in sanitized):
533
+ logger.warning("API key contains control characters that may cause issues")
534
+ # Remove any remaining control characters
535
+ sanitized = ''.join(c for c in sanitized if ord(c) >= 32)
536
+
537
+ # Validate API key format (basic checks)
538
+ if sanitized:
539
+ # Check for common API key patterns
540
+ if len(sanitized) < 10:
541
+ logger.warning(f"API key appears too short ({len(sanitized)} characters) - may be invalid")
542
+
543
+ # Check for invalid characters in API keys (should be printable ASCII)
544
+ if not all(32 <= ord(c) <= 126 for c in sanitized):
545
+ logger.warning("API key contains non-printable characters")
546
+
547
+ # Check for WSL-specific issues (detect if original had carriage returns)
548
+ if key_value != sanitized and '\r' in key_value:
549
+ if _is_wsl_environment():
550
+ logger.info("Detected and fixed WSL line ending issue in API key")
551
+ else:
552
+ logger.info("Detected and fixed line ending issue in API key")
553
+
554
+ return sanitized
555
+
556
+
451
557
  def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
452
558
  """Checks for API key in env, prompts user if missing, and updates .env."""
453
559
  key_name = model_info.get('api_key')
@@ -458,6 +564,8 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
458
564
  return True # Assume key is handled elsewhere or not needed
459
565
 
460
566
  key_value = os.getenv(key_name)
567
+ if key_value:
568
+ key_value = _sanitize_api_key(key_value)
461
569
 
462
570
  if key_value:
463
571
  if verbose:
@@ -473,6 +581,9 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
473
581
  logger.error("No API key provided. Cannot proceed with this model.")
474
582
  return False
475
583
 
584
+ # Sanitize the user-provided key
585
+ user_provided_key = _sanitize_api_key(user_provided_key)
586
+
476
587
  # Set environment variable for the current process
477
588
  os.environ[key_name] = user_provided_key
478
589
  logger.info(f"API key '{key_name}' set for the current session.")
@@ -767,6 +878,7 @@ def llm_invoke(
767
878
  elif api_key_name_from_csv: # For other api_key_names specified in CSV (e.g., OPENAI_API_KEY, or a direct VERTEX_AI_API_KEY string)
768
879
  key_value = os.getenv(api_key_name_from_csv)
769
880
  if key_value:
881
+ key_value = _sanitize_api_key(key_value)
770
882
  litellm_kwargs["api_key"] = key_value
771
883
  if verbose:
772
884
  logger.info(f"[INFO] Explicitly passing API key from env var '{api_key_name_from_csv}' as 'api_key' parameter to LiteLLM.")
@@ -932,6 +1044,46 @@ def llm_invoke(
932
1044
  # Result (String or Pydantic)
933
1045
  try:
934
1046
  raw_result = resp_item.choices[0].message.content
1047
+
1048
+ # Check if raw_result is None (likely cached corrupted data)
1049
+ if raw_result is None:
1050
+ logger.warning(f"[WARNING] LLM returned None content for item {i}, likely due to corrupted cache. Retrying with cache bypass...")
1051
+ # Retry with cache bypass by modifying the prompt slightly
1052
+ if not use_batch_mode and prompt and input_json is not None:
1053
+ # Add a small space to bypass cache
1054
+ modified_prompt = prompt + " "
1055
+ try:
1056
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
1057
+ # Disable cache for retry
1058
+ litellm.cache = None
1059
+ retry_response = litellm.completion(
1060
+ model=model_name_litellm,
1061
+ messages=retry_messages,
1062
+ temperature=temperature,
1063
+ response_format=response_format,
1064
+ max_completion_tokens=max_tokens,
1065
+ **time_kwargs
1066
+ )
1067
+ # Re-enable cache
1068
+ litellm.cache = Cache()
1069
+ # Extract result from retry
1070
+ retry_raw_result = retry_response.choices[0].message.content
1071
+ if retry_raw_result is not None:
1072
+ logger.info(f"[SUCCESS] Cache bypass retry succeeded for item {i}")
1073
+ raw_result = retry_raw_result
1074
+ else:
1075
+ logger.error(f"[ERROR] Cache bypass retry also returned None for item {i}")
1076
+ results.append("ERROR: LLM returned None content even after cache bypass")
1077
+ continue
1078
+ except Exception as retry_e:
1079
+ logger.error(f"[ERROR] Cache bypass retry failed for item {i}: {retry_e}")
1080
+ results.append(f"ERROR: LLM returned None content and retry failed: {retry_e}")
1081
+ continue
1082
+ else:
1083
+ logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
1084
+ results.append("ERROR: LLM returned None content and cannot retry")
1085
+ continue
1086
+
935
1087
  if output_pydantic:
936
1088
  parsed_result = None
937
1089
  json_string_to_parse = None
@@ -1064,6 +1216,16 @@ def llm_invoke(
1064
1216
  # --- 6b. Handle Invocation Errors ---
1065
1217
  except openai.AuthenticationError as e:
1066
1218
  last_exception = e
1219
+ error_message = str(e)
1220
+
1221
+ # Check for WSL-specific issues in authentication errors
1222
+ if _is_wsl_environment() and ('Illegal header value' in error_message or '\r' in error_message):
1223
+ logger.warning(f"[WSL AUTH ERROR] Authentication failed for {model_name_litellm} - detected WSL line ending issue")
1224
+ logger.warning("[WSL AUTH ERROR] This is likely caused by API key environment variables containing carriage returns")
1225
+ logger.warning("[WSL AUTH ERROR] Try setting your API key again or check your .env file for line ending issues")
1226
+ env_info = _get_environment_info()
1227
+ logger.debug(f"Environment info: {env_info}")
1228
+
1067
1229
  if newly_acquired_keys.get(api_key_name):
1068
1230
  logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} with the newly provided key for '{api_key_name}'. Please check the key and try again.")
1069
1231
  # Invalidate the key in env for this session to force re-prompt on retry