pdd-cli 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

pdd/llm_invoke.py CHANGED
@@ -81,6 +81,58 @@ from pdd import DEFAULT_LLM_MODEL
81
81
  # Opt-in to future pandas behavior regarding downcasting
82
82
  pd.set_option('future.no_silent_downcasting', True)
83
83
 
84
+
85
+ def _is_wsl_environment() -> bool:
86
+ """
87
+ Detect if we're running in WSL (Windows Subsystem for Linux) environment.
88
+
89
+ Returns:
90
+ True if running in WSL, False otherwise
91
+ """
92
+ try:
93
+ # Check for WSL-specific indicators
94
+ if os.path.exists('/proc/version'):
95
+ with open('/proc/version', 'r') as f:
96
+ version_info = f.read().lower()
97
+ return 'microsoft' in version_info or 'wsl' in version_info
98
+
99
+ # Alternative check: WSL_DISTRO_NAME environment variable
100
+ if os.getenv('WSL_DISTRO_NAME'):
101
+ return True
102
+
103
+ # Check for Windows-style paths in PATH
104
+ path_env = os.getenv('PATH', '')
105
+ return '/mnt/c/' in path_env.lower()
106
+
107
+ except Exception:
108
+ return False
109
+
110
+
111
+ def _get_environment_info() -> Dict[str, str]:
112
+ """
113
+ Get environment information for debugging and error reporting.
114
+
115
+ Returns:
116
+ Dictionary containing environment details
117
+ """
118
+ import platform
119
+
120
+ info = {
121
+ 'platform': platform.system(),
122
+ 'platform_release': platform.release(),
123
+ 'platform_version': platform.version(),
124
+ 'architecture': platform.machine(),
125
+ 'is_wsl': str(_is_wsl_environment()),
126
+ 'python_version': platform.python_version(),
127
+ }
128
+
129
+ # Add WSL-specific information
130
+ if _is_wsl_environment():
131
+ info['wsl_distro'] = os.getenv('WSL_DISTRO_NAME', 'unknown')
132
+ info['wsl_interop'] = os.getenv('WSL_INTEROP', 'not_set')
133
+
134
+ return info
135
+
84
136
  # <<< SET LITELLM DEBUG LOGGING >>>
85
137
  # os.environ['LITELLM_LOG'] = 'DEBUG' # Keep commented out unless debugging LiteLLM itself
86
138
 
@@ -164,6 +216,12 @@ GCS_REGION_NAME = os.getenv("GCS_REGION_NAME", "auto") # Often 'auto' works for
164
216
  GCS_HMAC_ACCESS_KEY_ID = os.getenv("GCS_HMAC_ACCESS_KEY_ID") # Load HMAC Key ID
165
217
  GCS_HMAC_SECRET_ACCESS_KEY = os.getenv("GCS_HMAC_SECRET_ACCESS_KEY") # Load HMAC Secret
166
218
 
219
+ # Sanitize GCS credentials to handle WSL environment issues
220
+ if GCS_HMAC_ACCESS_KEY_ID:
221
+ GCS_HMAC_ACCESS_KEY_ID = GCS_HMAC_ACCESS_KEY_ID.strip()
222
+ if GCS_HMAC_SECRET_ACCESS_KEY:
223
+ GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
224
+
167
225
  cache_configured = False
168
226
 
169
227
  if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
@@ -448,6 +506,54 @@ def _select_model_candidates(
448
506
  return candidates
449
507
 
450
508
 
509
+ def _sanitize_api_key(key_value: str) -> str:
510
+ """
511
+ Sanitize API key by removing whitespace and carriage returns.
512
+
513
+ This fixes WSL environment issues where API keys may contain trailing \r characters
514
+ that make them invalid for HTTP headers.
515
+
516
+ Args:
517
+ key_value: The raw API key value from environment
518
+
519
+ Returns:
520
+ Sanitized API key with whitespace and carriage returns removed
521
+
522
+ Raises:
523
+ ValueError: If the API key format is invalid after sanitization
524
+ """
525
+ if not key_value:
526
+ return key_value
527
+
528
+ # Strip all whitespace including carriage returns, newlines, etc.
529
+ sanitized = key_value.strip()
530
+
531
+ # Additional validation: ensure no remaining control characters
532
+ if any(ord(c) < 32 for c in sanitized):
533
+ logger.warning("API key contains control characters that may cause issues")
534
+ # Remove any remaining control characters
535
+ sanitized = ''.join(c for c in sanitized if ord(c) >= 32)
536
+
537
+ # Validate API key format (basic checks)
538
+ if sanitized:
539
+ # Check for common API key patterns
540
+ if len(sanitized) < 10:
541
+ logger.warning(f"API key appears too short ({len(sanitized)} characters) - may be invalid")
542
+
543
+ # Check for invalid characters in API keys (should be printable ASCII)
544
+ if not all(32 <= ord(c) <= 126 for c in sanitized):
545
+ logger.warning("API key contains non-printable characters")
546
+
547
+ # Check for WSL-specific issues (detect if original had carriage returns)
548
+ if key_value != sanitized and '\r' in key_value:
549
+ if _is_wsl_environment():
550
+ logger.info("Detected and fixed WSL line ending issue in API key")
551
+ else:
552
+ logger.info("Detected and fixed line ending issue in API key")
553
+
554
+ return sanitized
555
+
556
+
451
557
  def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
452
558
  """Checks for API key in env, prompts user if missing, and updates .env."""
453
559
  key_name = model_info.get('api_key')
@@ -458,6 +564,8 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
458
564
  return True # Assume key is handled elsewhere or not needed
459
565
 
460
566
  key_value = os.getenv(key_name)
567
+ if key_value:
568
+ key_value = _sanitize_api_key(key_value)
461
569
 
462
570
  if key_value:
463
571
  if verbose:
@@ -473,6 +581,9 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
473
581
  logger.error("No API key provided. Cannot proceed with this model.")
474
582
  return False
475
583
 
584
+ # Sanitize the user-provided key
585
+ user_provided_key = _sanitize_api_key(user_provided_key)
586
+
476
587
  # Set environment variable for the current process
477
588
  os.environ[key_name] = user_provided_key
478
589
  logger.info(f"API key '{key_name}' set for the current session.")
@@ -767,6 +878,7 @@ def llm_invoke(
767
878
  elif api_key_name_from_csv: # For other api_key_names specified in CSV (e.g., OPENAI_API_KEY, or a direct VERTEX_AI_API_KEY string)
768
879
  key_value = os.getenv(api_key_name_from_csv)
769
880
  if key_value:
881
+ key_value = _sanitize_api_key(key_value)
770
882
  litellm_kwargs["api_key"] = key_value
771
883
  if verbose:
772
884
  logger.info(f"[INFO] Explicitly passing API key from env var '{api_key_name_from_csv}' as 'api_key' parameter to LiteLLM.")
@@ -932,6 +1044,46 @@ def llm_invoke(
932
1044
  # Result (String or Pydantic)
933
1045
  try:
934
1046
  raw_result = resp_item.choices[0].message.content
1047
+
1048
+ # Check if raw_result is None (likely cached corrupted data)
1049
+ if raw_result is None:
1050
+ logger.warning(f"[WARNING] LLM returned None content for item {i}, likely due to corrupted cache. Retrying with cache bypass...")
1051
+ # Retry with cache bypass by modifying the prompt slightly
1052
+ if not use_batch_mode and prompt and input_json is not None:
1053
+ # Add a small space to bypass cache
1054
+ modified_prompt = prompt + " "
1055
+ try:
1056
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
1057
+ # Disable cache for retry
1058
+ litellm.cache = None
1059
+ retry_response = litellm.completion(
1060
+ model=model_name_litellm,
1061
+ messages=retry_messages,
1062
+ temperature=temperature,
1063
+ response_format=response_format,
1064
+ max_completion_tokens=max_tokens,
1065
+ **time_kwargs
1066
+ )
1067
+ # Re-enable cache
1068
+ litellm.cache = Cache()
1069
+ # Extract result from retry
1070
+ retry_raw_result = retry_response.choices[0].message.content
1071
+ if retry_raw_result is not None:
1072
+ logger.info(f"[SUCCESS] Cache bypass retry succeeded for item {i}")
1073
+ raw_result = retry_raw_result
1074
+ else:
1075
+ logger.error(f"[ERROR] Cache bypass retry also returned None for item {i}")
1076
+ results.append("ERROR: LLM returned None content even after cache bypass")
1077
+ continue
1078
+ except Exception as retry_e:
1079
+ logger.error(f"[ERROR] Cache bypass retry failed for item {i}: {retry_e}")
1080
+ results.append(f"ERROR: LLM returned None content and retry failed: {retry_e}")
1081
+ continue
1082
+ else:
1083
+ logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
1084
+ results.append("ERROR: LLM returned None content and cannot retry")
1085
+ continue
1086
+
935
1087
  if output_pydantic:
936
1088
  parsed_result = None
937
1089
  json_string_to_parse = None
@@ -1064,6 +1216,16 @@ def llm_invoke(
1064
1216
  # --- 6b. Handle Invocation Errors ---
1065
1217
  except openai.AuthenticationError as e:
1066
1218
  last_exception = e
1219
+ error_message = str(e)
1220
+
1221
+ # Check for WSL-specific issues in authentication errors
1222
+ if _is_wsl_environment() and ('Illegal header value' in error_message or '\r' in error_message):
1223
+ logger.warning(f"[WSL AUTH ERROR] Authentication failed for {model_name_litellm} - detected WSL line ending issue")
1224
+ logger.warning("[WSL AUTH ERROR] This is likely caused by API key environment variables containing carriage returns")
1225
+ logger.warning("[WSL AUTH ERROR] Try setting your API key again or check your .env file for line ending issues")
1226
+ env_info = _get_environment_info()
1227
+ logger.debug(f"Environment info: {env_info}")
1228
+
1067
1229
  if newly_acquired_keys.get(api_key_name):
1068
1230
  logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} with the newly provided key for '{api_key_name}'. Please check the key and try again.")
1069
1231
  # Invalidate the key in env for this session to force re-prompt on retry
pdd/preprocess_main.py CHANGED
@@ -27,7 +27,7 @@ def preprocess_main(
27
27
  # Construct file paths
28
28
  input_file_paths = {"prompt_file": prompt_file}
29
29
  command_options = {"output": output}
30
- input_strings, output_file_paths, _ = construct_paths(
30
+ resolved_config, input_strings, output_file_paths, _ = construct_paths(
31
31
  input_file_paths=input_file_paths,
32
32
  force=ctx.obj.get("force", False),
33
33
  quiet=ctx.obj.get("quiet", False),
@@ -57,19 +57,19 @@ Based on the fingerprint and the provided diffs, determine the most logical and
57
57
  Respond **only** with a single JSON object. Do not add any explanatory text before or after the JSON block.
58
58
 
59
59
  ```json
60
- {
60
+ {{
61
61
  "next_operation": "generate|update|fix|test|verify|fail_and_request_manual_merge",
62
62
  "reason": "A clear, concise explanation of the situation and the rationale for your chosen operation.",
63
- "merge_strategy": {
63
+ "merge_strategy": {{
64
64
  "type": "preserve_code_and_regenerate|update_prompt_from_code|three_way_merge_safe|three_way_merge_unsafe|none",
65
65
  "description": "A human-readable description of the merge plan.",
66
66
  "preservation_notes": [
67
67
  "A list of specific, actionable notes for the merge process. For example: 'Preserve the body of the `calculate_total` function in the code file.' or 'Merge the new tests from the user, then regenerate the rest of the test file.'"
68
68
  ]
69
- },
69
+ }},
70
70
  "confidence": 0.9,
71
71
  "follow_up_operations": ["A list of likely PDD operations to run after this one succeeds (e.g., 'test', 'verify')."]
72
- }
72
+ }}
73
73
  ```
74
74
 
75
75
  ### `merge_strategy.type` Definitions:
pdd/split_main.py CHANGED
@@ -48,7 +48,7 @@ def split_main(
48
48
  }
49
49
 
50
50
  # Get input strings and output paths
51
- input_strings, output_file_paths, _ = construct_paths(
51
+ resolved_config, input_strings, output_file_paths, _ = construct_paths(
52
52
  input_file_paths=input_file_paths,
53
53
  force=ctx.obj.get('force', False),
54
54
  quiet=ctx.obj.get('quiet', False),