pdd-cli 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/auto_deps_main.py +1 -1
- pdd/bug_main.py +1 -1
- pdd/change_main.py +1 -1
- pdd/cli.py +81 -3
- pdd/cmd_test_main.py +3 -3
- pdd/code_generator_main.py +3 -2
- pdd/conflicts_main.py +1 -1
- pdd/construct_paths.py +245 -19
- pdd/context_generator_main.py +27 -12
- pdd/crash_main.py +44 -51
- pdd/detect_change_main.py +1 -1
- pdd/fix_code_module_errors.py +12 -0
- pdd/fix_main.py +2 -2
- pdd/fix_verification_main.py +1 -1
- pdd/generate_output_paths.py +113 -21
- pdd/generate_test.py +53 -16
- pdd/llm_invoke.py +162 -0
- pdd/preprocess_main.py +1 -1
- pdd/prompts/sync_analysis_LLM.prompt +4 -4
- pdd/split_main.py +1 -1
- pdd/sync_determine_operation.py +921 -456
- pdd/sync_main.py +333 -0
- pdd/sync_orchestration.py +639 -0
- pdd/trace_main.py +1 -1
- pdd/update_main.py +7 -2
- pdd_cli-0.0.43.dist-info/METADATA +307 -0
- {pdd_cli-0.0.41.dist-info → pdd_cli-0.0.43.dist-info}/RECORD +32 -30
- pdd_cli-0.0.41.dist-info/METADATA +0 -269
- {pdd_cli-0.0.41.dist-info → pdd_cli-0.0.43.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.41.dist-info → pdd_cli-0.0.43.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.41.dist-info → pdd_cli-0.0.43.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.41.dist-info → pdd_cli-0.0.43.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -81,6 +81,58 @@ from pdd import DEFAULT_LLM_MODEL
|
|
|
81
81
|
# Opt-in to future pandas behavior regarding downcasting
|
|
82
82
|
pd.set_option('future.no_silent_downcasting', True)
|
|
83
83
|
|
|
84
|
+
|
|
85
|
+
def _is_wsl_environment() -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Detect if we're running in WSL (Windows Subsystem for Linux) environment.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
True if running in WSL, False otherwise
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
# Check for WSL-specific indicators
|
|
94
|
+
if os.path.exists('/proc/version'):
|
|
95
|
+
with open('/proc/version', 'r') as f:
|
|
96
|
+
version_info = f.read().lower()
|
|
97
|
+
return 'microsoft' in version_info or 'wsl' in version_info
|
|
98
|
+
|
|
99
|
+
# Alternative check: WSL_DISTRO_NAME environment variable
|
|
100
|
+
if os.getenv('WSL_DISTRO_NAME'):
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
# Check for Windows-style paths in PATH
|
|
104
|
+
path_env = os.getenv('PATH', '')
|
|
105
|
+
return '/mnt/c/' in path_env.lower()
|
|
106
|
+
|
|
107
|
+
except Exception:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _get_environment_info() -> Dict[str, str]:
|
|
112
|
+
"""
|
|
113
|
+
Get environment information for debugging and error reporting.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Dictionary containing environment details
|
|
117
|
+
"""
|
|
118
|
+
import platform
|
|
119
|
+
|
|
120
|
+
info = {
|
|
121
|
+
'platform': platform.system(),
|
|
122
|
+
'platform_release': platform.release(),
|
|
123
|
+
'platform_version': platform.version(),
|
|
124
|
+
'architecture': platform.machine(),
|
|
125
|
+
'is_wsl': str(_is_wsl_environment()),
|
|
126
|
+
'python_version': platform.python_version(),
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Add WSL-specific information
|
|
130
|
+
if _is_wsl_environment():
|
|
131
|
+
info['wsl_distro'] = os.getenv('WSL_DISTRO_NAME', 'unknown')
|
|
132
|
+
info['wsl_interop'] = os.getenv('WSL_INTEROP', 'not_set')
|
|
133
|
+
|
|
134
|
+
return info
|
|
135
|
+
|
|
84
136
|
# <<< SET LITELLM DEBUG LOGGING >>>
|
|
85
137
|
# os.environ['LITELLM_LOG'] = 'DEBUG' # Keep commented out unless debugging LiteLLM itself
|
|
86
138
|
|
|
@@ -164,6 +216,12 @@ GCS_REGION_NAME = os.getenv("GCS_REGION_NAME", "auto") # Often 'auto' works for
|
|
|
164
216
|
GCS_HMAC_ACCESS_KEY_ID = os.getenv("GCS_HMAC_ACCESS_KEY_ID") # Load HMAC Key ID
|
|
165
217
|
GCS_HMAC_SECRET_ACCESS_KEY = os.getenv("GCS_HMAC_SECRET_ACCESS_KEY") # Load HMAC Secret
|
|
166
218
|
|
|
219
|
+
# Sanitize GCS credentials to handle WSL environment issues
|
|
220
|
+
if GCS_HMAC_ACCESS_KEY_ID:
|
|
221
|
+
GCS_HMAC_ACCESS_KEY_ID = GCS_HMAC_ACCESS_KEY_ID.strip()
|
|
222
|
+
if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
223
|
+
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
224
|
+
|
|
167
225
|
cache_configured = False
|
|
168
226
|
|
|
169
227
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
@@ -448,6 +506,54 @@ def _select_model_candidates(
|
|
|
448
506
|
return candidates
|
|
449
507
|
|
|
450
508
|
|
|
509
|
+
def _sanitize_api_key(key_value: str) -> str:
|
|
510
|
+
"""
|
|
511
|
+
Sanitize API key by removing whitespace and carriage returns.
|
|
512
|
+
|
|
513
|
+
This fixes WSL environment issues where API keys may contain trailing \r characters
|
|
514
|
+
that make them invalid for HTTP headers.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
key_value: The raw API key value from environment
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
Sanitized API key with whitespace and carriage returns removed
|
|
521
|
+
|
|
522
|
+
Raises:
|
|
523
|
+
ValueError: If the API key format is invalid after sanitization
|
|
524
|
+
"""
|
|
525
|
+
if not key_value:
|
|
526
|
+
return key_value
|
|
527
|
+
|
|
528
|
+
# Strip all whitespace including carriage returns, newlines, etc.
|
|
529
|
+
sanitized = key_value.strip()
|
|
530
|
+
|
|
531
|
+
# Additional validation: ensure no remaining control characters
|
|
532
|
+
if any(ord(c) < 32 for c in sanitized):
|
|
533
|
+
logger.warning("API key contains control characters that may cause issues")
|
|
534
|
+
# Remove any remaining control characters
|
|
535
|
+
sanitized = ''.join(c for c in sanitized if ord(c) >= 32)
|
|
536
|
+
|
|
537
|
+
# Validate API key format (basic checks)
|
|
538
|
+
if sanitized:
|
|
539
|
+
# Check for common API key patterns
|
|
540
|
+
if len(sanitized) < 10:
|
|
541
|
+
logger.warning(f"API key appears too short ({len(sanitized)} characters) - may be invalid")
|
|
542
|
+
|
|
543
|
+
# Check for invalid characters in API keys (should be printable ASCII)
|
|
544
|
+
if not all(32 <= ord(c) <= 126 for c in sanitized):
|
|
545
|
+
logger.warning("API key contains non-printable characters")
|
|
546
|
+
|
|
547
|
+
# Check for WSL-specific issues (detect if original had carriage returns)
|
|
548
|
+
if key_value != sanitized and '\r' in key_value:
|
|
549
|
+
if _is_wsl_environment():
|
|
550
|
+
logger.info("Detected and fixed WSL line ending issue in API key")
|
|
551
|
+
else:
|
|
552
|
+
logger.info("Detected and fixed line ending issue in API key")
|
|
553
|
+
|
|
554
|
+
return sanitized
|
|
555
|
+
|
|
556
|
+
|
|
451
557
|
def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
|
|
452
558
|
"""Checks for API key in env, prompts user if missing, and updates .env."""
|
|
453
559
|
key_name = model_info.get('api_key')
|
|
@@ -458,6 +564,8 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
458
564
|
return True # Assume key is handled elsewhere or not needed
|
|
459
565
|
|
|
460
566
|
key_value = os.getenv(key_name)
|
|
567
|
+
if key_value:
|
|
568
|
+
key_value = _sanitize_api_key(key_value)
|
|
461
569
|
|
|
462
570
|
if key_value:
|
|
463
571
|
if verbose:
|
|
@@ -473,6 +581,9 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
473
581
|
logger.error("No API key provided. Cannot proceed with this model.")
|
|
474
582
|
return False
|
|
475
583
|
|
|
584
|
+
# Sanitize the user-provided key
|
|
585
|
+
user_provided_key = _sanitize_api_key(user_provided_key)
|
|
586
|
+
|
|
476
587
|
# Set environment variable for the current process
|
|
477
588
|
os.environ[key_name] = user_provided_key
|
|
478
589
|
logger.info(f"API key '{key_name}' set for the current session.")
|
|
@@ -767,6 +878,7 @@ def llm_invoke(
|
|
|
767
878
|
elif api_key_name_from_csv: # For other api_key_names specified in CSV (e.g., OPENAI_API_KEY, or a direct VERTEX_AI_API_KEY string)
|
|
768
879
|
key_value = os.getenv(api_key_name_from_csv)
|
|
769
880
|
if key_value:
|
|
881
|
+
key_value = _sanitize_api_key(key_value)
|
|
770
882
|
litellm_kwargs["api_key"] = key_value
|
|
771
883
|
if verbose:
|
|
772
884
|
logger.info(f"[INFO] Explicitly passing API key from env var '{api_key_name_from_csv}' as 'api_key' parameter to LiteLLM.")
|
|
@@ -932,6 +1044,46 @@ def llm_invoke(
|
|
|
932
1044
|
# Result (String or Pydantic)
|
|
933
1045
|
try:
|
|
934
1046
|
raw_result = resp_item.choices[0].message.content
|
|
1047
|
+
|
|
1048
|
+
# Check if raw_result is None (likely cached corrupted data)
|
|
1049
|
+
if raw_result is None:
|
|
1050
|
+
logger.warning(f"[WARNING] LLM returned None content for item {i}, likely due to corrupted cache. Retrying with cache bypass...")
|
|
1051
|
+
# Retry with cache bypass by modifying the prompt slightly
|
|
1052
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
1053
|
+
# Add a small space to bypass cache
|
|
1054
|
+
modified_prompt = prompt + " "
|
|
1055
|
+
try:
|
|
1056
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
1057
|
+
# Disable cache for retry
|
|
1058
|
+
litellm.cache = None
|
|
1059
|
+
retry_response = litellm.completion(
|
|
1060
|
+
model=model_name_litellm,
|
|
1061
|
+
messages=retry_messages,
|
|
1062
|
+
temperature=temperature,
|
|
1063
|
+
response_format=response_format,
|
|
1064
|
+
max_completion_tokens=max_tokens,
|
|
1065
|
+
**time_kwargs
|
|
1066
|
+
)
|
|
1067
|
+
# Re-enable cache
|
|
1068
|
+
litellm.cache = Cache()
|
|
1069
|
+
# Extract result from retry
|
|
1070
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
1071
|
+
if retry_raw_result is not None:
|
|
1072
|
+
logger.info(f"[SUCCESS] Cache bypass retry succeeded for item {i}")
|
|
1073
|
+
raw_result = retry_raw_result
|
|
1074
|
+
else:
|
|
1075
|
+
logger.error(f"[ERROR] Cache bypass retry also returned None for item {i}")
|
|
1076
|
+
results.append("ERROR: LLM returned None content even after cache bypass")
|
|
1077
|
+
continue
|
|
1078
|
+
except Exception as retry_e:
|
|
1079
|
+
logger.error(f"[ERROR] Cache bypass retry failed for item {i}: {retry_e}")
|
|
1080
|
+
results.append(f"ERROR: LLM returned None content and retry failed: {retry_e}")
|
|
1081
|
+
continue
|
|
1082
|
+
else:
|
|
1083
|
+
logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
|
|
1084
|
+
results.append("ERROR: LLM returned None content and cannot retry")
|
|
1085
|
+
continue
|
|
1086
|
+
|
|
935
1087
|
if output_pydantic:
|
|
936
1088
|
parsed_result = None
|
|
937
1089
|
json_string_to_parse = None
|
|
@@ -1064,6 +1216,16 @@ def llm_invoke(
|
|
|
1064
1216
|
# --- 6b. Handle Invocation Errors ---
|
|
1065
1217
|
except openai.AuthenticationError as e:
|
|
1066
1218
|
last_exception = e
|
|
1219
|
+
error_message = str(e)
|
|
1220
|
+
|
|
1221
|
+
# Check for WSL-specific issues in authentication errors
|
|
1222
|
+
if _is_wsl_environment() and ('Illegal header value' in error_message or '\r' in error_message):
|
|
1223
|
+
logger.warning(f"[WSL AUTH ERROR] Authentication failed for {model_name_litellm} - detected WSL line ending issue")
|
|
1224
|
+
logger.warning("[WSL AUTH ERROR] This is likely caused by API key environment variables containing carriage returns")
|
|
1225
|
+
logger.warning("[WSL AUTH ERROR] Try setting your API key again or check your .env file for line ending issues")
|
|
1226
|
+
env_info = _get_environment_info()
|
|
1227
|
+
logger.debug(f"Environment info: {env_info}")
|
|
1228
|
+
|
|
1067
1229
|
if newly_acquired_keys.get(api_key_name):
|
|
1068
1230
|
logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} with the newly provided key for '{api_key_name}'. Please check the key and try again.")
|
|
1069
1231
|
# Invalidate the key in env for this session to force re-prompt on retry
|
pdd/preprocess_main.py
CHANGED
|
@@ -27,7 +27,7 @@ def preprocess_main(
|
|
|
27
27
|
# Construct file paths
|
|
28
28
|
input_file_paths = {"prompt_file": prompt_file}
|
|
29
29
|
command_options = {"output": output}
|
|
30
|
-
input_strings, output_file_paths, _ = construct_paths(
|
|
30
|
+
resolved_config, input_strings, output_file_paths, _ = construct_paths(
|
|
31
31
|
input_file_paths=input_file_paths,
|
|
32
32
|
force=ctx.obj.get("force", False),
|
|
33
33
|
quiet=ctx.obj.get("quiet", False),
|
|
@@ -57,19 +57,19 @@ Based on the fingerprint and the provided diffs, determine the most logical and
|
|
|
57
57
|
Respond **only** with a single JSON object. Do not add any explanatory text before or after the JSON block.
|
|
58
58
|
|
|
59
59
|
```json
|
|
60
|
-
{
|
|
60
|
+
{{
|
|
61
61
|
"next_operation": "generate|update|fix|test|verify|fail_and_request_manual_merge",
|
|
62
62
|
"reason": "A clear, concise explanation of the situation and the rationale for your chosen operation.",
|
|
63
|
-
"merge_strategy": {
|
|
63
|
+
"merge_strategy": {{
|
|
64
64
|
"type": "preserve_code_and_regenerate|update_prompt_from_code|three_way_merge_safe|three_way_merge_unsafe|none",
|
|
65
65
|
"description": "A human-readable description of the merge plan.",
|
|
66
66
|
"preservation_notes": [
|
|
67
67
|
"A list of specific, actionable notes for the merge process. For example: 'Preserve the body of the `calculate_total` function in the code file.' or 'Merge the new tests from the user, then regenerate the rest of the test file.'"
|
|
68
68
|
]
|
|
69
|
-
},
|
|
69
|
+
}},
|
|
70
70
|
"confidence": 0.9,
|
|
71
71
|
"follow_up_operations": ["A list of likely PDD operations to run after this one succeeds (e.g., 'test', 'verify')."]
|
|
72
|
-
}
|
|
72
|
+
}}
|
|
73
73
|
```
|
|
74
74
|
|
|
75
75
|
### `merge_strategy.type` Definitions:
|
pdd/split_main.py
CHANGED
|
@@ -48,7 +48,7 @@ def split_main(
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
# Get input strings and output paths
|
|
51
|
-
input_strings, output_file_paths, _ = construct_paths(
|
|
51
|
+
resolved_config, input_strings, output_file_paths, _ = construct_paths(
|
|
52
52
|
input_file_paths=input_file_paths,
|
|
53
53
|
force=ctx.obj.get('force', False),
|
|
54
54
|
quiet=ctx.obj.get('quiet', False),
|