pdd-cli 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +1 -1
- pdd/auto_deps_main.py +1 -1
- pdd/auto_update.py +73 -78
- pdd/bug_main.py +3 -3
- pdd/bug_to_unit_test.py +46 -38
- pdd/change.py +20 -13
- pdd/change_main.py +223 -163
- pdd/cli.py +192 -95
- pdd/cmd_test_main.py +51 -36
- pdd/code_generator_main.py +3 -2
- pdd/conflicts_main.py +1 -1
- pdd/construct_paths.py +221 -19
- pdd/context_generator_main.py +27 -12
- pdd/crash_main.py +44 -50
- pdd/data/llm_model.csv +1 -1
- pdd/detect_change_main.py +1 -1
- pdd/fix_code_module_errors.py +12 -0
- pdd/fix_main.py +2 -2
- pdd/fix_verification_errors.py +13 -0
- pdd/fix_verification_main.py +3 -3
- pdd/generate_output_paths.py +113 -21
- pdd/generate_test.py +53 -16
- pdd/llm_invoke.py +162 -0
- pdd/logo_animation.py +455 -0
- pdd/preprocess_main.py +1 -1
- pdd/process_csv_change.py +1 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +2 -1
- pdd/prompts/sync_analysis_LLM.prompt +82 -0
- pdd/split_main.py +1 -1
- pdd/sync_animation.py +643 -0
- pdd/sync_determine_operation.py +1039 -0
- pdd/sync_main.py +333 -0
- pdd/sync_orchestration.py +639 -0
- pdd/trace_main.py +1 -1
- pdd/update_main.py +7 -2
- pdd/xml_tagger.py +15 -6
- pdd_cli-0.0.42.dist-info/METADATA +307 -0
- {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/RECORD +42 -36
- pdd_cli-0.0.40.dist-info/METADATA +0 -269
- {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/top_level.txt +0 -0
pdd/generate_output_paths.py
CHANGED
|
@@ -26,6 +26,7 @@ COMMAND_OUTPUT_KEYS: Dict[str, List[str]] = {
|
|
|
26
26
|
'bug': ['output'],
|
|
27
27
|
'auto-deps': ['output'],
|
|
28
28
|
'verify': ['output_results', 'output_code', 'output_program'],
|
|
29
|
+
'sync': ['generate_output_path', 'test_output_path', 'example_output_path'],
|
|
29
30
|
}
|
|
30
31
|
|
|
31
32
|
# Define default filename patterns for each output key
|
|
@@ -48,8 +49,8 @@ DEFAULT_FILENAMES: Dict[str, Dict[str, str]] = {
|
|
|
48
49
|
},
|
|
49
50
|
'change': {'output': 'modified_{basename}.prompt'},
|
|
50
51
|
'update': {'output': 'modified_{basename}.prompt'}, # Consistent with change/split default
|
|
51
|
-
'detect': {'output': '{basename}_detect.csv'}, #
|
|
52
|
-
'conflicts': {'output': '{basename}_conflict.csv'}, #
|
|
52
|
+
'detect': {'output': '{basename}_detect.csv'}, # basename here is from change_file per construct_paths logic
|
|
53
|
+
'conflicts': {'output': '{basename}_conflict.csv'}, # basename here is combined sorted prompt basenames per construct_paths logic
|
|
53
54
|
'crash': {
|
|
54
55
|
'output': '{basename}_fixed{ext}',
|
|
55
56
|
# Using basename as program_basename isn't available here
|
|
@@ -63,6 +64,11 @@ DEFAULT_FILENAMES: Dict[str, Dict[str, str]] = {
|
|
|
63
64
|
'output_code': '{basename}_verified{ext}',
|
|
64
65
|
'output_program': '{basename}_program_verified{ext}',
|
|
65
66
|
},
|
|
67
|
+
'sync': {
|
|
68
|
+
'generate_output_path': '{basename}{ext}',
|
|
69
|
+
'test_output_path': 'test_{basename}{ext}',
|
|
70
|
+
'example_output_path': '{basename}_example{ext}',
|
|
71
|
+
},
|
|
66
72
|
}
|
|
67
73
|
|
|
68
74
|
# Define the mapping from command/output key to environment variables
|
|
@@ -96,6 +102,50 @@ ENV_VAR_MAP: Dict[str, Dict[str, str]] = {
|
|
|
96
102
|
'output_code': 'PDD_VERIFY_CODE_OUTPUT_PATH',
|
|
97
103
|
'output_program': 'PDD_VERIFY_PROGRAM_OUTPUT_PATH',
|
|
98
104
|
},
|
|
105
|
+
'sync': {
|
|
106
|
+
'generate_output_path': 'PDD_GENERATE_OUTPUT_PATH',
|
|
107
|
+
'test_output_path': 'PDD_TEST_OUTPUT_PATH',
|
|
108
|
+
'example_output_path': 'PDD_EXAMPLE_OUTPUT_PATH',
|
|
109
|
+
},
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Define mapping from context config keys to output keys for different commands
|
|
113
|
+
CONTEXT_CONFIG_MAP: Dict[str, Dict[str, str]] = {
|
|
114
|
+
'generate': {'output': 'generate_output_path'},
|
|
115
|
+
'example': {'output': 'example_output_path'},
|
|
116
|
+
'test': {'output': 'test_output_path'},
|
|
117
|
+
'sync': {
|
|
118
|
+
'generate_output_path': 'generate_output_path',
|
|
119
|
+
'test_output_path': 'test_output_path',
|
|
120
|
+
'example_output_path': 'example_output_path',
|
|
121
|
+
},
|
|
122
|
+
# For other commands, they can use the general mapping if needed
|
|
123
|
+
'preprocess': {'output': 'generate_output_path'}, # fallback
|
|
124
|
+
'fix': {
|
|
125
|
+
'output_test': 'test_output_path',
|
|
126
|
+
'output_code': 'generate_output_path',
|
|
127
|
+
'output_results': 'generate_output_path', # fallback for results
|
|
128
|
+
},
|
|
129
|
+
'split': {
|
|
130
|
+
'output_sub': 'generate_output_path', # fallback
|
|
131
|
+
'output_modified': 'generate_output_path', # fallback
|
|
132
|
+
},
|
|
133
|
+
'change': {'output': 'generate_output_path'},
|
|
134
|
+
'update': {'output': 'generate_output_path'},
|
|
135
|
+
'detect': {'output': 'generate_output_path'},
|
|
136
|
+
'conflicts': {'output': 'generate_output_path'},
|
|
137
|
+
'crash': {
|
|
138
|
+
'output': None, # Use default CWD behavior, not context paths
|
|
139
|
+
'output_program': None, # Use default CWD behavior, not context paths
|
|
140
|
+
},
|
|
141
|
+
'trace': {'output': 'generate_output_path'},
|
|
142
|
+
'bug': {'output': 'test_output_path'},
|
|
143
|
+
'auto-deps': {'output': 'generate_output_path'},
|
|
144
|
+
'verify': {
|
|
145
|
+
'output_results': 'generate_output_path',
|
|
146
|
+
'output_code': 'generate_output_path',
|
|
147
|
+
'output_program': 'generate_output_path',
|
|
148
|
+
},
|
|
99
149
|
}
|
|
100
150
|
|
|
101
151
|
# --- Helper Function ---
|
|
@@ -127,14 +177,15 @@ def generate_output_paths(
|
|
|
127
177
|
output_locations: Dict[str, Optional[str]],
|
|
128
178
|
basename: str,
|
|
129
179
|
language: str,
|
|
130
|
-
file_extension: str
|
|
180
|
+
file_extension: str,
|
|
181
|
+
context_config: Optional[Dict[str, str]] = None
|
|
131
182
|
) -> Dict[str, str]:
|
|
132
183
|
"""
|
|
133
184
|
Generates the full, absolute output paths for a given PDD command.
|
|
134
185
|
|
|
135
|
-
It prioritizes user-specified paths (--output options), then
|
|
136
|
-
|
|
137
|
-
current working directory.
|
|
186
|
+
It prioritizes user-specified paths (--output options), then context
|
|
187
|
+
configuration from .pddrc, then environment variables, and finally
|
|
188
|
+
falls back to default naming conventions in the current working directory.
|
|
138
189
|
|
|
139
190
|
Args:
|
|
140
191
|
command: The PDD command being executed (e.g., 'generate', 'fix').
|
|
@@ -146,6 +197,8 @@ def generate_output_paths(
|
|
|
146
197
|
language: The programming language associated with the operation.
|
|
147
198
|
file_extension: The file extension (including '.') for the language,
|
|
148
199
|
used when default patterns require it.
|
|
200
|
+
context_config: Optional dictionary with context-specific paths from .pddrc
|
|
201
|
+
configuration (e.g., {'generate_output_path': 'src/'}).
|
|
149
202
|
|
|
150
203
|
Returns:
|
|
151
204
|
A dictionary where keys are the standardized output identifiers
|
|
@@ -155,8 +208,10 @@ def generate_output_paths(
|
|
|
155
208
|
"""
|
|
156
209
|
logger.debug(f"Generating output paths for command: {command}")
|
|
157
210
|
logger.debug(f"User output locations: {output_locations}")
|
|
211
|
+
logger.debug(f"Context config: {context_config}")
|
|
158
212
|
logger.debug(f"Basename: {basename}, Language: {language}, Extension: {file_extension}")
|
|
159
213
|
|
|
214
|
+
context_config = context_config or {}
|
|
160
215
|
result_paths: Dict[str, str] = {}
|
|
161
216
|
|
|
162
217
|
if not basename:
|
|
@@ -183,6 +238,11 @@ def generate_output_paths(
|
|
|
183
238
|
logger.debug(f"Processing output key: {output_key}")
|
|
184
239
|
|
|
185
240
|
user_path: Optional[str] = processed_output_locations.get(output_key)
|
|
241
|
+
|
|
242
|
+
# Get context configuration path for this output key
|
|
243
|
+
context_config_key = CONTEXT_CONFIG_MAP.get(command, {}).get(output_key)
|
|
244
|
+
context_path: Optional[str] = context_config.get(context_config_key) if context_config_key else None
|
|
245
|
+
|
|
186
246
|
env_var_name: Optional[str] = ENV_VAR_MAP.get(command, {}).get(output_key)
|
|
187
247
|
env_path: Optional[str] = os.environ.get(env_var_name) if env_var_name else None
|
|
188
248
|
|
|
@@ -215,7 +275,26 @@ def generate_output_paths(
|
|
|
215
275
|
logger.debug(f"User path '{user_path}' identified as a specific file path.")
|
|
216
276
|
final_path = user_path # Assume it's a full path or filename
|
|
217
277
|
|
|
218
|
-
# 2. Check
|
|
278
|
+
# 2. Check Context Configuration Path (.pddrc)
|
|
279
|
+
elif context_path:
|
|
280
|
+
source = "context"
|
|
281
|
+
# Check if the context path is a directory
|
|
282
|
+
is_dir = context_path.endswith(os.path.sep) or context_path.endswith('/')
|
|
283
|
+
if not is_dir:
|
|
284
|
+
try:
|
|
285
|
+
if os.path.exists(context_path) and os.path.isdir(context_path):
|
|
286
|
+
is_dir = True
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.warning(f"Could not check if context path '{context_path}' is a directory: {e}")
|
|
289
|
+
|
|
290
|
+
if is_dir:
|
|
291
|
+
logger.debug(f"Context path '{context_path}' identified as a directory.")
|
|
292
|
+
final_path = os.path.join(context_path, default_filename)
|
|
293
|
+
else:
|
|
294
|
+
logger.debug(f"Context path '{context_path}' identified as a specific file path.")
|
|
295
|
+
final_path = context_path
|
|
296
|
+
|
|
297
|
+
# 3. Check Environment Variable Path
|
|
219
298
|
elif env_path:
|
|
220
299
|
source = "environment"
|
|
221
300
|
# Check if the environment variable points to a directory
|
|
@@ -234,7 +313,7 @@ def generate_output_paths(
|
|
|
234
313
|
logger.debug(f"Env path '{env_path}' identified as a specific file path.")
|
|
235
314
|
final_path = env_path # Assume it's a full path or filename
|
|
236
315
|
|
|
237
|
-
#
|
|
316
|
+
# 4. Use Default Naming Convention in CWD
|
|
238
317
|
else:
|
|
239
318
|
source = "default"
|
|
240
319
|
logger.debug(f"Using default filename '{default_filename}' in current directory.")
|
|
@@ -273,7 +352,8 @@ if __name__ == '__main__':
|
|
|
273
352
|
output_locations={}, # No user input
|
|
274
353
|
basename=mock_basename,
|
|
275
354
|
language=mock_language,
|
|
276
|
-
file_extension=mock_extension
|
|
355
|
+
file_extension=mock_extension,
|
|
356
|
+
context_config={}
|
|
277
357
|
)
|
|
278
358
|
print(f"Result: {paths1}")
|
|
279
359
|
# Expected: {'output': '/path/to/cwd/my_module.py'}
|
|
@@ -285,7 +365,8 @@ if __name__ == '__main__':
|
|
|
285
365
|
output_locations={'output': 'generated_code.py'},
|
|
286
366
|
basename=mock_basename,
|
|
287
367
|
language=mock_language,
|
|
288
|
-
file_extension=mock_extension
|
|
368
|
+
file_extension=mock_extension,
|
|
369
|
+
context_config={}
|
|
289
370
|
)
|
|
290
371
|
print(f"Result: {paths2}")
|
|
291
372
|
# Expected: {'output': '/path/to/cwd/generated_code.py'}
|
|
@@ -300,7 +381,8 @@ if __name__ == '__main__':
|
|
|
300
381
|
output_locations={'output': test_dir_gen + os.path.sep}, # Explicit directory
|
|
301
382
|
basename=mock_basename,
|
|
302
383
|
language=mock_language,
|
|
303
|
-
file_extension=mock_extension
|
|
384
|
+
file_extension=mock_extension,
|
|
385
|
+
context_config={}
|
|
304
386
|
)
|
|
305
387
|
print(f"Result: {paths3}")
|
|
306
388
|
# Expected: {'output': '/path/to/cwd/temp_gen_output/my_module.py'}
|
|
@@ -319,7 +401,8 @@ if __name__ == '__main__':
|
|
|
319
401
|
},
|
|
320
402
|
basename=mock_basename,
|
|
321
403
|
language=mock_language,
|
|
322
|
-
file_extension=mock_extension
|
|
404
|
+
file_extension=mock_extension,
|
|
405
|
+
context_config={}
|
|
323
406
|
)
|
|
324
407
|
print(f"Result: {paths4}")
|
|
325
408
|
# Expected: {
|
|
@@ -344,7 +427,8 @@ if __name__ == '__main__':
|
|
|
344
427
|
output_locations={}, # No user input
|
|
345
428
|
basename=mock_basename,
|
|
346
429
|
language=mock_language,
|
|
347
|
-
file_extension=mock_extension
|
|
430
|
+
file_extension=mock_extension,
|
|
431
|
+
context_config={}
|
|
348
432
|
)
|
|
349
433
|
print(f"Result: {paths5}")
|
|
350
434
|
# Expected: {
|
|
@@ -365,7 +449,8 @@ if __name__ == '__main__':
|
|
|
365
449
|
output_locations={},
|
|
366
450
|
basename=mock_basename,
|
|
367
451
|
language=mock_language,
|
|
368
|
-
file_extension=mock_extension # This extension is ignored for preprocess default
|
|
452
|
+
file_extension=mock_extension, # This extension is ignored for preprocess default
|
|
453
|
+
context_config={}
|
|
369
454
|
)
|
|
370
455
|
print(f"Result: {paths6}")
|
|
371
456
|
# Expected: {'output': '/path/to/cwd/my_module_python_preprocessed.prompt'}
|
|
@@ -377,7 +462,8 @@ if __name__ == '__main__':
|
|
|
377
462
|
output_locations={},
|
|
378
463
|
basename=mock_basename,
|
|
379
464
|
language=mock_language,
|
|
380
|
-
file_extension=mock_extension
|
|
465
|
+
file_extension=mock_extension,
|
|
466
|
+
context_config={}
|
|
381
467
|
)
|
|
382
468
|
print(f"Result: {paths7}")
|
|
383
469
|
# Expected: {}
|
|
@@ -389,7 +475,8 @@ if __name__ == '__main__':
|
|
|
389
475
|
output_locations={},
|
|
390
476
|
basename="complex_prompt",
|
|
391
477
|
language="javascript",
|
|
392
|
-
file_extension=".js" # Ignored for split defaults
|
|
478
|
+
file_extension=".js", # Ignored for split defaults
|
|
479
|
+
context_config={}
|
|
393
480
|
)
|
|
394
481
|
print(f"Result: {paths8}")
|
|
395
482
|
# Expected: {
|
|
@@ -404,7 +491,8 @@ if __name__ == '__main__':
|
|
|
404
491
|
output_locations={},
|
|
405
492
|
basename="feature_analysis", # Used instead of change_file_basename
|
|
406
493
|
language="", # Not relevant for detect default
|
|
407
|
-
file_extension="" # Not relevant for detect default
|
|
494
|
+
file_extension="", # Not relevant for detect default
|
|
495
|
+
context_config={}
|
|
408
496
|
)
|
|
409
497
|
print(f"Result: {paths9}")
|
|
410
498
|
# Expected: {'output': '/path/to/cwd/feature_analysis_detect.csv'}
|
|
@@ -416,7 +504,8 @@ if __name__ == '__main__':
|
|
|
416
504
|
output_locations={},
|
|
417
505
|
basename="crashed_module", # Used for both code and program defaults
|
|
418
506
|
language="java",
|
|
419
|
-
file_extension=".java"
|
|
507
|
+
file_extension=".java",
|
|
508
|
+
context_config={}
|
|
420
509
|
)
|
|
421
510
|
print(f"Result: {paths10}")
|
|
422
511
|
# Expected: {
|
|
@@ -431,7 +520,8 @@ if __name__ == '__main__':
|
|
|
431
520
|
output_locations={},
|
|
432
521
|
basename="module_to_verify",
|
|
433
522
|
language="python",
|
|
434
|
-
file_extension=".py"
|
|
523
|
+
file_extension=".py",
|
|
524
|
+
context_config={}
|
|
435
525
|
)
|
|
436
526
|
print(f"Result: {paths11}")
|
|
437
527
|
# Expected: {
|
|
@@ -449,7 +539,8 @@ if __name__ == '__main__':
|
|
|
449
539
|
output_locations={'output_program': test_dir_verify_prog + os.path.sep},
|
|
450
540
|
basename="module_to_verify",
|
|
451
541
|
language="python",
|
|
452
|
-
file_extension=".py"
|
|
542
|
+
file_extension=".py",
|
|
543
|
+
context_config={}
|
|
453
544
|
)
|
|
454
545
|
print(f"Result: {paths12}")
|
|
455
546
|
# Expected: {
|
|
@@ -468,7 +559,8 @@ if __name__ == '__main__':
|
|
|
468
559
|
output_locations={},
|
|
469
560
|
basename="another_module_verify",
|
|
470
561
|
language="python",
|
|
471
|
-
file_extension=".py"
|
|
562
|
+
file_extension=".py",
|
|
563
|
+
context_config={}
|
|
472
564
|
)
|
|
473
565
|
print(f"Result: {paths13}")
|
|
474
566
|
# Expected: {
|
pdd/generate_test.py
CHANGED
|
@@ -72,19 +72,34 @@ def generate_test(
|
|
|
72
72
|
model_name = response['model_name']
|
|
73
73
|
result = response['result']
|
|
74
74
|
|
|
75
|
+
# Validate that we got a non-empty result
|
|
76
|
+
if not result or not result.strip():
|
|
77
|
+
raise ValueError(f"LLM test generation returned empty result. Model: {model_name}, Cost: ${response['cost']:.6f}")
|
|
78
|
+
|
|
75
79
|
if verbose:
|
|
76
80
|
console.print(Markdown(result))
|
|
77
81
|
console.print(f"[bold green]Initial generation cost: ${total_cost:.6f}[/bold green]")
|
|
78
82
|
|
|
79
83
|
# Step 4: Check if generation is complete
|
|
80
84
|
last_600_chars = result[-600:] if len(result) > 600 else result
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
|
|
86
|
+
# Validate that the last_600_chars is not empty after stripping
|
|
87
|
+
if not last_600_chars.strip():
|
|
88
|
+
# If the tail is empty, assume generation is complete
|
|
89
|
+
if verbose:
|
|
90
|
+
console.print("[bold yellow]Last 600 chars are empty, assuming generation is complete[/bold yellow]")
|
|
91
|
+
reasoning = "Generation appears complete (tail is empty)"
|
|
92
|
+
is_finished = True
|
|
93
|
+
check_cost = 0.0
|
|
94
|
+
check_model = model_name
|
|
95
|
+
else:
|
|
96
|
+
reasoning, is_finished, check_cost, check_model = unfinished_prompt(
|
|
97
|
+
prompt_text=last_600_chars,
|
|
98
|
+
strength=strength,
|
|
99
|
+
temperature=temperature,
|
|
100
|
+
time=time,
|
|
101
|
+
verbose=verbose
|
|
102
|
+
)
|
|
88
103
|
total_cost += check_cost
|
|
89
104
|
|
|
90
105
|
if not is_finished:
|
|
@@ -104,15 +119,37 @@ def generate_test(
|
|
|
104
119
|
model_name = continue_model
|
|
105
120
|
|
|
106
121
|
# Process the final result
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
122
|
+
try:
|
|
123
|
+
processed_result, post_cost, post_model = postprocess(
|
|
124
|
+
result,
|
|
125
|
+
language=language,
|
|
126
|
+
strength=EXTRACTION_STRENGTH,
|
|
127
|
+
temperature=temperature,
|
|
128
|
+
time=time,
|
|
129
|
+
verbose=verbose
|
|
130
|
+
)
|
|
131
|
+
total_cost += post_cost
|
|
132
|
+
except Exception as e:
|
|
133
|
+
console.print(f"[bold red]Postprocess failed: {str(e)}[/bold red]")
|
|
134
|
+
console.print(f"[bold yellow]Falling back to raw result[/bold yellow]")
|
|
135
|
+
|
|
136
|
+
# Try to extract code blocks directly from the raw result
|
|
137
|
+
import re
|
|
138
|
+
code_blocks = re.findall(r'```(?:python)?\s*(.*?)```', result, re.DOTALL | re.IGNORECASE)
|
|
139
|
+
|
|
140
|
+
if code_blocks:
|
|
141
|
+
# Use the first substantial code block
|
|
142
|
+
for block in code_blocks:
|
|
143
|
+
if len(block.strip()) > 100 and ('def test_' in block or 'import' in block):
|
|
144
|
+
processed_result = block.strip()
|
|
145
|
+
break
|
|
146
|
+
else:
|
|
147
|
+
processed_result = code_blocks[0].strip() if code_blocks else result
|
|
148
|
+
else:
|
|
149
|
+
# No code blocks found, use raw result
|
|
150
|
+
processed_result = result
|
|
151
|
+
|
|
152
|
+
post_cost = 0.0
|
|
116
153
|
|
|
117
154
|
# Step 5: Print total cost if verbose
|
|
118
155
|
if verbose:
|
pdd/llm_invoke.py
CHANGED
|
@@ -81,6 +81,58 @@ from pdd import DEFAULT_LLM_MODEL
|
|
|
81
81
|
# Opt-in to future pandas behavior regarding downcasting
|
|
82
82
|
pd.set_option('future.no_silent_downcasting', True)
|
|
83
83
|
|
|
84
|
+
|
|
85
|
+
def _is_wsl_environment() -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Detect if we're running in WSL (Windows Subsystem for Linux) environment.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
True if running in WSL, False otherwise
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
# Check for WSL-specific indicators
|
|
94
|
+
if os.path.exists('/proc/version'):
|
|
95
|
+
with open('/proc/version', 'r') as f:
|
|
96
|
+
version_info = f.read().lower()
|
|
97
|
+
return 'microsoft' in version_info or 'wsl' in version_info
|
|
98
|
+
|
|
99
|
+
# Alternative check: WSL_DISTRO_NAME environment variable
|
|
100
|
+
if os.getenv('WSL_DISTRO_NAME'):
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
# Check for Windows-style paths in PATH
|
|
104
|
+
path_env = os.getenv('PATH', '')
|
|
105
|
+
return '/mnt/c/' in path_env.lower()
|
|
106
|
+
|
|
107
|
+
except Exception:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _get_environment_info() -> Dict[str, str]:
|
|
112
|
+
"""
|
|
113
|
+
Get environment information for debugging and error reporting.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Dictionary containing environment details
|
|
117
|
+
"""
|
|
118
|
+
import platform
|
|
119
|
+
|
|
120
|
+
info = {
|
|
121
|
+
'platform': platform.system(),
|
|
122
|
+
'platform_release': platform.release(),
|
|
123
|
+
'platform_version': platform.version(),
|
|
124
|
+
'architecture': platform.machine(),
|
|
125
|
+
'is_wsl': str(_is_wsl_environment()),
|
|
126
|
+
'python_version': platform.python_version(),
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Add WSL-specific information
|
|
130
|
+
if _is_wsl_environment():
|
|
131
|
+
info['wsl_distro'] = os.getenv('WSL_DISTRO_NAME', 'unknown')
|
|
132
|
+
info['wsl_interop'] = os.getenv('WSL_INTEROP', 'not_set')
|
|
133
|
+
|
|
134
|
+
return info
|
|
135
|
+
|
|
84
136
|
# <<< SET LITELLM DEBUG LOGGING >>>
|
|
85
137
|
# os.environ['LITELLM_LOG'] = 'DEBUG' # Keep commented out unless debugging LiteLLM itself
|
|
86
138
|
|
|
@@ -164,6 +216,12 @@ GCS_REGION_NAME = os.getenv("GCS_REGION_NAME", "auto") # Often 'auto' works for
|
|
|
164
216
|
GCS_HMAC_ACCESS_KEY_ID = os.getenv("GCS_HMAC_ACCESS_KEY_ID") # Load HMAC Key ID
|
|
165
217
|
GCS_HMAC_SECRET_ACCESS_KEY = os.getenv("GCS_HMAC_SECRET_ACCESS_KEY") # Load HMAC Secret
|
|
166
218
|
|
|
219
|
+
# Sanitize GCS credentials to handle WSL environment issues
|
|
220
|
+
if GCS_HMAC_ACCESS_KEY_ID:
|
|
221
|
+
GCS_HMAC_ACCESS_KEY_ID = GCS_HMAC_ACCESS_KEY_ID.strip()
|
|
222
|
+
if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
223
|
+
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
224
|
+
|
|
167
225
|
cache_configured = False
|
|
168
226
|
|
|
169
227
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
@@ -448,6 +506,54 @@ def _select_model_candidates(
|
|
|
448
506
|
return candidates
|
|
449
507
|
|
|
450
508
|
|
|
509
|
+
def _sanitize_api_key(key_value: str) -> str:
|
|
510
|
+
"""
|
|
511
|
+
Sanitize API key by removing whitespace and carriage returns.
|
|
512
|
+
|
|
513
|
+
This fixes WSL environment issues where API keys may contain trailing \r characters
|
|
514
|
+
that make them invalid for HTTP headers.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
key_value: The raw API key value from environment
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
Sanitized API key with whitespace and carriage returns removed
|
|
521
|
+
|
|
522
|
+
Raises:
|
|
523
|
+
ValueError: If the API key format is invalid after sanitization
|
|
524
|
+
"""
|
|
525
|
+
if not key_value:
|
|
526
|
+
return key_value
|
|
527
|
+
|
|
528
|
+
# Strip all whitespace including carriage returns, newlines, etc.
|
|
529
|
+
sanitized = key_value.strip()
|
|
530
|
+
|
|
531
|
+
# Additional validation: ensure no remaining control characters
|
|
532
|
+
if any(ord(c) < 32 for c in sanitized):
|
|
533
|
+
logger.warning("API key contains control characters that may cause issues")
|
|
534
|
+
# Remove any remaining control characters
|
|
535
|
+
sanitized = ''.join(c for c in sanitized if ord(c) >= 32)
|
|
536
|
+
|
|
537
|
+
# Validate API key format (basic checks)
|
|
538
|
+
if sanitized:
|
|
539
|
+
# Check for common API key patterns
|
|
540
|
+
if len(sanitized) < 10:
|
|
541
|
+
logger.warning(f"API key appears too short ({len(sanitized)} characters) - may be invalid")
|
|
542
|
+
|
|
543
|
+
# Check for invalid characters in API keys (should be printable ASCII)
|
|
544
|
+
if not all(32 <= ord(c) <= 126 for c in sanitized):
|
|
545
|
+
logger.warning("API key contains non-printable characters")
|
|
546
|
+
|
|
547
|
+
# Check for WSL-specific issues (detect if original had carriage returns)
|
|
548
|
+
if key_value != sanitized and '\r' in key_value:
|
|
549
|
+
if _is_wsl_environment():
|
|
550
|
+
logger.info("Detected and fixed WSL line ending issue in API key")
|
|
551
|
+
else:
|
|
552
|
+
logger.info("Detected and fixed line ending issue in API key")
|
|
553
|
+
|
|
554
|
+
return sanitized
|
|
555
|
+
|
|
556
|
+
|
|
451
557
|
def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
|
|
452
558
|
"""Checks for API key in env, prompts user if missing, and updates .env."""
|
|
453
559
|
key_name = model_info.get('api_key')
|
|
@@ -458,6 +564,8 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
458
564
|
return True # Assume key is handled elsewhere or not needed
|
|
459
565
|
|
|
460
566
|
key_value = os.getenv(key_name)
|
|
567
|
+
if key_value:
|
|
568
|
+
key_value = _sanitize_api_key(key_value)
|
|
461
569
|
|
|
462
570
|
if key_value:
|
|
463
571
|
if verbose:
|
|
@@ -473,6 +581,9 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
473
581
|
logger.error("No API key provided. Cannot proceed with this model.")
|
|
474
582
|
return False
|
|
475
583
|
|
|
584
|
+
# Sanitize the user-provided key
|
|
585
|
+
user_provided_key = _sanitize_api_key(user_provided_key)
|
|
586
|
+
|
|
476
587
|
# Set environment variable for the current process
|
|
477
588
|
os.environ[key_name] = user_provided_key
|
|
478
589
|
logger.info(f"API key '{key_name}' set for the current session.")
|
|
@@ -767,6 +878,7 @@ def llm_invoke(
|
|
|
767
878
|
elif api_key_name_from_csv: # For other api_key_names specified in CSV (e.g., OPENAI_API_KEY, or a direct VERTEX_AI_API_KEY string)
|
|
768
879
|
key_value = os.getenv(api_key_name_from_csv)
|
|
769
880
|
if key_value:
|
|
881
|
+
key_value = _sanitize_api_key(key_value)
|
|
770
882
|
litellm_kwargs["api_key"] = key_value
|
|
771
883
|
if verbose:
|
|
772
884
|
logger.info(f"[INFO] Explicitly passing API key from env var '{api_key_name_from_csv}' as 'api_key' parameter to LiteLLM.")
|
|
@@ -932,6 +1044,46 @@ def llm_invoke(
|
|
|
932
1044
|
# Result (String or Pydantic)
|
|
933
1045
|
try:
|
|
934
1046
|
raw_result = resp_item.choices[0].message.content
|
|
1047
|
+
|
|
1048
|
+
# Check if raw_result is None (likely cached corrupted data)
|
|
1049
|
+
if raw_result is None:
|
|
1050
|
+
logger.warning(f"[WARNING] LLM returned None content for item {i}, likely due to corrupted cache. Retrying with cache bypass...")
|
|
1051
|
+
# Retry with cache bypass by modifying the prompt slightly
|
|
1052
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
1053
|
+
# Add a small space to bypass cache
|
|
1054
|
+
modified_prompt = prompt + " "
|
|
1055
|
+
try:
|
|
1056
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
1057
|
+
# Disable cache for retry
|
|
1058
|
+
litellm.cache = None
|
|
1059
|
+
retry_response = litellm.completion(
|
|
1060
|
+
model=model_name_litellm,
|
|
1061
|
+
messages=retry_messages,
|
|
1062
|
+
temperature=temperature,
|
|
1063
|
+
response_format=response_format,
|
|
1064
|
+
max_completion_tokens=max_tokens,
|
|
1065
|
+
**time_kwargs
|
|
1066
|
+
)
|
|
1067
|
+
# Re-enable cache
|
|
1068
|
+
litellm.cache = Cache()
|
|
1069
|
+
# Extract result from retry
|
|
1070
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
1071
|
+
if retry_raw_result is not None:
|
|
1072
|
+
logger.info(f"[SUCCESS] Cache bypass retry succeeded for item {i}")
|
|
1073
|
+
raw_result = retry_raw_result
|
|
1074
|
+
else:
|
|
1075
|
+
logger.error(f"[ERROR] Cache bypass retry also returned None for item {i}")
|
|
1076
|
+
results.append("ERROR: LLM returned None content even after cache bypass")
|
|
1077
|
+
continue
|
|
1078
|
+
except Exception as retry_e:
|
|
1079
|
+
logger.error(f"[ERROR] Cache bypass retry failed for item {i}: {retry_e}")
|
|
1080
|
+
results.append(f"ERROR: LLM returned None content and retry failed: {retry_e}")
|
|
1081
|
+
continue
|
|
1082
|
+
else:
|
|
1083
|
+
logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
|
|
1084
|
+
results.append("ERROR: LLM returned None content and cannot retry")
|
|
1085
|
+
continue
|
|
1086
|
+
|
|
935
1087
|
if output_pydantic:
|
|
936
1088
|
parsed_result = None
|
|
937
1089
|
json_string_to_parse = None
|
|
@@ -1064,6 +1216,16 @@ def llm_invoke(
|
|
|
1064
1216
|
# --- 6b. Handle Invocation Errors ---
|
|
1065
1217
|
except openai.AuthenticationError as e:
|
|
1066
1218
|
last_exception = e
|
|
1219
|
+
error_message = str(e)
|
|
1220
|
+
|
|
1221
|
+
# Check for WSL-specific issues in authentication errors
|
|
1222
|
+
if _is_wsl_environment() and ('Illegal header value' in error_message or '\r' in error_message):
|
|
1223
|
+
logger.warning(f"[WSL AUTH ERROR] Authentication failed for {model_name_litellm} - detected WSL line ending issue")
|
|
1224
|
+
logger.warning("[WSL AUTH ERROR] This is likely caused by API key environment variables containing carriage returns")
|
|
1225
|
+
logger.warning("[WSL AUTH ERROR] Try setting your API key again or check your .env file for line ending issues")
|
|
1226
|
+
env_info = _get_environment_info()
|
|
1227
|
+
logger.debug(f"Environment info: {env_info}")
|
|
1228
|
+
|
|
1067
1229
|
if newly_acquired_keys.get(api_key_name):
|
|
1068
1230
|
logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} with the newly provided key for '{api_key_name}'. Please check the key and try again.")
|
|
1069
1231
|
# Invalidate the key in env for this session to force re-prompt on retry
|