lam-cli 0.1.7__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lam/engines/python.py ADDED
@@ -0,0 +1,839 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import re
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Dict, Optional, Set, Tuple, Union
10
+
11
+ from ..core import Stats, check_resource_limits
12
+ from .base import Engine
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PythonEngine(Engine):
18
+ """Python execution engine with improved sandboxing and logging"""
19
+ def __init__(self, *args, **kwargs):
20
+ super().__init__(*args, **kwargs)
21
+ self.modules_dir = Path(tempfile.gettempdir()) / "lam_python_modules"
22
+ self.modules_dir.mkdir(exist_ok=True)
23
+
24
+ # Define allowed modules - more permissive list
25
+ self.allowed_modules = {
26
+ # Core modules
27
+ "json", "datetime", "math", "statistics", "collections",
28
+ "itertools", "functools", "re", "copy", "decimal",
29
+ "csv", "io", "dataclasses", "typing", "enum",
30
+ # Additional useful modules
31
+ "time", "random", "uuid", "hashlib", "base64",
32
+ "urllib", "urllib.parse", "html", "xml", "xml.etree",
33
+ "xml.etree.ElementTree", "string", "textwrap",
34
+ "operator", "bisect", "heapq", "array",
35
+ "unicodedata", "locale", "calendar", "zoneinfo",
36
+ # Data processing
37
+ "struct", "binascii", "codecs", "difflib",
38
+ "pprint", "reprlib", "abc", "contextlib",
39
+ "secrets", "fractions", "numbers"
40
+ }
41
+
42
+ # Dangerous modules that are always blocked
43
+ self.blocked_modules = {
44
+ "subprocess", "os", "sys", "shutil", "pathlib",
45
+ "socket", "pickle", "multiprocessing", "threading",
46
+ "importlib", "builtins", "_thread", "ctypes",
47
+ "marshal", "code", "codeop", "ast", "compile",
48
+ "__builtin__", "__main__", "gc", "inspect",
49
+ "asyncio", "concurrent", "signal", "select",
50
+ "mmap", "sqlite3", "dbm", "shelve",
51
+ "tempfile", "glob", "fnmatch", "fileinput",
52
+ "zipfile", "tarfile", "gzip", "bz2", "lzma",
53
+ "webbrowser", "cgi", "cgitb", "wsgiref",
54
+ "http", "ftplib", "poplib", "imaplib", "smtplib",
55
+ "telnetlib", "uuid", "socketserver", "xmlrpc",
56
+ "ipaddress", "platform", "errno", "pwd", "grp",
57
+ "resource", "pty", "termios", "tty", "fcntl",
58
+ "pipes", "syslog", "ssl", "email"
59
+ }
60
+
61
+ def validate_environment(self) -> bool:
62
+ logger.debug("Validating Python environment")
63
+ return sys.executable is not None
64
+
65
+ def check_for_dangerous_code(self, code: str) -> Optional[str]:
66
+ """Check for potentially dangerous patterns in the code"""
67
+ dangerous_patterns = [
68
+ (r"__import__\s*\(", "Use of __import__ is not allowed. Use regular import statements instead."),
69
+ (r"eval\s*\(", "Use of eval() is not allowed for security reasons."),
70
+ (r"exec\s*\(", "Use of exec() is not allowed for security reasons."),
71
+ (r"compile\s*\(", "Use of compile() is not allowed for security reasons."),
72
+ (r"open\s*\(", "Use of open() is not allowed. File system access is restricted."),
73
+ (r"__subclasses__", "Access to __subclasses__ is not allowed for security reasons."),
74
+ (r"__bases__", "Access to __bases__ is not allowed for security reasons."),
75
+ (r"__code__", "Access to __code__ is not allowed for security reasons."),
76
+ (r"__class__\s*\.\s*__", "Access to class internals is restricted."),
77
+ (r"globals\s*\(\s*\)", "Access to globals() is not allowed."),
78
+ (r"locals\s*\(\s*\)", "Access to locals() is not allowed."),
79
+ (r"vars\s*\(\s*\)", "Access to vars() is not allowed."),
80
+ (r"getattr\s*\([^,]+,[^,]+\)", None), # We'll check this more carefully
81
+ (r"setattr\s*\(", "Use of setattr() is not allowed."),
82
+ (r"delattr\s*\(", "Use of delattr() is not allowed."),
83
+ (r"__dict__", "Direct access to __dict__ is not allowed."),
84
+ (r"__module__", "Access to __module__ is not allowed."),
85
+ ]
86
+
87
+ for pattern, message in dangerous_patterns:
88
+ if re.search(pattern, code):
89
+ # Special case for getattr - allow for normal attribute access
90
+ if "getattr" in pattern:
91
+ # Check if it's trying to access dunder methods
92
+ getattr_matches = re.findall(r"getattr\s*\([^,]+,\s*['\"](__[^'\"]+__)['\"]", code)
93
+ if getattr_matches:
94
+ return f"Use of getattr() to access special methods ({getattr_matches[0]}) is not allowed."
95
+ continue
96
+
97
+ if message:
98
+ return message
99
+
100
+ # Check for imports
101
+ import_pattern = r"(?:^|\n)\s*(?:import|from)\s+(\S+)"
102
+ for match in re.finditer(import_pattern, code, re.MULTILINE):
103
+ module = match.group(1).split('.')[0] # Get base module
104
+
105
+ if module in self.blocked_modules:
106
+ return f"Import of '{module}' is not allowed for security reasons."
107
+
108
+ # Check if it's trying to import something not in allowed list
109
+ if module not in self.allowed_modules and not module.startswith('_'):
110
+ # Give a helpful message about what is allowed
111
+ return (f"Import of '{module}' is not allowed. "
112
+ f"Available modules include: {', '.join(sorted(list(self.allowed_modules)[:10]))}... "
113
+ f"(and {len(self.allowed_modules)-10} more)")
114
+
115
+ return None
116
+
117
+ def create_wrapper(self, input_data: str, user_script: str) -> str:
118
+ """Create the wrapper script with enhanced logging and debugging"""
119
+ # Perform safety checks before creating wrapper
120
+ safety_issue = self.check_for_dangerous_code(user_script)
121
+ if safety_issue:
122
+ # Return a wrapper that will immediately exit with the safety error
123
+ return f'''
124
+ import json
125
+ import sys
126
+
127
+ sys.stderr.write(json.dumps({{
128
+ "error": "Security violation: {safety_issue}",
129
+ "type": "SecurityError",
130
+ "stack": ["Security check failed", "{safety_issue}"]
131
+ }}))
132
+ sys.exit(1)
133
+ '''
134
+
135
+ return f'''
136
+ import json
137
+ import sys
138
+ import traceback
139
+ import io
140
+ import contextlib
141
+ from datetime import datetime
142
+ import time
143
+
144
+ # Import all allowed modules
145
+ import re
146
+ import math
147
+ import statistics
148
+ import collections
149
+ import itertools
150
+ import functools
151
+ import copy
152
+ import decimal
153
+ import csv
154
+ import dataclasses
155
+ import typing
156
+ import enum
157
+ import random
158
+ import uuid
159
+ import hashlib
160
+ import base64
161
+ import urllib
162
+ import urllib.parse
163
+ import html
164
+ import xml
165
+ import xml.etree.ElementTree
166
+ import string
167
+ import textwrap
168
+ import operator
169
+ import bisect
170
+ import heapq
171
+ import array
172
+ import unicodedata
173
+ import locale
174
+ import calendar
175
+ import struct
176
+ import binascii
177
+ import codecs
178
+ import difflib
179
+ import pprint
180
+ import reprlib
181
+ import abc
182
+ import contextlib as contextlib_module
183
+ import secrets
184
+ import fractions
185
+ import numbers
186
+
187
+ # Try to import zoneinfo (Python 3.9+)
188
+ try:
189
+ import zoneinfo
190
+ except ImportError:
191
+ zoneinfo = None
192
+
193
+ # Resource limiting
194
+ try:
195
+ import resource
196
+ import signal
197
+ HAS_RESOURCE_LIMITS = True
198
+ except ImportError:
199
+ HAS_RESOURCE_LIMITS = False
200
+
201
+ # Enhanced logging system
202
+ class LogCapture:
203
+ def __init__(self):
204
+ self.logs = []
205
+ self.buffer = []
206
+
207
+ def write(self, message):
208
+ if message.strip():
209
+ self.logs.append({{
210
+ "type": "log",
211
+ "message": message.strip(),
212
+ "timestamp": time.time()
213
+ }})
214
+ return len(message)
215
+
216
+ def flush(self):
217
+ pass
218
+
219
+ class ErrorCapture:
220
+ def __init__(self):
221
+ self.logs = []
222
+
223
+ def write(self, message):
224
+ if message.strip():
225
+ self.logs.append({{
226
+ "type": "error",
227
+ "message": message.strip(),
228
+ "timestamp": time.time()
229
+ }})
230
+ return len(message)
231
+
232
+ def flush(self):
233
+ pass
234
+
235
+ # Create log captures
236
+ stdout_capture = LogCapture()
237
+ stderr_capture = ErrorCapture()
238
+
239
+ # Enhanced console object for better logging
240
+ class Console:
241
+ def __init__(self, stdout, stderr):
242
+ self.stdout = stdout
243
+ self.stderr = stderr
244
+
245
+ def log(self, *args, **kwargs):
246
+ message = ' '.join(str(arg) for arg in args)
247
+ self.stdout.write(message + '\\n')
248
+
249
+ def error(self, *args, **kwargs):
250
+ message = ' '.join(str(arg) for arg in args)
251
+ self.stderr.write(message + '\\n')
252
+
253
+ def warn(self, *args, **kwargs):
254
+ message = ' '.join(str(arg) for arg in args)
255
+ self.stderr.write(f"WARNING: {{message}}\\n")
256
+
257
+ def info(self, *args, **kwargs):
258
+ self.log(*args, **kwargs)
259
+
260
+ def debug(self, *args, **kwargs):
261
+ message = ' '.join(str(arg) for arg in args)
262
+ self.stdout.write(f"DEBUG: {{message}}\\n")
263
+
264
+ def table(self, data):
265
+ # Simple table formatting
266
+ if isinstance(data, dict):
267
+ for k, v in data.items():
268
+ self.log(f"{{k}}: {{v}}")
269
+ elif isinstance(data, list):
270
+ for item in data:
271
+ self.log(str(item))
272
+ else:
273
+ self.log(str(data))
274
+
275
+ console = Console(stdout_capture, stderr_capture)
276
+
277
+ # Set resource limits if available
278
+ if HAS_RESOURCE_LIMITS:
279
+ def set_resource_limits():
280
+ # 10 seconds CPU time (more generous)
281
+ resource.setrlimit(resource.RLIMIT_CPU, (10, 10))
282
+
283
+ # 200MB memory limit
284
+ memory_limit = 200 * 1024 * 1024 # 200MB in bytes
285
+ resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit))
286
+
287
+ # Set timeout handler
288
+ def timeout_handler(signum, frame):
289
+ error_info = {{
290
+ "error": "Execution timed out (10 seconds)",
291
+ "type": "TimeoutError",
292
+ "stack": ["Timeout handler triggered", "Maximum execution time exceeded"]
293
+ }}
294
+ sys.stderr.write(json.dumps(error_info))
295
+ sys.exit(1)
296
+
297
+ signal.signal(signal.SIGALRM, timeout_handler)
298
+ signal.alarm(10) # 10 second timeout
299
+
300
+ try:
301
+ set_resource_limits()
302
+ except Exception as e:
303
+ # Continue if resource limiting fails (e.g., on Windows)
304
+ pass
305
+
306
+ # Capture stdout and stderr
307
+ original_stdout = sys.stdout
308
+ original_stderr = sys.stderr
309
+ sys.stdout = stdout_capture
310
+ sys.stderr = stderr_capture
311
+
312
+ # Enhanced print function that goes to our logging
313
+ def print(*args, sep=' ', end='\\n', file=None, flush=False):
314
+ output = sep.join(str(arg) for arg in args) + end
315
+ if file is sys.stderr:
316
+ stderr_capture.write(output)
317
+ else:
318
+ stdout_capture.write(output)
319
+
320
+ # Parse input data
321
+ input_data = None
322
+ try:
323
+ input_data_raw = r"""{input_data}"""
324
+ input_data = json.loads(input_data_raw)
325
+ except json.JSONDecodeError as e:
326
+ error_info = {{
327
+ "error": f"Failed to parse input data: {{e}}",
328
+ "type": "JSONDecodeError",
329
+ "stack": []
330
+ }}
331
+ original_stderr.write(json.dumps(error_info))
332
+ sys.exit(1)
333
+
334
+ # Custom import hook for better error messages
335
+ class RestrictedImporter:
336
+ def __init__(self, allowed_modules, blocked_modules):
337
+ self.allowed_modules = allowed_modules
338
+ self.blocked_modules = blocked_modules
339
+
340
+ def __call__(self, name, globals=None, locals=None, fromlist=(), level=0):
341
+ base_module = name.split('.')[0]
342
+
343
+ if base_module in self.blocked_modules:
344
+ raise ImportError(
345
+ f"Import of '{{name}}' is blocked for security reasons. "
346
+ f"This module can access system resources that are not allowed in the sandbox."
347
+ )
348
+
349
+ if base_module not in self.allowed_modules and not base_module.startswith('_'):
350
+ available = sorted(list(self.allowed_modules))[:15]
351
+ raise ImportError(
352
+ f"Import of '{{name}}' is not in the allowed module list. "
353
+ f"Available modules include: {{', '.join(available)}}... "
354
+ f"({{len(self.allowed_modules)}} total modules available)"
355
+ )
356
+
357
+ return __import__(name, globals, locals, fromlist, level)
358
+
359
+ # Create restricted builtins
360
+ restricted_builtins = {{
361
+ # Safe built-in functions
362
+ "abs": abs, "all": all, "any": any, "ascii": ascii,
363
+ "bin": bin, "bool": bool, "bytearray": bytearray, "bytes": bytes,
364
+ "callable": callable, "chr": chr, "classmethod": classmethod,
365
+ "complex": complex, "dict": dict, "dir": dir, "divmod": divmod,
366
+ "enumerate": enumerate, "filter": filter, "float": float,
367
+ "format": format, "frozenset": frozenset, "getattr": getattr,
368
+ "hasattr": hasattr, "hash": hash, "hex": hex, "id": id,
369
+ "int": int, "isinstance": isinstance, "issubclass": issubclass,
370
+ "iter": iter, "len": len, "list": list, "map": map,
371
+ "max": max, "memoryview": memoryview, "min": min, "next": next,
372
+ "object": object, "oct": oct, "ord": ord, "pow": pow,
373
+ "print": print, "property": property, "range": range,
374
+ "repr": repr, "reversed": reversed, "round": round,
375
+ "set": set, "slice": slice, "sorted": sorted,
376
+ "staticmethod": staticmethod, "str": str, "sum": sum,
377
+ "super": super, "tuple": tuple, "type": type, "zip": zip,
378
+
379
+ # Exception types
380
+ "BaseException": BaseException, "Exception": Exception,
381
+ "ArithmeticError": ArithmeticError, "AssertionError": AssertionError,
382
+ "AttributeError": AttributeError, "EOFError": EOFError,
383
+ "ImportError": ImportError, "IndexError": IndexError,
384
+ "KeyError": KeyError, "KeyboardInterrupt": KeyboardInterrupt,
385
+ "LookupError": LookupError, "MemoryError": MemoryError,
386
+ "NameError": NameError, "NotImplementedError": NotImplementedError,
387
+ "OSError": OSError, "OverflowError": OverflowError,
388
+ "RecursionError": RecursionError, "ReferenceError": ReferenceError,
389
+ "RuntimeError": RuntimeError, "StopIteration": StopIteration,
390
+ "SyntaxError": SyntaxError, "SystemError": SystemError,
391
+ "TypeError": TypeError, "UnboundLocalError": UnboundLocalError,
392
+ "UnicodeError": UnicodeError, "ValueError": ValueError,
393
+ "ZeroDivisionError": ZeroDivisionError,
394
+
395
+ # Restricted import function
396
+ "__import__": RestrictedImporter(
397
+ {json.dumps(list(self.allowed_modules))},
398
+ {json.dumps(list(self.blocked_modules))}
399
+ ),
400
+
401
+ # Constants
402
+ "True": True, "False": False, "None": None,
403
+ "NotImplemented": NotImplemented, "Ellipsis": Ellipsis,
404
+ }}
405
+
406
+ # Create safe globals with pre-imported modules
407
+ safe_globals = {{
408
+ "__builtins__": restricted_builtins,
409
+ "console": console,
410
+
411
+ # Pre-imported safe modules
412
+ "json": json, "datetime": datetime, "time": time,
413
+ "math": math, "statistics": statistics,
414
+ "collections": collections, "itertools": itertools,
415
+ "functools": functools, "re": re, "copy": copy,
416
+ "decimal": decimal, "csv": csv, "io": io,
417
+ "dataclasses": dataclasses, "typing": typing,
418
+ "enum": enum, "random": random, "uuid": uuid,
419
+ "hashlib": hashlib, "base64": base64,
420
+ "urllib": urllib, "html": html, "xml": xml,
421
+ "string": string, "textwrap": textwrap,
422
+ "operator": operator, "bisect": bisect,
423
+ "heapq": heapq, "array": array,
424
+ "unicodedata": unicodedata, "locale": locale,
425
+ "calendar": calendar, "struct": struct,
426
+ "binascii": binascii, "codecs": codecs,
427
+ "difflib": difflib, "pprint": pprint,
428
+ "reprlib": reprlib, "abc": abc,
429
+ "contextlib": contextlib_module,
430
+ "secrets": secrets, "fractions": fractions,
431
+ "numbers": numbers,
432
+ }}
433
+
434
+ if zoneinfo:
435
+ safe_globals["zoneinfo"] = zoneinfo
436
+
437
+ # Create safe locals with input data
438
+ safe_locals = {{"input_data": input_data}}
439
+
440
+ # Helper function for better error formatting
441
+ def format_error_context(exc_type, exc_value, exc_tb):
442
+ """Extract detailed context about an error"""
443
+ error_msg = str(exc_value)
444
+
445
+ # Special handling for KeyError to provide more context
446
+ if exc_type.__name__ == 'KeyError':
447
+ # The key that wasn't found
448
+ missing_key = str(exc_value).strip("'")
449
+
450
+ # Try to get the code that caused the error
451
+ tb_frames = list(traceback.extract_tb(exc_tb))
452
+ if tb_frames:
453
+ last_frame = tb_frames[-1]
454
+ code_line = last_frame.line
455
+
456
+ if code_line:
457
+ # Look for dictionary access patterns to provide context
458
+ import re
459
+
460
+ # Check if it's a nested access - use simpler pattern
461
+ if f"['{{missing_key}}']" in code_line:
462
+ # Extract the object being accessed
463
+ match = re.search(r'(\w+(?:\[[^\]]+\])*)\[[^\]]+\]', code_line)
464
+ if match:
465
+ accessed_obj = match.group(1)
466
+ error_msg = f"KeyError: '{{missing_key}}' not found in {{accessed_obj}}"
467
+ else:
468
+ error_msg = f"KeyError: dictionary key '{{missing_key}}' not found"
469
+ else:
470
+ error_msg = f"KeyError: dictionary key '{{missing_key}}' not found"
471
+
472
+ elif exc_type.__name__ == 'AttributeError':
473
+ # Try to extract what object and attribute were involved
474
+ import re
475
+ match = re.search(r"'(\w+)' object has no attribute '(\w+)'", error_msg)
476
+ if match:
477
+ obj_type, attr = match.groups()
478
+ error_msg = f"AttributeError: '{{obj_type}}' object has no attribute '{{attr}}'"
479
+
480
+ elif exc_type.__name__ == 'IndexError':
481
+ # Enhance index errors
482
+ if "list index out of range" in error_msg:
483
+ tb_frames = list(traceback.extract_tb(exc_tb))
484
+ if tb_frames:
485
+ last_frame = tb_frames[-1]
486
+ code_line = last_frame.line
487
+ if code_line and '[' in code_line:
488
+ # Try to extract what list was being accessed
489
+ match = re.search(r"(\w+)\[", code_line)
490
+ if match:
491
+ list_name = match.group(1)
492
+ error_msg = f"IndexError: list index out of range when accessing {{list_name}}"
493
+
494
+ elif exc_type.__name__ == 'TypeError':
495
+ # Keep TypeError messages as they usually have good context
496
+ pass
497
+
498
+ return error_msg
499
+
500
+ # Execute user script
501
+ transform_fn = None
502
+ execution_start = time.time()
503
+
504
+ try:
505
+ # Compile the user script
506
+ compiled_code = compile(r"""{user_script}""", "<user_script>", "exec")
507
+
508
+ # Execute in restricted environment
509
+ exec(compiled_code, safe_globals, safe_locals)
510
+
511
+ # Validate transform function
512
+ if 'transform' not in safe_locals:
513
+ raise NameError(
514
+ "No 'transform' function defined. Your script must define a function "
515
+ "named 'transform' that takes the input data as its parameter."
516
+ )
517
+
518
+ transform_fn = safe_locals['transform']
519
+
520
+ if not callable(transform_fn):
521
+ raise TypeError(
522
+ f"'transform' must be a function, but got {{type(transform_fn).__name__}}. "
523
+ "Make sure you define it as: def transform(data): ..."
524
+ )
525
+
526
+ # Check function signature (basic check)
527
+ import inspect
528
+ try:
529
+ sig = inspect.signature(transform_fn)
530
+ param_count = len(sig.parameters)
531
+ if param_count != 1:
532
+ raise TypeError(
533
+ f"'transform' function must accept exactly 1 parameter, "
534
+ f"but it accepts {{param_count}}. "
535
+ f"Define it as: def transform(data): ..."
536
+ )
537
+ except Exception:
538
+ # If signature inspection fails, continue anyway
539
+ pass
540
+
541
+ except SyntaxError as e:
542
+ # Enhanced syntax error reporting
543
+ error_info = {{
544
+ "error": f"SyntaxError: {{e.msg}}",
545
+ "type": "SyntaxError",
546
+ "stack": [],
547
+ "traceback": [{{
548
+ "file": "user_script.py",
549
+ "line": e.lineno,
550
+ "function": "<module>",
551
+ "code": e.text.strip() if e.text else ""
552
+ }}]
553
+ }}
554
+ original_stderr.write(json.dumps(error_info))
555
+ sys.exit(1)
556
+
557
+ except Exception as e:
558
+ # Extract detailed traceback information
559
+ import traceback as tb_module
560
+ tb_info = []
561
+
562
+ # Get the traceback
563
+ exc_type, exc_value, exc_tb = sys.exc_info()
564
+
565
+ # Format traceback with more details
566
+ for frame in tb_module.extract_tb(exc_tb):
567
+ if '<user_script>' in frame.filename or '<string>' in frame.filename:
568
+ tb_info.append({{
569
+ "file": "user_script.py",
570
+ "line": frame.lineno,
571
+ "function": frame.name,
572
+ "code": frame.line.strip() if frame.line else ""
573
+ }})
574
+
575
+ error_info = {{
576
+ "error": str(e),
577
+ "type": type(e).__name__,
578
+ "traceback": tb_info,
579
+ "stack": []
580
+ }}
581
+
582
+ original_stderr.write(json.dumps(error_info))
583
+ sys.exit(1)
584
+
585
+ # Execute transform function
586
+ transform_start = time.time()
587
+ result = None
588
+
589
+ try:
590
+ # Cancel alarm if set
591
+ if HAS_RESOURCE_LIMITS:
592
+ signal.alarm(0)
593
+
594
+ # Call transform with input data
595
+ result = transform_fn(input_data)
596
+
597
+ # Validate result is JSON serializable
598
+ try:
599
+ json.dumps(result)
600
+ except (TypeError, ValueError) as e:
601
+ # Try to find what's not serializable
602
+ def find_non_serializable(obj, path="result"):
603
+ if isinstance(obj, (str, int, float, bool, type(None))):
604
+ return None
605
+ elif isinstance(obj, dict):
606
+ for k, v in obj.items():
607
+ issue = find_non_serializable(v, f"{{path}}[{{repr(k)}}]")
608
+ if issue:
609
+ return issue
610
+ elif isinstance(obj, (list, tuple)):
611
+ for i, v in enumerate(obj):
612
+ issue = find_non_serializable(v, f"{{path}}[{{i}}]")
613
+ if issue:
614
+ return issue
615
+ else:
616
+ return f"{{path}} contains non-serializable type: {{type(obj).__name__}}"
617
+
618
+ issue = find_non_serializable(result)
619
+ raise TypeError(
620
+ f"Transform result is not JSON serializable. {{issue or str(e)}}. "
621
+ "Make sure your transform function returns only JSON-compatible types "
622
+ "(dict, list, str, int, float, bool, None)."
623
+ )
624
+
625
+ except Exception as e:
626
+ # Get detailed error information
627
+ import traceback as tb_module
628
+ exc_type, exc_value, exc_tb = sys.exc_info()
629
+
630
+ # Get the original exception if this is a wrapped exception
631
+ original_error = str(e)
632
+ error_context = ""
633
+
634
+ # If this is a wrapped ValueError, try to get the original error
635
+ if isinstance(e, ValueError) and "Failed to process:" in str(e):
636
+ # Use exception chaining to get the cause
637
+ if hasattr(e, '__context__') and e.__context__:
638
+ # Get the original exception that was caught
639
+ orig_exc = e.__context__
640
+ orig_type = type(orig_exc).__name__
641
+
642
+ # Format the original error with context
643
+ enhanced_msg = format_error_context(type(orig_exc), orig_exc, orig_exc.__traceback__)
644
+ original_error = f"Failed to process: {{enhanced_msg}}"
645
+
646
+ # Build traceback info focusing on user code
647
+ tb_info = []
648
+ for frame in tb_module.extract_tb(exc_tb):
649
+ if '<user_script>' in frame.filename or '<string>' in frame.filename or 'transform' in frame.name:
650
+ tb_info.append({{
651
+ "file": "user_script.py",
652
+ "line": frame.lineno,
653
+ "function": frame.name,
654
+ "code": frame.line.strip() if frame.line else ""
655
+ }})
656
+
657
+ stack_trace = []
658
+
659
+ # First line: Error type and message (like JS)
660
+ stack_trace.append(f"{{type(e).__name__}}: {{original_error}}")
661
+
662
+ # Add traceback entries
663
+ for tb_entry in tb_info:
664
+ stack_trace.append(f" at {{tb_entry['function']}} ({{tb_entry['file']}}:{{tb_entry['line']}})")
665
+
666
+ error_info = {{
667
+ "error": original_error,
668
+ "type": type(e).__name__,
669
+ "traceback": tb_info,
670
+ "stack": stack_trace
671
+ }}
672
+
673
+ original_stderr.write(json.dumps(error_info))
674
+ sys.exit(1)
675
+
676
+ before_write = {{
677
+ "result": result,
678
+ "logs": stdout_capture.logs + stderr_capture.logs
679
+ }}
680
+
681
+ stdout_capture.write(f"Writing output: {{json.dumps(before_write, indent=2)}}")
682
+
683
+ # Prepare final output
684
+ output = {{
685
+ "result": result,
686
+ "logs": stdout_capture.logs + stderr_capture.logs
687
+ }}
688
+
689
+ # Write successful result
690
+ original_stdout.write(json.dumps(output))
691
+ '''
692
+
693
+ def execute(self, program_file: str, input_data: str) -> Tuple[Union[Dict, str], Optional[str]]:
694
+ logger.info(f"Executing Python script: {program_file}")
695
+ stats = Stats()
696
+
697
+ try:
698
+ check_resource_limits(self.modules_dir)
699
+
700
+ with tempfile.TemporaryDirectory() as temp_dir:
701
+ temp_dir = Path(temp_dir)
702
+
703
+ # Read user script
704
+ with open(program_file, 'r') as f:
705
+ user_script = f.read()
706
+ logger.debug("Loaded user Python script: %d characters", len(user_script))
707
+
708
+ # Check for dangerous code
709
+ safety_issue = self.check_for_dangerous_code(user_script)
710
+ if safety_issue:
711
+ logger.warning(f"Security violation detected in script: {safety_issue}")
712
+ return {
713
+ "lam.error": f"Security violation: {safety_issue}",
714
+ "type": "SecurityError"
715
+ }, f"Security violation: {safety_issue}"
716
+
717
+ # Create wrapper script with enhanced error handling
718
+ wrapper = self.create_wrapper(input_data, user_script)
719
+ script_path = temp_dir / "script.py"
720
+ with open(script_path, 'w') as f:
721
+ f.write(wrapper)
722
+ logger.debug("Generated Python wrapper script: %s", script_path)
723
+
724
+ # Execute with Python in isolated environment
725
+ process = subprocess.Popen(
726
+ [
727
+ sys.executable,
728
+ "-I", # Isolated mode
729
+ str(script_path)
730
+ ],
731
+ stdout=subprocess.PIPE,
732
+ stderr=subprocess.PIPE,
733
+ text=True,
734
+ cwd=temp_dir,
735
+ env={"PATH": os.environ.get("PATH", "")}
736
+ )
737
+ logger.info("Started Python process PID %d", process.pid)
738
+
739
+ try:
740
+ output, error = process.communicate(timeout=10)
741
+ logger.debug("Process completed with code %d", process.returncode)
742
+ except subprocess.TimeoutExpired:
743
+ logger.warning("Process timeout after 10 seconds")
744
+ process.kill()
745
+ return {
746
+ "lam.error": "Script execution timed out",
747
+ "type": "TimeoutError"
748
+ }, "Execution timed out after 10 seconds"
749
+
750
+ # Handle process errors with enhanced logging
751
+ if process.returncode != 0:
752
+ try:
753
+ # Try to parse structured error from stderr
754
+ error_data = json.loads(error.strip())
755
+ error_msg = error_data.get('error', 'Unknown error')
756
+ error_type = error_data.get('type', 'Error')
757
+
758
+ # Format error response similar to JS engine
759
+ error_details = {
760
+ "lam.error": error_msg,
761
+ }
762
+
763
+ # Use the pre-formatted stack trace if available
764
+ if 'stack' in error_data and error_data['stack']:
765
+ error_details["stack_trace"] = error_data['stack']
766
+ else:
767
+ # Fallback: format stack trace similar to JS
768
+ stack_lines = []
769
+
770
+ # Add main error line
771
+ stack_lines.append(f"{error_type}: {error_msg}")
772
+
773
+ # Add traceback entries in JS style
774
+ if 'traceback' in error_data:
775
+ for tb in error_data['traceback']:
776
+ file = tb.get('file', 'unknown')
777
+ line = tb.get('line', 0)
778
+ func = tb.get('function', '<module>')
779
+ stack_lines.append(f" at {func} ({file}:{line})")
780
+
781
+ error_details["stack_trace"] = stack_lines
782
+
783
+ # Log the error details
784
+ logger.error("Python execution error: %s (%s)", error_msg, error_type)
785
+ if 'traceback' in error_data:
786
+ for tb_entry in error_data['traceback']:
787
+ logger.error(" at %s:%d in %s()",
788
+ tb_entry.get('file', 'unknown'),
789
+ tb_entry.get('line', 0),
790
+ tb_entry.get('function', 'unknown'))
791
+
792
+ return error_details, error_msg
793
+
794
+ except json.JSONDecodeError:
795
+ # Fallback to raw error output
796
+ error_msg = error.strip() or "Unknown error"
797
+ logger.error("Failed to parse error JSON, raw error: %s", error_msg)
798
+ return {"lam.error": error_msg}, error_msg
799
+
800
+ # Handle successful output
801
+ try:
802
+ output_data = json.loads(output)
803
+
804
+ # Process Python logs
805
+ if 'logs' in output_data:
806
+ for log_entry in output_data.get('logs', []):
807
+ log_type = log_entry.get('type', 'log')
808
+ message = log_entry.get('message', '')
809
+
810
+ # Log "Writing result:" message like JS engine
811
+ if message.startswith('Writing result:'):
812
+ logger.debug("[Python] %s", message)
813
+ elif log_type == 'error':
814
+ logger.error("[Python] %s", message)
815
+ elif 'WARNING:' in message:
816
+ logger.warning("[Python] %s", message)
817
+ elif 'DEBUG:' in message:
818
+ logger.debug("[Python] %s", message)
819
+ else:
820
+ logger.info("[Python] %s", message)
821
+
822
+ result = output_data.get('result', {})
823
+ return result, None
824
+
825
+ except json.JSONDecodeError as e:
826
+ logger.error("Failed to parse output as JSON: %s", str(e))
827
+ logger.error("Raw output: %s", output[:500]) # Log first 500 chars
828
+ return {
829
+ "lam.error": "Invalid JSON output from transform",
830
+ "parse_error": str(e),
831
+ "raw_output": output.strip()[:1000] # Include some of the output
832
+ }, "Output format error"
833
+
834
+ except Exception as e:
835
+ logger.exception("Python engine execution failed")
836
+ return {
837
+ "lam.error": str(e),
838
+ "type": e.__class__.__name__
839
+ }, str(e)