lam-cli 0.1.7__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lam/__init__.py +1 -0
- lam/core.py +95 -0
- lam/engines/__init__.py +23 -0
- lam/engines/base.py +56 -0
- lam/engines/javascript.py +283 -0
- lam/engines/jq.py +65 -0
- lam/engines/python.py +839 -0
- lam/lam.py +18 -863
- lam/utils.py +22 -0
- lam_cli-1.0.0.dist-info/METADATA +229 -0
- lam_cli-1.0.0.dist-info/RECORD +15 -0
- {lam_cli-0.1.7.dist-info → lam_cli-1.0.0.dist-info}/WHEEL +1 -1
- lam_cli-0.1.7.dist-info/METADATA +0 -53
- lam_cli-0.1.7.dist-info/RECORD +0 -8
- {lam_cli-0.1.7.dist-info → lam_cli-1.0.0.dist-info}/entry_points.txt +0 -0
- {lam_cli-0.1.7.dist-info → lam_cli-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {lam_cli-0.1.7.dist-info → lam_cli-1.0.0.dist-info}/top_level.txt +0 -0
lam/engines/python.py
ADDED
|
@@ -0,0 +1,839 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Optional, Set, Tuple, Union
|
|
10
|
+
|
|
11
|
+
from ..core import Stats, check_resource_limits
|
|
12
|
+
from .base import Engine
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PythonEngine(Engine):
|
|
18
|
+
"""Python execution engine with improved sandboxing and logging"""
|
|
19
|
+
def __init__(self, *args, **kwargs):
|
|
20
|
+
super().__init__(*args, **kwargs)
|
|
21
|
+
self.modules_dir = Path(tempfile.gettempdir()) / "lam_python_modules"
|
|
22
|
+
self.modules_dir.mkdir(exist_ok=True)
|
|
23
|
+
|
|
24
|
+
# Define allowed modules - more permissive list
|
|
25
|
+
self.allowed_modules = {
|
|
26
|
+
# Core modules
|
|
27
|
+
"json", "datetime", "math", "statistics", "collections",
|
|
28
|
+
"itertools", "functools", "re", "copy", "decimal",
|
|
29
|
+
"csv", "io", "dataclasses", "typing", "enum",
|
|
30
|
+
# Additional useful modules
|
|
31
|
+
"time", "random", "uuid", "hashlib", "base64",
|
|
32
|
+
"urllib", "urllib.parse", "html", "xml", "xml.etree",
|
|
33
|
+
"xml.etree.ElementTree", "string", "textwrap",
|
|
34
|
+
"operator", "bisect", "heapq", "array",
|
|
35
|
+
"unicodedata", "locale", "calendar", "zoneinfo",
|
|
36
|
+
# Data processing
|
|
37
|
+
"struct", "binascii", "codecs", "difflib",
|
|
38
|
+
"pprint", "reprlib", "abc", "contextlib",
|
|
39
|
+
"secrets", "fractions", "numbers"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Dangerous modules that are always blocked
|
|
43
|
+
self.blocked_modules = {
|
|
44
|
+
"subprocess", "os", "sys", "shutil", "pathlib",
|
|
45
|
+
"socket", "pickle", "multiprocessing", "threading",
|
|
46
|
+
"importlib", "builtins", "_thread", "ctypes",
|
|
47
|
+
"marshal", "code", "codeop", "ast", "compile",
|
|
48
|
+
"__builtin__", "__main__", "gc", "inspect",
|
|
49
|
+
"asyncio", "concurrent", "signal", "select",
|
|
50
|
+
"mmap", "sqlite3", "dbm", "shelve",
|
|
51
|
+
"tempfile", "glob", "fnmatch", "fileinput",
|
|
52
|
+
"zipfile", "tarfile", "gzip", "bz2", "lzma",
|
|
53
|
+
"webbrowser", "cgi", "cgitb", "wsgiref",
|
|
54
|
+
"http", "ftplib", "poplib", "imaplib", "smtplib",
|
|
55
|
+
"telnetlib", "uuid", "socketserver", "xmlrpc",
|
|
56
|
+
"ipaddress", "platform", "errno", "pwd", "grp",
|
|
57
|
+
"resource", "pty", "termios", "tty", "fcntl",
|
|
58
|
+
"pipes", "syslog", "ssl", "email"
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
def validate_environment(self) -> bool:
|
|
62
|
+
logger.debug("Validating Python environment")
|
|
63
|
+
return sys.executable is not None
|
|
64
|
+
|
|
65
|
+
def check_for_dangerous_code(self, code: str) -> Optional[str]:
|
|
66
|
+
"""Check for potentially dangerous patterns in the code"""
|
|
67
|
+
dangerous_patterns = [
|
|
68
|
+
(r"__import__\s*\(", "Use of __import__ is not allowed. Use regular import statements instead."),
|
|
69
|
+
(r"eval\s*\(", "Use of eval() is not allowed for security reasons."),
|
|
70
|
+
(r"exec\s*\(", "Use of exec() is not allowed for security reasons."),
|
|
71
|
+
(r"compile\s*\(", "Use of compile() is not allowed for security reasons."),
|
|
72
|
+
(r"open\s*\(", "Use of open() is not allowed. File system access is restricted."),
|
|
73
|
+
(r"__subclasses__", "Access to __subclasses__ is not allowed for security reasons."),
|
|
74
|
+
(r"__bases__", "Access to __bases__ is not allowed for security reasons."),
|
|
75
|
+
(r"__code__", "Access to __code__ is not allowed for security reasons."),
|
|
76
|
+
(r"__class__\s*\.\s*__", "Access to class internals is restricted."),
|
|
77
|
+
(r"globals\s*\(\s*\)", "Access to globals() is not allowed."),
|
|
78
|
+
(r"locals\s*\(\s*\)", "Access to locals() is not allowed."),
|
|
79
|
+
(r"vars\s*\(\s*\)", "Access to vars() is not allowed."),
|
|
80
|
+
(r"getattr\s*\([^,]+,[^,]+\)", None), # We'll check this more carefully
|
|
81
|
+
(r"setattr\s*\(", "Use of setattr() is not allowed."),
|
|
82
|
+
(r"delattr\s*\(", "Use of delattr() is not allowed."),
|
|
83
|
+
(r"__dict__", "Direct access to __dict__ is not allowed."),
|
|
84
|
+
(r"__module__", "Access to __module__ is not allowed."),
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
for pattern, message in dangerous_patterns:
|
|
88
|
+
if re.search(pattern, code):
|
|
89
|
+
# Special case for getattr - allow for normal attribute access
|
|
90
|
+
if "getattr" in pattern:
|
|
91
|
+
# Check if it's trying to access dunder methods
|
|
92
|
+
getattr_matches = re.findall(r"getattr\s*\([^,]+,\s*['\"](__[^'\"]+__)['\"]", code)
|
|
93
|
+
if getattr_matches:
|
|
94
|
+
return f"Use of getattr() to access special methods ({getattr_matches[0]}) is not allowed."
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
if message:
|
|
98
|
+
return message
|
|
99
|
+
|
|
100
|
+
# Check for imports
|
|
101
|
+
import_pattern = r"(?:^|\n)\s*(?:import|from)\s+(\S+)"
|
|
102
|
+
for match in re.finditer(import_pattern, code, re.MULTILINE):
|
|
103
|
+
module = match.group(1).split('.')[0] # Get base module
|
|
104
|
+
|
|
105
|
+
if module in self.blocked_modules:
|
|
106
|
+
return f"Import of '{module}' is not allowed for security reasons."
|
|
107
|
+
|
|
108
|
+
# Check if it's trying to import something not in allowed list
|
|
109
|
+
if module not in self.allowed_modules and not module.startswith('_'):
|
|
110
|
+
# Give a helpful message about what is allowed
|
|
111
|
+
return (f"Import of '{module}' is not allowed. "
|
|
112
|
+
f"Available modules include: {', '.join(sorted(list(self.allowed_modules)[:10]))}... "
|
|
113
|
+
f"(and {len(self.allowed_modules)-10} more)")
|
|
114
|
+
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def create_wrapper(self, input_data: str, user_script: str) -> str:
|
|
118
|
+
"""Create the wrapper script with enhanced logging and debugging"""
|
|
119
|
+
# Perform safety checks before creating wrapper
|
|
120
|
+
safety_issue = self.check_for_dangerous_code(user_script)
|
|
121
|
+
if safety_issue:
|
|
122
|
+
# Return a wrapper that will immediately exit with the safety error
|
|
123
|
+
return f'''
|
|
124
|
+
import json
|
|
125
|
+
import sys
|
|
126
|
+
|
|
127
|
+
sys.stderr.write(json.dumps({{
|
|
128
|
+
"error": "Security violation: {safety_issue}",
|
|
129
|
+
"type": "SecurityError",
|
|
130
|
+
"stack": ["Security check failed", "{safety_issue}"]
|
|
131
|
+
}}))
|
|
132
|
+
sys.exit(1)
|
|
133
|
+
'''
|
|
134
|
+
|
|
135
|
+
return f'''
|
|
136
|
+
import json
|
|
137
|
+
import sys
|
|
138
|
+
import traceback
|
|
139
|
+
import io
|
|
140
|
+
import contextlib
|
|
141
|
+
from datetime import datetime
|
|
142
|
+
import time
|
|
143
|
+
|
|
144
|
+
# Import all allowed modules
|
|
145
|
+
import re
|
|
146
|
+
import math
|
|
147
|
+
import statistics
|
|
148
|
+
import collections
|
|
149
|
+
import itertools
|
|
150
|
+
import functools
|
|
151
|
+
import copy
|
|
152
|
+
import decimal
|
|
153
|
+
import csv
|
|
154
|
+
import dataclasses
|
|
155
|
+
import typing
|
|
156
|
+
import enum
|
|
157
|
+
import random
|
|
158
|
+
import uuid
|
|
159
|
+
import hashlib
|
|
160
|
+
import base64
|
|
161
|
+
import urllib
|
|
162
|
+
import urllib.parse
|
|
163
|
+
import html
|
|
164
|
+
import xml
|
|
165
|
+
import xml.etree.ElementTree
|
|
166
|
+
import string
|
|
167
|
+
import textwrap
|
|
168
|
+
import operator
|
|
169
|
+
import bisect
|
|
170
|
+
import heapq
|
|
171
|
+
import array
|
|
172
|
+
import unicodedata
|
|
173
|
+
import locale
|
|
174
|
+
import calendar
|
|
175
|
+
import struct
|
|
176
|
+
import binascii
|
|
177
|
+
import codecs
|
|
178
|
+
import difflib
|
|
179
|
+
import pprint
|
|
180
|
+
import reprlib
|
|
181
|
+
import abc
|
|
182
|
+
import contextlib as contextlib_module
|
|
183
|
+
import secrets
|
|
184
|
+
import fractions
|
|
185
|
+
import numbers
|
|
186
|
+
|
|
187
|
+
# Try to import zoneinfo (Python 3.9+)
|
|
188
|
+
try:
|
|
189
|
+
import zoneinfo
|
|
190
|
+
except ImportError:
|
|
191
|
+
zoneinfo = None
|
|
192
|
+
|
|
193
|
+
# Resource limiting
|
|
194
|
+
try:
|
|
195
|
+
import resource
|
|
196
|
+
import signal
|
|
197
|
+
HAS_RESOURCE_LIMITS = True
|
|
198
|
+
except ImportError:
|
|
199
|
+
HAS_RESOURCE_LIMITS = False
|
|
200
|
+
|
|
201
|
+
# Enhanced logging system
|
|
202
|
+
class LogCapture:
|
|
203
|
+
def __init__(self):
|
|
204
|
+
self.logs = []
|
|
205
|
+
self.buffer = []
|
|
206
|
+
|
|
207
|
+
def write(self, message):
|
|
208
|
+
if message.strip():
|
|
209
|
+
self.logs.append({{
|
|
210
|
+
"type": "log",
|
|
211
|
+
"message": message.strip(),
|
|
212
|
+
"timestamp": time.time()
|
|
213
|
+
}})
|
|
214
|
+
return len(message)
|
|
215
|
+
|
|
216
|
+
def flush(self):
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
class ErrorCapture:
|
|
220
|
+
def __init__(self):
|
|
221
|
+
self.logs = []
|
|
222
|
+
|
|
223
|
+
def write(self, message):
|
|
224
|
+
if message.strip():
|
|
225
|
+
self.logs.append({{
|
|
226
|
+
"type": "error",
|
|
227
|
+
"message": message.strip(),
|
|
228
|
+
"timestamp": time.time()
|
|
229
|
+
}})
|
|
230
|
+
return len(message)
|
|
231
|
+
|
|
232
|
+
def flush(self):
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
# Create log captures
|
|
236
|
+
stdout_capture = LogCapture()
|
|
237
|
+
stderr_capture = ErrorCapture()
|
|
238
|
+
|
|
239
|
+
# Enhanced console object for better logging
|
|
240
|
+
class Console:
|
|
241
|
+
def __init__(self, stdout, stderr):
|
|
242
|
+
self.stdout = stdout
|
|
243
|
+
self.stderr = stderr
|
|
244
|
+
|
|
245
|
+
def log(self, *args, **kwargs):
|
|
246
|
+
message = ' '.join(str(arg) for arg in args)
|
|
247
|
+
self.stdout.write(message + '\\n')
|
|
248
|
+
|
|
249
|
+
def error(self, *args, **kwargs):
|
|
250
|
+
message = ' '.join(str(arg) for arg in args)
|
|
251
|
+
self.stderr.write(message + '\\n')
|
|
252
|
+
|
|
253
|
+
def warn(self, *args, **kwargs):
|
|
254
|
+
message = ' '.join(str(arg) for arg in args)
|
|
255
|
+
self.stderr.write(f"WARNING: {{message}}\\n")
|
|
256
|
+
|
|
257
|
+
def info(self, *args, **kwargs):
|
|
258
|
+
self.log(*args, **kwargs)
|
|
259
|
+
|
|
260
|
+
def debug(self, *args, **kwargs):
|
|
261
|
+
message = ' '.join(str(arg) for arg in args)
|
|
262
|
+
self.stdout.write(f"DEBUG: {{message}}\\n")
|
|
263
|
+
|
|
264
|
+
def table(self, data):
|
|
265
|
+
# Simple table formatting
|
|
266
|
+
if isinstance(data, dict):
|
|
267
|
+
for k, v in data.items():
|
|
268
|
+
self.log(f"{{k}}: {{v}}")
|
|
269
|
+
elif isinstance(data, list):
|
|
270
|
+
for item in data:
|
|
271
|
+
self.log(str(item))
|
|
272
|
+
else:
|
|
273
|
+
self.log(str(data))
|
|
274
|
+
|
|
275
|
+
console = Console(stdout_capture, stderr_capture)
|
|
276
|
+
|
|
277
|
+
# Set resource limits if available
|
|
278
|
+
if HAS_RESOURCE_LIMITS:
|
|
279
|
+
def set_resource_limits():
|
|
280
|
+
# 10 seconds CPU time (more generous)
|
|
281
|
+
resource.setrlimit(resource.RLIMIT_CPU, (10, 10))
|
|
282
|
+
|
|
283
|
+
# 200MB memory limit
|
|
284
|
+
memory_limit = 200 * 1024 * 1024 # 200MB in bytes
|
|
285
|
+
resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit))
|
|
286
|
+
|
|
287
|
+
# Set timeout handler
|
|
288
|
+
def timeout_handler(signum, frame):
|
|
289
|
+
error_info = {{
|
|
290
|
+
"error": "Execution timed out (10 seconds)",
|
|
291
|
+
"type": "TimeoutError",
|
|
292
|
+
"stack": ["Timeout handler triggered", "Maximum execution time exceeded"]
|
|
293
|
+
}}
|
|
294
|
+
sys.stderr.write(json.dumps(error_info))
|
|
295
|
+
sys.exit(1)
|
|
296
|
+
|
|
297
|
+
signal.signal(signal.SIGALRM, timeout_handler)
|
|
298
|
+
signal.alarm(10) # 10 second timeout
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
set_resource_limits()
|
|
302
|
+
except Exception as e:
|
|
303
|
+
# Continue if resource limiting fails (e.g., on Windows)
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
# Capture stdout and stderr
|
|
307
|
+
original_stdout = sys.stdout
|
|
308
|
+
original_stderr = sys.stderr
|
|
309
|
+
sys.stdout = stdout_capture
|
|
310
|
+
sys.stderr = stderr_capture
|
|
311
|
+
|
|
312
|
+
# Enhanced print function that goes to our logging
|
|
313
|
+
def print(*args, sep=' ', end='\\n', file=None, flush=False):
|
|
314
|
+
output = sep.join(str(arg) for arg in args) + end
|
|
315
|
+
if file is sys.stderr:
|
|
316
|
+
stderr_capture.write(output)
|
|
317
|
+
else:
|
|
318
|
+
stdout_capture.write(output)
|
|
319
|
+
|
|
320
|
+
# Parse input data
|
|
321
|
+
input_data = None
|
|
322
|
+
try:
|
|
323
|
+
input_data_raw = r"""{input_data}"""
|
|
324
|
+
input_data = json.loads(input_data_raw)
|
|
325
|
+
except json.JSONDecodeError as e:
|
|
326
|
+
error_info = {{
|
|
327
|
+
"error": f"Failed to parse input data: {{e}}",
|
|
328
|
+
"type": "JSONDecodeError",
|
|
329
|
+
"stack": []
|
|
330
|
+
}}
|
|
331
|
+
original_stderr.write(json.dumps(error_info))
|
|
332
|
+
sys.exit(1)
|
|
333
|
+
|
|
334
|
+
# Custom import hook for better error messages
|
|
335
|
+
class RestrictedImporter:
|
|
336
|
+
def __init__(self, allowed_modules, blocked_modules):
|
|
337
|
+
self.allowed_modules = allowed_modules
|
|
338
|
+
self.blocked_modules = blocked_modules
|
|
339
|
+
|
|
340
|
+
def __call__(self, name, globals=None, locals=None, fromlist=(), level=0):
|
|
341
|
+
base_module = name.split('.')[0]
|
|
342
|
+
|
|
343
|
+
if base_module in self.blocked_modules:
|
|
344
|
+
raise ImportError(
|
|
345
|
+
f"Import of '{{name}}' is blocked for security reasons. "
|
|
346
|
+
f"This module can access system resources that are not allowed in the sandbox."
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if base_module not in self.allowed_modules and not base_module.startswith('_'):
|
|
350
|
+
available = sorted(list(self.allowed_modules))[:15]
|
|
351
|
+
raise ImportError(
|
|
352
|
+
f"Import of '{{name}}' is not in the allowed module list. "
|
|
353
|
+
f"Available modules include: {{', '.join(available)}}... "
|
|
354
|
+
f"({{len(self.allowed_modules)}} total modules available)"
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
return __import__(name, globals, locals, fromlist, level)
|
|
358
|
+
|
|
359
|
+
# Create restricted builtins
|
|
360
|
+
restricted_builtins = {{
|
|
361
|
+
# Safe built-in functions
|
|
362
|
+
"abs": abs, "all": all, "any": any, "ascii": ascii,
|
|
363
|
+
"bin": bin, "bool": bool, "bytearray": bytearray, "bytes": bytes,
|
|
364
|
+
"callable": callable, "chr": chr, "classmethod": classmethod,
|
|
365
|
+
"complex": complex, "dict": dict, "dir": dir, "divmod": divmod,
|
|
366
|
+
"enumerate": enumerate, "filter": filter, "float": float,
|
|
367
|
+
"format": format, "frozenset": frozenset, "getattr": getattr,
|
|
368
|
+
"hasattr": hasattr, "hash": hash, "hex": hex, "id": id,
|
|
369
|
+
"int": int, "isinstance": isinstance, "issubclass": issubclass,
|
|
370
|
+
"iter": iter, "len": len, "list": list, "map": map,
|
|
371
|
+
"max": max, "memoryview": memoryview, "min": min, "next": next,
|
|
372
|
+
"object": object, "oct": oct, "ord": ord, "pow": pow,
|
|
373
|
+
"print": print, "property": property, "range": range,
|
|
374
|
+
"repr": repr, "reversed": reversed, "round": round,
|
|
375
|
+
"set": set, "slice": slice, "sorted": sorted,
|
|
376
|
+
"staticmethod": staticmethod, "str": str, "sum": sum,
|
|
377
|
+
"super": super, "tuple": tuple, "type": type, "zip": zip,
|
|
378
|
+
|
|
379
|
+
# Exception types
|
|
380
|
+
"BaseException": BaseException, "Exception": Exception,
|
|
381
|
+
"ArithmeticError": ArithmeticError, "AssertionError": AssertionError,
|
|
382
|
+
"AttributeError": AttributeError, "EOFError": EOFError,
|
|
383
|
+
"ImportError": ImportError, "IndexError": IndexError,
|
|
384
|
+
"KeyError": KeyError, "KeyboardInterrupt": KeyboardInterrupt,
|
|
385
|
+
"LookupError": LookupError, "MemoryError": MemoryError,
|
|
386
|
+
"NameError": NameError, "NotImplementedError": NotImplementedError,
|
|
387
|
+
"OSError": OSError, "OverflowError": OverflowError,
|
|
388
|
+
"RecursionError": RecursionError, "ReferenceError": ReferenceError,
|
|
389
|
+
"RuntimeError": RuntimeError, "StopIteration": StopIteration,
|
|
390
|
+
"SyntaxError": SyntaxError, "SystemError": SystemError,
|
|
391
|
+
"TypeError": TypeError, "UnboundLocalError": UnboundLocalError,
|
|
392
|
+
"UnicodeError": UnicodeError, "ValueError": ValueError,
|
|
393
|
+
"ZeroDivisionError": ZeroDivisionError,
|
|
394
|
+
|
|
395
|
+
# Restricted import function
|
|
396
|
+
"__import__": RestrictedImporter(
|
|
397
|
+
{json.dumps(list(self.allowed_modules))},
|
|
398
|
+
{json.dumps(list(self.blocked_modules))}
|
|
399
|
+
),
|
|
400
|
+
|
|
401
|
+
# Constants
|
|
402
|
+
"True": True, "False": False, "None": None,
|
|
403
|
+
"NotImplemented": NotImplemented, "Ellipsis": Ellipsis,
|
|
404
|
+
}}
|
|
405
|
+
|
|
406
|
+
# Create safe globals with pre-imported modules
|
|
407
|
+
safe_globals = {{
|
|
408
|
+
"__builtins__": restricted_builtins,
|
|
409
|
+
"console": console,
|
|
410
|
+
|
|
411
|
+
# Pre-imported safe modules
|
|
412
|
+
"json": json, "datetime": datetime, "time": time,
|
|
413
|
+
"math": math, "statistics": statistics,
|
|
414
|
+
"collections": collections, "itertools": itertools,
|
|
415
|
+
"functools": functools, "re": re, "copy": copy,
|
|
416
|
+
"decimal": decimal, "csv": csv, "io": io,
|
|
417
|
+
"dataclasses": dataclasses, "typing": typing,
|
|
418
|
+
"enum": enum, "random": random, "uuid": uuid,
|
|
419
|
+
"hashlib": hashlib, "base64": base64,
|
|
420
|
+
"urllib": urllib, "html": html, "xml": xml,
|
|
421
|
+
"string": string, "textwrap": textwrap,
|
|
422
|
+
"operator": operator, "bisect": bisect,
|
|
423
|
+
"heapq": heapq, "array": array,
|
|
424
|
+
"unicodedata": unicodedata, "locale": locale,
|
|
425
|
+
"calendar": calendar, "struct": struct,
|
|
426
|
+
"binascii": binascii, "codecs": codecs,
|
|
427
|
+
"difflib": difflib, "pprint": pprint,
|
|
428
|
+
"reprlib": reprlib, "abc": abc,
|
|
429
|
+
"contextlib": contextlib_module,
|
|
430
|
+
"secrets": secrets, "fractions": fractions,
|
|
431
|
+
"numbers": numbers,
|
|
432
|
+
}}
|
|
433
|
+
|
|
434
|
+
if zoneinfo:
|
|
435
|
+
safe_globals["zoneinfo"] = zoneinfo
|
|
436
|
+
|
|
437
|
+
# Create safe locals with input data
|
|
438
|
+
safe_locals = {{"input_data": input_data}}
|
|
439
|
+
|
|
440
|
+
# Helper function for better error formatting
|
|
441
|
+
def format_error_context(exc_type, exc_value, exc_tb):
|
|
442
|
+
"""Extract detailed context about an error"""
|
|
443
|
+
error_msg = str(exc_value)
|
|
444
|
+
|
|
445
|
+
# Special handling for KeyError to provide more context
|
|
446
|
+
if exc_type.__name__ == 'KeyError':
|
|
447
|
+
# The key that wasn't found
|
|
448
|
+
missing_key = str(exc_value).strip("'")
|
|
449
|
+
|
|
450
|
+
# Try to get the code that caused the error
|
|
451
|
+
tb_frames = list(traceback.extract_tb(exc_tb))
|
|
452
|
+
if tb_frames:
|
|
453
|
+
last_frame = tb_frames[-1]
|
|
454
|
+
code_line = last_frame.line
|
|
455
|
+
|
|
456
|
+
if code_line:
|
|
457
|
+
# Look for dictionary access patterns to provide context
|
|
458
|
+
import re
|
|
459
|
+
|
|
460
|
+
# Check if it's a nested access - use simpler pattern
|
|
461
|
+
if f"['{{missing_key}}']" in code_line:
|
|
462
|
+
# Extract the object being accessed
|
|
463
|
+
match = re.search(r'(\w+(?:\[[^\]]+\])*)\[[^\]]+\]', code_line)
|
|
464
|
+
if match:
|
|
465
|
+
accessed_obj = match.group(1)
|
|
466
|
+
error_msg = f"KeyError: '{{missing_key}}' not found in {{accessed_obj}}"
|
|
467
|
+
else:
|
|
468
|
+
error_msg = f"KeyError: dictionary key '{{missing_key}}' not found"
|
|
469
|
+
else:
|
|
470
|
+
error_msg = f"KeyError: dictionary key '{{missing_key}}' not found"
|
|
471
|
+
|
|
472
|
+
elif exc_type.__name__ == 'AttributeError':
|
|
473
|
+
# Try to extract what object and attribute were involved
|
|
474
|
+
import re
|
|
475
|
+
match = re.search(r"'(\w+)' object has no attribute '(\w+)'", error_msg)
|
|
476
|
+
if match:
|
|
477
|
+
obj_type, attr = match.groups()
|
|
478
|
+
error_msg = f"AttributeError: '{{obj_type}}' object has no attribute '{{attr}}'"
|
|
479
|
+
|
|
480
|
+
elif exc_type.__name__ == 'IndexError':
|
|
481
|
+
# Enhance index errors
|
|
482
|
+
if "list index out of range" in error_msg:
|
|
483
|
+
tb_frames = list(traceback.extract_tb(exc_tb))
|
|
484
|
+
if tb_frames:
|
|
485
|
+
last_frame = tb_frames[-1]
|
|
486
|
+
code_line = last_frame.line
|
|
487
|
+
if code_line and '[' in code_line:
|
|
488
|
+
# Try to extract what list was being accessed
|
|
489
|
+
match = re.search(r"(\w+)\[", code_line)
|
|
490
|
+
if match:
|
|
491
|
+
list_name = match.group(1)
|
|
492
|
+
error_msg = f"IndexError: list index out of range when accessing {{list_name}}"
|
|
493
|
+
|
|
494
|
+
elif exc_type.__name__ == 'TypeError':
|
|
495
|
+
# Keep TypeError messages as they usually have good context
|
|
496
|
+
pass
|
|
497
|
+
|
|
498
|
+
return error_msg
|
|
499
|
+
|
|
500
|
+
# Execute user script
|
|
501
|
+
transform_fn = None
|
|
502
|
+
execution_start = time.time()
|
|
503
|
+
|
|
504
|
+
try:
|
|
505
|
+
# Compile the user script
|
|
506
|
+
compiled_code = compile(r"""{user_script}""", "<user_script>", "exec")
|
|
507
|
+
|
|
508
|
+
# Execute in restricted environment
|
|
509
|
+
exec(compiled_code, safe_globals, safe_locals)
|
|
510
|
+
|
|
511
|
+
# Validate transform function
|
|
512
|
+
if 'transform' not in safe_locals:
|
|
513
|
+
raise NameError(
|
|
514
|
+
"No 'transform' function defined. Your script must define a function "
|
|
515
|
+
"named 'transform' that takes the input data as its parameter."
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
transform_fn = safe_locals['transform']
|
|
519
|
+
|
|
520
|
+
if not callable(transform_fn):
|
|
521
|
+
raise TypeError(
|
|
522
|
+
f"'transform' must be a function, but got {{type(transform_fn).__name__}}. "
|
|
523
|
+
"Make sure you define it as: def transform(data): ..."
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Check function signature (basic check)
|
|
527
|
+
import inspect
|
|
528
|
+
try:
|
|
529
|
+
sig = inspect.signature(transform_fn)
|
|
530
|
+
param_count = len(sig.parameters)
|
|
531
|
+
if param_count != 1:
|
|
532
|
+
raise TypeError(
|
|
533
|
+
f"'transform' function must accept exactly 1 parameter, "
|
|
534
|
+
f"but it accepts {{param_count}}. "
|
|
535
|
+
f"Define it as: def transform(data): ..."
|
|
536
|
+
)
|
|
537
|
+
except Exception:
|
|
538
|
+
# If signature inspection fails, continue anyway
|
|
539
|
+
pass
|
|
540
|
+
|
|
541
|
+
except SyntaxError as e:
|
|
542
|
+
# Enhanced syntax error reporting
|
|
543
|
+
error_info = {{
|
|
544
|
+
"error": f"SyntaxError: {{e.msg}}",
|
|
545
|
+
"type": "SyntaxError",
|
|
546
|
+
"stack": [],
|
|
547
|
+
"traceback": [{{
|
|
548
|
+
"file": "user_script.py",
|
|
549
|
+
"line": e.lineno,
|
|
550
|
+
"function": "<module>",
|
|
551
|
+
"code": e.text.strip() if e.text else ""
|
|
552
|
+
}}]
|
|
553
|
+
}}
|
|
554
|
+
original_stderr.write(json.dumps(error_info))
|
|
555
|
+
sys.exit(1)
|
|
556
|
+
|
|
557
|
+
except Exception as e:
|
|
558
|
+
# Extract detailed traceback information
|
|
559
|
+
import traceback as tb_module
|
|
560
|
+
tb_info = []
|
|
561
|
+
|
|
562
|
+
# Get the traceback
|
|
563
|
+
exc_type, exc_value, exc_tb = sys.exc_info()
|
|
564
|
+
|
|
565
|
+
# Format traceback with more details
|
|
566
|
+
for frame in tb_module.extract_tb(exc_tb):
|
|
567
|
+
if '<user_script>' in frame.filename or '<string>' in frame.filename:
|
|
568
|
+
tb_info.append({{
|
|
569
|
+
"file": "user_script.py",
|
|
570
|
+
"line": frame.lineno,
|
|
571
|
+
"function": frame.name,
|
|
572
|
+
"code": frame.line.strip() if frame.line else ""
|
|
573
|
+
}})
|
|
574
|
+
|
|
575
|
+
error_info = {{
|
|
576
|
+
"error": str(e),
|
|
577
|
+
"type": type(e).__name__,
|
|
578
|
+
"traceback": tb_info,
|
|
579
|
+
"stack": []
|
|
580
|
+
}}
|
|
581
|
+
|
|
582
|
+
original_stderr.write(json.dumps(error_info))
|
|
583
|
+
sys.exit(1)
|
|
584
|
+
|
|
585
|
+
# Execute transform function
|
|
586
|
+
transform_start = time.time()
|
|
587
|
+
result = None
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
# Cancel alarm if set
|
|
591
|
+
if HAS_RESOURCE_LIMITS:
|
|
592
|
+
signal.alarm(0)
|
|
593
|
+
|
|
594
|
+
# Call transform with input data
|
|
595
|
+
result = transform_fn(input_data)
|
|
596
|
+
|
|
597
|
+
# Validate result is JSON serializable
|
|
598
|
+
try:
|
|
599
|
+
json.dumps(result)
|
|
600
|
+
except (TypeError, ValueError) as e:
|
|
601
|
+
# Try to find what's not serializable
|
|
602
|
+
def find_non_serializable(obj, path="result"):
|
|
603
|
+
if isinstance(obj, (str, int, float, bool, type(None))):
|
|
604
|
+
return None
|
|
605
|
+
elif isinstance(obj, dict):
|
|
606
|
+
for k, v in obj.items():
|
|
607
|
+
issue = find_non_serializable(v, f"{{path}}[{{repr(k)}}]")
|
|
608
|
+
if issue:
|
|
609
|
+
return issue
|
|
610
|
+
elif isinstance(obj, (list, tuple)):
|
|
611
|
+
for i, v in enumerate(obj):
|
|
612
|
+
issue = find_non_serializable(v, f"{{path}}[{{i}}]")
|
|
613
|
+
if issue:
|
|
614
|
+
return issue
|
|
615
|
+
else:
|
|
616
|
+
return f"{{path}} contains non-serializable type: {{type(obj).__name__}}"
|
|
617
|
+
|
|
618
|
+
issue = find_non_serializable(result)
|
|
619
|
+
raise TypeError(
|
|
620
|
+
f"Transform result is not JSON serializable. {{issue or str(e)}}. "
|
|
621
|
+
"Make sure your transform function returns only JSON-compatible types "
|
|
622
|
+
"(dict, list, str, int, float, bool, None)."
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
except Exception as e:
|
|
626
|
+
# Get detailed error information
|
|
627
|
+
import traceback as tb_module
|
|
628
|
+
exc_type, exc_value, exc_tb = sys.exc_info()
|
|
629
|
+
|
|
630
|
+
# Get the original exception if this is a wrapped exception
|
|
631
|
+
original_error = str(e)
|
|
632
|
+
error_context = ""
|
|
633
|
+
|
|
634
|
+
# If this is a wrapped ValueError, try to get the original error
|
|
635
|
+
if isinstance(e, ValueError) and "Failed to process:" in str(e):
|
|
636
|
+
# Use exception chaining to get the cause
|
|
637
|
+
if hasattr(e, '__context__') and e.__context__:
|
|
638
|
+
# Get the original exception that was caught
|
|
639
|
+
orig_exc = e.__context__
|
|
640
|
+
orig_type = type(orig_exc).__name__
|
|
641
|
+
|
|
642
|
+
# Format the original error with context
|
|
643
|
+
enhanced_msg = format_error_context(type(orig_exc), orig_exc, orig_exc.__traceback__)
|
|
644
|
+
original_error = f"Failed to process: {{enhanced_msg}}"
|
|
645
|
+
|
|
646
|
+
# Build traceback info focusing on user code
|
|
647
|
+
tb_info = []
|
|
648
|
+
for frame in tb_module.extract_tb(exc_tb):
|
|
649
|
+
if '<user_script>' in frame.filename or '<string>' in frame.filename or 'transform' in frame.name:
|
|
650
|
+
tb_info.append({{
|
|
651
|
+
"file": "user_script.py",
|
|
652
|
+
"line": frame.lineno,
|
|
653
|
+
"function": frame.name,
|
|
654
|
+
"code": frame.line.strip() if frame.line else ""
|
|
655
|
+
}})
|
|
656
|
+
|
|
657
|
+
stack_trace = []
|
|
658
|
+
|
|
659
|
+
# First line: Error type and message (like JS)
|
|
660
|
+
stack_trace.append(f"{{type(e).__name__}}: {{original_error}}")
|
|
661
|
+
|
|
662
|
+
# Add traceback entries
|
|
663
|
+
for tb_entry in tb_info:
|
|
664
|
+
stack_trace.append(f" at {{tb_entry['function']}} ({{tb_entry['file']}}:{{tb_entry['line']}})")
|
|
665
|
+
|
|
666
|
+
error_info = {{
|
|
667
|
+
"error": original_error,
|
|
668
|
+
"type": type(e).__name__,
|
|
669
|
+
"traceback": tb_info,
|
|
670
|
+
"stack": stack_trace
|
|
671
|
+
}}
|
|
672
|
+
|
|
673
|
+
original_stderr.write(json.dumps(error_info))
|
|
674
|
+
sys.exit(1)
|
|
675
|
+
|
|
676
|
+
before_write = {{
|
|
677
|
+
"result": result,
|
|
678
|
+
"logs": stdout_capture.logs + stderr_capture.logs
|
|
679
|
+
}}
|
|
680
|
+
|
|
681
|
+
stdout_capture.write(f"Writing output: {{json.dumps(before_write, indent=2)}}")
|
|
682
|
+
|
|
683
|
+
# Prepare final output
|
|
684
|
+
output = {{
|
|
685
|
+
"result": result,
|
|
686
|
+
"logs": stdout_capture.logs + stderr_capture.logs
|
|
687
|
+
}}
|
|
688
|
+
|
|
689
|
+
# Write successful result
|
|
690
|
+
original_stdout.write(json.dumps(output))
|
|
691
|
+
'''
|
|
692
|
+
|
|
693
|
+
def execute(self, program_file: str, input_data: str) -> Tuple[Union[Dict, str], Optional[str]]:
|
|
694
|
+
logger.info(f"Executing Python script: {program_file}")
|
|
695
|
+
stats = Stats()
|
|
696
|
+
|
|
697
|
+
try:
|
|
698
|
+
check_resource_limits(self.modules_dir)
|
|
699
|
+
|
|
700
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
701
|
+
temp_dir = Path(temp_dir)
|
|
702
|
+
|
|
703
|
+
# Read user script
|
|
704
|
+
with open(program_file, 'r') as f:
|
|
705
|
+
user_script = f.read()
|
|
706
|
+
logger.debug("Loaded user Python script: %d characters", len(user_script))
|
|
707
|
+
|
|
708
|
+
# Check for dangerous code
|
|
709
|
+
safety_issue = self.check_for_dangerous_code(user_script)
|
|
710
|
+
if safety_issue:
|
|
711
|
+
logger.warning(f"Security violation detected in script: {safety_issue}")
|
|
712
|
+
return {
|
|
713
|
+
"lam.error": f"Security violation: {safety_issue}",
|
|
714
|
+
"type": "SecurityError"
|
|
715
|
+
}, f"Security violation: {safety_issue}"
|
|
716
|
+
|
|
717
|
+
# Create wrapper script with enhanced error handling
|
|
718
|
+
wrapper = self.create_wrapper(input_data, user_script)
|
|
719
|
+
script_path = temp_dir / "script.py"
|
|
720
|
+
with open(script_path, 'w') as f:
|
|
721
|
+
f.write(wrapper)
|
|
722
|
+
logger.debug("Generated Python wrapper script: %s", script_path)
|
|
723
|
+
|
|
724
|
+
# Execute with Python in isolated environment
|
|
725
|
+
process = subprocess.Popen(
|
|
726
|
+
[
|
|
727
|
+
sys.executable,
|
|
728
|
+
"-I", # Isolated mode
|
|
729
|
+
str(script_path)
|
|
730
|
+
],
|
|
731
|
+
stdout=subprocess.PIPE,
|
|
732
|
+
stderr=subprocess.PIPE,
|
|
733
|
+
text=True,
|
|
734
|
+
cwd=temp_dir,
|
|
735
|
+
env={"PATH": os.environ.get("PATH", "")}
|
|
736
|
+
)
|
|
737
|
+
logger.info("Started Python process PID %d", process.pid)
|
|
738
|
+
|
|
739
|
+
try:
|
|
740
|
+
output, error = process.communicate(timeout=10)
|
|
741
|
+
logger.debug("Process completed with code %d", process.returncode)
|
|
742
|
+
except subprocess.TimeoutExpired:
|
|
743
|
+
logger.warning("Process timeout after 10 seconds")
|
|
744
|
+
process.kill()
|
|
745
|
+
return {
|
|
746
|
+
"lam.error": "Script execution timed out",
|
|
747
|
+
"type": "TimeoutError"
|
|
748
|
+
}, "Execution timed out after 10 seconds"
|
|
749
|
+
|
|
750
|
+
# Handle process errors with enhanced logging
|
|
751
|
+
if process.returncode != 0:
|
|
752
|
+
try:
|
|
753
|
+
# Try to parse structured error from stderr
|
|
754
|
+
error_data = json.loads(error.strip())
|
|
755
|
+
error_msg = error_data.get('error', 'Unknown error')
|
|
756
|
+
error_type = error_data.get('type', 'Error')
|
|
757
|
+
|
|
758
|
+
# Format error response similar to JS engine
|
|
759
|
+
error_details = {
|
|
760
|
+
"lam.error": error_msg,
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
# Use the pre-formatted stack trace if available
|
|
764
|
+
if 'stack' in error_data and error_data['stack']:
|
|
765
|
+
error_details["stack_trace"] = error_data['stack']
|
|
766
|
+
else:
|
|
767
|
+
# Fallback: format stack trace similar to JS
|
|
768
|
+
stack_lines = []
|
|
769
|
+
|
|
770
|
+
# Add main error line
|
|
771
|
+
stack_lines.append(f"{error_type}: {error_msg}")
|
|
772
|
+
|
|
773
|
+
# Add traceback entries in JS style
|
|
774
|
+
if 'traceback' in error_data:
|
|
775
|
+
for tb in error_data['traceback']:
|
|
776
|
+
file = tb.get('file', 'unknown')
|
|
777
|
+
line = tb.get('line', 0)
|
|
778
|
+
func = tb.get('function', '<module>')
|
|
779
|
+
stack_lines.append(f" at {func} ({file}:{line})")
|
|
780
|
+
|
|
781
|
+
error_details["stack_trace"] = stack_lines
|
|
782
|
+
|
|
783
|
+
# Log the error details
|
|
784
|
+
logger.error("Python execution error: %s (%s)", error_msg, error_type)
|
|
785
|
+
if 'traceback' in error_data:
|
|
786
|
+
for tb_entry in error_data['traceback']:
|
|
787
|
+
logger.error(" at %s:%d in %s()",
|
|
788
|
+
tb_entry.get('file', 'unknown'),
|
|
789
|
+
tb_entry.get('line', 0),
|
|
790
|
+
tb_entry.get('function', 'unknown'))
|
|
791
|
+
|
|
792
|
+
return error_details, error_msg
|
|
793
|
+
|
|
794
|
+
except json.JSONDecodeError:
|
|
795
|
+
# Fallback to raw error output
|
|
796
|
+
error_msg = error.strip() or "Unknown error"
|
|
797
|
+
logger.error("Failed to parse error JSON, raw error: %s", error_msg)
|
|
798
|
+
return {"lam.error": error_msg}, error_msg
|
|
799
|
+
|
|
800
|
+
# Handle successful output
|
|
801
|
+
try:
|
|
802
|
+
output_data = json.loads(output)
|
|
803
|
+
|
|
804
|
+
# Process Python logs
|
|
805
|
+
if 'logs' in output_data:
|
|
806
|
+
for log_entry in output_data.get('logs', []):
|
|
807
|
+
log_type = log_entry.get('type', 'log')
|
|
808
|
+
message = log_entry.get('message', '')
|
|
809
|
+
|
|
810
|
+
# Log "Writing result:" message like JS engine
|
|
811
|
+
if message.startswith('Writing result:'):
|
|
812
|
+
logger.debug("[Python] %s", message)
|
|
813
|
+
elif log_type == 'error':
|
|
814
|
+
logger.error("[Python] %s", message)
|
|
815
|
+
elif 'WARNING:' in message:
|
|
816
|
+
logger.warning("[Python] %s", message)
|
|
817
|
+
elif 'DEBUG:' in message:
|
|
818
|
+
logger.debug("[Python] %s", message)
|
|
819
|
+
else:
|
|
820
|
+
logger.info("[Python] %s", message)
|
|
821
|
+
|
|
822
|
+
result = output_data.get('result', {})
|
|
823
|
+
return result, None
|
|
824
|
+
|
|
825
|
+
except json.JSONDecodeError as e:
|
|
826
|
+
logger.error("Failed to parse output as JSON: %s", str(e))
|
|
827
|
+
logger.error("Raw output: %s", output[:500]) # Log first 500 chars
|
|
828
|
+
return {
|
|
829
|
+
"lam.error": "Invalid JSON output from transform",
|
|
830
|
+
"parse_error": str(e),
|
|
831
|
+
"raw_output": output.strip()[:1000] # Include some of the output
|
|
832
|
+
}, "Output format error"
|
|
833
|
+
|
|
834
|
+
except Exception as e:
|
|
835
|
+
logger.exception("Python engine execution failed")
|
|
836
|
+
return {
|
|
837
|
+
"lam.error": str(e),
|
|
838
|
+
"type": e.__class__.__name__
|
|
839
|
+
}, str(e)
|