lam-cli 0.1.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lam/lam.py CHANGED
@@ -2,874 +2,61 @@
2
2
 
3
3
  import json
4
4
  import logging
5
- import os
6
- import re
7
- import shutil
8
- import socket
9
- import subprocess
10
- import sys
11
- import tempfile
12
- from datetime import datetime, timezone
13
- from enum import Enum
14
- from pathlib import Path
15
- from typing import Any, Dict, Optional, Tuple, Union
16
5
 
17
6
  import click
18
- import psutil
19
- from logtail import LogtailHandler
20
- from posthog import Posthog
21
7
 
22
- # Initialize analytics and logging
23
- posthog = Posthog(project_api_key='phc_wfeHFG0p5yZIdBpjVYy00o5x1HbEpggdMzIuFYgNPSK',
24
- host='https://app.posthog.com')
8
+ from .core import ProcessingError, Stats, setup_logging
9
+ from .engines import get_engine
10
+ from .engines.javascript import BunEngine
11
+ from .engines.jq import JQEngine
12
+ from .engines.python import PythonEngine
13
+ from .utils import process_input
25
14
 
26
- # Configure logging with UTC timezone
27
- logging.Formatter.converter = lambda *args: datetime.now(timezone.utc).timetuple()
28
- logging.basicConfig(level=logging.DEBUG,
29
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
30
- logger = logging.getLogger(__name__)
31
- logging.getLogger('urllib3').setLevel(logging.WARNING) # Suppress urllib3 logs
15
+ # Set up logging once at module level
16
+ logger = setup_logging()
32
17
 
33
- handler = LogtailHandler(source_token="TYz3WrrvC8ehYjXdAEGGyiDp")
34
- logger.addHandler(handler)
35
18
 
36
- class LAMError(Exception):
37
- """Base exception for LAM errors"""
38
- pass
39
-
40
- class UserError(LAMError):
41
- """Errors caused by user input"""
42
- pass
43
-
44
- class SystemError(LAMError):
45
- """Errors caused by system issues"""
46
- pass
47
-
48
- class ResourceLimitError(LAMError):
49
- """Errors caused by resource limits"""
19
+ @click.group()
20
+ def lam():
21
+ """LAM - Laminar Data Transformation Tool"""
50
22
  pass
51
23
 
52
- def check_resource_limits(modules_dir: Optional[Path] = None) -> None:
53
- """Check system resource availability"""
54
- logger.debug("Checking system resource limits")
55
- disk = shutil.disk_usage(tempfile.gettempdir())
56
- if disk.free < 100 * 1024 * 1024: # 100MB minimum
57
- logger.critical("Insufficient disk space: %dMB free", disk.free // (1024*1024))
58
- raise ResourceLimitError("Insufficient disk space")
59
-
60
- if modules_dir and modules_dir.exists():
61
- modules_size = sum(
62
- os.path.getsize(os.path.join(dirpath, filename))
63
- for dirpath, _, filenames in os.walk(modules_dir)
64
- for filename in filenames
65
- )
66
- if modules_size > 500 * 1024 * 1024:
67
- logger.warning("Cleaning oversized modules directory (%dMB)", modules_size//(1024*1024))
68
- shutil.rmtree(modules_dir)
69
- modules_dir.mkdir(exist_ok=True)
70
24
 
71
- class Stats:
72
- """Track execution statistics"""
73
- def __init__(self):
74
- self.start_time = datetime.now()
75
- self.memory_start = self.get_memory_usage()
76
-
77
- def get_memory_usage(self):
78
- process = psutil.Process()
79
- return process.memory_info().rss
25
+ @lam.command()
26
+ def initialize():
27
+ """Initialize shared modules for supported engines."""
28
+ click.echo("Starting LAM initialization...")
80
29
 
81
- def finalize(self):
82
- return {
83
- 'duration_ms': (datetime.now() - self.start_time).total_seconds() * 1000,
84
- 'memory_used_mb': (self.get_memory_usage() - self.memory_start) / (1024 * 1024),
85
- 'timestamp': datetime.now().isoformat()
86
- }
87
-
88
- class EngineType(Enum):
89
- JQ = "jq"
90
- JAVASCRIPT = "js"
91
- PYTHON = "py"
92
-
93
- class ProcessingError(Exception):
94
- """Custom exception for processing errors"""
95
- pass
96
-
97
- class Engine:
98
- """Base class for execution engines"""
99
- def __init__(self, workspace_id: str, flow_id: str, execution_id: str):
100
- self.workspace_id = workspace_id
101
- self.flow_id = flow_id
102
- self.execution_id = execution_id
103
- self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
30
+ engine_classes = [BunEngine, PythonEngine, JQEngine]
104
31
 
105
- def get_log_file(self) -> str:
106
- return f"lam_run_{self.workspace_id}_{self.flow_id}_{self.execution_id}_{self.timestamp}.log"
32
+ # Define placeholder IDs for engine instantiation during initialization
33
+ init_workspace_id = "lam_init_workspace"
34
+ init_flow_id = "lam_init_flow"
35
+ init_execution_id = "lam_init_execution"
107
36
 
108
- def get_result_file(self) -> str:
109
- return f"lam_result_{self.workspace_id}_{self.flow_id}_{self.execution_id}_{self.timestamp}.json"
110
-
111
- def track_event(self, event_name: str, properties: Dict[str, Any]) -> None:
112
- """Track events with PostHog"""
113
- try:
114
- distinct_id = f"{os.getuid()}_{socket.gethostname()}_{self.workspace_id}_{self.flow_id}"
115
- properties |= {
116
- 'workspace_id': self.workspace_id,
117
- 'flow_id': self.flow_id,
118
- 'engine': self.__class__.__name__,
119
- }
120
- posthog.capture(distinct_id=distinct_id, event=event_name, properties=properties)
121
- except Exception as e:
122
- logger.error(f"Error tracking event: {e}")
123
-
124
- class JQEngine(Engine):
125
- """JQ execution engine"""
126
- def validate_environment(self) -> bool:
127
- logger.debug("Validating JQ environment")
128
- return shutil.which("jq") is not None
129
-
130
- def execute(self, program_file: str, input_data: str) -> Tuple[Union[Dict, str], Optional[str]]:
131
- logger.info(f"Executing JQ script: {program_file}")
132
-
37
+ for engine_class in engine_classes:
38
+ engine_name = engine_class.__name__
39
+ click.echo(f"Checking {engine_name} for shared module setup...")
133
40
  try:
134
- with open(program_file, 'r') as file:
135
- jq_script = ''.join(line for line in file if not line.strip().startswith('#'))
136
- logger.debug("Loaded JQ script: %d characters", len(jq_script))
137
-
138
- process = subprocess.Popen(
139
- ["jq", "-c", jq_script],
140
- stdin=subprocess.PIPE,
141
- stdout=subprocess.PIPE,
142
- stderr=subprocess.PIPE,
143
- text=True
41
+ # Instantiate engine to access instance methods like _setup_shared_modules
42
+ engine_instance = engine_class(
43
+ workspace_id=init_workspace_id,
44
+ flow_id=init_flow_id,
45
+ execution_id=init_execution_id
144
46
  )
145
47
 
146
- logger.debug("Starting JQ process PID %d", process.pid)
147
- output, error = process.communicate(input=input_data)
148
-
149
- if error:
150
- logger.error("JQ error output: %s", error.strip())
151
- raise ProcessingError(error)
152
-
153
- # Handle output
154
- try:
155
- output_lines = [line.strip() for line in output.splitlines() if line.strip()]
156
- logger.debug(f"Found {len(output_lines)} JSON objects in output")
157
-
158
- if len(output_lines) > 1:
159
- parsed = [json.loads(line) for line in output_lines]
160
- logger.info(f"Processed {len(parsed)} JSON objects")
161
- return {"lam.result": parsed}, None
162
- elif len(output_lines) == 1:
163
- result = json.loads(output_lines[0])
164
- logger.info("Processed single JSON object")
165
- return result, None
166
- else:
167
- logger.info("No JSON objects in output")
168
- return {"lam.error": "No JSON objects in output"}, "No JSON objects in output"
169
-
170
- except json.JSONDecodeError as e:
171
- return {"lam.result": output}, None
172
-
48
+ if hasattr(engine_instance, '_setup_shared_modules') and callable(getattr(engine_instance, '_setup_shared_modules')):
49
+ click.echo(f"Running _setup_shared_modules for {engine_name}...")
50
+ getattr(engine_instance, '_setup_shared_modules')()
51
+ click.echo(f"Successfully initialized shared modules for {engine_name}.")
52
+ else:
53
+ click.echo(f"{engine_name} does not have a _setup_shared_modules method or it's not callable.")
173
54
  except Exception as e:
174
- logger.exception("JQ execution failed")
175
- self.track_event('lam.jq.error', {'error': str(e)})
176
- return {"lam.error": str(e)}, str(e)
177
-
178
- class BunEngine(Engine):
179
- """Bun JavaScript execution engine with enhanced logging"""
180
- def __init__(self, *args, **kwargs):
181
- super().__init__(*args, **kwargs)
182
- self.modules_dir = Path(tempfile.gettempdir()) / "lam_modules"
183
- self.modules_dir.mkdir(exist_ok=True)
184
- self._setup_shared_modules()
185
-
186
- self.runtime_template = '''
187
- const logs = [];
188
- const originalLog = console.log;
189
- const originalError = console.error;
190
- const originalWarn = console.warn;
191
-
192
- console.log = (...args) => logs.push({ type: 'log', message: args.map(String).join(' ') });
193
- console.error = (...args) => {
194
- originalError(...args); // Keep error output for debugging
195
- logs.push({ type: 'error', message: args.map(String).join(' ') });
196
- };
197
- console.warn = (...args) => logs.push({ type: 'warn', message: args.map(String).join(' ') });
198
-
199
- // Keep original stdout for result output
200
- const writeResult = (obj) => {
201
- console.error("Writing result:", JSON.stringify(obj, null, 2));
202
- originalLog(JSON.stringify(obj));
203
- };
204
-
205
- const _ = require('lodash');
206
- const { format, parseISO } = require('date-fns');
207
-
208
- module.exports = {
209
- _,
210
- format,
211
- parseISO,
212
- logs,
213
- writeResult
214
- };
215
- '''
216
-
217
- def _setup_shared_modules(self):
218
- """Setup shared node_modules once"""
219
- if not (self.modules_dir / "node_modules").exists():
220
- logger.info("Initializing shared modules directory")
221
- package_json = {
222
- "dependencies": {
223
- "lodash": "^4.17.21",
224
- "date-fns": "^2.30.0"
225
- }
226
- }
227
- with open(self.modules_dir / "package.json", "w") as f:
228
- json.dump(package_json, f, indent=2)
229
-
230
- try:
231
- logger.debug("Installing shared dependencies")
232
- result = subprocess.run(
233
- [self.get_bun_path(), "install"],
234
- cwd=self.modules_dir,
235
- check=True,
236
- capture_output=True,
237
- text=True,
238
- timeout=30
239
- )
240
- logger.debug("Dependency install output: %s", result.stdout)
241
- except subprocess.CalledProcessError as e:
242
- logger.error("Dependency install failed: %s", e.stderr)
243
- raise ProcessingError(f"Environment setup failed: {e.stderr}") from e
55
+ click.echo(f"Error during initialization of {engine_name}: {e}", err=True)
56
+ logger.error(f"Initialization error for {engine_name}", exc_info=True)
244
57
 
245
- def create_wrapper(self, input_data: str, user_script: str) -> str:
246
- """Create the wrapper script with proper escaping"""
247
- return f'''
248
- const {{ _, format, parseISO, logs, writeResult }} = require('./runtime.js');
58
+ click.echo("LAM initialization complete.")
249
59
 
250
- // Utility function to handle circular references in JSON.stringify
251
- function safeStringify(obj) {{
252
- const seen = new WeakSet();
253
- return JSON.stringify(obj, (key, value) => {{
254
- if (typeof value === 'object' && value !== null) {{
255
- if (seen.has(value)) {{
256
- return '[Circular Reference]';
257
- }}
258
- seen.add(value);
259
- }}
260
- return value;
261
- }}, 2);
262
- }}
263
-
264
- // Validate transform function
265
- function validateTransform(fn) {{
266
- if (typeof fn !== 'function') {{
267
- throw new Error('Transform must be a function');
268
- }}
269
- if (fn.length !== 1) {{
270
- throw new Error('Transform function must accept exactly one argument (input)');
271
- }}
272
- }}
273
-
274
- // Execute transform immediately
275
- try {{
276
- // Parse input safely
277
- let input;
278
- try {{
279
- input = JSON.parse({json.dumps(input_data)});
280
- }} catch (e) {{
281
- throw new Error(`Failed to parse input data: ${{e.message}}`);
282
- }}
283
-
284
- // Get transform function
285
- let transform;
286
- try {{
287
- transform = {user_script};
288
- }} catch (e) {{
289
- throw new Error(`Failed to parse transform function: ${{e.message}}`);
290
- }}
291
-
292
- // Validate transform
293
- validateTransform(transform);
294
-
295
- // Execute transform
296
- const result = transform(input);
297
-
298
- // Output result after transform
299
- writeResult({{
300
- result,
301
- logs
302
- }});
303
- }} catch (error) {{
304
- console.error(JSON.stringify({{
305
- error: error.message,
306
- stack: error.stack?.split('\\n') || [],
307
- type: error.constructor.name
308
- }}));
309
- process.exit(1);
310
- }}
311
- '''
312
-
313
- def setup_environment(self, temp_dir: Path) -> None:
314
- """Set up the JavaScript environment with runtime"""
315
- # Write runtime file only
316
- runtime_path = temp_dir / "runtime.js"
317
- with open(runtime_path, "w") as f:
318
- f.write(self.runtime_template)
319
- logger.debug("Runtime file written to: %s", runtime_path)
320
-
321
- # Symlink node_modules from shared directory
322
- os.symlink(self.modules_dir / "node_modules", temp_dir / "node_modules")
323
- logger.debug("node_modules symlinked from: %s", self.modules_dir / "node_modules")
324
-
325
- def validate_environment(self) -> bool:
326
- # Check multiple locations for bun
327
- possible_locations = [
328
- "bun", # System PATH
329
- os.path.join(os.path.dirname(sys.executable), "bun"), # venv/bin
330
- os.path.join(os.path.dirname(os.path.dirname(sys.executable)), "bin", "bun") # venv/bin (alternative)
331
- ]
332
-
333
- return any(shutil.which(loc) is not None for loc in possible_locations)
334
-
335
- def get_bun_path(self) -> str:
336
- """Get the appropriate bun executable path"""
337
- possible_locations = [
338
- "bun",
339
- os.path.join(os.path.dirname(sys.executable), "bun"),
340
- os.path.join(os.path.dirname(os.path.dirname(sys.executable)), "bin", "bun")
341
- ]
342
-
343
- for loc in possible_locations:
344
- if shutil.which(loc):
345
- return shutil.which(loc)
346
-
347
- raise EnvironmentError("Bun not found in environment")
348
-
349
- def execute(self, program_file: str, input_data: str) -> Tuple[Union[Dict, str], Optional[str]]:
350
- logger.info(f"Executing Bun script: {program_file}")
351
- stats = Stats()
352
-
353
- try:
354
- check_resource_limits(self.modules_dir)
355
-
356
- with tempfile.TemporaryDirectory() as temp_dir:
357
- temp_dir = Path(temp_dir)
358
- self.setup_environment(temp_dir)
359
-
360
- # Read user script
361
- with open(program_file, 'r') as f:
362
- user_script = f.read()
363
- logger.debug("Loaded user script: %d characters", len(user_script))
364
-
365
- # Create wrapper script
366
- wrapper = self.create_wrapper(input_data, user_script)
367
- script_path = temp_dir / "script.js"
368
- with open(script_path, 'w') as f:
369
- f.write(wrapper)
370
- logger.debug("Generated wrapper script: %s", script_path)
371
-
372
- # Execute with Bun
373
- process = subprocess.Popen(
374
- [
375
- self.get_bun_path(),
376
- "run",
377
- "--no-fetch",
378
- "--smol",
379
- "--silent",
380
- str(script_path)
381
- ],
382
- stdout=subprocess.PIPE,
383
- stderr=subprocess.PIPE,
384
- text=True,
385
- cwd=temp_dir
386
- )
387
- logger.info("Started Bun process PID %d", process.pid)
388
-
389
- try:
390
- output, error = process.communicate(timeout=5)
391
- logger.debug("Process completed with code %d", process.returncode)
392
- except subprocess.TimeoutExpired as e:
393
- logger.warning("Process timeout after 5 seconds")
394
- process.kill()
395
- return {"lam.error": "Script execution timed out"}, "Execution timed out after 5 seconds"
396
-
397
- # Handle process errors
398
- if process.returncode != 0:
399
- try:
400
- # Try to parse structured error from stderr
401
- error_data = json.loads(error.strip())
402
- error_msg = error_data.get('error', 'Unknown error')
403
- stack = error_data.get('stack', [])
404
-
405
- # Format error message
406
- error_details = {
407
- "lam.error": error_msg,
408
- "stack_trace": stack
409
- }
410
- return error_details, error_msg
411
-
412
- except json.JSONDecodeError:
413
- # Fallback to raw error output
414
- error_msg = error.strip() or "Unknown error"
415
- return {"lam.error": error_msg}, error_msg
416
-
417
- # Handle successful output
418
- try:
419
- output_data = json.loads(output)
420
-
421
- # Process JavaScript logs (if any)
422
- if 'logs' in output_data:
423
- for log_entry in output_data.get('logs', []):
424
- if log_entry['type'] == 'error':
425
- logger.error("[JS] %s", log_entry['message'])
426
- else:
427
- logger.debug("[JS] %s", log_entry['message'])
428
-
429
- result = output_data.get('result', {})
430
- return result, None
431
-
432
- except json.JSONDecodeError as e:
433
- logger.error("Failed to parse output: %s", str(e))
434
- return {
435
- "lam.error": "Invalid JSON output",
436
- "raw_output": output.strip()
437
- }, "Output format error"
438
-
439
- except Exception as e:
440
- logger.exception("Execution failed")
441
- return {
442
- "lam.error": str(e),
443
- "type": e.__class__.__name__
444
- }, str(e)
445
-
446
- class PythonEngine(Engine):
447
- """Python execution engine with improved sandboxing for security"""
448
- def __init__(self, *args, **kwargs):
449
- super().__init__(*args, **kwargs)
450
- self.modules_dir = Path(tempfile.gettempdir()) / "lam_python_modules"
451
- self.modules_dir.mkdir(exist_ok=True)
452
- # Define allowed modules that can be safely imported
453
- self.allowed_modules = {
454
- "json", "datetime", "math", "statistics", "collections",
455
- "itertools", "functools", "re", "copy", "decimal",
456
- "csv", "io", "dataclasses", "typing", "enum"
457
- }
458
-
459
- def validate_environment(self) -> bool:
460
- logger.debug("Validating Python environment")
461
- return sys.executable is not None
462
-
463
- def create_safe_globals(self) -> dict:
464
- """Create a restricted globals dictionary for safer execution"""
465
- safe_globals = {
466
- "__builtins__": {
467
- # Safe builtins only
468
- "abs": abs, "all": all, "any": any, "bool": bool,
469
- "chr": chr, "dict": dict, "dir": dir, "divmod": divmod,
470
- "enumerate": enumerate, "filter": filter, "float": float,
471
- "format": format, "frozenset": frozenset, "hash": hash,
472
- "hex": hex, "int": int, "isinstance": isinstance,
473
- "issubclass": issubclass, "iter": iter, "len": len,
474
- "list": list, "map": map, "max": max, "min": min,
475
- "next": next, "oct": oct, "ord": ord, "pow": pow,
476
- "print": print, "range": range, "repr": repr,
477
- "reversed": reversed, "round": round, "set": set,
478
- "slice": slice, "sorted": sorted, "str": str, "sum": sum,
479
- "tuple": tuple, "type": type, "zip": zip,
480
- # Add Exception types for error handling
481
- "Exception": Exception, "ValueError": ValueError,
482
- "TypeError": TypeError, "KeyError": KeyError,
483
- "IndexError": IndexError
484
- },
485
- # Pre-import safe modules
486
- "json": json,
487
- "datetime": datetime,
488
- "math": __import__("math"),
489
- "statistics": __import__("statistics"),
490
- "collections": __import__("collections"),
491
- "itertools": __import__("itertools"),
492
- "functools": __import__("functools"),
493
- "re": __import__("re")
494
- }
495
- return safe_globals
496
-
497
- def check_for_dangerous_code(self, code: str) -> Optional[str]:
498
- """Check for potentially dangerous patterns in the code"""
499
- dangerous_patterns = [
500
- (r"__import__\s*\(", "Use of __import__ is not allowed"),
501
- (r"eval\s*\(", "Use of eval() is not allowed"),
502
- (r"exec\s*\(", "Use of exec() is not allowed"),
503
- (r"globals\s*\(", "Access to globals() is not allowed"),
504
- (r"locals\s*\(", "Access to locals() is not allowed"),
505
- (r"getattr\s*\(", "Use of getattr() is not allowed"),
506
- (r"setattr\s*\(", "Use of setattr() is not allowed"),
507
- (r"delattr\s*\(", "Use of delattr() is not allowed"),
508
- (r"compile\s*\(", "Use of compile() is not allowed"),
509
- (r"open\s*\(", "Use of open() is not allowed"),
510
- (r"__subclasses__", "Access to __subclasses__ is not allowed"),
511
- (r"subprocess", "Access to subprocess module is not allowed"),
512
- (r"sys\.", "Access to sys module is not allowed"),
513
- (r"os\.", "Access to os module is not allowed"),
514
- (r"shutil", "Access to shutil module is not allowed"),
515
- (r"pathlib", "Access to pathlib module is not allowed"),
516
- (r"importlib", "Access to importlib module is not allowed"),
517
- (r"builtins", "Access to builtins module is not allowed"),
518
- (r"_thread", "Access to _thread module is not allowed"),
519
- (r"ctypes", "Access to ctypes module is not allowed"),
520
- (r"socket", "Access to socket module is not allowed"),
521
- (r"pickle", "Access to pickle module is not allowed"),
522
- (r"multiprocessing", "Access to multiprocessing module is not allowed"),
523
- (r"__\w+__", "Access to dunder attributes may not be allowed")
524
- ]
525
-
526
- for pattern, message in dangerous_patterns:
527
- if re.search(pattern, code):
528
- return message
529
-
530
- # Check for imports outside of allowed modules
531
- import_pattern = r"import\s+(\w+)|from\s+(\w+)\s+import"
532
- for match in re.finditer(import_pattern, code):
533
- module = match.group(1) or match.group(2)
534
- if module and module not in self.allowed_modules:
535
- return f"Import of '{module}' is not allowed, only these modules are permitted: {', '.join(sorted(self.allowed_modules))}"
536
-
537
- return None
538
-
539
- def create_wrapper(self, input_data: str, user_script: str) -> str:
540
- """Create the wrapper script with proper escaping and sandboxing"""
541
- # Perform safety checks before creating wrapper
542
- safety_issue = self.check_for_dangerous_code(user_script)
543
- if safety_issue:
544
- # Return a wrapper that will immediately exit with the safety error
545
- return f'''
546
- import json
547
- import sys
548
-
549
- sys.stdout.write(json.dumps({{
550
- "error": "Security violation detected: {safety_issue}",
551
- "stack": []
552
- }}))
553
- sys.exit(1)
554
- '''
555
-
556
- return f'''
557
- import json
558
- import sys
559
- import traceback
560
- from datetime import datetime
561
- import re
562
- import math
563
- import statistics
564
- import collections
565
- import itertools
566
- import functools
567
-
568
- # Resource limiting
569
- import resource
570
- import signal
571
-
572
- # Set resource limits
573
- def set_resource_limits():
574
- # 5 seconds CPU time
575
- resource.setrlimit(resource.RLIMIT_CPU, (5, 5))
576
-
577
- # 100MB memory limit
578
- memory_limit = 100 * 1024 * 1024 # 100MB in bytes
579
- resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit))
580
-
581
- # Set timeout handler
582
- def timeout_handler(signum, frame):
583
- sys.stderr.write(json.dumps({{
584
- "error": "Execution timed out (5 seconds)",
585
- "stack": []
586
- }}))
587
- sys.exit(1)
588
-
589
- signal.signal(signal.SIGALRM, timeout_handler)
590
- signal.alarm(5) # 5 second timeout
591
-
592
- try:
593
- set_resource_limits()
594
- except Exception as e:
595
- # Continue if resource limiting is not available (e.g., on Windows)
596
- pass
597
-
598
- # Setup basic logging
599
- logs = []
600
-
601
- class LogCapture:
602
- def __init__(self, log_type):
603
- self.log_type = log_type
604
-
605
- def write(self, message):
606
- if message.strip():
607
- logs.append({{"type": self.log_type, "message": message.strip()}})
608
- return len(message)
609
-
610
- def flush(self):
611
- pass
612
-
613
- # Custom safer importer
614
- class RestrictedImporter:
615
- def __init__(self, allowed_modules):
616
- self.allowed_modules = allowed_modules
617
-
618
- def __call__(self, name, *args, **kwargs):
619
- if name in self.allowed_modules:
620
- return __import__(name, *args, **kwargs)
621
- else:
622
- raise ImportError(f"Import of '{{name}}' is not allowed for security reasons. " +
623
- f"Allowed modules: {{', '.join(sorted(self.allowed_modules))}}")
624
-
625
- # Capture stdout and stderr
626
- original_stdout = sys.stdout
627
- original_stderr = sys.stderr
628
- sys.stdout = LogCapture("log")
629
- sys.stderr = LogCapture("error")
630
-
631
- # Parse input data
632
- try:
633
- input_data = json.loads(r"""{input_data}""")
634
- except json.JSONDecodeError as e:
635
- original_stderr.write(json.dumps({{"error": f"Failed to parse input data: {{e}}"}}))
636
- sys.exit(1)
637
-
638
- # Create safe environment
639
- safe_globals = {{
640
- "__builtins__": {{
641
- # Safe builtins only
642
- "abs": abs, "all": all, "any": any, "bool": bool,
643
- "chr": chr, "dict": dict, "divmod": divmod,
644
- "enumerate": enumerate, "filter": filter, "float": float,
645
- "format": format, "frozenset": frozenset, "hash": hash,
646
- "hex": hex, "int": int, "isinstance": isinstance,
647
- "issubclass": issubclass, "iter": iter, "len": len,
648
- "list": list, "map": map, "max": max, "min": min,
649
- "next": next, "oct": oct, "ord": ord, "pow": pow,
650
- "print": print, "range": range, "repr": repr,
651
- "reversed": reversed, "round": round, "set": set,
652
- "slice": slice, "sorted": sorted, "str": str, "sum": sum,
653
- "tuple": tuple, "type": type, "zip": zip,
654
- # Exception types for error handling
655
- "Exception": Exception, "ValueError": ValueError,
656
- "TypeError": TypeError, "KeyError": KeyError,
657
- "IndexError": IndexError,
658
- # Add a safe import function
659
- "__import__": RestrictedImporter({{
660
- "json", "datetime", "math", "statistics", "collections",
661
- "itertools", "functools", "re", "copy", "decimal",
662
- "csv", "io", "dataclasses", "typing", "enum"
663
- }})
664
- }},
665
- # Pre-import safe modules
666
- "json": json,
667
- "datetime": datetime,
668
- "math": math,
669
- "statistics": statistics,
670
- "collections": collections,
671
- "itertools": itertools,
672
- "functools": functools,
673
- "re": re
674
- }}
675
-
676
- safe_locals = {{"input_data": input_data}}
677
-
678
- # Define transform function from user script in a safe context
679
- try:
680
- compiled_code = compile(r"""{user_script}""", "<user_script>", "exec")
681
- exec(compiled_code, safe_globals, safe_locals)
682
-
683
- # Validate transform function exists and has correct signature
684
- if 'transform' not in safe_locals:
685
- original_stderr.write(json.dumps({{"error": "No transform function defined"}}))
686
- sys.exit(1)
687
-
688
- if not callable(safe_locals['transform']):
689
- original_stderr.write(json.dumps({{"error": "transform must be a function"}}))
690
- sys.exit(1)
691
-
692
- transform_fn = safe_locals['transform']
693
-
694
- except Exception as e:
695
- original_stderr.write(json.dumps({{
696
- "error": str(e),
697
- "stack": traceback.format_exc().split('\\n')
698
- }}))
699
- sys.exit(1)
700
-
701
- # Execute transform with input data
702
- try:
703
- # Cancel the alarm if we reach here (we have our own timeout)
704
- try:
705
- signal.alarm(0)
706
- except:
707
- pass
708
-
709
- result = transform_fn(input_data)
710
-
711
- # Basic validation of output (to prevent non-serializable data)
712
- try:
713
- json.dumps(result)
714
- except TypeError as e:
715
- raise TypeError(f"Transform result is not JSON serializable: {{e}}")
716
-
717
- # Write result to original stdout
718
- original_stdout.write(json.dumps({{"result": result, "logs": logs}}))
719
-
720
- except Exception as e:
721
- original_stderr.write(json.dumps({{
722
- "error": str(e),
723
- "stack": traceback.format_exc().split('\\n')
724
- }}))
725
- sys.exit(1)
726
- finally:
727
- # Restore stdout and stderr
728
- sys.stdout = original_stdout
729
- sys.stderr = original_stderr
730
- '''
731
-
732
- def execute(self, program_file: str, input_data: str) -> Tuple[Union[Dict, str], Optional[str]]:
733
- logger.info(f"Executing Python script: {program_file}")
734
- stats = Stats()
735
-
736
- try:
737
- check_resource_limits(self.modules_dir)
738
-
739
- with tempfile.TemporaryDirectory() as temp_dir:
740
- temp_dir = Path(temp_dir)
741
-
742
- # Read user script
743
- with open(program_file, 'r') as f:
744
- user_script = f.read()
745
- logger.debug("Loaded user Python script: %d characters", len(user_script))
746
-
747
- # Check for dangerous code
748
- safety_issue = self.check_for_dangerous_code(user_script)
749
- if safety_issue:
750
- logger.warning(f"Security violation detected in script: {safety_issue}")
751
- return {
752
- "lam.error": f"Security violation: {safety_issue}",
753
- "type": "SecurityError"
754
- }, f"Security violation: {safety_issue}"
755
-
756
- # Create wrapper script
757
- wrapper = self.create_wrapper(input_data, user_script)
758
- script_path = temp_dir / "script.py"
759
- with open(script_path, 'w') as f:
760
- f.write(wrapper)
761
- logger.debug("Generated Python wrapper script: %s", script_path)
762
-
763
- # Execute with Python in isolated environment
764
- process = subprocess.Popen(
765
- [
766
- sys.executable,
767
- "-I", # Isolated mode, ignores environment variables/site packages
768
- str(script_path)
769
- ],
770
- stdout=subprocess.PIPE,
771
- stderr=subprocess.PIPE,
772
- text=True,
773
- cwd=temp_dir,
774
- # Prevent access to system environment variables
775
- env={"PATH": os.environ.get("PATH", "")}
776
- )
777
- logger.info("Started Python process PID %d", process.pid)
778
-
779
- try:
780
- output, error = process.communicate(timeout=5)
781
- logger.debug("Process completed with code %d", process.returncode)
782
- except subprocess.TimeoutExpired as e:
783
- logger.warning("Process timeout after 5 seconds")
784
- process.kill()
785
- return {"lam.error": "Script execution timed out"}, "Execution timed out after 5 seconds"
786
-
787
- # Handle process errors
788
- if process.returncode != 0:
789
- try:
790
- # Try to parse structured error from stderr
791
- error_data = json.loads(error.strip())
792
- error_msg = error_data.get('error', 'Unknown error')
793
- stack = error_data.get('stack', [])
794
-
795
- # Format error message
796
- error_details = {
797
- "lam.error": error_msg,
798
- "stack_trace": stack
799
- }
800
- return error_details, error_msg
801
-
802
- except json.JSONDecodeError:
803
- # Fallback to raw error output
804
- error_msg = error.strip() or "Unknown error"
805
- return {"lam.error": error_msg}, error_msg
806
-
807
- # Handle successful output
808
- try:
809
- output_data = json.loads(output)
810
-
811
- # Process Python logs (if any)
812
- if 'logs' in output_data:
813
- for log_entry in output_data.get('logs', []):
814
- if log_entry['type'] == 'error':
815
- logger.error("[Python] %s", log_entry['message'])
816
- else:
817
- logger.debug("[Python] %s", log_entry['message'])
818
-
819
- result = output_data.get('result', {})
820
- return result, None
821
-
822
- except json.JSONDecodeError as e:
823
- logger.error("Failed to parse output: %s", str(e))
824
- return {
825
- "lam.error": "Invalid JSON output",
826
- "raw_output": output.strip()
827
- }, "Output format error"
828
-
829
- except Exception as e:
830
- logger.exception("Execution failed")
831
- return {
832
- "lam.error": str(e),
833
- "type": e.__class__.__name__
834
- }, str(e)
835
-
836
- def get_engine(engine_type: str, workspace_id: str, flow_id: str, execution_id: str) -> Engine:
837
- """Factory function to get the appropriate execution engine"""
838
- engines = {
839
- EngineType.JQ.value: JQEngine,
840
- EngineType.JAVASCRIPT.value: BunEngine,
841
- EngineType.PYTHON.value: PythonEngine
842
- }
843
-
844
- engine_class = engines.get(engine_type)
845
- if not engine_class:
846
- raise ValueError(f"Unsupported engine type: {engine_type}")
847
-
848
- engine = engine_class(workspace_id, flow_id, execution_id)
849
- if not engine.validate_environment():
850
- raise EnvironmentError(f"Required dependencies not found for {engine_type}")
851
-
852
- return engine
853
-
854
- def process_input(input: str) -> Tuple[str, Optional[str]]:
855
- """Process and validate input data"""
856
- if os.path.isfile(input):
857
- logger.debug("Loading input from file: %s", input)
858
- with open(input, 'r') as file:
859
- return file.read(), None
860
-
861
- try:
862
- json.loads(input)
863
- logger.debug("Validated inline JSON input")
864
- return input, None
865
- except json.JSONDecodeError as e:
866
- logger.error("Invalid JSON input: %s", str(e))
867
- return None, str(e)
868
-
869
- @click.group()
870
- def lam():
871
- """LAM - Laminar Data Transformation Tool"""
872
- pass
873
60
 
874
61
  @lam.command()
875
62
  @click.argument('program_file', type=click.Path(exists=True))
@@ -894,9 +81,11 @@ def run(program_file: str, input: str, language: str, workspace_id: str,
894
81
  log_file = engine.get_log_file()
895
82
  result_file = engine.get_result_file()
896
83
 
84
+ # Add file handler to root logger so ALL loggers write to the file
897
85
  file_handler = logging.FileHandler(log_file, 'w')
898
- file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
899
- logger.addHandler(file_handler)
86
+ file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
87
+ root_logger = logging.getLogger()
88
+ root_logger.addHandler(file_handler)
900
89
 
901
90
  logger.info("Starting LAM execution with %s engine", language)
902
91
  engine.track_event('lam.run.start', {
@@ -939,7 +128,8 @@ def run(program_file: str, input: str, language: str, workspace_id: str,
939
128
 
940
129
  finally:
941
130
  logger.info("Execution complete")
942
- logger.removeHandler(file_handler)
131
+ root_logger.removeHandler(file_handler)
132
+
943
133
 
944
134
  if __name__ == '__main__':
945
- lam()
135
+ lam()