kailash 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/api/custom_nodes_secure.py +2 -2
  3. kailash/api/studio_secure.py +1 -1
  4. kailash/mcp/client_new.py +1 -1
  5. kailash/nodes/ai/a2a.py +1 -1
  6. kailash/nodes/api/__init__.py +26 -0
  7. kailash/nodes/api/monitoring.py +463 -0
  8. kailash/nodes/api/security.py +822 -0
  9. kailash/nodes/base.py +3 -3
  10. kailash/nodes/code/python.py +6 -0
  11. kailash/nodes/data/__init__.py +9 -0
  12. kailash/nodes/data/directory.py +278 -0
  13. kailash/nodes/data/event_generation.py +297 -0
  14. kailash/nodes/data/file_discovery.py +601 -0
  15. kailash/nodes/data/sql.py +2 -2
  16. kailash/nodes/transform/processors.py +32 -1
  17. kailash/runtime/async_local.py +1 -1
  18. kailash/runtime/docker.py +4 -4
  19. kailash/runtime/local.py +41 -4
  20. kailash/runtime/parallel.py +2 -2
  21. kailash/runtime/parallel_cyclic.py +2 -2
  22. kailash/runtime/testing.py +2 -2
  23. kailash/utils/templates.py +6 -6
  24. kailash/visualization/performance.py +16 -3
  25. kailash/visualization/reports.py +5 -1
  26. kailash/workflow/convergence.py +1 -1
  27. kailash/workflow/cycle_analyzer.py +8 -1
  28. kailash/workflow/cyclic_runner.py +1 -1
  29. kailash/workflow/graph.py +33 -6
  30. kailash/workflow/visualization.py +10 -2
  31. kailash-0.3.0.dist-info/METADATA +428 -0
  32. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/RECORD +36 -31
  33. kailash-0.2.1.dist-info/METADATA +0 -1617
  34. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
  35. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
  36. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
  37. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,601 @@
1
+ """File discovery and analysis nodes for file system operations."""
2
+
3
+ import hashlib
4
+ import mimetypes
5
+ import os
6
+ import time
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ from kailash.nodes.base import Node, NodeParameter, register_node
12
+
13
+
14
+ @register_node()
15
+ class FileDiscoveryNode(Node):
16
+ """
17
+ Discovers and analyzes files and directories in the file system.
18
+
19
+ This node provides comprehensive file discovery capabilities, replacing
20
+ DataTransformer with embedded Python code for file processing tasks.
21
+ It can scan directories, analyze file properties, detect file types,
22
+ and generate detailed file system reports.
23
+
24
+ Design Philosophy:
25
+ File system operations require robust discovery and analysis capabilities.
26
+ This node eliminates the need for custom file processing code in
27
+ DataTransformer nodes by providing dedicated, configurable file
28
+ discovery with filtering, analysis, and reporting features.
29
+
30
+ Upstream Dependencies:
31
+ - Path configuration nodes
32
+ - Filter criteria nodes
33
+ - Authentication/permission nodes
34
+ - Schedule/trigger nodes
35
+
36
+ Downstream Consumers:
37
+ - File processing nodes
38
+ - Content analysis nodes
39
+ - Backup and archival nodes
40
+ - Security scanning nodes
41
+ - Compliance reporting nodes
42
+
43
+ Configuration:
44
+ - Search paths and patterns
45
+ - File type filters
46
+ - Size and date criteria
47
+ - Analysis depth and options
48
+ - Output format preferences
49
+
50
+ Implementation Details:
51
+ - Recursive directory traversal
52
+ - File metadata extraction
53
+ - Content type detection
54
+ - Permission and ownership analysis
55
+ - Hash calculation for integrity
56
+
57
+ Error Handling:
58
+ - Permission denied gracefully handled
59
+ - Broken symlinks detected
60
+ - Invalid paths reported
61
+ - Partial results on errors
62
+
63
+ Side Effects:
64
+ - File system access (read-only by default)
65
+ - Temporary file creation for analysis
66
+ - Metadata caching for performance
67
+ - Logging of discovery activities
68
+
69
+ Examples:
70
+ >>> # Discover all Python files in a project
71
+ >>> discovery = FileDiscoveryNode(
72
+ ... search_paths=['/path/to/project'],
73
+ ... file_patterns=['*.py'],
74
+ ... include_metadata=True,
75
+ ... max_depth=5
76
+ ... )
77
+ >>> result = discovery.execute()
78
+ >>> assert 'discovered_files' in result
79
+ >>> assert all(f['name'].endswith('.py') for f in result['discovered_files'])
80
+ >>>
81
+ >>> # Find large files for cleanup
82
+ >>> discovery = FileDiscoveryNode(
83
+ ... search_paths=['/var/log', '/tmp'],
84
+ ... min_size_mb=100,
85
+ ... older_than_days=30,
86
+ ... include_checksums=True
87
+ ... )
88
+ >>> result = discovery.execute()
89
+ >>> large_files = result['discovered_files']
90
+ """
91
+
92
+ def get_parameters(self) -> Dict[str, NodeParameter]:
93
+ return {
94
+ "search_paths": NodeParameter(
95
+ name="search_paths",
96
+ type=list,
97
+ required=True,
98
+ description="List of paths to search for files",
99
+ ),
100
+ "file_patterns": NodeParameter(
101
+ name="file_patterns",
102
+ type=list,
103
+ required=False,
104
+ default=["*"],
105
+ description="File name patterns to match (glob-style)",
106
+ ),
107
+ "exclude_patterns": NodeParameter(
108
+ name="exclude_patterns",
109
+ type=list,
110
+ required=False,
111
+ default=[],
112
+ description="File name patterns to exclude",
113
+ ),
114
+ "max_depth": NodeParameter(
115
+ name="max_depth",
116
+ type=int,
117
+ required=False,
118
+ default=10,
119
+ description="Maximum directory depth to search",
120
+ ),
121
+ "include_metadata": NodeParameter(
122
+ name="include_metadata",
123
+ type=bool,
124
+ required=False,
125
+ default=True,
126
+ description="Include detailed file metadata",
127
+ ),
128
+ "include_checksums": NodeParameter(
129
+ name="include_checksums",
130
+ type=bool,
131
+ required=False,
132
+ default=False,
133
+ description="Calculate file checksums (slower but more thorough)",
134
+ ),
135
+ "min_size_mb": NodeParameter(
136
+ name="min_size_mb",
137
+ type=float,
138
+ required=False,
139
+ description="Minimum file size in megabytes",
140
+ ),
141
+ "max_size_mb": NodeParameter(
142
+ name="max_size_mb",
143
+ type=float,
144
+ required=False,
145
+ description="Maximum file size in megabytes",
146
+ ),
147
+ "older_than_days": NodeParameter(
148
+ name="older_than_days",
149
+ type=int,
150
+ required=False,
151
+ description="Only include files older than N days",
152
+ ),
153
+ "newer_than_days": NodeParameter(
154
+ name="newer_than_days",
155
+ type=int,
156
+ required=False,
157
+ description="Only include files newer than N days",
158
+ ),
159
+ "follow_symlinks": NodeParameter(
160
+ name="follow_symlinks",
161
+ type=bool,
162
+ required=False,
163
+ default=False,
164
+ description="Follow symbolic links during traversal",
165
+ ),
166
+ }
167
+
168
+ def run(self, **kwargs) -> Dict[str, Any]:
169
+ search_paths = kwargs["search_paths"]
170
+ file_patterns = kwargs.get("file_patterns", ["*"])
171
+ exclude_patterns = kwargs.get("exclude_patterns", [])
172
+ max_depth = kwargs.get("max_depth", 10)
173
+ include_metadata = kwargs.get("include_metadata", True)
174
+ include_checksums = kwargs.get("include_checksums", False)
175
+ min_size_mb = kwargs.get("min_size_mb")
176
+ max_size_mb = kwargs.get("max_size_mb")
177
+ older_than_days = kwargs.get("older_than_days")
178
+ newer_than_days = kwargs.get("newer_than_days")
179
+ follow_symlinks = kwargs.get("follow_symlinks", False)
180
+
181
+ start_time = time.time()
182
+ discovered_files = []
183
+ discovery_stats = {
184
+ "total_directories_scanned": 0,
185
+ "total_files_found": 0,
186
+ "total_files_matching": 0,
187
+ "access_errors": 0,
188
+ "broken_symlinks": 0,
189
+ }
190
+
191
+ for search_path in search_paths:
192
+ try:
193
+ path_files, path_stats = self._discover_files_in_path(
194
+ search_path=search_path,
195
+ file_patterns=file_patterns,
196
+ exclude_patterns=exclude_patterns,
197
+ max_depth=max_depth,
198
+ include_metadata=include_metadata,
199
+ include_checksums=include_checksums,
200
+ min_size_mb=min_size_mb,
201
+ max_size_mb=max_size_mb,
202
+ older_than_days=older_than_days,
203
+ newer_than_days=newer_than_days,
204
+ follow_symlinks=follow_symlinks,
205
+ )
206
+
207
+ discovered_files.extend(path_files)
208
+
209
+ # Aggregate stats
210
+ for key, value in path_stats.items():
211
+ discovery_stats[key] += value
212
+
213
+ except Exception as e:
214
+ discovery_stats["access_errors"] += 1
215
+ # Add error entry to results
216
+ discovered_files.append(
217
+ {
218
+ "type": "discovery_error",
219
+ "path": search_path,
220
+ "error": str(e),
221
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
222
+ }
223
+ )
224
+
225
+ execution_time = time.time() - start_time
226
+
227
+ # Generate summary
228
+ summary = self._generate_discovery_summary(
229
+ discovered_files, discovery_stats, execution_time
230
+ )
231
+
232
+ return {
233
+ "discovered_files": discovered_files,
234
+ "discovery_summary": summary,
235
+ "discovery_stats": discovery_stats,
236
+ "total_files": len(
237
+ [f for f in discovered_files if f.get("type") != "discovery_error"]
238
+ ),
239
+ "execution_time": execution_time,
240
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
241
+ }
242
+
243
+ def _discover_files_in_path(
244
+ self,
245
+ search_path: str,
246
+ file_patterns: List[str],
247
+ exclude_patterns: List[str],
248
+ max_depth: int,
249
+ include_metadata: bool,
250
+ include_checksums: bool,
251
+ min_size_mb: Optional[float],
252
+ max_size_mb: Optional[float],
253
+ older_than_days: Optional[int],
254
+ newer_than_days: Optional[int],
255
+ follow_symlinks: bool,
256
+ ) -> Tuple[List[Dict[str, Any]], Dict[str, int]]:
257
+ """Discover files in a specific path."""
258
+
259
+ discovered_files = []
260
+ stats = {
261
+ "total_directories_scanned": 0,
262
+ "total_files_found": 0,
263
+ "total_files_matching": 0,
264
+ "access_errors": 0,
265
+ "broken_symlinks": 0,
266
+ }
267
+
268
+ try:
269
+ search_path_obj = Path(search_path)
270
+ if not search_path_obj.exists():
271
+ raise FileNotFoundError(f"Search path does not exist: {search_path}")
272
+
273
+ # Walk the directory tree
274
+ for root, dirs, files in os.walk(search_path, followlinks=follow_symlinks):
275
+ current_depth = len(Path(root).relative_to(search_path_obj).parts)
276
+
277
+ # Skip if max depth exceeded
278
+ if current_depth > max_depth:
279
+ dirs[:] = [] # Don't descend further
280
+ continue
281
+
282
+ stats["total_directories_scanned"] += 1
283
+
284
+ for file_name in files:
285
+ file_path = os.path.join(root, file_name)
286
+ stats["total_files_found"] += 1
287
+
288
+ try:
289
+ # Check if file matches patterns
290
+ if not self._matches_patterns(
291
+ file_name, file_patterns, exclude_patterns
292
+ ):
293
+ continue
294
+
295
+ file_info = self._analyze_file(
296
+ file_path=file_path,
297
+ include_metadata=include_metadata,
298
+ include_checksums=include_checksums,
299
+ )
300
+
301
+ # Apply size filters
302
+ if min_size_mb is not None:
303
+ if file_info.get("size_mb", 0) < min_size_mb:
304
+ continue
305
+
306
+ if max_size_mb is not None:
307
+ if file_info.get("size_mb", 0) > max_size_mb:
308
+ continue
309
+
310
+ # Apply date filters
311
+ if older_than_days is not None or newer_than_days is not None:
312
+ if not self._matches_date_criteria(
313
+ file_info, older_than_days, newer_than_days
314
+ ):
315
+ continue
316
+
317
+ discovered_files.append(file_info)
318
+ stats["total_files_matching"] += 1
319
+
320
+ except (OSError, PermissionError) as e:
321
+ stats["access_errors"] += 1
322
+ # Add error info for this specific file
323
+ discovered_files.append(
324
+ {
325
+ "type": "file_access_error",
326
+ "path": file_path,
327
+ "name": file_name,
328
+ "error": str(e),
329
+ "timestamp": datetime.now(timezone.utc).isoformat()
330
+ + "Z",
331
+ }
332
+ )
333
+
334
+ except Exception:
335
+ stats["access_errors"] += 1
336
+ raise
337
+
338
+ return discovered_files, stats
339
+
340
+ def _matches_patterns(
341
+ self, file_name: str, include_patterns: List[str], exclude_patterns: List[str]
342
+ ) -> bool:
343
+ """Check if filename matches include patterns and doesn't match exclude patterns."""
344
+ import fnmatch
345
+
346
+ # Check exclude patterns first
347
+ for pattern in exclude_patterns:
348
+ if fnmatch.fnmatch(file_name, pattern):
349
+ return False
350
+
351
+ # Check include patterns
352
+ if not include_patterns or include_patterns == ["*"]:
353
+ return True
354
+
355
+ for pattern in include_patterns:
356
+ if fnmatch.fnmatch(file_name, pattern):
357
+ return True
358
+
359
+ return False
360
+
361
+ def _analyze_file(
362
+ self, file_path: str, include_metadata: bool, include_checksums: bool
363
+ ) -> Dict[str, Any]:
364
+ """Analyze a single file and return its information."""
365
+
366
+ file_path_obj = Path(file_path)
367
+ file_info = {
368
+ "type": "file",
369
+ "path": str(file_path),
370
+ "name": file_path_obj.name,
371
+ "directory": str(file_path_obj.parent),
372
+ }
373
+
374
+ try:
375
+ # Basic file stats
376
+ stat_info = file_path_obj.stat()
377
+
378
+ file_info.update(
379
+ {
380
+ "size_bytes": stat_info.st_size,
381
+ "size_mb": stat_info.st_size / (1024 * 1024),
382
+ "created_timestamp": stat_info.st_ctime,
383
+ "modified_timestamp": stat_info.st_mtime,
384
+ "accessed_timestamp": stat_info.st_atime,
385
+ "created_date": datetime.fromtimestamp(
386
+ stat_info.st_ctime, timezone.utc
387
+ ).isoformat()
388
+ + "Z",
389
+ "modified_date": datetime.fromtimestamp(
390
+ stat_info.st_mtime, timezone.utc
391
+ ).isoformat()
392
+ + "Z",
393
+ "accessed_date": datetime.fromtimestamp(
394
+ stat_info.st_atime, timezone.utc
395
+ ).isoformat()
396
+ + "Z",
397
+ }
398
+ )
399
+
400
+ if include_metadata:
401
+ # File type detection
402
+ mime_type, encoding = mimetypes.guess_type(file_path)
403
+ file_info.update(
404
+ {
405
+ "mime_type": mime_type,
406
+ "encoding": encoding,
407
+ "extension": file_path_obj.suffix.lower(),
408
+ }
409
+ )
410
+
411
+ # File permissions
412
+ file_info.update(
413
+ {
414
+ "permissions": oct(stat_info.st_mode)[-3:],
415
+ "owner_uid": stat_info.st_uid,
416
+ "group_gid": stat_info.st_gid,
417
+ "is_readable": os.access(file_path, os.R_OK),
418
+ "is_writable": os.access(file_path, os.W_OK),
419
+ "is_executable": os.access(file_path, os.X_OK),
420
+ }
421
+ )
422
+
423
+ # Symbolic link detection
424
+ if file_path_obj.is_symlink():
425
+ try:
426
+ link_target = os.readlink(file_path)
427
+ file_info.update(
428
+ {
429
+ "is_symlink": True,
430
+ "link_target": link_target,
431
+ "link_target_exists": os.path.exists(link_target),
432
+ }
433
+ )
434
+ except OSError:
435
+ file_info.update(
436
+ {
437
+ "is_symlink": True,
438
+ "link_target": None,
439
+ "link_target_exists": False,
440
+ }
441
+ )
442
+ else:
443
+ file_info["is_symlink"] = False
444
+
445
+ # Content analysis for text files
446
+ if mime_type and mime_type.startswith("text/"):
447
+ try:
448
+ with open(
449
+ file_path, "r", encoding="utf-8", errors="ignore"
450
+ ) as f:
451
+ content_sample = f.read(1024) # Read first 1KB
452
+ file_info.update(
453
+ {
454
+ "line_count": len(content_sample.splitlines()),
455
+ "character_count": len(content_sample),
456
+ "content_sample": (
457
+ content_sample[:200] + "..."
458
+ if len(content_sample) > 200
459
+ else content_sample
460
+ ),
461
+ }
462
+ )
463
+ except (UnicodeDecodeError, PermissionError):
464
+ pass
465
+
466
+ if include_checksums:
467
+ # Calculate file hashes
468
+ file_info.update(self._calculate_checksums(file_path))
469
+
470
+ except (OSError, PermissionError) as e:
471
+ file_info.update(
472
+ {
473
+ "error": str(e),
474
+ "accessible": False,
475
+ }
476
+ )
477
+
478
+ file_info["timestamp"] = datetime.now(timezone.utc).isoformat() + "Z"
479
+ return file_info
480
+
481
+ def _calculate_checksums(self, file_path: str) -> Dict[str, str]:
482
+ """Calculate MD5 and SHA256 checksums for a file."""
483
+ checksums = {}
484
+
485
+ try:
486
+ md5_hash = hashlib.md5()
487
+ sha256_hash = hashlib.sha256()
488
+
489
+ with open(file_path, "rb") as f:
490
+ # Read file in chunks to handle large files efficiently
491
+ for chunk in iter(lambda: f.read(4096), b""):
492
+ md5_hash.update(chunk)
493
+ sha256_hash.update(chunk)
494
+
495
+ checksums.update(
496
+ {
497
+ "md5": md5_hash.hexdigest(),
498
+ "sha256": sha256_hash.hexdigest(),
499
+ }
500
+ )
501
+ except (OSError, PermissionError) as e:
502
+ checksums.update(
503
+ {
504
+ "checksum_error": str(e),
505
+ }
506
+ )
507
+
508
+ return checksums
509
+
510
+ def _matches_date_criteria(
511
+ self,
512
+ file_info: Dict[str, Any],
513
+ older_than_days: Optional[int],
514
+ newer_than_days: Optional[int],
515
+ ) -> bool:
516
+ """Check if file matches date criteria."""
517
+
518
+ modified_timestamp = file_info.get("modified_timestamp")
519
+ if modified_timestamp is None:
520
+ return True
521
+
522
+ now = time.time()
523
+ file_age_days = (now - modified_timestamp) / (24 * 3600)
524
+
525
+ if older_than_days is not None and file_age_days < older_than_days:
526
+ return False
527
+
528
+ if newer_than_days is not None and file_age_days > newer_than_days:
529
+ return False
530
+
531
+ return True
532
+
533
+ def _generate_discovery_summary(
534
+ self,
535
+ discovered_files: List[Dict],
536
+ discovery_stats: Dict[str, int],
537
+ execution_time: float,
538
+ ) -> Dict[str, Any]:
539
+ """Generate summary of file discovery results."""
540
+
541
+ # Count files by type/extension
542
+ extension_counts = {}
543
+ mime_type_counts = {}
544
+ size_distribution = {"small": 0, "medium": 0, "large": 0, "very_large": 0}
545
+
546
+ total_size_mb = 0
547
+ error_count = 0
548
+
549
+ for file_info in discovered_files:
550
+ if file_info.get("type") in ["discovery_error", "file_access_error"]:
551
+ error_count += 1
552
+ continue
553
+
554
+ # Extension analysis
555
+ extension = file_info.get("extension", "")
556
+ extension_counts[extension] = extension_counts.get(extension, 0) + 1
557
+
558
+ # MIME type analysis
559
+ mime_type = file_info.get("mime_type", "unknown")
560
+ mime_type_counts[mime_type] = mime_type_counts.get(mime_type, 0) + 1
561
+
562
+ # Size distribution
563
+ size_mb = file_info.get("size_mb", 0)
564
+ total_size_mb += size_mb
565
+
566
+ if size_mb < 1:
567
+ size_distribution["small"] += 1
568
+ elif size_mb < 50:
569
+ size_distribution["medium"] += 1
570
+ elif size_mb < 500:
571
+ size_distribution["large"] += 1
572
+ else:
573
+ size_distribution["very_large"] += 1
574
+
575
+ # Find largest files
576
+ file_sizes = [
577
+ (f.get("size_mb", 0), f.get("path", ""))
578
+ for f in discovered_files
579
+ if f.get("type") == "file"
580
+ ]
581
+ largest_files = sorted(file_sizes, reverse=True)[:10]
582
+
583
+ return {
584
+ "execution_time": execution_time,
585
+ "total_files_discovered": len(discovered_files) - error_count,
586
+ "total_errors": error_count,
587
+ "total_size_mb": total_size_mb,
588
+ "average_file_size_mb": total_size_mb
589
+ / max(1, len(discovered_files) - error_count),
590
+ "extension_distribution": dict(
591
+ sorted(extension_counts.items(), key=lambda x: x[1], reverse=True)
592
+ ),
593
+ "mime_type_distribution": dict(
594
+ sorted(mime_type_counts.items(), key=lambda x: x[1], reverse=True)
595
+ ),
596
+ "size_distribution": size_distribution,
597
+ "largest_files": [
598
+ {"size_mb": size, "path": path} for size, path in largest_files[:5]
599
+ ],
600
+ "discovery_stats": discovery_stats,
601
+ }
kailash/nodes/data/sql.py CHANGED
@@ -166,10 +166,10 @@ class SQLDatabaseNode(Node):
166
166
  Example:
167
167
  >>> # Initialize with project configuration
168
168
  >>> SQLDatabaseNode.initialize('kailash_project.yaml')
169
- >>>
169
+ >>>
170
170
  >>> # Create node with database connection configuration
171
171
  >>> sql_node = SQLDatabaseNode(connection='customer_db')
172
- >>>
172
+ >>>
173
173
  >>> # Execute multiple queries with the same node
174
174
  >>> result1 = sql_node.run(
175
175
  ... query='SELECT * FROM customers WHERE active = ?',
@@ -311,7 +311,7 @@ class DataTransformer(Node):
311
311
  return {
312
312
  "data": NodeParameter(
313
313
  name="data",
314
- type=list,
314
+ type=Any,
315
315
  required=False,
316
316
  description="Primary input data to transform",
317
317
  ),
@@ -332,12 +332,35 @@ class DataTransformer(Node):
332
332
  }, # Support for up to 5 additional arguments
333
333
  }
334
334
 
335
+ def validate_inputs(self, **kwargs) -> Dict[str, Any]:
336
+ """Override validate_inputs to accept arbitrary parameters for transformations.
337
+
338
+ DataTransformer needs to accept any input parameters that might be mapped
339
+ from other nodes, not just the predefined parameters in get_parameters().
340
+ This enables flexible data flow in workflows.
341
+ """
342
+ # First, do the standard validation for defined parameters
343
+ validated = super().validate_inputs(**kwargs)
344
+
345
+ # Then, add any extra parameters that aren't in the schema
346
+ # These will be passed to the transformation context
347
+ defined_params = set(self.get_parameters().keys())
348
+ for key, value in kwargs.items():
349
+ if key not in defined_params:
350
+ validated[key] = value # Accept arbitrary additional parameters
351
+
352
+ return validated
353
+
335
354
  def run(self, **kwargs) -> Dict[str, Any]:
336
355
  # Extract the transformation functions
337
356
  transformations = kwargs.get("transformations", [])
338
357
  if not transformations:
339
358
  return {"result": kwargs.get("data", [])}
340
359
 
360
+ # Debug: Check what kwargs we received
361
+ print(f"DATATRANSFORMER RUN DEBUG: kwargs keys = {list(kwargs.keys())}")
362
+ print(f"DATATRANSFORMER RUN DEBUG: kwargs = {kwargs}")
363
+
341
364
  # Get all input data
342
365
  input_data = {}
343
366
  for key, value in kwargs.items():
@@ -371,6 +394,14 @@ class DataTransformer(Node):
371
394
  local_vars = input_data.copy()
372
395
  local_vars["result"] = result
373
396
 
397
+ # Debug: Print available variables
398
+ print(
399
+ f"DataTransformer DEBUG - Available variables: {list(local_vars.keys())}"
400
+ )
401
+ print(
402
+ f"DataTransformer DEBUG - Input data keys: {list(input_data.keys())}"
403
+ )
404
+
374
405
  # Execute the code block
375
406
  exec(transform_str, safe_globals, local_vars) # noqa: S102
376
407
 
@@ -227,7 +227,7 @@ class AsyncLocalRuntime:
227
227
  outputs = await node_instance.execute_async(**inputs)
228
228
  else:
229
229
  # Fall back to synchronous execution
230
- outputs = node_instance.execute(**inputs)
230
+ outputs = node_instance.run(**inputs)
231
231
 
232
232
  execution_time = (
233
233
  datetime.now(timezone.utc) - start_time