kailash 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/api/custom_nodes_secure.py +2 -2
  3. kailash/api/studio_secure.py +1 -1
  4. kailash/mcp/client_new.py +1 -1
  5. kailash/nodes/ai/a2a.py +1 -1
  6. kailash/nodes/api/__init__.py +26 -0
  7. kailash/nodes/api/monitoring.py +463 -0
  8. kailash/nodes/api/security.py +822 -0
  9. kailash/nodes/base.py +3 -3
  10. kailash/nodes/code/python.py +6 -0
  11. kailash/nodes/data/__init__.py +9 -0
  12. kailash/nodes/data/directory.py +278 -0
  13. kailash/nodes/data/event_generation.py +297 -0
  14. kailash/nodes/data/file_discovery.py +601 -0
  15. kailash/nodes/data/sql.py +2 -2
  16. kailash/nodes/transform/processors.py +32 -1
  17. kailash/runtime/async_local.py +1 -1
  18. kailash/runtime/docker.py +4 -4
  19. kailash/runtime/local.py +41 -4
  20. kailash/runtime/parallel.py +2 -2
  21. kailash/runtime/parallel_cyclic.py +2 -2
  22. kailash/runtime/testing.py +2 -2
  23. kailash/utils/templates.py +6 -6
  24. kailash/visualization/performance.py +16 -3
  25. kailash/visualization/reports.py +5 -1
  26. kailash/workflow/convergence.py +1 -1
  27. kailash/workflow/cycle_analyzer.py +8 -1
  28. kailash/workflow/cyclic_runner.py +1 -1
  29. kailash/workflow/graph.py +33 -6
  30. kailash/workflow/visualization.py +10 -2
  31. kailash-0.3.0.dist-info/METADATA +428 -0
  32. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/RECORD +36 -31
  33. kailash-0.2.1.dist-info/METADATA +0 -1617
  34. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
  35. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
  36. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
  37. {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/__init__.py CHANGED
@@ -15,7 +15,7 @@ from kailash.workflow.visualization import WorkflowVisualizer
15
15
  # For backward compatibility
16
16
  WorkflowGraph = Workflow
17
17
 
18
- __version__ = "0.2.0"
18
+ __version__ = "0.3.0"
19
19
 
20
20
  __all__ = [
21
21
  "Workflow",
@@ -350,7 +350,7 @@ async def _execute_api_node(
350
350
  ) -> Dict[str, Any]:
351
351
  """Execute an API-based custom node"""
352
352
 
353
- from kailash.nodes.api.http import HTTPClientNode
353
+ from kailash.nodes.api.http import HTTPRequestNode
354
354
  from kailash.security import TenantContext
355
355
 
356
356
  # Execute in tenant context
@@ -359,7 +359,7 @@ async def _execute_api_node(
359
359
  api_config = node.implementation.get("api", {})
360
360
 
361
361
  # Create HTTP client node
362
- http_node = HTTPClientNode(
362
+ http_node = HTTPRequestNode(
363
363
  url=api_config.get("url", ""),
364
364
  method=api_config.get("method", "GET"),
365
365
  headers=api_config.get("headers", {}),
@@ -369,7 +369,7 @@ class WorkflowStudioAPI:
369
369
  ),
370
370
  }
371
371
  )
372
- except:
372
+ except Exception:
373
373
  outputs.append({"name": "output", "type": "any"})
374
374
  else:
375
375
  # Default output for all nodes
kailash/mcp/client_new.py CHANGED
@@ -292,7 +292,7 @@ class MCPClient:
292
292
 
293
293
  # Convenience functions for LLM agents
294
294
  async def discover_and_prepare_tools(
295
- mcp_servers: List[Union[str, Dict[str, Any]]]
295
+ mcp_servers: List[Union[str, Dict[str, Any]]],
296
296
  ) -> List[Dict[str, Any]]:
297
297
  """
298
298
  Discover tools from multiple MCP servers and prepare them for LLM use.
kailash/nodes/ai/a2a.py CHANGED
@@ -832,7 +832,7 @@ Focus on actionable intelligence rather than just listing what each agent said."
832
832
  summary = result.get("response", {}).get("content", "")
833
833
  if summary:
834
834
  return f"Shared Context Summary:\n{summary}"
835
- except:
835
+ except Exception:
836
836
  pass
837
837
 
838
838
  # Fallback to simple summary
@@ -18,9 +18,12 @@ Design philosophy:
18
18
  - Enable both synchronous and asynchronous operation
19
19
  """
20
20
 
21
+ import warnings
22
+
21
23
  from .auth import APIKeyNode, BasicAuthNode, OAuth2Node
22
24
  from .graphql import AsyncGraphQLClientNode, GraphQLClientNode
23
25
  from .http import AsyncHTTPRequestNode, HTTPRequestNode
26
+ from .monitoring import HealthCheckNode
24
27
  from .rate_limiting import (
25
28
  AsyncRateLimitedAPINode,
26
29
  RateLimitConfig,
@@ -31,6 +34,24 @@ from .rate_limiting import (
31
34
  create_rate_limiter,
32
35
  )
33
36
  from .rest import AsyncRESTClientNode, RESTClientNode
37
+ from .security import SecurityScannerNode
38
+
39
+ # Backwards compatibility aliases
40
+ HTTPClientNode = HTTPRequestNode # Deprecated: Use HTTPRequestNode instead
41
+
42
+
43
+ def __getattr__(name):
44
+ """Provide deprecation warnings for backwards compatibility."""
45
+ if name == "HTTPClientNode":
46
+ warnings.warn(
47
+ "HTTPClientNode is deprecated and will be removed in v0.3.0. "
48
+ "Use HTTPRequestNode instead.",
49
+ DeprecationWarning,
50
+ stacklevel=2,
51
+ )
52
+ return HTTPRequestNode
53
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
54
+
34
55
 
35
56
  __all__ = [
36
57
  "HTTPRequestNode",
@@ -49,4 +70,9 @@ __all__ = [
49
70
  "RateLimitedAPINode",
50
71
  "AsyncRateLimitedAPINode",
51
72
  "create_rate_limiter",
73
+ # Monitoring and Security
74
+ "HealthCheckNode",
75
+ "SecurityScannerNode",
76
+ # Backwards compatibility
77
+ "HTTPClientNode", # Deprecated alias
52
78
  ]
@@ -0,0 +1,463 @@
1
+ """Monitoring and health check nodes for system observability."""
2
+
3
+ import asyncio
4
+ import socket
5
+ import subprocess
6
+ import time
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict, List
9
+
10
+ import requests
11
+
12
+ from kailash.nodes.base import Node, NodeParameter, register_node
13
+
14
+
15
+ @register_node()
16
+ class HealthCheckNode(Node):
17
+ """
18
+ Performs health checks on various system components and services.
19
+
20
+ This node provides comprehensive health monitoring capabilities for
21
+ distributed systems, replacing DataTransformer with embedded Python code
22
+ for monitoring tasks. It supports HTTP endpoints, TCP ports, databases,
23
+ file systems, and custom health check commands.
24
+
25
+ Design Philosophy:
26
+ Modern distributed systems require robust health monitoring. This node
27
+ provides a declarative way to define health checks without writing
28
+ custom code in DataTransformer nodes. It standardizes health check
29
+ patterns and provides consistent output formats.
30
+
31
+ Upstream Dependencies:
32
+ - Configuration nodes with endpoint definitions
33
+ - Service discovery nodes
34
+ - Timer nodes for scheduled checks
35
+ - Alert threshold nodes
36
+
37
+ Downstream Consumers:
38
+ - Alert generation nodes
39
+ - Dashboard visualization nodes
40
+ - Logging and metrics nodes
41
+ - Auto-scaling decision nodes
42
+ - Incident response workflows
43
+
44
+ Configuration:
45
+ - Target endpoints and services
46
+ - Check types and parameters
47
+ - Timeout and retry settings
48
+ - Success/failure criteria
49
+ - Alert thresholds
50
+
51
+ Implementation Details:
52
+ - Parallel execution of multiple checks
53
+ - Proper timeout handling
54
+ - Retry logic with exponential backoff
55
+ - Structured output with metrics
56
+ - Support for various check types
57
+
58
+ Error Handling:
59
+ - Graceful handling of network failures
60
+ - Timeout management
61
+ - Invalid configuration detection
62
+ - Partial failure reporting
63
+
64
+ Side Effects:
65
+ - Network requests to target systems
66
+ - File system access for disk checks
67
+ - Process execution for custom commands
68
+ - Minimal impact design
69
+
70
+ Examples:
71
+ >>> # HTTP endpoint health checks
72
+ >>> health_check = HealthCheckNode(
73
+ ... targets=[
74
+ ... {'type': 'http', 'url': 'https://api.example.com/health'},
75
+ ... {'type': 'http', 'url': 'https://app.example.com/status'}
76
+ ... ],
77
+ ... timeout=30
78
+ ... )
79
+ >>> result = health_check.execute()
80
+ >>> assert 'health_results' in result
81
+ >>> assert result['summary']['total_checks'] == 2
82
+ >>>
83
+ >>> # Mixed health checks
84
+ >>> health_check = HealthCheckNode(
85
+ ... targets=[
86
+ ... {'type': 'tcp', 'host': 'database.example.com', 'port': 5432},
87
+ ... {'type': 'disk', 'path': '/var/log', 'threshold': 80},
88
+ ... {'type': 'command', 'command': 'systemctl is-active nginx'}
89
+ ... ]
90
+ ... )
91
+ >>> result = health_check.execute()
92
+ >>> assert 'health_results' in result
93
+ """
94
+
95
+ def get_parameters(self) -> Dict[str, NodeParameter]:
96
+ return {
97
+ "targets": NodeParameter(
98
+ name="targets",
99
+ type=list,
100
+ required=True,
101
+ description="List of health check targets with type and configuration",
102
+ ),
103
+ "timeout": NodeParameter(
104
+ name="timeout",
105
+ type=int,
106
+ required=False,
107
+ default=30,
108
+ description="Timeout in seconds for each health check",
109
+ ),
110
+ "retries": NodeParameter(
111
+ name="retries",
112
+ type=int,
113
+ required=False,
114
+ default=2,
115
+ description="Number of retry attempts for failed checks",
116
+ ),
117
+ "parallel": NodeParameter(
118
+ name="parallel",
119
+ type=bool,
120
+ required=False,
121
+ default=True,
122
+ description="Execute health checks in parallel",
123
+ ),
124
+ "include_metrics": NodeParameter(
125
+ name="include_metrics",
126
+ type=bool,
127
+ required=False,
128
+ default=True,
129
+ description="Include performance metrics in results",
130
+ ),
131
+ }
132
+
133
+ def run(self, **kwargs) -> Dict[str, Any]:
134
+ targets = kwargs["targets"]
135
+ timeout = kwargs.get("timeout", 30)
136
+ retries = kwargs.get("retries", 2)
137
+ parallel = kwargs.get("parallel", True)
138
+ include_metrics = kwargs.get("include_metrics", True)
139
+
140
+ start_time = time.time()
141
+
142
+ if parallel:
143
+ # Use asyncio for parallel execution
144
+ results = asyncio.run(
145
+ self._run_checks_parallel(targets, timeout, retries, include_metrics)
146
+ )
147
+ else:
148
+ # Sequential execution
149
+ results = self._run_checks_sequential(
150
+ targets, timeout, retries, include_metrics
151
+ )
152
+
153
+ execution_time = time.time() - start_time
154
+
155
+ # Generate summary
156
+ summary = self._generate_summary(results, execution_time)
157
+
158
+ return {
159
+ "health_results": results,
160
+ "summary": summary,
161
+ "check_count": len(results),
162
+ "healthy_count": len([r for r in results if r["status"] == "healthy"]),
163
+ "unhealthy_count": len([r for r in results if r["status"] == "unhealthy"]),
164
+ "execution_time": execution_time,
165
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
166
+ }
167
+
168
+ async def _run_checks_parallel(
169
+ self, targets: List[Dict], timeout: int, retries: int, include_metrics: bool
170
+ ) -> List[Dict[str, Any]]:
171
+ """Run health checks in parallel using asyncio."""
172
+
173
+ async def run_single_check(target):
174
+ return await asyncio.get_event_loop().run_in_executor(
175
+ None,
176
+ self._perform_health_check,
177
+ target,
178
+ timeout,
179
+ retries,
180
+ include_metrics,
181
+ )
182
+
183
+ tasks = [run_single_check(target) for target in targets]
184
+ return await asyncio.gather(*tasks, return_exceptions=True)
185
+
186
+ def _run_checks_sequential(
187
+ self, targets: List[Dict], timeout: int, retries: int, include_metrics: bool
188
+ ) -> List[Dict[str, Any]]:
189
+ """Run health checks sequentially."""
190
+ return [
191
+ self._perform_health_check(target, timeout, retries, include_metrics)
192
+ for target in targets
193
+ ]
194
+
195
+ def _perform_health_check(
196
+ self, target: Dict, timeout: int, retries: int, include_metrics: bool
197
+ ) -> Dict[str, Any]:
198
+ """Perform a single health check with retry logic."""
199
+
200
+ check_type = target.get("type", "unknown")
201
+ check_id = target.get("id", f"{check_type}_{hash(str(target)) % 10000}")
202
+
203
+ for attempt in range(retries + 1):
204
+ try:
205
+ start_time = time.time()
206
+
207
+ if check_type == "http":
208
+ result = self._check_http(target, timeout)
209
+ elif check_type == "tcp":
210
+ result = self._check_tcp(target, timeout)
211
+ elif check_type == "disk":
212
+ result = self._check_disk(target)
213
+ elif check_type == "command":
214
+ result = self._check_command(target, timeout)
215
+ elif check_type == "database":
216
+ result = self._check_database(target, timeout)
217
+ else:
218
+ result = {
219
+ "status": "unhealthy",
220
+ "message": f"Unknown check type: {check_type}",
221
+ "details": {},
222
+ }
223
+
224
+ # Add timing information
225
+ response_time = time.time() - start_time
226
+ result["response_time"] = response_time
227
+ result["attempt"] = attempt + 1
228
+ result["check_id"] = check_id
229
+ result["check_type"] = check_type
230
+ result["target"] = target
231
+ result["timestamp"] = datetime.now(timezone.utc).isoformat() + "Z"
232
+
233
+ # If successful, return immediately
234
+ if result["status"] == "healthy":
235
+ return result
236
+
237
+ except Exception as e:
238
+ if attempt == retries: # Last attempt
239
+ return {
240
+ "check_id": check_id,
241
+ "check_type": check_type,
242
+ "target": target,
243
+ "status": "unhealthy",
244
+ "message": f"Health check failed after {retries + 1} attempts: {str(e)}",
245
+ "details": {"error": str(e), "error_type": type(e).__name__},
246
+ "response_time": time.time() - start_time,
247
+ "attempt": attempt + 1,
248
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
249
+ }
250
+
251
+ # Wait before retry (exponential backoff)
252
+ time.sleep(min(2**attempt, 10))
253
+
254
+ return result
255
+
256
+ def _check_http(self, target: Dict, timeout: int) -> Dict[str, Any]:
257
+ """Perform HTTP health check."""
258
+ url = target["url"]
259
+ expected_status = target.get("expected_status", 200)
260
+ expected_content = target.get("expected_content")
261
+ headers = target.get("headers", {})
262
+
263
+ response = requests.get(url, timeout=timeout, headers=headers)
264
+
265
+ # Check status code
266
+ if response.status_code != expected_status:
267
+ return {
268
+ "status": "unhealthy",
269
+ "message": f"HTTP status {response.status_code}, expected {expected_status}",
270
+ "details": {
271
+ "status_code": response.status_code,
272
+ "response_size": len(response.content),
273
+ "url": url,
274
+ },
275
+ }
276
+
277
+ # Check content if specified
278
+ if expected_content and expected_content not in response.text:
279
+ return {
280
+ "status": "unhealthy",
281
+ "message": f"Expected content '{expected_content}' not found in response",
282
+ "details": {
283
+ "status_code": response.status_code,
284
+ "response_size": len(response.content),
285
+ "url": url,
286
+ },
287
+ }
288
+
289
+ return {
290
+ "status": "healthy",
291
+ "message": f"HTTP check successful: {response.status_code}",
292
+ "details": {
293
+ "status_code": response.status_code,
294
+ "response_size": len(response.content),
295
+ "url": url,
296
+ },
297
+ }
298
+
299
+ def _check_tcp(self, target: Dict, timeout: int) -> Dict[str, Any]:
300
+ """Perform TCP port connectivity check."""
301
+ host = target["host"]
302
+ port = target["port"]
303
+
304
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
305
+ sock.settimeout(timeout)
306
+
307
+ try:
308
+ result = sock.connect_ex((host, port))
309
+ if result == 0:
310
+ return {
311
+ "status": "healthy",
312
+ "message": f"TCP connection successful to {host}:{port}",
313
+ "details": {"host": host, "port": port},
314
+ }
315
+ else:
316
+ return {
317
+ "status": "unhealthy",
318
+ "message": f"TCP connection failed to {host}:{port}",
319
+ "details": {"host": host, "port": port, "error_code": result},
320
+ }
321
+ finally:
322
+ sock.close()
323
+
324
+ def _check_disk(self, target: Dict) -> Dict[str, Any]:
325
+ """Perform disk space check."""
326
+ import shutil
327
+
328
+ path = target["path"]
329
+ threshold = target.get("threshold", 90) # Default 90% threshold
330
+
331
+ try:
332
+ total, used, free = shutil.disk_usage(path)
333
+ usage_percent = (used / total) * 100
334
+
335
+ if usage_percent > threshold:
336
+ return {
337
+ "status": "unhealthy",
338
+ "message": f"Disk usage {usage_percent:.1f}% exceeds threshold {threshold}%",
339
+ "details": {
340
+ "path": path,
341
+ "usage_percent": usage_percent,
342
+ "threshold": threshold,
343
+ "total_gb": total / (1024**3),
344
+ "used_gb": used / (1024**3),
345
+ "free_gb": free / (1024**3),
346
+ },
347
+ }
348
+ else:
349
+ return {
350
+ "status": "healthy",
351
+ "message": f"Disk usage {usage_percent:.1f}% within threshold",
352
+ "details": {
353
+ "path": path,
354
+ "usage_percent": usage_percent,
355
+ "threshold": threshold,
356
+ "total_gb": total / (1024**3),
357
+ "used_gb": used / (1024**3),
358
+ "free_gb": free / (1024**3),
359
+ },
360
+ }
361
+ except Exception as e:
362
+ return {
363
+ "status": "unhealthy",
364
+ "message": f"Disk check failed: {str(e)}",
365
+ "details": {"path": path, "error": str(e)},
366
+ }
367
+
368
+ def _check_command(self, target: Dict, timeout: int) -> Dict[str, Any]:
369
+ """Perform custom command health check."""
370
+ command = target["command"]
371
+ expected_exit_code = target.get("expected_exit_code", 0)
372
+
373
+ try:
374
+ result = subprocess.run(
375
+ command,
376
+ shell=True,
377
+ timeout=timeout,
378
+ capture_output=True,
379
+ text=True,
380
+ )
381
+
382
+ if result.returncode == expected_exit_code:
383
+ return {
384
+ "status": "healthy",
385
+ "message": f"Command succeeded with exit code {result.returncode}",
386
+ "details": {
387
+ "command": command,
388
+ "exit_code": result.returncode,
389
+ "stdout": result.stdout.strip(),
390
+ "stderr": result.stderr.strip(),
391
+ },
392
+ }
393
+ else:
394
+ return {
395
+ "status": "unhealthy",
396
+ "message": f"Command failed with exit code {result.returncode}",
397
+ "details": {
398
+ "command": command,
399
+ "exit_code": result.returncode,
400
+ "expected_exit_code": expected_exit_code,
401
+ "stdout": result.stdout.strip(),
402
+ "stderr": result.stderr.strip(),
403
+ },
404
+ }
405
+ except subprocess.TimeoutExpired:
406
+ return {
407
+ "status": "unhealthy",
408
+ "message": f"Command timed out after {timeout} seconds",
409
+ "details": {"command": command, "timeout": timeout},
410
+ }
411
+
412
+ def _check_database(self, target: Dict, timeout: int) -> Dict[str, Any]:
413
+ """Perform database connectivity check."""
414
+ # This is a simplified example - in production, you'd use actual database drivers
415
+ db_type = target.get("db_type", "postgresql")
416
+ host = target["host"]
417
+ port = target.get("port", 5432 if db_type == "postgresql" else 3306)
418
+
419
+ # For now, just check TCP connectivity
420
+ # In a real implementation, you'd use database-specific health checks
421
+ return self._check_tcp({"host": host, "port": port}, timeout)
422
+
423
+ def _generate_summary(
424
+ self, results: List[Dict], execution_time: float
425
+ ) -> Dict[str, Any]:
426
+ """Generate summary statistics from health check results."""
427
+ total_checks = len(results)
428
+ healthy_checks = len([r for r in results if r.get("status") == "healthy"])
429
+ unhealthy_checks = total_checks - healthy_checks
430
+
431
+ # Calculate average response time
432
+ response_times = [
433
+ r.get("response_time", 0) for r in results if "response_time" in r
434
+ ]
435
+ avg_response_time = (
436
+ sum(response_times) / len(response_times) if response_times else 0
437
+ )
438
+
439
+ # Group by check type
440
+ check_types = {}
441
+ for result in results:
442
+ check_type = result.get("check_type", "unknown")
443
+ if check_type not in check_types:
444
+ check_types[check_type] = {"total": 0, "healthy": 0, "unhealthy": 0}
445
+
446
+ check_types[check_type]["total"] += 1
447
+ if result.get("status") == "healthy":
448
+ check_types[check_type]["healthy"] += 1
449
+ else:
450
+ check_types[check_type]["unhealthy"] += 1
451
+
452
+ return {
453
+ "total_checks": total_checks,
454
+ "healthy_checks": healthy_checks,
455
+ "unhealthy_checks": unhealthy_checks,
456
+ "health_percentage": (
457
+ (healthy_checks / total_checks * 100) if total_checks > 0 else 0
458
+ ),
459
+ "average_response_time": avg_response_time,
460
+ "execution_time": execution_time,
461
+ "check_types": check_types,
462
+ "overall_status": "healthy" if unhealthy_checks == 0 else "unhealthy",
463
+ }