kailash 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control/__init__.py +1 -1
  3. kailash/core/actors/adaptive_pool_controller.py +630 -0
  4. kailash/core/actors/connection_actor.py +3 -3
  5. kailash/core/ml/__init__.py +1 -0
  6. kailash/core/ml/query_patterns.py +544 -0
  7. kailash/core/monitoring/__init__.py +19 -0
  8. kailash/core/monitoring/connection_metrics.py +488 -0
  9. kailash/core/optimization/__init__.py +1 -0
  10. kailash/core/resilience/__init__.py +17 -0
  11. kailash/core/resilience/circuit_breaker.py +382 -0
  12. kailash/gateway/api.py +7 -5
  13. kailash/gateway/enhanced_gateway.py +1 -1
  14. kailash/middleware/auth/access_control.py +11 -11
  15. kailash/middleware/communication/ai_chat.py +7 -7
  16. kailash/middleware/communication/api_gateway.py +5 -15
  17. kailash/middleware/gateway/checkpoint_manager.py +45 -8
  18. kailash/middleware/gateway/event_store.py +66 -26
  19. kailash/middleware/mcp/enhanced_server.py +2 -2
  20. kailash/nodes/admin/permission_check.py +110 -30
  21. kailash/nodes/admin/schema.sql +387 -0
  22. kailash/nodes/admin/tenant_isolation.py +249 -0
  23. kailash/nodes/admin/transaction_utils.py +244 -0
  24. kailash/nodes/admin/user_management.py +37 -9
  25. kailash/nodes/ai/ai_providers.py +55 -3
  26. kailash/nodes/ai/llm_agent.py +115 -13
  27. kailash/nodes/data/query_pipeline.py +641 -0
  28. kailash/nodes/data/query_router.py +895 -0
  29. kailash/nodes/data/sql.py +24 -0
  30. kailash/nodes/data/workflow_connection_pool.py +451 -23
  31. kailash/nodes/monitoring/__init__.py +3 -5
  32. kailash/nodes/monitoring/connection_dashboard.py +822 -0
  33. kailash/nodes/rag/__init__.py +1 -3
  34. kailash/resources/registry.py +6 -0
  35. kailash/runtime/async_local.py +7 -0
  36. kailash/utils/export.py +152 -0
  37. kailash/workflow/builder.py +42 -0
  38. kailash/workflow/graph.py +86 -17
  39. kailash/workflow/templates.py +4 -9
  40. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/METADATA +14 -1
  41. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/RECORD +45 -31
  42. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/WHEEL +0 -0
  43. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/entry_points.txt +0 -0
  44. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/licenses/LICENSE +0 -0
  45. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/top_level.txt +0 -0
kailash/nodes/data/sql.py CHANGED
@@ -494,6 +494,30 @@ class SQLDatabaseNode(Node):
494
494
  "execution_time": execution_time,
495
495
  }
496
496
 
497
+ async def async_run(self, **kwargs) -> dict[str, Any]:
498
+ """
499
+ Async wrapper for the run method to maintain backward compatibility.
500
+
501
+ This method provides an async interface while maintaining the same
502
+ functionality as the synchronous run method. The underlying SQLAlchemy
503
+ operations are still synchronous but wrapped for async compatibility.
504
+
505
+ Args:
506
+ **kwargs: Same parameters as run()
507
+
508
+ Returns:
509
+ Same return format as run()
510
+
511
+ Note:
512
+ This is a compatibility method. The actual database operations
513
+ are still synchronous underneath.
514
+ """
515
+ import asyncio
516
+
517
+ # Run the synchronous method in a thread pool to avoid blocking
518
+ loop = asyncio.get_event_loop()
519
+ return await loop.run_in_executor(None, lambda: self.run(**kwargs))
520
+
497
521
  @classmethod
498
522
  def get_pool_status(cls) -> dict[str, Any]:
499
523
  """Get status of all shared connection pools."""
@@ -19,6 +19,17 @@ from kailash.core.actors import (
19
19
  ConnectionState,
20
20
  SupervisionStrategy,
21
21
  )
22
+ from kailash.core.actors.adaptive_pool_controller import AdaptivePoolController
23
+ from kailash.core.ml.query_patterns import QueryPatternTracker
24
+ from kailash.core.monitoring.connection_metrics import (
25
+ ConnectionMetricsCollector,
26
+ ErrorCategory,
27
+ )
28
+ from kailash.core.resilience.circuit_breaker import (
29
+ CircuitBreakerConfig,
30
+ CircuitBreakerError,
31
+ ConnectionCircuitBreaker,
32
+ )
22
33
  from kailash.nodes.base import NodeParameter, register_node
23
34
  from kailash.nodes.base_async import AsyncNode
24
35
  from kailash.sdk_exceptions import NodeExecutionError
@@ -187,6 +198,8 @@ class WorkflowConnectionPool(AsyncNode):
187
198
  self.max_connections = config.get("max_connections", 10)
188
199
  self.health_threshold = config.get("health_threshold", 50)
189
200
  self.pre_warm_enabled = config.get("pre_warm", True)
201
+ self.adaptive_sizing_enabled = config.get("adaptive_sizing", False)
202
+ self.enable_query_routing = config.get("enable_query_routing", False)
190
203
 
191
204
  # Database configuration
192
205
  self.db_config = {
@@ -223,6 +236,41 @@ class WorkflowConnectionPool(AsyncNode):
223
236
  self._initialized = False
224
237
  self._closing = False
225
238
 
239
+ # Phase 2 components
240
+ self.query_pattern_tracker = None
241
+ self.adaptive_controller = None
242
+
243
+ if self.enable_query_routing:
244
+ self.query_pattern_tracker = QueryPatternTracker()
245
+
246
+ if self.adaptive_sizing_enabled:
247
+ self.adaptive_controller = AdaptivePoolController(
248
+ min_size=self.min_connections, max_size=self.max_connections
249
+ )
250
+
251
+ # Phase 3 components
252
+ # Circuit breaker for connection failures
253
+ self.circuit_breaker_config = CircuitBreakerConfig(
254
+ failure_threshold=config.get("circuit_breaker_failure_threshold", 5),
255
+ recovery_timeout=config.get("circuit_breaker_recovery_timeout", 60),
256
+ error_rate_threshold=config.get("circuit_breaker_error_rate", 0.5),
257
+ )
258
+ self.circuit_breaker = ConnectionCircuitBreaker(self.circuit_breaker_config)
259
+
260
+ # Comprehensive metrics collector
261
+ self.metrics_collector = ConnectionMetricsCollector(
262
+ pool_name=self.metadata.name,
263
+ retention_minutes=config.get("metrics_retention_minutes", 60),
264
+ )
265
+
266
+ # Enable query pipelining support
267
+ self.enable_pipelining = config.get("enable_pipelining", False)
268
+ self.pipeline_batch_size = config.get("pipeline_batch_size", 100)
269
+
270
+ # Monitoring dashboard integration
271
+ self.enable_monitoring = config.get("enable_monitoring", False)
272
+ self.monitoring_port = config.get("monitoring_port", 8080)
273
+
226
274
  def get_parameters(self) -> Dict[str, NodeParameter]:
227
275
  """Define node parameters."""
228
276
  params = [
@@ -287,6 +335,77 @@ class WorkflowConnectionPool(AsyncNode):
287
335
  default=True,
288
336
  description="Enable pattern-based pre-warming",
289
337
  ),
338
+ NodeParameter(
339
+ name="adaptive_sizing",
340
+ type=bool,
341
+ required=False,
342
+ default=False,
343
+ description="Enable adaptive pool sizing based on workload",
344
+ ),
345
+ NodeParameter(
346
+ name="enable_query_routing",
347
+ type=bool,
348
+ required=False,
349
+ default=False,
350
+ description="Enable query pattern tracking for routing optimization",
351
+ ),
352
+ # Phase 3 parameters
353
+ NodeParameter(
354
+ name="circuit_breaker_failure_threshold",
355
+ type=int,
356
+ required=False,
357
+ default=5,
358
+ description="Failures before circuit breaker opens",
359
+ ),
360
+ NodeParameter(
361
+ name="circuit_breaker_recovery_timeout",
362
+ type=int,
363
+ required=False,
364
+ default=60,
365
+ description="Seconds before circuit breaker tries recovery",
366
+ ),
367
+ NodeParameter(
368
+ name="circuit_breaker_error_rate",
369
+ type=float,
370
+ required=False,
371
+ default=0.5,
372
+ description="Error rate threshold to open circuit",
373
+ ),
374
+ NodeParameter(
375
+ name="metrics_retention_minutes",
376
+ type=int,
377
+ required=False,
378
+ default=60,
379
+ description="How long to retain detailed metrics",
380
+ ),
381
+ NodeParameter(
382
+ name="enable_pipelining",
383
+ type=bool,
384
+ required=False,
385
+ default=False,
386
+ description="Enable query pipelining for batch operations",
387
+ ),
388
+ NodeParameter(
389
+ name="pipeline_batch_size",
390
+ type=int,
391
+ required=False,
392
+ default=100,
393
+ description="Maximum queries per pipeline batch",
394
+ ),
395
+ NodeParameter(
396
+ name="enable_monitoring",
397
+ type=bool,
398
+ required=False,
399
+ default=False,
400
+ description="Enable monitoring dashboard",
401
+ ),
402
+ NodeParameter(
403
+ name="monitoring_port",
404
+ type=int,
405
+ required=False,
406
+ default=8080,
407
+ description="Port for monitoring dashboard",
408
+ ),
290
409
  # Operation parameters
291
410
  NodeParameter(
292
411
  name="operation",
@@ -355,6 +474,20 @@ class WorkflowConnectionPool(AsyncNode):
355
474
  return await self._execute_query(inputs)
356
475
  elif operation == "stats":
357
476
  return await self._get_stats()
477
+ elif operation == "get_status":
478
+ return await self._get_pool_status()
479
+ elif operation == "adjust_pool_size":
480
+ return await self.adjust_pool_size(inputs.get("new_size"))
481
+ elif operation == "get_pool_statistics":
482
+ return await self.get_pool_statistics()
483
+ elif operation == "get_comprehensive_status":
484
+ return await self.get_comprehensive_status()
485
+ elif operation == "start_monitoring":
486
+ return await self._start_monitoring_dashboard()
487
+ elif operation == "stop_monitoring":
488
+ return await self._stop_monitoring_dashboard()
489
+ elif operation == "export_metrics":
490
+ return {"prometheus_metrics": self.metrics_collector.export_prometheus()}
358
491
  else:
359
492
  raise NodeExecutionError(f"Unknown operation: {operation}")
360
493
 
@@ -374,12 +507,20 @@ class WorkflowConnectionPool(AsyncNode):
374
507
  # Create minimum connections
375
508
  await self._ensure_min_connections()
376
509
 
510
+ # Start adaptive controller if enabled
511
+ if self.adaptive_controller:
512
+ await self.adaptive_controller.start(
513
+ pool_ref=self, pattern_tracker=self.query_pattern_tracker
514
+ )
515
+
377
516
  self._initialized = True
378
517
 
379
518
  return {
380
519
  "status": "initialized",
381
520
  "min_connections": self.min_connections,
382
521
  "max_connections": self.max_connections,
522
+ "adaptive_sizing": self.adaptive_sizing_enabled,
523
+ "query_routing": self.enable_query_routing,
383
524
  }
384
525
 
385
526
  except Exception as e:
@@ -394,28 +535,39 @@ class WorkflowConnectionPool(AsyncNode):
394
535
  start_time = time.time()
395
536
 
396
537
  try:
397
- # Try to get available connection
398
- connection = None
399
-
400
- # Fast path: try to get immediately available connection
401
- try:
402
- connection = await asyncio.wait_for(
403
- self.available_connections.get(), timeout=0.1
404
- )
405
- except asyncio.TimeoutError:
406
- # Need to create new connection or wait
407
- if len(self.all_connections) < self.max_connections:
408
- # Create new connection
409
- connection = await self._create_connection()
410
- # Don't put it in available queue - we'll use it directly
411
- else:
412
- # Wait for available connection
413
- connection = await self.available_connections.get()
538
+ # Use circuit breaker to protect connection acquisition
539
+ async def acquire_with_circuit_breaker():
540
+ # Try to get available connection
541
+ connection = None
542
+
543
+ # Fast path: try to get immediately available connection
544
+ try:
545
+ connection = await asyncio.wait_for(
546
+ self.available_connections.get(), timeout=0.1
547
+ )
548
+ except asyncio.TimeoutError:
549
+ # Need to create new connection or wait
550
+ if len(self.all_connections) < self.max_connections:
551
+ # Create new connection
552
+ connection = await self._create_connection()
553
+ # Don't put it in available queue - we'll use it directly
554
+ else:
555
+ # Wait for available connection
556
+ connection = await self.available_connections.get()
557
+
558
+ return connection
559
+
560
+ # Execute with circuit breaker protection
561
+ connection = await self.circuit_breaker.call(acquire_with_circuit_breaker)
414
562
 
415
563
  # Record acquisition time
416
564
  wait_time = time.time() - start_time
417
565
  self.metrics.record_acquisition_time(wait_time)
418
566
 
567
+ # Track in comprehensive metrics
568
+ with self.metrics_collector.track_acquisition() as timer:
569
+ pass # Already acquired, just recording time
570
+
419
571
  # Move to active
420
572
  self.active_connections[connection.id] = connection
421
573
 
@@ -431,8 +583,14 @@ class WorkflowConnectionPool(AsyncNode):
431
583
  "acquisition_time_ms": wait_time * 1000,
432
584
  }
433
585
 
586
+ except CircuitBreakerError as e:
587
+ # Circuit is open - pool is experiencing failures
588
+ self.metrics_collector.track_pool_exhaustion()
589
+ logger.error(f"Circuit breaker open: {e}")
590
+ raise NodeExecutionError(f"Connection pool circuit breaker open: {e}")
434
591
  except Exception as e:
435
592
  logger.error(f"Failed to acquire connection: {e}")
593
+ self.metrics_collector.track_query_error("ACQUIRE", e)
436
594
  raise NodeExecutionError(f"Connection acquisition failed: {e}")
437
595
 
438
596
  async def _release_connection(self, connection_id: Optional[str]) -> Dict[str, Any]:
@@ -462,18 +620,45 @@ class WorkflowConnectionPool(AsyncNode):
462
620
 
463
621
  connection = self.active_connections[connection_id]
464
622
 
623
+ # Determine query type for metrics
624
+ query = inputs.get("query", "").strip().upper()
625
+ query_type = "UNKNOWN"
626
+ if query.startswith("SELECT"):
627
+ query_type = "SELECT"
628
+ elif query.startswith("INSERT"):
629
+ query_type = "INSERT"
630
+ elif query.startswith("UPDATE"):
631
+ query_type = "UPDATE"
632
+ elif query.startswith("DELETE"):
633
+ query_type = "DELETE"
634
+
465
635
  try:
466
- # Execute query
467
- result = await connection.execute(
468
- query=inputs.get("query"),
469
- params=inputs.get("params"),
470
- fetch_mode=inputs.get("fetch_mode", "all"),
471
- )
636
+ # Execute query with comprehensive metrics tracking
637
+ with self.metrics_collector.track_query(query_type) as timer:
638
+ result = await connection.execute(
639
+ query=inputs.get("query"),
640
+ params=inputs.get("params"),
641
+ fetch_mode=inputs.get("fetch_mode", "all"),
642
+ )
472
643
 
473
644
  # Update metrics
474
645
  self.metrics.queries_executed += 1
475
646
  if not result.success:
476
647
  self.metrics.query_errors += 1
648
+ self.metrics_collector.track_query_error(
649
+ query_type, Exception(result.error)
650
+ )
651
+
652
+ # Track query pattern if enabled
653
+ if self.query_pattern_tracker and inputs.get("query"):
654
+ self.query_pattern_tracker.record_execution(
655
+ fingerprint=inputs.get("query_fingerprint", inputs.get("query")),
656
+ execution_time_ms=result.execution_time * 1000,
657
+ connection_id=connection_id,
658
+ parameters=inputs.get("params", {}),
659
+ success=result.success,
660
+ result_size=len(result.data) if result.data else 0,
661
+ )
477
662
 
478
663
  return {
479
664
  "success": result.success,
@@ -593,6 +778,10 @@ class WorkflowConnectionPool(AsyncNode):
593
778
  # Stop accepting new connections
594
779
  self._initialized = False
595
780
 
781
+ # Stop adaptive controller if running
782
+ if self.adaptive_controller:
783
+ await self.adaptive_controller.stop()
784
+
596
785
  # Stop all connection actors gracefully
597
786
  actors_to_stop = list(self.all_connections.values())
598
787
  for actor in actors_to_stop:
@@ -641,3 +830,242 @@ class WorkflowConnectionPool(AsyncNode):
641
830
  async def __aexit__(self, exc_type, exc_val, exc_tb):
642
831
  """Context manager exit."""
643
832
  await self._cleanup()
833
+
834
+ async def _get_pool_status(self) -> Dict[str, Any]:
835
+ """Get pool status for query router."""
836
+ connections = {}
837
+
838
+ for conn_id, conn in self.all_connections.items():
839
+ connections[conn_id] = {
840
+ "health_score": conn.health_score,
841
+ "active_queries": 1 if conn_id in self.active_connections else 0,
842
+ "capabilities": [
843
+ "read",
844
+ "write",
845
+ ], # TODO: Add actual capability detection
846
+ "avg_latency_ms": 0.0, # TODO: Track actual latency
847
+ "last_used": datetime.now().isoformat(),
848
+ }
849
+
850
+ return {
851
+ "connections": connections,
852
+ "pool_size": len(self.all_connections),
853
+ "active_count": len(self.active_connections),
854
+ "available_count": self.available_connections.qsize(),
855
+ }
856
+
857
+ async def adjust_pool_size(self, new_size: int) -> Dict[str, Any]:
858
+ """Dynamically adjust pool size."""
859
+ if new_size < self.min_connections or new_size > self.max_connections:
860
+ return {
861
+ "success": False,
862
+ "reason": f"Size must be between {self.min_connections} and {self.max_connections}",
863
+ }
864
+
865
+ current_size = len(self.all_connections)
866
+
867
+ if new_size > current_size:
868
+ # Scale up
869
+ connections_to_add = new_size - current_size
870
+ for _ in range(connections_to_add):
871
+ try:
872
+ await self._create_connection()
873
+ except Exception as e:
874
+ logger.error(f"Failed to create connection during scale up: {e}")
875
+
876
+ elif new_size < current_size:
877
+ # Scale down - remove idle connections first
878
+ connections_to_remove = current_size - new_size
879
+ removed = 0
880
+
881
+ # Try to remove idle connections
882
+ while (
883
+ removed < connections_to_remove
884
+ and not self.available_connections.empty()
885
+ ):
886
+ try:
887
+ conn = await asyncio.wait_for(
888
+ self.available_connections.get(), timeout=0.1
889
+ )
890
+ await self._recycle_connection(conn)
891
+ removed += 1
892
+ except asyncio.TimeoutError:
893
+ break
894
+
895
+ return {
896
+ "success": True,
897
+ "previous_size": current_size,
898
+ "new_size": len(self.all_connections),
899
+ }
900
+
901
+ async def get_pool_statistics(self) -> Dict[str, Any]:
902
+ """Get detailed pool statistics for adaptive sizing."""
903
+ total_connections = len(self.all_connections)
904
+ active_connections = len(self.active_connections)
905
+ idle_connections = self.available_connections.qsize()
906
+
907
+ # Calculate metrics
908
+ utilization_rate = (
909
+ active_connections / total_connections if total_connections > 0 else 0
910
+ )
911
+
912
+ # Get average health score
913
+ health_scores = [conn.health_score for conn in self.all_connections.values()]
914
+ avg_health_score = (
915
+ sum(health_scores) / len(health_scores) if health_scores else 100
916
+ )
917
+
918
+ # Queue depth (approximate based on waiters)
919
+ queue_depth = 0 # TODO: Track actual queue depth
920
+
921
+ # Get timing metrics from pool metrics
922
+ stats = self.metrics.get_stats()
923
+
924
+ return {
925
+ "total_connections": total_connections,
926
+ "active_connections": active_connections,
927
+ "idle_connections": idle_connections,
928
+ "queue_depth": queue_depth,
929
+ "utilization_rate": utilization_rate,
930
+ "avg_health_score": avg_health_score,
931
+ "avg_acquisition_time_ms": stats["performance"]["avg_acquisition_time_ms"],
932
+ "avg_query_time_ms": 50.0, # TODO: Track actual query time
933
+ "queries_per_second": (
934
+ stats["queries"]["executed"] / stats["uptime_seconds"]
935
+ if stats["uptime_seconds"] > 0
936
+ else 0
937
+ ),
938
+ # Phase 3 additions
939
+ "circuit_breaker_status": self.circuit_breaker.get_status(),
940
+ "comprehensive_metrics": self.metrics_collector.get_all_metrics(),
941
+ "error_rate": self.metrics_collector.get_error_summary()["error_rate"],
942
+ "health_score": avg_health_score,
943
+ "pool_name": self.metadata.name,
944
+ }
945
+
946
+ async def get_comprehensive_status(self) -> Dict[str, Any]:
947
+ """Get comprehensive status including all Phase 3 features."""
948
+ base_stats = await self.get_pool_statistics()
949
+
950
+ # Add circuit breaker details
951
+ cb_status = self.circuit_breaker.get_status()
952
+
953
+ # Add comprehensive metrics
954
+ metrics = self.metrics_collector.get_all_metrics()
955
+
956
+ # Add pattern learning insights if enabled
957
+ pattern_insights = {}
958
+ if self.query_pattern_tracker:
959
+ patterns = self.query_pattern_tracker.get_all_patterns()
960
+ pattern_insights = {
961
+ "detected_patterns": len(patterns),
962
+ "workload_forecast": self.query_pattern_tracker.get_workload_forecast(
963
+ 15
964
+ ),
965
+ }
966
+
967
+ # Add adaptive controller status if enabled
968
+ adaptive_status = {}
969
+ if self.adaptive_controller:
970
+ adaptive_status = {
971
+ "current_size": len(self.all_connections),
972
+ "recommended_size": self.adaptive_controller.get_recommended_size(),
973
+ "last_adjustment": self.adaptive_controller.get_last_adjustment(),
974
+ }
975
+
976
+ return {
977
+ **base_stats,
978
+ "circuit_breaker": {
979
+ "state": cb_status["state"],
980
+ "metrics": cb_status["metrics"],
981
+ "time_until_recovery": cb_status.get("time_until_recovery"),
982
+ },
983
+ "detailed_metrics": {
984
+ "counters": metrics["counters"],
985
+ "gauges": metrics["gauges"],
986
+ "histograms": metrics["histograms"],
987
+ "errors": metrics["errors"],
988
+ "query_summary": metrics["queries"],
989
+ },
990
+ "pattern_insights": pattern_insights,
991
+ "adaptive_control": adaptive_status,
992
+ "monitoring": {
993
+ "dashboard_enabled": self.enable_monitoring,
994
+ "dashboard_url": (
995
+ f"http://localhost:{self.monitoring_port}"
996
+ if self.enable_monitoring
997
+ else None
998
+ ),
999
+ },
1000
+ }
1001
+
1002
+ async def _start_monitoring_dashboard(self) -> Dict[str, Any]:
1003
+ """Start the monitoring dashboard if enabled."""
1004
+ if not self.enable_monitoring:
1005
+ return {"error": "Monitoring not enabled in configuration"}
1006
+
1007
+ try:
1008
+ # Register this pool with the global metrics aggregator
1009
+ if hasattr(self.runtime, "metrics_aggregator"):
1010
+ self.runtime.metrics_aggregator.register_collector(
1011
+ self.metrics_collector
1012
+ )
1013
+
1014
+ # Start monitoring dashboard if not already running
1015
+ if not hasattr(self.runtime, "monitoring_dashboard"):
1016
+ from kailash.nodes.monitoring.connection_dashboard import (
1017
+ ConnectionDashboardNode,
1018
+ )
1019
+
1020
+ dashboard = ConnectionDashboardNode(
1021
+ name="global_dashboard",
1022
+ port=self.monitoring_port,
1023
+ update_interval=1.0,
1024
+ )
1025
+
1026
+ # Store dashboard in runtime for sharing
1027
+ self.runtime.monitoring_dashboard = dashboard
1028
+ await dashboard.start()
1029
+
1030
+ return {
1031
+ "status": "started",
1032
+ "dashboard_url": f"http://localhost:{self.monitoring_port}",
1033
+ }
1034
+ else:
1035
+ return {
1036
+ "status": "already_running",
1037
+ "dashboard_url": f"http://localhost:{self.monitoring_port}",
1038
+ }
1039
+
1040
+ except Exception as e:
1041
+ logger.error(f"Failed to start monitoring dashboard: {e}")
1042
+ return {"error": str(e)}
1043
+
1044
+ async def _stop_monitoring_dashboard(self) -> Dict[str, Any]:
1045
+ """Stop the monitoring dashboard."""
1046
+ try:
1047
+ if hasattr(self.runtime, "monitoring_dashboard"):
1048
+ await self.runtime.monitoring_dashboard.stop()
1049
+ del self.runtime.monitoring_dashboard
1050
+ return {"status": "stopped"}
1051
+ else:
1052
+ return {"status": "not_running"}
1053
+ except Exception as e:
1054
+ logger.error(f"Failed to stop monitoring dashboard: {e}")
1055
+ return {"error": str(e)}
1056
+
1057
+ def _update_pool_metrics(self):
1058
+ """Update pool metrics for monitoring."""
1059
+ total = len(self.all_connections)
1060
+ active = len(self.active_connections)
1061
+ idle = self.available_connections.qsize()
1062
+
1063
+ # Update comprehensive metrics
1064
+ self.metrics_collector.update_pool_stats(active, idle, total)
1065
+
1066
+ # Track health checks
1067
+ for conn in self.all_connections.values():
1068
+ self.metrics_collector.track_health_check(
1069
+ success=conn.health_score > self.health_threshold,
1070
+ duration_ms=5.0, # Placeholder - real implementation would track actual time
1071
+ )
@@ -1,7 +1,5 @@
1
- """Monitoring and performance nodes for the Kailash SDK."""
1
+ """Monitoring nodes for connection and workflow visualization."""
2
2
 
3
- from .performance_benchmark import PerformanceBenchmarkNode
3
+ from .connection_dashboard import ConnectionDashboardNode
4
4
 
5
- __all__ = [
6
- "PerformanceBenchmarkNode",
7
- ]
5
+ __all__ = ["ConnectionDashboardNode"]