kailash 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control/__init__.py +1 -1
- kailash/core/actors/adaptive_pool_controller.py +630 -0
- kailash/core/actors/connection_actor.py +3 -3
- kailash/core/ml/__init__.py +1 -0
- kailash/core/ml/query_patterns.py +544 -0
- kailash/core/monitoring/__init__.py +19 -0
- kailash/core/monitoring/connection_metrics.py +488 -0
- kailash/core/optimization/__init__.py +1 -0
- kailash/core/resilience/__init__.py +17 -0
- kailash/core/resilience/circuit_breaker.py +382 -0
- kailash/gateway/api.py +7 -5
- kailash/gateway/enhanced_gateway.py +1 -1
- kailash/middleware/auth/access_control.py +11 -11
- kailash/middleware/communication/ai_chat.py +7 -7
- kailash/middleware/communication/api_gateway.py +5 -15
- kailash/middleware/gateway/checkpoint_manager.py +45 -8
- kailash/middleware/gateway/event_store.py +66 -26
- kailash/middleware/mcp/enhanced_server.py +2 -2
- kailash/nodes/admin/permission_check.py +110 -30
- kailash/nodes/admin/schema.sql +387 -0
- kailash/nodes/admin/tenant_isolation.py +249 -0
- kailash/nodes/admin/transaction_utils.py +244 -0
- kailash/nodes/admin/user_management.py +37 -9
- kailash/nodes/ai/ai_providers.py +55 -3
- kailash/nodes/ai/llm_agent.py +115 -13
- kailash/nodes/data/query_pipeline.py +641 -0
- kailash/nodes/data/query_router.py +895 -0
- kailash/nodes/data/sql.py +24 -0
- kailash/nodes/data/workflow_connection_pool.py +451 -23
- kailash/nodes/monitoring/__init__.py +3 -5
- kailash/nodes/monitoring/connection_dashboard.py +822 -0
- kailash/nodes/rag/__init__.py +1 -3
- kailash/resources/registry.py +6 -0
- kailash/runtime/async_local.py +7 -0
- kailash/utils/export.py +152 -0
- kailash/workflow/builder.py +42 -0
- kailash/workflow/graph.py +86 -17
- kailash/workflow/templates.py +4 -9
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/METADATA +14 -1
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/RECORD +45 -31
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/WHEEL +0 -0
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
"""Transaction utilities for admin nodes to handle timing and persistence issues.
|
2
|
+
|
3
|
+
This module provides utilities to handle common transaction and timing issues
|
4
|
+
encountered in admin node operations, particularly around user creation,
|
5
|
+
role assignment, and permission checks.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import time
|
10
|
+
from typing import Any, Callable, Dict, Optional, TypeVar
|
11
|
+
|
12
|
+
from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
T = TypeVar("T")
|
17
|
+
|
18
|
+
|
19
|
+
class TransactionHelper:
|
20
|
+
"""Helper class for handling database transaction timing and persistence issues."""
|
21
|
+
|
22
|
+
def __init__(self, db_node, max_retries: int = 3, retry_delay: float = 0.1):
|
23
|
+
"""
|
24
|
+
Initialize transaction helper.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
db_node: Database node instance (SQLDatabaseNode)
|
28
|
+
max_retries: Maximum number of retries for transient failures
|
29
|
+
retry_delay: Delay between retries in seconds
|
30
|
+
"""
|
31
|
+
self.db_node = db_node
|
32
|
+
self.max_retries = max_retries
|
33
|
+
self.retry_delay = retry_delay
|
34
|
+
|
35
|
+
def execute_with_retry(self, operation: Callable[[], T], operation_name: str) -> T:
|
36
|
+
"""
|
37
|
+
Execute a database operation with retry logic.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
operation: Function that performs the database operation
|
41
|
+
operation_name: Description of the operation for logging
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
Result of the operation
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
NodeExecutionError: If operation fails after all retries
|
48
|
+
"""
|
49
|
+
last_exception = None
|
50
|
+
|
51
|
+
for attempt in range(self.max_retries):
|
52
|
+
try:
|
53
|
+
result = operation()
|
54
|
+
if attempt > 0:
|
55
|
+
logger.info(f"{operation_name} succeeded on attempt {attempt + 1}")
|
56
|
+
return result
|
57
|
+
except Exception as e:
|
58
|
+
last_exception = e
|
59
|
+
if attempt < self.max_retries - 1:
|
60
|
+
logger.warning(
|
61
|
+
f"{operation_name} failed on attempt {attempt + 1}, retrying: {e}"
|
62
|
+
)
|
63
|
+
time.sleep(self.retry_delay * (2**attempt)) # Exponential backoff
|
64
|
+
else:
|
65
|
+
logger.error(
|
66
|
+
f"{operation_name} failed after {self.max_retries} attempts: {e}"
|
67
|
+
)
|
68
|
+
|
69
|
+
raise NodeExecutionError(
|
70
|
+
f"{operation_name} failed after {self.max_retries} attempts: {last_exception}"
|
71
|
+
)
|
72
|
+
|
73
|
+
def verify_operation_success(
|
74
|
+
self,
|
75
|
+
verification_query: str,
|
76
|
+
expected_result: Any,
|
77
|
+
operation_name: str,
|
78
|
+
timeout_seconds: float = 5.0,
|
79
|
+
) -> bool:
|
80
|
+
"""
|
81
|
+
Verify that a database operation was successful by checking the result.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
verification_query: SQL query to verify the operation
|
85
|
+
expected_result: Expected result from the verification query
|
86
|
+
operation_name: Description of the operation for logging
|
87
|
+
timeout_seconds: Maximum time to wait for verification
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
True if verification succeeds
|
91
|
+
|
92
|
+
Raises:
|
93
|
+
NodeValidationError: If verification fails after timeout
|
94
|
+
"""
|
95
|
+
start_time = time.time()
|
96
|
+
|
97
|
+
while time.time() - start_time < timeout_seconds:
|
98
|
+
try:
|
99
|
+
result = self.db_node.run(
|
100
|
+
query=verification_query, result_format="dict"
|
101
|
+
)
|
102
|
+
data = result.get("data", [])
|
103
|
+
|
104
|
+
if data and len(data) > 0:
|
105
|
+
# Operation was successful
|
106
|
+
logger.debug(f"{operation_name} verification succeeded")
|
107
|
+
return True
|
108
|
+
|
109
|
+
except Exception as e:
|
110
|
+
logger.debug(f"{operation_name} verification error: {e}")
|
111
|
+
|
112
|
+
# Wait before retrying
|
113
|
+
time.sleep(0.05) # 50ms
|
114
|
+
|
115
|
+
raise NodeValidationError(
|
116
|
+
f"{operation_name} verification failed after {timeout_seconds}s"
|
117
|
+
)
|
118
|
+
|
119
|
+
def create_user_with_verification(
|
120
|
+
self, user_data: Dict[str, Any], tenant_id: str
|
121
|
+
) -> Dict[str, Any]:
|
122
|
+
"""
|
123
|
+
Create a user and verify the creation was successful.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
user_data: User data dictionary
|
127
|
+
tenant_id: Tenant ID
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
User creation result
|
131
|
+
"""
|
132
|
+
user_id = user_data.get("user_id")
|
133
|
+
|
134
|
+
def create_operation():
|
135
|
+
# Perform the user creation
|
136
|
+
from .user_management import UserManagementNode
|
137
|
+
|
138
|
+
user_mgmt = UserManagementNode(database_url=self.db_node.connection_string)
|
139
|
+
return user_mgmt.run(
|
140
|
+
operation="create_user", user_data=user_data, tenant_id=tenant_id
|
141
|
+
)
|
142
|
+
|
143
|
+
# Execute creation with retry
|
144
|
+
result = self.execute_with_retry(
|
145
|
+
create_operation, f"User creation for {user_id}"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Verify user was created
|
149
|
+
verification_query = """
|
150
|
+
SELECT user_id FROM users
|
151
|
+
WHERE user_id = $1 AND tenant_id = $2
|
152
|
+
"""
|
153
|
+
|
154
|
+
self.verify_operation_success(
|
155
|
+
verification_query,
|
156
|
+
user_id,
|
157
|
+
f"User {user_id} creation verification",
|
158
|
+
timeout_seconds=2.0,
|
159
|
+
)
|
160
|
+
|
161
|
+
return result
|
162
|
+
|
163
|
+
def assign_role_with_verification(
|
164
|
+
self, user_id: str, role_id: str, tenant_id: str
|
165
|
+
) -> Dict[str, Any]:
|
166
|
+
"""
|
167
|
+
Assign a role to a user and verify the assignment was successful.
|
168
|
+
|
169
|
+
Args:
|
170
|
+
user_id: User ID
|
171
|
+
role_id: Role ID
|
172
|
+
tenant_id: Tenant ID
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Role assignment result
|
176
|
+
"""
|
177
|
+
|
178
|
+
def assign_operation():
|
179
|
+
from .role_management import RoleManagementNode
|
180
|
+
|
181
|
+
role_mgmt = RoleManagementNode(database_url=self.db_node.connection_string)
|
182
|
+
return role_mgmt.run(
|
183
|
+
operation="assign_user",
|
184
|
+
user_id=user_id,
|
185
|
+
role_id=role_id,
|
186
|
+
tenant_id=tenant_id,
|
187
|
+
)
|
188
|
+
|
189
|
+
# Execute assignment with retry
|
190
|
+
result = self.execute_with_retry(
|
191
|
+
assign_operation, f"Role assignment {role_id} to {user_id}"
|
192
|
+
)
|
193
|
+
|
194
|
+
# Verify role was assigned
|
195
|
+
verification_query = """
|
196
|
+
SELECT user_id, role_id FROM user_role_assignments
|
197
|
+
WHERE user_id = $1 AND role_id = $2 AND tenant_id = $3 AND is_active = true
|
198
|
+
"""
|
199
|
+
|
200
|
+
self.verify_operation_success(
|
201
|
+
verification_query,
|
202
|
+
{"user_id": user_id, "role_id": role_id},
|
203
|
+
f"Role assignment {role_id} to {user_id} verification",
|
204
|
+
timeout_seconds=2.0,
|
205
|
+
)
|
206
|
+
|
207
|
+
return result
|
208
|
+
|
209
|
+
|
210
|
+
def with_transaction_retry(max_retries: int = 3, retry_delay: float = 0.1):
|
211
|
+
"""
|
212
|
+
Decorator to add retry logic to admin node operations.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
max_retries: Maximum number of retries
|
216
|
+
retry_delay: Initial delay between retries
|
217
|
+
"""
|
218
|
+
|
219
|
+
def decorator(func):
|
220
|
+
def wrapper(*args, **kwargs):
|
221
|
+
last_exception = None
|
222
|
+
|
223
|
+
for attempt in range(max_retries):
|
224
|
+
try:
|
225
|
+
return func(*args, **kwargs)
|
226
|
+
except Exception as e:
|
227
|
+
last_exception = e
|
228
|
+
if attempt < max_retries - 1:
|
229
|
+
logger.warning(
|
230
|
+
f"{func.__name__} failed on attempt {attempt + 1}, retrying: {e}"
|
231
|
+
)
|
232
|
+
time.sleep(retry_delay * (2**attempt))
|
233
|
+
else:
|
234
|
+
logger.error(
|
235
|
+
f"{func.__name__} failed after {max_retries} attempts: {e}"
|
236
|
+
)
|
237
|
+
|
238
|
+
raise NodeExecutionError(
|
239
|
+
f"{func.__name__} failed after {max_retries} attempts: {last_exception}"
|
240
|
+
)
|
241
|
+
|
242
|
+
return wrapper
|
243
|
+
|
244
|
+
return decorator
|
@@ -25,6 +25,8 @@ from enum import Enum
|
|
25
25
|
from typing import Any, Dict, List, Optional, Set, Union
|
26
26
|
from uuid import uuid4
|
27
27
|
|
28
|
+
import bcrypt
|
29
|
+
|
28
30
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
29
31
|
from kailash.nodes.data import SQLDatabaseNode
|
30
32
|
from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
|
@@ -32,6 +34,25 @@ from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
|
|
32
34
|
from .schema_manager import AdminSchemaManager
|
33
35
|
|
34
36
|
|
37
|
+
def hash_password(password: str) -> str:
|
38
|
+
"""Hash password using bcrypt with salt."""
|
39
|
+
if not password:
|
40
|
+
return ""
|
41
|
+
salt = bcrypt.gensalt()
|
42
|
+
hashed = bcrypt.hashpw(password.encode("utf-8"), salt)
|
43
|
+
return hashed.decode("utf-8")
|
44
|
+
|
45
|
+
|
46
|
+
def verify_password(password: str, hashed: str) -> bool:
|
47
|
+
"""Verify password against bcrypt hash."""
|
48
|
+
if not password or not hashed:
|
49
|
+
return False
|
50
|
+
try:
|
51
|
+
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
|
52
|
+
except Exception:
|
53
|
+
return False
|
54
|
+
|
55
|
+
|
35
56
|
def parse_datetime(value: Union[str, datetime, None]) -> Optional[datetime]:
|
36
57
|
"""Parse datetime from various formats."""
|
37
58
|
if value is None:
|
@@ -496,7 +517,7 @@ class UserManagementNode(Node):
|
|
496
517
|
user.user_id,
|
497
518
|
user.email,
|
498
519
|
user.username,
|
499
|
-
inputs.get("
|
520
|
+
hash_password(inputs.get("password", "")),
|
500
521
|
user.first_name,
|
501
522
|
user.last_name,
|
502
523
|
user.display_name,
|
@@ -509,12 +530,15 @@ class UserManagementNode(Node):
|
|
509
530
|
],
|
510
531
|
)
|
511
532
|
|
512
|
-
#
|
513
|
-
|
533
|
+
# Return the user data that was successfully inserted
|
534
|
+
# Add timestamps that would be set by the database
|
535
|
+
user_dict = user.to_dict()
|
536
|
+
user_dict["created_at"] = datetime.now(UTC).isoformat()
|
537
|
+
user_dict["updated_at"] = datetime.now(UTC).isoformat()
|
514
538
|
|
515
539
|
return {
|
516
540
|
"result": {
|
517
|
-
"user":
|
541
|
+
"user": user_dict,
|
518
542
|
"operation": "create_user",
|
519
543
|
"timestamp": datetime.now(UTC).isoformat(),
|
520
544
|
}
|
@@ -918,7 +942,8 @@ class UserManagementNode(Node):
|
|
918
942
|
"""Set user password hash."""
|
919
943
|
user_id = inputs["user_id"]
|
920
944
|
tenant_id = inputs["tenant_id"]
|
921
|
-
|
945
|
+
password = inputs.get("password", "")
|
946
|
+
password_hash = hash_password(password)
|
922
947
|
|
923
948
|
update_query = """
|
924
949
|
UPDATE users
|
@@ -964,9 +989,13 @@ class UserManagementNode(Node):
|
|
964
989
|
for i, user_data in enumerate(users_data):
|
965
990
|
try:
|
966
991
|
# Create each user individually for better error handling
|
992
|
+
# Extract password from user_data if present
|
993
|
+
user_data_copy = user_data.copy()
|
994
|
+
password = user_data_copy.pop("password", "")
|
967
995
|
create_inputs = {
|
968
996
|
"operation": "create_user",
|
969
|
-
"user_data":
|
997
|
+
"user_data": user_data_copy,
|
998
|
+
"password": password,
|
970
999
|
"tenant_id": tenant_id,
|
971
1000
|
"database_config": inputs["database_config"],
|
972
1001
|
}
|
@@ -1370,7 +1399,7 @@ class UserManagementNode(Node):
|
|
1370
1399
|
user_id = result["data"][0]["user_id"]
|
1371
1400
|
|
1372
1401
|
# Update password
|
1373
|
-
password_hash =
|
1402
|
+
password_hash = hash_password(new_password)
|
1374
1403
|
update_query = """
|
1375
1404
|
UPDATE users
|
1376
1405
|
SET password_hash = :password_hash,
|
@@ -1441,9 +1470,8 @@ class UserManagementNode(Node):
|
|
1441
1470
|
|
1442
1471
|
user_data = result["data"][0]
|
1443
1472
|
stored_hash = user_data["password_hash"]
|
1444
|
-
provided_hash = hashlib.sha256(password.encode()).hexdigest()
|
1445
1473
|
|
1446
|
-
if
|
1474
|
+
if not verify_password(password, stored_hash):
|
1447
1475
|
return {"authenticated": False, "message": "Invalid password"}
|
1448
1476
|
|
1449
1477
|
if user_data["status"] != "active":
|
kailash/nodes/ai/ai_providers.py
CHANGED
@@ -387,10 +387,16 @@ class OllamaProvider(UnifiedAIProvider):
|
|
387
387
|
return self._available
|
388
388
|
|
389
389
|
try:
|
390
|
+
import os
|
391
|
+
|
390
392
|
import ollama
|
391
393
|
|
394
|
+
# Check with environment-configured host if available
|
395
|
+
host = os.getenv("OLLAMA_BASE_URL") or os.getenv("OLLAMA_HOST")
|
396
|
+
client = ollama.Client(host=host) if host else ollama.Client()
|
397
|
+
|
392
398
|
# Check if Ollama is running
|
393
|
-
|
399
|
+
client.list()
|
394
400
|
self._available = True
|
395
401
|
except Exception:
|
396
402
|
self._available = False
|
@@ -409,6 +415,9 @@ class OllamaProvider(UnifiedAIProvider):
|
|
409
415
|
* temperature, max_tokens, top_p, top_k, repeat_penalty
|
410
416
|
* seed, stop, num_ctx, num_batch, num_thread
|
411
417
|
* tfs_z, typical_p, mirostat, mirostat_tau, mirostat_eta
|
418
|
+
backend_config (dict): Backend configuration including:
|
419
|
+
* host (str): Ollama host URL (default: from env or http://localhost:11434)
|
420
|
+
* port (int): Ollama port (if provided, will be appended to host)
|
412
421
|
|
413
422
|
Returns:
|
414
423
|
Dict containing the standardized response.
|
@@ -418,6 +427,28 @@ class OllamaProvider(UnifiedAIProvider):
|
|
418
427
|
|
419
428
|
model = kwargs.get("model", "llama3.1:8b-instruct-q8_0")
|
420
429
|
generation_config = kwargs.get("generation_config", {})
|
430
|
+
backend_config = kwargs.get("backend_config", {})
|
431
|
+
|
432
|
+
# Configure Ollama client with custom host if provided
|
433
|
+
if backend_config:
|
434
|
+
host = backend_config.get("host", "localhost")
|
435
|
+
port = backend_config.get("port")
|
436
|
+
if port:
|
437
|
+
# Construct full URL if port is provided
|
438
|
+
host = (
|
439
|
+
f"http://{host}:{port}"
|
440
|
+
if not host.startswith("http")
|
441
|
+
else f"{host}:{port}"
|
442
|
+
)
|
443
|
+
elif backend_config.get("base_url"):
|
444
|
+
host = backend_config["base_url"]
|
445
|
+
self._client = ollama.Client(host=host)
|
446
|
+
elif self._client is None:
|
447
|
+
# Use default client
|
448
|
+
import os
|
449
|
+
|
450
|
+
host = os.getenv("OLLAMA_BASE_URL") or os.getenv("OLLAMA_HOST")
|
451
|
+
self._client = ollama.Client(host=host) if host else ollama.Client()
|
421
452
|
|
422
453
|
# Map generation_config to Ollama options
|
423
454
|
options = {
|
@@ -482,7 +513,7 @@ class OllamaProvider(UnifiedAIProvider):
|
|
482
513
|
processed_messages.append(msg)
|
483
514
|
|
484
515
|
# Call Ollama
|
485
|
-
response =
|
516
|
+
response = self._client.chat(
|
486
517
|
model=model, messages=processed_messages, options=options
|
487
518
|
)
|
488
519
|
|
@@ -522,16 +553,37 @@ class OllamaProvider(UnifiedAIProvider):
|
|
522
553
|
Supported kwargs:
|
523
554
|
- model (str): Ollama model name (default: "snowflake-arctic-embed2")
|
524
555
|
- normalize (bool): Normalize embeddings to unit length
|
556
|
+
- backend_config (dict): Backend configuration (host, port, base_url)
|
525
557
|
"""
|
526
558
|
try:
|
527
559
|
import ollama
|
528
560
|
|
529
561
|
model = kwargs.get("model", "snowflake-arctic-embed2")
|
530
562
|
normalize = kwargs.get("normalize", False)
|
563
|
+
backend_config = kwargs.get("backend_config", {})
|
564
|
+
|
565
|
+
# Configure Ollama client if not already configured
|
566
|
+
if backend_config and not hasattr(self, "_client"):
|
567
|
+
host = backend_config.get("host", "localhost")
|
568
|
+
port = backend_config.get("port")
|
569
|
+
if port:
|
570
|
+
host = (
|
571
|
+
f"http://{host}:{port}"
|
572
|
+
if not host.startswith("http")
|
573
|
+
else f"{host}:{port}"
|
574
|
+
)
|
575
|
+
elif backend_config.get("base_url"):
|
576
|
+
host = backend_config["base_url"]
|
577
|
+
self._client = ollama.Client(host=host)
|
578
|
+
elif not hasattr(self, "_client") or self._client is None:
|
579
|
+
import os
|
580
|
+
|
581
|
+
host = os.getenv("OLLAMA_BASE_URL") or os.getenv("OLLAMA_HOST")
|
582
|
+
self._client = ollama.Client(host=host) if host else ollama.Client()
|
531
583
|
|
532
584
|
embeddings = []
|
533
585
|
for text in texts:
|
534
|
-
response =
|
586
|
+
response = self._client.embeddings(model=model, prompt=text)
|
535
587
|
embedding = response.get("embedding", [])
|
536
588
|
|
537
589
|
if normalize and embedding:
|
kailash/nodes/ai/llm_agent.py
CHANGED
@@ -853,6 +853,62 @@ class LLMAgentNode(Node):
|
|
853
853
|
"loaded_from": "mock_storage",
|
854
854
|
}
|
855
855
|
|
856
|
+
def _run_async_in_sync_context(self, coro):
|
857
|
+
"""
|
858
|
+
Run async coroutine in a synchronous context, handling existing event loops.
|
859
|
+
|
860
|
+
This helper method detects if an event loop is already running and handles
|
861
|
+
the execution appropriately to avoid "RuntimeError: This event loop is already running".
|
862
|
+
|
863
|
+
Args:
|
864
|
+
coro: The coroutine to execute
|
865
|
+
|
866
|
+
Returns:
|
867
|
+
The result of the coroutine execution
|
868
|
+
|
869
|
+
Raises:
|
870
|
+
TimeoutError: If the operation times out (30 seconds)
|
871
|
+
Exception: Any exception raised by the coroutine
|
872
|
+
"""
|
873
|
+
import asyncio
|
874
|
+
|
875
|
+
try:
|
876
|
+
# Check if there's already a running event loop
|
877
|
+
loop = asyncio.get_running_loop()
|
878
|
+
# If we're here, there's a running loop - create a new thread
|
879
|
+
import threading
|
880
|
+
|
881
|
+
result = None
|
882
|
+
exception = None
|
883
|
+
|
884
|
+
def run_in_thread():
|
885
|
+
nonlocal result, exception
|
886
|
+
try:
|
887
|
+
# Create new event loop in thread
|
888
|
+
new_loop = asyncio.new_event_loop()
|
889
|
+
asyncio.set_event_loop(new_loop)
|
890
|
+
try:
|
891
|
+
result = new_loop.run_until_complete(coro)
|
892
|
+
finally:
|
893
|
+
new_loop.close()
|
894
|
+
except Exception as e:
|
895
|
+
exception = e
|
896
|
+
|
897
|
+
thread = threading.Thread(target=run_in_thread)
|
898
|
+
thread.start()
|
899
|
+
thread.join(timeout=30) # 30 second timeout
|
900
|
+
|
901
|
+
if thread.is_alive():
|
902
|
+
raise TimeoutError("MCP operation timed out after 30 seconds")
|
903
|
+
|
904
|
+
if exception:
|
905
|
+
raise exception
|
906
|
+
return result
|
907
|
+
|
908
|
+
except RuntimeError:
|
909
|
+
# No running event loop, use asyncio.run()
|
910
|
+
return asyncio.run(coro)
|
911
|
+
|
856
912
|
def _retrieve_mcp_context(
|
857
913
|
self, mcp_servers: list[dict], mcp_context: list[str]
|
858
914
|
) -> list[dict[str, Any]]:
|
@@ -939,14 +995,14 @@ class LLMAgentNode(Node):
|
|
939
995
|
for server_config in mcp_servers:
|
940
996
|
try:
|
941
997
|
# List resources from server
|
942
|
-
resources =
|
998
|
+
resources = self._run_async_in_sync_context(
|
943
999
|
self._mcp_client.list_resources(server_config)
|
944
1000
|
)
|
945
1001
|
|
946
1002
|
# Read specific resources if requested
|
947
1003
|
for uri in mcp_context:
|
948
1004
|
try:
|
949
|
-
resource_data =
|
1005
|
+
resource_data = self._run_async_in_sync_context(
|
950
1006
|
self._mcp_client.read_resource(server_config, uri)
|
951
1007
|
)
|
952
1008
|
|
@@ -1014,17 +1070,48 @@ class LLMAgentNode(Node):
|
|
1014
1070
|
}
|
1015
1071
|
)
|
1016
1072
|
|
1073
|
+
except TimeoutError as e:
|
1074
|
+
self.logger.warning(
|
1075
|
+
f"MCP server '{server_config.get('name', 'unknown')}' timed out after 30 seconds: {e}"
|
1076
|
+
)
|
1077
|
+
# Fall back to mock for this server
|
1078
|
+
context_data.append(
|
1079
|
+
{
|
1080
|
+
"uri": f"mcp://{server_config.get('name', 'unknown')}/fallback",
|
1081
|
+
"content": "MCP server timed out - using fallback content. Check if the server is running and accessible.",
|
1082
|
+
"source": server_config.get("name", "unknown"),
|
1083
|
+
"retrieved_at": datetime.now().isoformat(),
|
1084
|
+
"relevance_score": 0.5,
|
1085
|
+
"metadata": {
|
1086
|
+
"error": "timeout",
|
1087
|
+
"error_message": str(e),
|
1088
|
+
},
|
1089
|
+
}
|
1090
|
+
)
|
1017
1091
|
except Exception as e:
|
1018
|
-
|
1092
|
+
error_type = type(e).__name__
|
1093
|
+
self.logger.error(
|
1094
|
+
f"MCP server '{server_config.get('name', 'unknown')}' connection failed ({error_type}): {e}"
|
1095
|
+
)
|
1096
|
+
|
1097
|
+
# Provide helpful error messages based on exception type
|
1098
|
+
if "coroutine" in str(e).lower() and "await" in str(e).lower():
|
1099
|
+
self.logger.error(
|
1100
|
+
"This appears to be an async/await issue. Please report this bug to the Kailash SDK team."
|
1101
|
+
)
|
1102
|
+
|
1019
1103
|
# Fall back to mock for this server
|
1020
1104
|
context_data.append(
|
1021
1105
|
{
|
1022
1106
|
"uri": f"mcp://{server_config.get('name', 'unknown')}/fallback",
|
1023
|
-
"content": "Connection failed
|
1107
|
+
"content": f"Connection failed ({error_type}) - using fallback content. Error: {str(e)}",
|
1024
1108
|
"source": server_config.get("name", "unknown"),
|
1025
1109
|
"retrieved_at": datetime.now().isoformat(),
|
1026
1110
|
"relevance_score": 0.5,
|
1027
|
-
"metadata": {
|
1111
|
+
"metadata": {
|
1112
|
+
"error": error_type,
|
1113
|
+
"error_message": str(e),
|
1114
|
+
},
|
1028
1115
|
}
|
1029
1116
|
)
|
1030
1117
|
|
@@ -1032,11 +1119,17 @@ class LLMAgentNode(Node):
|
|
1032
1119
|
if context_data:
|
1033
1120
|
return context_data
|
1034
1121
|
|
1035
|
-
except ImportError:
|
1122
|
+
except ImportError as e:
|
1036
1123
|
# MCPClient not available, fall back to mock
|
1124
|
+
self.logger.info(
|
1125
|
+
"MCP client not available. Install the MCP SDK with 'pip install mcp' to use real MCP servers."
|
1126
|
+
)
|
1037
1127
|
pass
|
1038
1128
|
except Exception as e:
|
1039
|
-
self.logger.
|
1129
|
+
self.logger.error(
|
1130
|
+
f"Unexpected error in MCP retrieval: {type(e).__name__}: {e}"
|
1131
|
+
)
|
1132
|
+
self.logger.info("Falling back to mock MCP implementation.")
|
1040
1133
|
|
1041
1134
|
# Fallback to mock implementation
|
1042
1135
|
for uri in mcp_context:
|
@@ -1089,8 +1182,6 @@ class LLMAgentNode(Node):
|
|
1089
1182
|
|
1090
1183
|
if use_real_mcp:
|
1091
1184
|
try:
|
1092
|
-
import asyncio
|
1093
|
-
|
1094
1185
|
from kailash.mcp import MCPClient
|
1095
1186
|
|
1096
1187
|
# Initialize MCP client if not already done
|
@@ -1101,7 +1192,7 @@ class LLMAgentNode(Node):
|
|
1101
1192
|
for server_config in mcp_servers:
|
1102
1193
|
try:
|
1103
1194
|
# Discover tools asynchronously
|
1104
|
-
tools =
|
1195
|
+
tools = self._run_async_in_sync_context(
|
1105
1196
|
self._mcp_client.discover_tools(server_config)
|
1106
1197
|
)
|
1107
1198
|
|
@@ -1131,16 +1222,27 @@ class LLMAgentNode(Node):
|
|
1131
1222
|
{"type": "function", "function": function_def}
|
1132
1223
|
)
|
1133
1224
|
|
1225
|
+
except TimeoutError as e:
|
1226
|
+
self.logger.warning(
|
1227
|
+
f"Tool discovery timed out for MCP server '{server_config.get('name', 'unknown')}': {e}"
|
1228
|
+
)
|
1134
1229
|
except Exception as e:
|
1135
|
-
|
1136
|
-
|
1230
|
+
error_type = type(e).__name__
|
1231
|
+
self.logger.error(
|
1232
|
+
f"Failed to discover tools from '{server_config.get('name', 'unknown')}' ({error_type}): {e}"
|
1137
1233
|
)
|
1138
1234
|
|
1139
1235
|
except ImportError:
|
1140
1236
|
# MCPClient not available, use mock tools
|
1237
|
+
self.logger.info(
|
1238
|
+
"MCP client not available for tool discovery. Install with 'pip install mcp' for real MCP tools."
|
1239
|
+
)
|
1141
1240
|
pass
|
1142
1241
|
except Exception as e:
|
1143
|
-
self.logger.
|
1242
|
+
self.logger.error(
|
1243
|
+
f"Unexpected error in MCP tool discovery: {type(e).__name__}: {e}"
|
1244
|
+
)
|
1245
|
+
self.logger.info("Using mock tools as fallback.")
|
1144
1246
|
|
1145
1247
|
# If no real tools discovered, provide minimal generic tools
|
1146
1248
|
if not discovered_tools:
|