alita-sdk 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent_loader.py +27 -6
- alita_sdk/cli/agents.py +10 -1
- alita_sdk/cli/tools/filesystem.py +95 -9
- alita_sdk/runtime/clients/client.py +40 -21
- alita_sdk/runtime/langchain/constants.py +3 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
- alita_sdk/runtime/langchain/langraph_agent.py +2 -1
- alita_sdk/runtime/toolkits/mcp.py +68 -62
- alita_sdk/runtime/toolkits/planning.py +3 -1
- alita_sdk/runtime/toolkits/tools.py +37 -18
- alita_sdk/runtime/tools/artifact.py +46 -17
- alita_sdk/runtime/tools/function.py +2 -1
- alita_sdk/runtime/tools/llm.py +135 -24
- alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
- alita_sdk/runtime/tools/vectorstore_base.py +3 -3
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/toolkit_utils.py +7 -13
- alita_sdk/tools/base_indexer_toolkit.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
- alita_sdk/tools/chunkers/universal_chunker.py +1 -0
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/confluence/api_wrapper.py +63 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +34 -32
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ from ..tools.mcp_remote_tool import McpRemoteTool
|
|
|
17
17
|
from ..tools.mcp_inspect_tool import McpInspectTool
|
|
18
18
|
from ...tools.utils import TOOLKIT_SPLITTER, clean_string
|
|
19
19
|
from ..models.mcp_models import McpConnectionConfig
|
|
20
|
-
from ..utils.
|
|
20
|
+
from ..utils.mcp_client import McpClient
|
|
21
21
|
from ..utils.mcp_oauth import (
|
|
22
22
|
McpAuthorizationRequired,
|
|
23
23
|
canonical_resource,
|
|
@@ -426,11 +426,6 @@ class McpToolkit(BaseToolkit):
|
|
|
426
426
|
except Exception as e:
|
|
427
427
|
logger.error(f"Direct discovery failed for MCP toolkit '{toolkit_name}': {e}", exc_info=True)
|
|
428
428
|
logger.error(f"Discovery error details - URL: {connection_config.url}, Timeout: {timeout}s")
|
|
429
|
-
|
|
430
|
-
# Check if the exception wraps McpAuthorizationRequired (can happen with asyncio)
|
|
431
|
-
if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
|
|
432
|
-
logger.info(f"Found wrapped McpAuthorizationRequired, re-raising")
|
|
433
|
-
raise e.__cause__
|
|
434
429
|
|
|
435
430
|
# For new MCP toolkits (no client), don't silently return empty - surface the error
|
|
436
431
|
# This helps users understand why tool discovery failed
|
|
@@ -464,37 +459,36 @@ class McpToolkit(BaseToolkit):
|
|
|
464
459
|
toolkit_name: str,
|
|
465
460
|
connection_config: McpConnectionConfig,
|
|
466
461
|
timeout: int
|
|
467
|
-
) -> List[Dict[str, Any]]:
|
|
462
|
+
) -> tuple[List[Dict[str, Any]], Optional[str]]:
|
|
468
463
|
"""
|
|
469
464
|
Discover tools and prompts from MCP server using SSE client.
|
|
470
|
-
|
|
471
|
-
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Tuple of (tool_list, server_session_id) - session_id may be server-provided
|
|
472
468
|
"""
|
|
473
|
-
|
|
469
|
+
initial_session_id = connection_config.session_id
|
|
474
470
|
|
|
475
|
-
if not
|
|
476
|
-
logger.warning(f"[MCP Session] No session_id provided for '{toolkit_name}' -
|
|
477
|
-
logger.warning(f"[MCP Session] Frontend should generate a UUID and include it with mcp_tokens")
|
|
471
|
+
if not initial_session_id:
|
|
472
|
+
logger.warning(f"[MCP Session] No session_id provided for '{toolkit_name}' - will generate one")
|
|
478
473
|
|
|
479
474
|
# Run async discovery in sync context
|
|
480
475
|
try:
|
|
481
|
-
all_tools = asyncio.run(
|
|
476
|
+
all_tools, server_session_id = asyncio.run(
|
|
482
477
|
cls._discover_tools_async(
|
|
483
478
|
toolkit_name=toolkit_name,
|
|
484
479
|
connection_config=connection_config,
|
|
485
480
|
timeout=timeout
|
|
486
481
|
)
|
|
487
482
|
)
|
|
488
|
-
|
|
483
|
+
# Return tools and the session_id (server-provided or generated)
|
|
484
|
+
logger.info(f"[MCP Session] Final session_id for '{toolkit_name}': {server_session_id}")
|
|
485
|
+
return all_tools, server_session_id
|
|
489
486
|
except McpAuthorizationRequired:
|
|
490
487
|
# Re-raise auth required exceptions directly
|
|
491
488
|
logger.info(f"[MCP SSE] Authorization required for '{toolkit_name}'")
|
|
492
489
|
raise
|
|
493
490
|
except Exception as e:
|
|
494
491
|
logger.error(f"[MCP SSE] Discovery failed for '{toolkit_name}': {e}")
|
|
495
|
-
# Check if the exception wraps McpAuthorizationRequired
|
|
496
|
-
if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
|
|
497
|
-
raise e.__cause__
|
|
498
492
|
raise
|
|
499
493
|
|
|
500
494
|
@classmethod
|
|
@@ -503,9 +497,12 @@ class McpToolkit(BaseToolkit):
|
|
|
503
497
|
toolkit_name: str,
|
|
504
498
|
connection_config: McpConnectionConfig,
|
|
505
499
|
timeout: int
|
|
506
|
-
) -> List[Dict[str, Any]]:
|
|
500
|
+
) -> tuple[List[Dict[str, Any]], Optional[str]]:
|
|
507
501
|
"""
|
|
508
502
|
Async implementation of tool discovery using SSE client.
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
Tuple of (tool_list, server_session_id)
|
|
509
506
|
"""
|
|
510
507
|
all_tools = []
|
|
511
508
|
session_id = connection_config.session_id
|
|
@@ -517,65 +514,74 @@ class McpToolkit(BaseToolkit):
|
|
|
517
514
|
session_id = str(uuid.uuid4())
|
|
518
515
|
logger.info(f"[MCP SSE] Generated temporary session_id for OAuth: {session_id}")
|
|
519
516
|
|
|
520
|
-
logger.info(f"[MCP
|
|
517
|
+
logger.info(f"[MCP] Discovering from {connection_config.url} with session {session_id}")
|
|
521
518
|
|
|
522
519
|
# Prepare headers
|
|
523
520
|
headers = {}
|
|
524
521
|
if connection_config.headers:
|
|
525
522
|
headers.update(connection_config.headers)
|
|
526
523
|
|
|
527
|
-
# Create SSE
|
|
528
|
-
client =
|
|
524
|
+
# Create unified MCP client (auto-detects SSE vs Streamable HTTP)
|
|
525
|
+
client = McpClient(
|
|
529
526
|
url=connection_config.url,
|
|
530
527
|
session_id=session_id,
|
|
531
528
|
headers=headers,
|
|
532
529
|
timeout=timeout
|
|
533
530
|
)
|
|
534
531
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
532
|
+
server_session_id = None
|
|
533
|
+
async with client:
|
|
534
|
+
# Initialize MCP session
|
|
535
|
+
await client.initialize()
|
|
536
|
+
logger.info(f"[MCP] Session initialized for '{toolkit_name}' (transport={client.detected_transport})")
|
|
537
|
+
|
|
538
|
+
# Capture server-provided session_id (from mcp-session-id header)
|
|
539
|
+
server_session_id = client.server_session_id
|
|
540
|
+
if server_session_id:
|
|
541
|
+
logger.info(f"[MCP] Server provided session_id: {server_session_id}")
|
|
542
|
+
|
|
543
|
+
# Discover tools
|
|
544
|
+
tools = await client.list_tools()
|
|
545
|
+
all_tools.extend(tools)
|
|
546
|
+
logger.info(f"[MCP] Discovered {len(tools)} tools from '{toolkit_name}'")
|
|
547
|
+
|
|
548
|
+
# Discover prompts
|
|
549
|
+
try:
|
|
550
|
+
prompts = await client.list_prompts()
|
|
551
|
+
# Convert prompts to tool format
|
|
552
|
+
for prompt in prompts:
|
|
553
|
+
prompt_tool = {
|
|
554
|
+
"name": f"prompt_{prompt.get('name', 'unnamed')}",
|
|
555
|
+
"description": prompt.get('description', f"Execute prompt: {prompt.get('name')}"),
|
|
556
|
+
"inputSchema": {
|
|
557
|
+
"type": "object",
|
|
558
|
+
"properties": {
|
|
559
|
+
"arguments": {
|
|
560
|
+
"type": "object",
|
|
561
|
+
"description": "Arguments for the prompt template",
|
|
562
|
+
"properties": {
|
|
563
|
+
arg.get("name"): {
|
|
564
|
+
"type": "string",
|
|
565
|
+
"description": arg.get("description", ""),
|
|
566
|
+
"required": arg.get("required", False)
|
|
567
|
+
}
|
|
568
|
+
for arg in prompt.get("arguments", [])
|
|
563
569
|
}
|
|
564
|
-
for arg in prompt.get("arguments", [])
|
|
565
570
|
}
|
|
566
571
|
}
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
logger.warning(f"[MCP SSE] Failed to discover prompts: {e}")
|
|
572
|
+
},
|
|
573
|
+
"_mcp_type": "prompt",
|
|
574
|
+
"_mcp_prompt_name": prompt.get('name')
|
|
575
|
+
}
|
|
576
|
+
all_tools.append(prompt_tool)
|
|
577
|
+
logger.info(f"[MCP] Discovered {len(prompts)} prompts from '{toolkit_name}'")
|
|
578
|
+
except Exception as e:
|
|
579
|
+
logger.warning(f"[MCP] Failed to discover prompts: {e}")
|
|
576
580
|
|
|
577
|
-
logger.info(f"[MCP
|
|
578
|
-
|
|
581
|
+
logger.info(f"[MCP] Total discovered {len(all_tools)} items from '{toolkit_name}'")
|
|
582
|
+
# Return tools and server-provided session_id (use server's if available, else the one we sent)
|
|
583
|
+
final_session_id = server_session_id or session_id
|
|
584
|
+
return all_tools, final_session_id
|
|
579
585
|
|
|
580
586
|
@classmethod
|
|
581
587
|
def _create_tool_from_dict(
|
|
@@ -94,7 +94,9 @@ class PlanningToolkit(BaseToolkit):
|
|
|
94
94
|
"label": "Planning",
|
|
95
95
|
"description": "Tools for managing multi-step execution plans with progress tracking. Uses PostgreSQL when configured, filesystem otherwise.",
|
|
96
96
|
"icon_url": None,
|
|
97
|
-
"max_length": PlanningToolkit._toolkit_max_length
|
|
97
|
+
"max_length": PlanningToolkit._toolkit_max_length,
|
|
98
|
+
"categories": ["planning", "internal_tool"],
|
|
99
|
+
"extra_categories": ["task management", "todo", "progress tracking"]
|
|
98
100
|
}
|
|
99
101
|
}
|
|
100
102
|
)
|
|
@@ -94,6 +94,11 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
94
94
|
else:
|
|
95
95
|
logger.warning("Image generation internal tool requested "
|
|
96
96
|
"but no image generation model configured")
|
|
97
|
+
elif tool['name'] == 'planner':
|
|
98
|
+
tools += PlanningToolkit.get_toolkit(
|
|
99
|
+
pgvector_configuration=tool.get('settings', {}).get('pgvector_configuration'),
|
|
100
|
+
conversation_id=conversation_id,
|
|
101
|
+
).get_tools()
|
|
97
102
|
elif tool['type'] == 'artifact':
|
|
98
103
|
toolkit_tools = ArtifactToolkit.get_toolkit(
|
|
99
104
|
client=alita_client,
|
|
@@ -118,22 +123,42 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
118
123
|
**tool['settings']).get_tools())
|
|
119
124
|
elif tool['type'] == 'planning':
|
|
120
125
|
# Planning toolkit for multi-step task tracking
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
settings = tool.get('settings', {})
|
|
127
|
+
|
|
128
|
+
# Check if local mode is enabled (uses filesystem storage, ignores pgvector)
|
|
129
|
+
use_local = settings.get('local', False)
|
|
130
|
+
|
|
131
|
+
if use_local:
|
|
132
|
+
# Local mode - use filesystem storage
|
|
133
|
+
logger.info("Planning toolkit using local filesystem storage (local=true)")
|
|
134
|
+
pgvector_config = {}
|
|
135
|
+
else:
|
|
136
|
+
# Check if explicit connection_string is provided in pgvector_configuration
|
|
137
|
+
explicit_pgvector_config = settings.get('pgvector_configuration', {})
|
|
138
|
+
explicit_connstr = explicit_pgvector_config.get('connection_string') if explicit_pgvector_config else None
|
|
139
|
+
|
|
140
|
+
if explicit_connstr:
|
|
141
|
+
# Use explicitly provided connection string (overrides project secrets)
|
|
142
|
+
logger.info("Using explicit connection_string for planning toolkit")
|
|
143
|
+
pgvector_config = explicit_pgvector_config
|
|
144
|
+
else:
|
|
145
|
+
# Try to fetch pgvector_project_connstr from project secrets
|
|
146
|
+
pgvector_connstr = None
|
|
147
|
+
if alita_client:
|
|
148
|
+
try:
|
|
149
|
+
pgvector_connstr = alita_client.unsecret('pgvector_project_connstr')
|
|
150
|
+
if pgvector_connstr:
|
|
151
|
+
logger.info("Using pgvector_project_connstr for planning toolkit")
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.debug(f"pgvector_project_connstr not available: {e}")
|
|
154
|
+
|
|
155
|
+
pgvector_config = {'connection_string': pgvector_connstr} if pgvector_connstr else {}
|
|
130
156
|
|
|
131
|
-
pgvector_config = {'connection_string': pgvector_connstr} if pgvector_connstr else {}
|
|
132
157
|
tools.extend(PlanningToolkit.get_toolkit(
|
|
133
158
|
toolkit_name=tool.get('toolkit_name', ''),
|
|
134
|
-
selected_tools=
|
|
159
|
+
selected_tools=settings.get('selected_tools', []),
|
|
135
160
|
pgvector_configuration=pgvector_config,
|
|
136
|
-
conversation_id=conversation_id or
|
|
161
|
+
conversation_id=conversation_id or settings.get('conversation_id'),
|
|
137
162
|
).get_tools())
|
|
138
163
|
elif tool['type'] == 'mcp':
|
|
139
164
|
# remote mcp tool initialization with token injection
|
|
@@ -183,12 +208,6 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
183
208
|
# Re-raise auth required exceptions directly
|
|
184
209
|
raise
|
|
185
210
|
except Exception as e:
|
|
186
|
-
# Check for wrapped McpAuthorizationRequired
|
|
187
|
-
if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
|
|
188
|
-
raise e.__cause__
|
|
189
|
-
# Check exception class name as fallback
|
|
190
|
-
if e.__class__.__name__ == 'McpAuthorizationRequired':
|
|
191
|
-
raise
|
|
192
211
|
logger.error(f"Error initializing toolkit for tool '{tool.get('name', 'unknown')}': {e}", exc_info=True)
|
|
193
212
|
if debug_mode:
|
|
194
213
|
logger.info("Skipping tool initialization error due to debug mode.")
|
|
@@ -34,28 +34,57 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
34
34
|
return self.artifact.list(bucket_name, return_as_string)
|
|
35
35
|
|
|
36
36
|
def create_file(self, filename: str, filedata: str, bucket_name = None):
|
|
37
|
-
|
|
37
|
+
# Sanitize filename to prevent regex errors during indexing
|
|
38
|
+
sanitized_filename, was_modified = self._sanitize_filename(filename)
|
|
39
|
+
if was_modified:
|
|
40
|
+
logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
|
|
41
|
+
|
|
42
|
+
if sanitized_filename.endswith(".xlsx"):
|
|
38
43
|
data = json.loads(filedata)
|
|
39
44
|
filedata = self.create_xlsx_filedata(data)
|
|
40
45
|
|
|
41
|
-
result = self.artifact.create(
|
|
46
|
+
result = self.artifact.create(sanitized_filename, filedata, bucket_name)
|
|
42
47
|
|
|
43
48
|
# Dispatch custom event for file creation
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"bucket": bucket_name or self.bucket
|
|
55
|
-
}
|
|
56
|
-
}""")
|
|
49
|
+
dispatch_custom_event("file_modified", {
|
|
50
|
+
"message": f"File '{filename}' created successfully",
|
|
51
|
+
"filename": filename,
|
|
52
|
+
"tool_name": "createFile",
|
|
53
|
+
"toolkit": "artifact",
|
|
54
|
+
"operation_type": "create",
|
|
55
|
+
"meta": {
|
|
56
|
+
"bucket": bucket_name or self.bucket
|
|
57
|
+
}
|
|
58
|
+
})
|
|
57
59
|
|
|
58
60
|
return result
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _sanitize_filename(filename: str) -> tuple:
|
|
64
|
+
"""Sanitize filename for safe storage and regex pattern matching."""
|
|
65
|
+
from pathlib import Path
|
|
66
|
+
|
|
67
|
+
if not filename or not filename.strip():
|
|
68
|
+
return "unnamed_file", True
|
|
69
|
+
|
|
70
|
+
original = filename
|
|
71
|
+
path_obj = Path(filename)
|
|
72
|
+
name = path_obj.stem
|
|
73
|
+
extension = path_obj.suffix
|
|
74
|
+
|
|
75
|
+
# Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
|
|
76
|
+
sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
|
|
77
|
+
sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
|
|
78
|
+
sanitized_name = sanitized_name.strip('-').strip()
|
|
79
|
+
|
|
80
|
+
if not sanitized_name:
|
|
81
|
+
sanitized_name = "file"
|
|
82
|
+
|
|
83
|
+
if extension:
|
|
84
|
+
extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
|
|
85
|
+
|
|
86
|
+
sanitized = sanitized_name + extension
|
|
87
|
+
return sanitized, (sanitized != original)
|
|
59
88
|
|
|
60
89
|
def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
|
|
61
90
|
try:
|
|
@@ -173,13 +202,13 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
173
202
|
file_name = file['name']
|
|
174
203
|
|
|
175
204
|
# Check if file should be skipped based on skip_extensions
|
|
176
|
-
if any(re.match(pattern.replace('
|
|
205
|
+
if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
177
206
|
for pattern in skip_extensions):
|
|
178
207
|
continue
|
|
179
208
|
|
|
180
209
|
# Check if file should be included based on include_extensions
|
|
181
210
|
# If include_extensions is empty, process all files (that weren't skipped)
|
|
182
|
-
if include_extensions and not (any(re.match(pattern.replace('
|
|
211
|
+
if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
183
212
|
for pattern in include_extensions)):
|
|
184
213
|
continue
|
|
185
214
|
|
|
@@ -107,7 +107,8 @@ class FunctionTool(BaseTool):
|
|
|
107
107
|
|
|
108
108
|
# special handler for PyodideSandboxTool
|
|
109
109
|
if self._is_pyodide_tool():
|
|
110
|
-
|
|
110
|
+
# replace new lines in strings in code block
|
|
111
|
+
code = func_args['code'].replace('\\n', '\\\\n')
|
|
111
112
|
func_args['code'] = f"{self._prepare_pyodide_input(state)}\n{code}"
|
|
112
113
|
try:
|
|
113
114
|
tool_result = self.tool.invoke(func_args, config, **kwargs)
|
alita_sdk/runtime/tools/llm.py
CHANGED
|
@@ -34,6 +34,7 @@ class LLMNode(BaseTool):
|
|
|
34
34
|
available_tools: Optional[List[BaseTool]] = Field(default=None, description='Available tools for binding')
|
|
35
35
|
tool_names: Optional[List[str]] = Field(default=None, description='Specific tool names to filter')
|
|
36
36
|
steps_limit: Optional[int] = Field(default=25, description='Maximum steps for tool execution')
|
|
37
|
+
tool_execution_timeout: Optional[int] = Field(default=900, description='Timeout (seconds) for tool execution. Default is 15 minutes.')
|
|
37
38
|
|
|
38
39
|
def get_filtered_tools(self) -> List[BaseTool]:
|
|
39
40
|
"""
|
|
@@ -129,7 +130,9 @@ class LLMNode(BaseTool):
|
|
|
129
130
|
# or standalone LLM node for chat (with messages only)
|
|
130
131
|
if 'system' in func_args.keys():
|
|
131
132
|
# Flow for LLM node with prompt/task from pipeline
|
|
132
|
-
if
|
|
133
|
+
if func_args.get('system') is None or func_args.get('task') is None:
|
|
134
|
+
raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
|
|
135
|
+
f"Actual params: {func_args}")
|
|
133
136
|
raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
|
|
134
137
|
f"Actual params: {func_args}")
|
|
135
138
|
# cast to str in case user passes variable different from str
|
|
@@ -243,40 +246,146 @@ class LLMNode(BaseTool):
|
|
|
243
246
|
|
|
244
247
|
For MCP tools with persistent sessions, we reuse the same event loop
|
|
245
248
|
that was used to create the MCP client and sessions (set by CLI).
|
|
249
|
+
|
|
250
|
+
When called from within a running event loop (e.g., nested LLM nodes),
|
|
251
|
+
we need to handle this carefully to avoid "event loop already running" errors.
|
|
252
|
+
|
|
253
|
+
This method handles three scenarios:
|
|
254
|
+
1. Called from async context (event loop running) - creates new thread with new loop
|
|
255
|
+
2. Called from sync context with persistent loop - reuses persistent loop
|
|
256
|
+
3. Called from sync context without loop - creates new persistent loop
|
|
246
257
|
"""
|
|
258
|
+
import threading
|
|
259
|
+
|
|
260
|
+
# Check if there's a running loop
|
|
247
261
|
try:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
262
|
+
running_loop = asyncio.get_running_loop()
|
|
263
|
+
loop_is_running = True
|
|
264
|
+
logger.debug(f"Detected running event loop (id: {id(running_loop)}), executing tool calls in separate thread")
|
|
265
|
+
except RuntimeError:
|
|
266
|
+
loop_is_running = False
|
|
267
|
+
|
|
268
|
+
# Scenario 1: Loop is currently running - MUST use thread
|
|
269
|
+
if loop_is_running:
|
|
252
270
|
result_container = []
|
|
253
|
-
|
|
271
|
+
exception_container = []
|
|
272
|
+
|
|
273
|
+
# Try to capture Streamlit context from current thread for propagation
|
|
274
|
+
streamlit_ctx = None
|
|
275
|
+
try:
|
|
276
|
+
from streamlit.runtime.scriptrunner import get_script_run_ctx, add_script_run_ctx
|
|
277
|
+
streamlit_ctx = get_script_run_ctx()
|
|
278
|
+
if streamlit_ctx:
|
|
279
|
+
logger.debug("Captured Streamlit context for propagation to worker thread")
|
|
280
|
+
except (ImportError, Exception) as e:
|
|
281
|
+
logger.debug(f"Streamlit context not available or failed to capture: {e}")
|
|
282
|
+
|
|
254
283
|
def run_in_thread():
|
|
284
|
+
"""Run coroutine in a new thread with its own event loop."""
|
|
255
285
|
new_loop = asyncio.new_event_loop()
|
|
256
286
|
asyncio.set_event_loop(new_loop)
|
|
257
287
|
try:
|
|
258
|
-
|
|
288
|
+
result = new_loop.run_until_complete(coro)
|
|
289
|
+
result_container.append(result)
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.debug(f"Exception in async thread: {e}")
|
|
292
|
+
exception_container.append(e)
|
|
259
293
|
finally:
|
|
260
294
|
new_loop.close()
|
|
261
|
-
|
|
262
|
-
|
|
295
|
+
asyncio.set_event_loop(None)
|
|
296
|
+
|
|
297
|
+
thread = threading.Thread(target=run_in_thread, daemon=False)
|
|
298
|
+
|
|
299
|
+
# Propagate Streamlit context to the worker thread if available
|
|
300
|
+
if streamlit_ctx is not None:
|
|
301
|
+
try:
|
|
302
|
+
add_script_run_ctx(thread, streamlit_ctx)
|
|
303
|
+
logger.debug("Successfully propagated Streamlit context to worker thread")
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.warning(f"Failed to propagate Streamlit context to worker thread: {e}")
|
|
306
|
+
|
|
263
307
|
thread.start()
|
|
264
|
-
thread.join()
|
|
308
|
+
thread.join(timeout=self.tool_execution_timeout) # 15 minute timeout for safety
|
|
309
|
+
|
|
310
|
+
if thread.is_alive():
|
|
311
|
+
logger.error("Async operation timed out after 5 minutes")
|
|
312
|
+
raise TimeoutError("Async operation in thread timed out")
|
|
313
|
+
|
|
314
|
+
# Re-raise exception if one occurred
|
|
315
|
+
if exception_container:
|
|
316
|
+
raise exception_container[0]
|
|
317
|
+
|
|
265
318
|
return result_container[0] if result_container else None
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
#
|
|
319
|
+
|
|
320
|
+
# Scenario 2 & 3: No loop running - use or create persistent loop
|
|
321
|
+
else:
|
|
322
|
+
# Get or create persistent loop
|
|
270
323
|
if not hasattr(self.__class__, '_persistent_loop') or \
|
|
271
324
|
self.__class__._persistent_loop is None or \
|
|
272
325
|
self.__class__._persistent_loop.is_closed():
|
|
273
326
|
self.__class__._persistent_loop = asyncio.new_event_loop()
|
|
274
327
|
logger.debug("Created persistent event loop for async tools")
|
|
275
|
-
|
|
328
|
+
|
|
276
329
|
loop = self.__class__._persistent_loop
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
330
|
+
|
|
331
|
+
# Double-check the loop is not running (safety check)
|
|
332
|
+
if loop.is_running():
|
|
333
|
+
logger.debug("Persistent loop is unexpectedly running, using thread execution")
|
|
334
|
+
|
|
335
|
+
result_container = []
|
|
336
|
+
exception_container = []
|
|
337
|
+
|
|
338
|
+
# Try to capture Streamlit context from current thread for propagation
|
|
339
|
+
streamlit_ctx = None
|
|
340
|
+
try:
|
|
341
|
+
from streamlit.runtime.scriptrunner import get_script_run_ctx, add_script_run_ctx
|
|
342
|
+
streamlit_ctx = get_script_run_ctx()
|
|
343
|
+
if streamlit_ctx:
|
|
344
|
+
logger.debug("Captured Streamlit context for propagation to worker thread")
|
|
345
|
+
except (ImportError, Exception) as e:
|
|
346
|
+
logger.debug(f"Streamlit context not available or failed to capture: {e}")
|
|
347
|
+
|
|
348
|
+
def run_in_thread():
|
|
349
|
+
"""Run coroutine in a new thread with its own event loop."""
|
|
350
|
+
new_loop = asyncio.new_event_loop()
|
|
351
|
+
asyncio.set_event_loop(new_loop)
|
|
352
|
+
try:
|
|
353
|
+
result = new_loop.run_until_complete(coro)
|
|
354
|
+
result_container.append(result)
|
|
355
|
+
except Exception as ex:
|
|
356
|
+
logger.debug(f"Exception in async thread: {ex}")
|
|
357
|
+
exception_container.append(ex)
|
|
358
|
+
finally:
|
|
359
|
+
new_loop.close()
|
|
360
|
+
asyncio.set_event_loop(None)
|
|
361
|
+
|
|
362
|
+
thread = threading.Thread(target=run_in_thread, daemon=False)
|
|
363
|
+
|
|
364
|
+
# Propagate Streamlit context to the worker thread if available
|
|
365
|
+
if streamlit_ctx is not None:
|
|
366
|
+
try:
|
|
367
|
+
add_script_run_ctx(thread, streamlit_ctx)
|
|
368
|
+
logger.debug("Successfully propagated Streamlit context to worker thread")
|
|
369
|
+
except Exception as e:
|
|
370
|
+
logger.warning(f"Failed to propagate Streamlit context to worker thread: {e}")
|
|
371
|
+
|
|
372
|
+
thread.start()
|
|
373
|
+
thread.join(timeout=self.tool_execution_timeout)
|
|
374
|
+
|
|
375
|
+
if thread.is_alive():
|
|
376
|
+
logger.error("Async operation timed out after 15 minutes")
|
|
377
|
+
raise TimeoutError("Async operation in thread timed out")
|
|
378
|
+
|
|
379
|
+
if exception_container:
|
|
380
|
+
raise exception_container[0]
|
|
381
|
+
|
|
382
|
+
return result_container[0] if result_container else None
|
|
383
|
+
else:
|
|
384
|
+
# Loop exists but not running - safe to use run_until_complete
|
|
385
|
+
logger.debug(f"Using persistent loop (id: {id(loop)}) with run_until_complete")
|
|
386
|
+
asyncio.set_event_loop(loop)
|
|
387
|
+
return loop.run_until_complete(coro)
|
|
388
|
+
|
|
280
389
|
async def _arun(self, *args, **kwargs):
|
|
281
390
|
# Legacy async support
|
|
282
391
|
return self.invoke(kwargs, **kwargs)
|
|
@@ -324,12 +433,14 @@ class LLMNode(BaseTool):
|
|
|
324
433
|
|
|
325
434
|
# Try async invoke first (for MCP tools), fallback to sync
|
|
326
435
|
tool_result = None
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
436
|
+
if hasattr(tool_to_execute, 'ainvoke'):
|
|
437
|
+
try:
|
|
438
|
+
tool_result = await tool_to_execute.ainvoke(tool_args, config=config)
|
|
439
|
+
except (NotImplementedError, AttributeError):
|
|
440
|
+
logger.debug(f"Tool '{tool_name}' ainvoke failed, falling back to sync invoke")
|
|
441
|
+
tool_result = tool_to_execute.invoke(tool_args, config=config)
|
|
442
|
+
else:
|
|
443
|
+
# Sync-only tool
|
|
333
444
|
tool_result = tool_to_execute.invoke(tool_args, config=config)
|
|
334
445
|
|
|
335
446
|
# Create tool message with result - preserve structured content
|