alita-sdk 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (34) hide show
  1. alita_sdk/cli/agent_loader.py +27 -6
  2. alita_sdk/cli/agents.py +10 -1
  3. alita_sdk/cli/tools/filesystem.py +95 -9
  4. alita_sdk/runtime/clients/client.py +40 -21
  5. alita_sdk/runtime/langchain/constants.py +3 -1
  6. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  7. alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
  8. alita_sdk/runtime/langchain/langraph_agent.py +2 -1
  9. alita_sdk/runtime/toolkits/mcp.py +68 -62
  10. alita_sdk/runtime/toolkits/planning.py +3 -1
  11. alita_sdk/runtime/toolkits/tools.py +37 -18
  12. alita_sdk/runtime/tools/artifact.py +46 -17
  13. alita_sdk/runtime/tools/function.py +2 -1
  14. alita_sdk/runtime/tools/llm.py +135 -24
  15. alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
  16. alita_sdk/runtime/tools/vectorstore_base.py +3 -3
  17. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  18. alita_sdk/runtime/utils/mcp_client.py +465 -0
  19. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  20. alita_sdk/runtime/utils/toolkit_utils.py +7 -13
  21. alita_sdk/tools/base_indexer_toolkit.py +1 -1
  22. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  23. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
  24. alita_sdk/tools/chunkers/universal_chunker.py +1 -0
  25. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  26. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  27. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  28. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
  29. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +1 -1
  30. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +34 -32
  31. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
  32. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
  33. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
  34. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,465 @@
1
+ """
2
+ Unified MCP Client with auto-detection for SSE and Streamable HTTP transports.
3
+
4
+ This module provides a unified interface for MCP server communication that
5
+ automatically detects and uses the appropriate transport:
6
+ - SSE (Server-Sent Events): Traditional dual-connection model (GET for stream, POST for commands)
7
+ - Streamable HTTP: Newer POST-based model used by servers like GitHub Copilot MCP
8
+
9
+ Usage:
10
+ # Auto-detect transport (recommended)
11
+ client = McpClient(url=url, session_id=session_id, headers=headers)
12
+
13
+ # Force specific transport
14
+ client = McpClient(url=url, session_id=session_id, transport="streamable_http")
15
+
16
+ async with client:
17
+ await client.initialize()
18
+ tools = await client.list_tools()
19
+ result = await client.call_tool("tool_name", {"arg": "value"})
20
+ """
21
+
22
+ import asyncio
23
+ import json
24
+ import logging
25
+ import uuid
26
+ from typing import Any, Dict, List, Literal, Optional
27
+
28
+ import aiohttp
29
+
30
+ from .mcp_oauth import McpAuthorizationRequired
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Transport types
35
+ TransportType = Literal["auto", "sse", "streamable_http"]
36
+
37
+
38
+ class McpClient:
39
+ """
40
+ Unified MCP client that supports both SSE and Streamable HTTP transports.
41
+
42
+ Auto-detects the appropriate transport by trying Streamable HTTP first,
43
+ then falling back to SSE if the server returns 405 Method Not Allowed.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ url: str,
49
+ session_id: Optional[str] = None,
50
+ headers: Optional[Dict[str, str]] = None,
51
+ timeout: int = 300,
52
+ transport: TransportType = "auto"
53
+ ):
54
+ """
55
+ Initialize the unified MCP client.
56
+
57
+ Args:
58
+ url: MCP server URL
59
+ session_id: Session ID for stateful connections (auto-generated if not provided)
60
+ headers: HTTP headers (e.g., Authorization)
61
+ timeout: Request timeout in seconds
62
+ transport: Transport type - "auto", "sse", or "streamable_http"
63
+ """
64
+ self.url = url
65
+ self.session_id = session_id or str(uuid.uuid4())
66
+ self.headers = headers or {}
67
+ self.timeout = timeout
68
+ self.transport = transport
69
+
70
+ # Will be set during connection
71
+ self._detected_transport: Optional[str] = None
72
+ self._sse_client = None
73
+ self._http_session: Optional[aiohttp.ClientSession] = None
74
+ self._mcp_session_id: Optional[str] = None # Server-provided session ID
75
+ self._initialized = False
76
+
77
+ logger.info(f"[MCP Client] Created for {url} (transport={transport}, session={self.session_id})")
78
+
79
+ @property
80
+ def server_session_id(self) -> Optional[str]:
81
+ """Get the server-provided session ID (from mcp-session-id header)."""
82
+ return self._mcp_session_id
83
+
84
+ @property
85
+ def detected_transport(self) -> Optional[str]:
86
+ """Get the detected transport type."""
87
+ return self._detected_transport
88
+
89
+ async def __aenter__(self):
90
+ """Async context manager entry - detect and connect."""
91
+ await self._connect()
92
+ return self
93
+
94
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
95
+ """Async context manager exit - cleanup."""
96
+ await self.close()
97
+
98
+ async def _connect(self):
99
+ """Detect transport and establish connection."""
100
+ if self.transport == "sse":
101
+ self._detected_transport = "sse"
102
+ await self._connect_sse()
103
+ elif self.transport == "streamable_http":
104
+ self._detected_transport = "streamable_http"
105
+ await self._connect_streamable_http()
106
+ else: # auto
107
+ await self._auto_detect_and_connect()
108
+
109
+ async def _auto_detect_and_connect(self):
110
+ """Try Streamable HTTP first, fall back to SSE."""
111
+ # If URL ends with /sse, use SSE transport directly
112
+ if self.url.rstrip('/').endswith('/sse'):
113
+ logger.debug("[MCP Client] URL ends with /sse, using SSE transport")
114
+ await self._connect_sse()
115
+ self._detected_transport = "sse"
116
+ logger.info("[MCP Client] Using SSE transport")
117
+ return
118
+
119
+ try:
120
+ logger.debug("[MCP Client] Auto-detecting transport, trying Streamable HTTP first...")
121
+ await self._connect_streamable_http()
122
+ self._detected_transport = "streamable_http"
123
+ logger.info("[MCP Client] Using Streamable HTTP transport")
124
+ except Exception as e:
125
+ error_str = str(e).lower()
126
+ # Check for 405, 404, or indicators that SSE is needed
127
+ if "405" in error_str or "method not allowed" in error_str or "404" in error_str:
128
+ logger.debug(f"[MCP Client] Streamable HTTP not supported ({e}), trying SSE...")
129
+ await self._connect_sse()
130
+ self._detected_transport = "sse"
131
+ logger.info("[MCP Client] Using SSE transport")
132
+ else:
133
+ # Re-raise other errors
134
+ raise
135
+
136
+ async def _connect_streamable_http(self):
137
+ """Connect using Streamable HTTP transport."""
138
+ self._http_session = aiohttp.ClientSession(
139
+ timeout=aiohttp.ClientTimeout(total=self.timeout)
140
+ )
141
+
142
+ async def _connect_sse(self):
143
+ """Connect using SSE transport."""
144
+ from .mcp_sse_client import McpSseClient
145
+
146
+ self._sse_client = McpSseClient(
147
+ url=self.url,
148
+ session_id=self.session_id,
149
+ headers=self.headers,
150
+ timeout=self.timeout
151
+ )
152
+
153
+ async def initialize(self) -> Dict[str, Any]:
154
+ """
155
+ Initialize MCP protocol session.
156
+
157
+ Returns:
158
+ Server capabilities and info
159
+ """
160
+ if self._detected_transport == "streamable_http":
161
+ return await self._initialize_streamable_http()
162
+ else:
163
+ return await self._initialize_sse()
164
+
165
+ async def _initialize_streamable_http(self, retry_without_session: bool = False) -> Dict[str, Any]:
166
+ """Initialize via Streamable HTTP transport."""
167
+ headers = {
168
+ "Content-Type": "application/json",
169
+ "Accept": "application/json, text/event-stream",
170
+ **self.headers
171
+ }
172
+
173
+ # DON'T send session_id on initialization - per MCP spec, initialization requests
174
+ # must not include a sessionId. The server will provide one in the response.
175
+ # Session ID is only used for subsequent requests after initialization.
176
+ # (The retry_without_session flag is kept for backwards compatibility but
177
+ # is effectively always true for initialization now)
178
+
179
+ # Debug: log headers (mask sensitive data)
180
+ debug_headers = {k: (v[:20] + '...' if k.lower() == 'authorization' and len(v) > 20 else v)
181
+ for k, v in headers.items()}
182
+ logger.debug(f"[MCP Client] Request headers: {debug_headers}")
183
+
184
+ init_request = {
185
+ "jsonrpc": "2.0",
186
+ "id": str(uuid.uuid4()),
187
+ "method": "initialize",
188
+ "params": {
189
+ "protocolVersion": "2024-11-05",
190
+ "capabilities": {
191
+ "roots": {"listChanged": True},
192
+ "sampling": {}
193
+ },
194
+ "clientInfo": {
195
+ "name": "Alita MCP Client",
196
+ "version": "1.0.0"
197
+ }
198
+ }
199
+ }
200
+
201
+ logger.debug(f"[MCP Client] Sending initialize via Streamable HTTP to {self.url}")
202
+
203
+ async with self._http_session.post(self.url, json=init_request, headers=headers) as response:
204
+ if response.status == 401:
205
+ await self._handle_401_response(response)
206
+
207
+ if response.status == 405:
208
+ raise Exception("HTTP 405 Method Not Allowed - server may require SSE transport")
209
+
210
+ # Handle invalid session error - retry without session_id
211
+ if response.status == 400 and not retry_without_session and self.session_id:
212
+ try:
213
+ error_body = await response.text()
214
+ if "invalid session" in error_body.lower():
215
+ logger.warning(f"[MCP Client] Invalid session, retrying without session_id")
216
+ return await self._initialize_streamable_http(retry_without_session=True)
217
+ except Exception:
218
+ pass
219
+
220
+ # Log error response body for debugging
221
+ if response.status >= 400:
222
+ try:
223
+ error_body = await response.text()
224
+ logger.error(f"[MCP Client] HTTP {response.status} error response: {error_body[:1000]}")
225
+ except Exception:
226
+ pass
227
+
228
+ response.raise_for_status()
229
+
230
+ # Get session ID from response headers
231
+ self._mcp_session_id = response.headers.get("mcp-session-id")
232
+ if self._mcp_session_id:
233
+ logger.info(f"[MCP Client] Server provided session_id: {self._mcp_session_id}")
234
+ else:
235
+ logger.debug(f"[MCP Client] No session_id in response headers. Headers: {dict(response.headers)}")
236
+
237
+ # Parse response
238
+ result = await self._parse_response(response)
239
+ logger.debug(f"[MCP Client] Initialize response: {result}")
240
+
241
+ # Send initialized notification
242
+ await self._send_notification("notifications/initialized")
243
+
244
+ self._initialized = True
245
+ return result.get('result', {})
246
+
247
+ async def _initialize_sse(self) -> Dict[str, Any]:
248
+ """Initialize via SSE transport."""
249
+ result = await self._sse_client.initialize()
250
+ self._initialized = True
251
+ return result
252
+
253
+ async def send_request(
254
+ self,
255
+ method: str,
256
+ params: Optional[Dict[str, Any]] = None,
257
+ request_id: Optional[str] = None
258
+ ) -> Dict[str, Any]:
259
+ """
260
+ Send a JSON-RPC request to the MCP server.
261
+
262
+ Args:
263
+ method: JSON-RPC method name (e.g., "tools/list", "tools/call")
264
+ params: Method parameters
265
+ request_id: Optional request ID (auto-generated if not provided)
266
+
267
+ Returns:
268
+ Parsed JSON-RPC response
269
+ """
270
+ if self._detected_transport == "streamable_http":
271
+ return await self._send_request_streamable_http(method, params, request_id)
272
+ else:
273
+ return await self._sse_client.send_request(method, params, request_id)
274
+
275
+ async def _send_request_streamable_http(
276
+ self,
277
+ method: str,
278
+ params: Optional[Dict[str, Any]] = None,
279
+ request_id: Optional[str] = None
280
+ ) -> Dict[str, Any]:
281
+ """Send request via Streamable HTTP."""
282
+ if request_id is None:
283
+ request_id = str(uuid.uuid4())
284
+
285
+ headers = {
286
+ "Content-Type": "application/json",
287
+ "Accept": "application/json, text/event-stream",
288
+ **self.headers
289
+ }
290
+
291
+ # Add MCP session ID if we have one
292
+ if self._mcp_session_id:
293
+ headers["mcp-session-id"] = self._mcp_session_id
294
+
295
+ request = {
296
+ "jsonrpc": "2.0",
297
+ "id": request_id,
298
+ "method": method,
299
+ "params": params or {}
300
+ }
301
+
302
+ logger.debug(f"[MCP Client] Sending request: {method} (id={request_id})")
303
+
304
+ async with self._http_session.post(self.url, json=request, headers=headers) as response:
305
+ if response.status == 401:
306
+ await self._handle_401_response(response)
307
+
308
+ response.raise_for_status()
309
+
310
+ result = await self._parse_response(response)
311
+
312
+ # Check for JSON-RPC error
313
+ if 'error' in result:
314
+ error = result['error']
315
+ raise Exception(f"MCP Error: {error.get('message', str(error))}")
316
+
317
+ return result
318
+
319
+ async def _send_notification(self, method: str, params: Optional[Dict[str, Any]] = None):
320
+ """Send a JSON-RPC notification (no response expected)."""
321
+ if self._detected_transport == "streamable_http":
322
+ headers = {
323
+ "Content-Type": "application/json",
324
+ **self.headers
325
+ }
326
+ if self._mcp_session_id:
327
+ headers["mcp-session-id"] = self._mcp_session_id
328
+
329
+ notification = {
330
+ "jsonrpc": "2.0",
331
+ "method": method
332
+ }
333
+ if params:
334
+ notification["params"] = params
335
+
336
+ async with self._http_session.post(self.url, json=notification, headers=headers) as response:
337
+ pass # Notifications don't expect a response
338
+
339
+ async def _parse_response(self, response: aiohttp.ClientResponse) -> Dict[str, Any]:
340
+ """Parse response, handling both JSON and SSE formats."""
341
+ content_type = response.headers.get("content-type", "")
342
+ text = await response.text()
343
+
344
+ if "text/event-stream" in content_type:
345
+ return self._parse_sse_text(text)
346
+ else:
347
+ return json.loads(text) if text else {}
348
+
349
+ def _parse_sse_text(self, text: str) -> Dict[str, Any]:
350
+ """Parse SSE formatted response to extract JSON data."""
351
+ for line in text.split('\n'):
352
+ if line.startswith('data:'):
353
+ data = line[5:].strip()
354
+ if data:
355
+ return json.loads(data)
356
+ return {}
357
+
358
+ async def _handle_401_response(self, response: aiohttp.ClientResponse):
359
+ """Handle 401 Unauthorized response with OAuth flow."""
360
+ from .mcp_oauth import (
361
+ canonical_resource,
362
+ extract_resource_metadata_url,
363
+ fetch_resource_metadata_async,
364
+ infer_authorization_servers_from_realm,
365
+ fetch_oauth_authorization_server_metadata
366
+ )
367
+
368
+ auth_header = response.headers.get('WWW-Authenticate', '')
369
+ resource_metadata_url = extract_resource_metadata_url(auth_header, self.url)
370
+
371
+ metadata = None
372
+ if resource_metadata_url:
373
+ metadata = await fetch_resource_metadata_async(
374
+ resource_metadata_url,
375
+ session=self._http_session,
376
+ timeout=30
377
+ )
378
+
379
+ # Infer authorization servers if not in metadata
380
+ if not metadata or not metadata.get('authorization_servers'):
381
+ inferred_servers = infer_authorization_servers_from_realm(auth_header, self.url)
382
+ if inferred_servers:
383
+ if not metadata:
384
+ metadata = {}
385
+ metadata['authorization_servers'] = inferred_servers
386
+
387
+ # Fetch OAuth metadata
388
+ auth_server_metadata = fetch_oauth_authorization_server_metadata(inferred_servers[0], timeout=30)
389
+ if auth_server_metadata:
390
+ metadata['oauth_authorization_server'] = auth_server_metadata
391
+
392
+ raise McpAuthorizationRequired(
393
+ message=f"MCP server {self.url} requires OAuth authorization",
394
+ server_url=canonical_resource(self.url),
395
+ resource_metadata_url=resource_metadata_url,
396
+ www_authenticate=auth_header,
397
+ resource_metadata=metadata,
398
+ status=401,
399
+ tool_name=self.url,
400
+ )
401
+
402
+ async def list_tools(self) -> List[Dict[str, Any]]:
403
+ """
404
+ Get list of available tools from the MCP server.
405
+
406
+ Returns:
407
+ List of tool definitions
408
+ """
409
+ response = await self.send_request("tools/list")
410
+ result = response.get('result', {})
411
+ tools = result.get('tools', [])
412
+ logger.info(f"[MCP Client] Discovered {len(tools)} tools")
413
+ return tools
414
+
415
+ async def list_prompts(self) -> List[Dict[str, Any]]:
416
+ """
417
+ Get list of available prompts from the MCP server.
418
+
419
+ Returns:
420
+ List of prompt definitions
421
+ """
422
+ response = await self.send_request("prompts/list")
423
+ result = response.get('result', {})
424
+ prompts = result.get('prompts', [])
425
+ logger.debug(f"[MCP Client] Discovered {len(prompts)} prompts")
426
+ return prompts
427
+
428
+ async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
429
+ """
430
+ Execute a tool on the MCP server.
431
+
432
+ Args:
433
+ tool_name: Name of the tool to call
434
+ arguments: Tool arguments
435
+
436
+ Returns:
437
+ Tool execution result
438
+ """
439
+ response = await self.send_request(
440
+ "tools/call",
441
+ params={
442
+ "name": tool_name,
443
+ "arguments": arguments
444
+ }
445
+ )
446
+ return response.get('result', {})
447
+
448
+ async def close(self):
449
+ """Close the client and cleanup resources."""
450
+ logger.info(f"[MCP Client] Closing connection...")
451
+
452
+ if self._sse_client:
453
+ await self._sse_client.close()
454
+ self._sse_client = None
455
+
456
+ if self._http_session and not self._http_session.closed:
457
+ await self._http_session.close()
458
+ self._http_session = None
459
+
460
+ logger.info(f"[MCP Client] Connection closed")
461
+
462
+ @property
463
+ def detected_transport(self) -> Optional[str]:
464
+ """Return the detected/selected transport type."""
465
+ return self._detected_transport
@@ -0,0 +1,124 @@
1
+ """
2
+ MCP Tools Discovery Utility.
3
+ Provides a standalone function to discover tools from remote MCP servers.
4
+ Supports both SSE (Server-Sent Events) and Streamable HTTP transports with auto-detection.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from .mcp_oauth import McpAuthorizationRequired
12
+ from .mcp_client import McpClient
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def discover_mcp_tools(
18
+ url: str,
19
+ headers: Optional[Dict[str, str]] = None,
20
+ timeout: int = 60,
21
+ session_id: Optional[str] = None,
22
+ ) -> List[Dict[str, Any]]:
23
+ """
24
+ Discover available tools from a remote MCP server.
25
+
26
+ This function connects to a remote MCP server and retrieves the list of
27
+ available tools using the MCP protocol. Automatically detects and uses
28
+ the appropriate transport (SSE or Streamable HTTP).
29
+
30
+ Args:
31
+ url: MCP server HTTP URL (http:// or https://)
32
+ headers: Optional HTTP headers for authentication
33
+ timeout: Request timeout in seconds (default: 60)
34
+ session_id: Optional session ID for stateful connections
35
+
36
+ Returns:
37
+ List of tool definitions, each containing:
38
+ - name: Tool name
39
+ - description: Tool description
40
+ - inputSchema: JSON schema for tool input parameters
41
+
42
+ Raises:
43
+ McpAuthorizationRequired: If the server requires OAuth authorization (401)
44
+ Exception: For other connection or protocol errors
45
+
46
+ Example:
47
+ >>> tools = discover_mcp_tools(
48
+ ... url="https://mcp.example.com/sse",
49
+ ... headers={"Authorization": "Bearer token123"}
50
+ ... )
51
+ >>> print(f"Found {len(tools)} tools")
52
+ """
53
+ logger.info(f"[MCP Discovery] Starting tool discovery from {url}")
54
+
55
+ try:
56
+ # Run the async discovery in a new event loop
57
+ tools_list = asyncio.run(
58
+ _discover_tools_async(url, headers, timeout, session_id)
59
+ )
60
+ logger.info(f"[MCP Discovery] Successfully discovered {len(tools_list)} tools from {url}")
61
+ return tools_list
62
+
63
+ except McpAuthorizationRequired:
64
+ # Re-raise auth exceptions directly
65
+ logger.info(f"[MCP Discovery] Authorization required for {url}")
66
+ raise
67
+
68
+ except Exception as e:
69
+ logger.error(f"[MCP Discovery] Failed to discover tools from {url}: {e}")
70
+ raise
71
+
72
+
73
+ async def _discover_tools_async(
74
+ url: str,
75
+ headers: Optional[Dict[str, str]],
76
+ timeout: int,
77
+ session_id: Optional[str],
78
+ ) -> List[Dict[str, Any]]:
79
+ """
80
+ Async implementation of tool discovery using unified MCP client.
81
+ """
82
+ all_tools = []
83
+
84
+ # Create unified MCP client (auto-detects transport)
85
+ client = McpClient(
86
+ url=url,
87
+ session_id=session_id,
88
+ headers=headers,
89
+ timeout=timeout
90
+ )
91
+
92
+ async with client:
93
+ # Initialize MCP session
94
+ await client.initialize()
95
+ logger.debug(f"[MCP Discovery] Session initialized (transport={client.detected_transport})")
96
+
97
+ # Get tools list
98
+ tools = await client.list_tools()
99
+ logger.debug(f"[MCP Discovery] Received {len(tools)} tools")
100
+
101
+ # Convert tools to standard format
102
+ for tool in tools:
103
+ tool_def = {
104
+ 'name': tool.get('name'),
105
+ 'description': tool.get('description', ''),
106
+ 'inputSchema': tool.get('inputSchema', {}),
107
+ }
108
+ all_tools.append(tool_def)
109
+
110
+ return all_tools
111
+
112
+
113
+ async def discover_mcp_tools_async(
114
+ url: str,
115
+ headers: Optional[Dict[str, str]] = None,
116
+ timeout: int = 60,
117
+ session_id: Optional[str] = None,
118
+ ) -> List[Dict[str, Any]]:
119
+ """
120
+ Async version of discover_mcp_tools.
121
+
122
+ See discover_mcp_tools for full documentation.
123
+ """
124
+ return await _discover_tools_async(url, headers, timeout, session_id)
@@ -13,7 +13,8 @@ logger = logging.getLogger(__name__)
13
13
  def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
14
14
  llm_client: Any,
15
15
  alita_client: Optional[Any] = None,
16
- mcp_tokens: Optional[Dict[str, Any]] = None) -> List[Any]:
16
+ mcp_tokens: Optional[Dict[str, Any]] = None,
17
+ use_prefix: bool = False) -> List[Any]:
17
18
  """
18
19
  Instantiate a toolkit with LLM client support.
19
20
 
@@ -25,6 +26,9 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
25
26
  llm_client: LLM client instance for tools that need LLM capabilities
26
27
  alita_client: Optional additional client instance
27
28
  mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
29
+ use_prefix: If True, tools get prefixed with toolkit_name to prevent collisions
30
+ (for agent use). If False, tools use base names only (for testing interface).
31
+ Default False for backward compatibility with testing.
28
32
 
29
33
  Returns:
30
34
  List of instantiated tools from the toolkit
@@ -54,11 +58,12 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
54
58
  toolkit_type = toolkit_config.get('type', toolkit_name.lower())
55
59
 
56
60
  # Create a tool configuration dict with required fields
61
+ # Note: MCP toolkit always requires toolkit_name, other toolkits respect use_prefix flag
57
62
  tool_config = {
58
63
  'id': toolkit_config.get('id', random.randint(1, 1000000)),
59
64
  'type': toolkit_config.get('type', toolkit_type),
60
65
  'settings': settings,
61
- 'toolkit_name': toolkit_name
66
+ 'toolkit_name': toolkit_name if (use_prefix or toolkit_type == 'mcp') else None
62
67
  }
63
68
 
64
69
  # Get tools using the toolkit configuration with clients
@@ -76,21 +81,10 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
76
81
  # Re-raise McpAuthorizationRequired without logging as error
77
82
  from ..utils.mcp_oauth import McpAuthorizationRequired
78
83
 
79
- # Check if it's McpAuthorizationRequired directly
80
84
  if isinstance(e, McpAuthorizationRequired):
81
85
  logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
82
86
  raise
83
87
 
84
- # Also check for wrapped exceptions
85
- if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
86
- logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (wrapped)")
87
- raise e.__cause__
88
-
89
- # Check exception class name as fallback
90
- if e.__class__.__name__ == 'McpAuthorizationRequired':
91
- logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (by name)")
92
- raise
93
-
94
88
  # Log and re-raise other errors
95
89
  logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
96
90
  raise
@@ -386,7 +386,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
386
386
 
387
387
  def remove_index(self, index_name: str = ""):
388
388
  """Cleans the indexed data in the collection."""
389
- super()._clean_collection(index_name=index_name)
389
+ super()._clean_collection(index_name=index_name, including_index_meta=True)
390
390
  return (f"Collection '{index_name}' has been removed from the vector store.\n"
391
391
  f"Available collections: {self.list_collections()}") if index_name \
392
392
  else "All collections have been removed from the vector store."
@@ -17,6 +17,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
17
17
  for chunk in chunks:
18
18
  metadata = doc.metadata.copy()
19
19
  metadata['chunk_id'] = chunk_id
20
+ metadata['method_name'] = 'json'
20
21
  chunk_id += 1
21
22
  yield Document(page_content=json.dumps(chunk), metadata=metadata)
22
23
  except Exception as e:
@@ -60,6 +60,7 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
60
60
  docmeta.update({"headers": "; ".join(headers_meta)})
61
61
  docmeta['chunk_id'] = chunk_id
62
62
  docmeta['chunk_type'] = "document"
63
+ docmeta['method_name'] = 'markdown'
63
64
  yield Document(
64
65
  page_content=subchunk,
65
66
  metadata=docmeta
@@ -71,6 +72,7 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
71
72
  docmeta.update({"headers": "; ".join(headers_meta)})
72
73
  docmeta['chunk_id'] = chunk_id
73
74
  docmeta['chunk_type'] = "document"
75
+ docmeta['method_name'] = 'text'
74
76
  yield Document(
75
77
  page_content=chunk.page_content,
76
78
  metadata=docmeta
@@ -86,6 +86,7 @@ def _default_text_chunker(
86
86
  for idx, chunk in enumerate(chunks, 1):
87
87
  chunk.metadata['chunk_id'] = idx
88
88
  chunk.metadata['chunk_type'] = 'text'
89
+ chunk.metadata['method_name'] = 'text'
89
90
  yield chunk
90
91
 
91
92