PyPI - alita-sdk - Versions diffs - 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl - Mend

alita-sdk 0.3.486py3-none-any.whl → 0.3.497py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (34) hide show

alita_sdk/cli/agent_loader.py +27 -6
alita_sdk/cli/agents.py +10 -1
alita_sdk/cli/tools/filesystem.py +95 -9
alita_sdk/runtime/clients/client.py +40 -21
alita_sdk/runtime/langchain/constants.py +3 -1
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
alita_sdk/runtime/langchain/langraph_agent.py +2 -1
alita_sdk/runtime/toolkits/mcp.py +68 -62
alita_sdk/runtime/toolkits/planning.py +3 -1
alita_sdk/runtime/toolkits/tools.py +37 -18
alita_sdk/runtime/tools/artifact.py +46 -17
alita_sdk/runtime/tools/function.py +2 -1
alita_sdk/runtime/tools/llm.py +135 -24
alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
alita_sdk/runtime/tools/vectorstore_base.py +3 -3
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/toolkit_utils.py +7 -13
alita_sdk/tools/base_indexer_toolkit.py +1 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
alita_sdk/tools/chunkers/universal_chunker.py +1 -0
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/confluence/api_wrapper.py +63 -14
alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +1 -1
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +34 -32
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/utils/mcp_client.py ADDED Viewed

@@ -0,0 +1,465 @@
+"""
+Unified MCP Client with auto-detection for SSE and Streamable HTTP transports.
+This module provides a unified interface for MCP server communication that
+automatically detects and uses the appropriate transport:
+- SSE (Server-Sent Events): Traditional dual-connection model (GET for stream, POST for commands)
+- Streamable HTTP: Newer POST-based model used by servers like GitHub Copilot MCP
+Usage:
+    # Auto-detect transport (recommended)
+    client = McpClient(url=url, session_id=session_id, headers=headers)
+    # Force specific transport
+    client = McpClient(url=url, session_id=session_id, transport="streamable_http")
+    async with client:
+        await client.initialize()
+        tools = await client.list_tools()
+        result = await client.call_tool("tool_name", {"arg": "value"})
+"""
+import asyncio
+import json
+import logging
+import uuid
+from typing import Any, Dict, List, Literal, Optional
+import aiohttp
+from .mcp_oauth import McpAuthorizationRequired
+logger = logging.getLogger(__name__)
+# Transport types
+TransportType = Literal["auto", "sse", "streamable_http"]
+class McpClient:
+    """
+    Unified MCP client that supports both SSE and Streamable HTTP transports.
+    Auto-detects the appropriate transport by trying Streamable HTTP first,
+    then falling back to SSE if the server returns 405 Method Not Allowed.
+    """
+    def __init__(
+        self,
+        url: str,
+        session_id: Optional[str] = None,
+        headers: Optional[Dict[str, str]] = None,
+        timeout: int = 300,
+        transport: TransportType = "auto"
+    ):
+        """
+        Initialize the unified MCP client.
+        Args:
+            url: MCP server URL
+            session_id: Session ID for stateful connections (auto-generated if not provided)
+            headers: HTTP headers (e.g., Authorization)
+            timeout: Request timeout in seconds
+            transport: Transport type - "auto", "sse", or "streamable_http"
+        """
+        self.url = url
+        self.session_id = session_id or str(uuid.uuid4())
+        self.headers = headers or {}
+        self.timeout = timeout
+        self.transport = transport
+        # Will be set during connection
+        self._detected_transport: Optional[str] = None
+        self._sse_client = None
+        self._http_session: Optional[aiohttp.ClientSession] = None
+        self._mcp_session_id: Optional[str] = None  # Server-provided session ID
+        self._initialized = False
+        logger.info(f"[MCP Client] Created for {url} (transport={transport}, session={self.session_id})")
+    @property
+    def server_session_id(self) -> Optional[str]:
+        """Get the server-provided session ID (from mcp-session-id header)."""
+        return self._mcp_session_id
+    @property
+    def detected_transport(self) -> Optional[str]:
+        """Get the detected transport type."""
+        return self._detected_transport
+    async def __aenter__(self):
+        """Async context manager entry - detect and connect."""
+        await self._connect()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - cleanup."""
+        await self.close()
+    async def _connect(self):
+        """Detect transport and establish connection."""
+        if self.transport == "sse":
+            self._detected_transport = "sse"
+            await self._connect_sse()
+        elif self.transport == "streamable_http":
+            self._detected_transport = "streamable_http"
+            await self._connect_streamable_http()
+        else:  # auto
+            await self._auto_detect_and_connect()
+    async def _auto_detect_and_connect(self):
+        """Try Streamable HTTP first, fall back to SSE."""
+        # If URL ends with /sse, use SSE transport directly
+        if self.url.rstrip('/').endswith('/sse'):
+            logger.debug("[MCP Client] URL ends with /sse, using SSE transport")
+            await self._connect_sse()
+            self._detected_transport = "sse"
+            logger.info("[MCP Client] Using SSE transport")
+            return
+        try:
+            logger.debug("[MCP Client] Auto-detecting transport, trying Streamable HTTP first...")
+            await self._connect_streamable_http()
+            self._detected_transport = "streamable_http"
+            logger.info("[MCP Client] Using Streamable HTTP transport")
+        except Exception as e:
+            error_str = str(e).lower()
+            # Check for 405, 404, or indicators that SSE is needed
+            if "405" in error_str or "method not allowed" in error_str or "404" in error_str:
+                logger.debug(f"[MCP Client] Streamable HTTP not supported ({e}), trying SSE...")
+                await self._connect_sse()
+                self._detected_transport = "sse"
+                logger.info("[MCP Client] Using SSE transport")
+            else:
+                # Re-raise other errors
+                raise
+    async def _connect_streamable_http(self):
+        """Connect using Streamable HTTP transport."""
+        self._http_session = aiohttp.ClientSession(
+            timeout=aiohttp.ClientTimeout(total=self.timeout)
+        )
+    async def _connect_sse(self):
+        """Connect using SSE transport."""
+        from .mcp_sse_client import McpSseClient
+        self._sse_client = McpSseClient(
+            url=self.url,
+            session_id=self.session_id,
+            headers=self.headers,
+            timeout=self.timeout
+        )
+    async def initialize(self) -> Dict[str, Any]:
+        """
+        Initialize MCP protocol session.
+        Returns:
+            Server capabilities and info
+        """
+        if self._detected_transport == "streamable_http":
+            return await self._initialize_streamable_http()
+        else:
+            return await self._initialize_sse()
+    async def _initialize_streamable_http(self, retry_without_session: bool = False) -> Dict[str, Any]:
+        """Initialize via Streamable HTTP transport."""
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream",
+            **self.headers
+        }
+        # DON'T send session_id on initialization - per MCP spec, initialization requests
+        # must not include a sessionId. The server will provide one in the response.
+        # Session ID is only used for subsequent requests after initialization.
+        # (The retry_without_session flag is kept for backwards compatibility but
+        # is effectively always true for initialization now)
+        # Debug: log headers (mask sensitive data)
+        debug_headers = {k: (v[:20] + '...' if k.lower() == 'authorization' and len(v) > 20 else v)
+                        for k, v in headers.items()}
+        logger.debug(f"[MCP Client] Request headers: {debug_headers}")
+        init_request = {
+            "jsonrpc": "2.0",
+            "id": str(uuid.uuid4()),
+            "method": "initialize",
+            "params": {
+                "protocolVersion": "2024-11-05",
+                "capabilities": {
+                    "roots": {"listChanged": True},
+                    "sampling": {}
+                },
+                "clientInfo": {
+                    "name": "Alita MCP Client",
+                    "version": "1.0.0"
+                }
+            }
+        }
+        logger.debug(f"[MCP Client] Sending initialize via Streamable HTTP to {self.url}")
+        async with self._http_session.post(self.url, json=init_request, headers=headers) as response:
+            if response.status == 401:
+                await self._handle_401_response(response)
+            if response.status == 405:
+                raise Exception("HTTP 405 Method Not Allowed - server may require SSE transport")
+            # Handle invalid session error - retry without session_id
+            if response.status == 400 and not retry_without_session and self.session_id:
+                try:
+                    error_body = await response.text()
+                    if "invalid session" in error_body.lower():
+                        logger.warning(f"[MCP Client] Invalid session, retrying without session_id")
+                        return await self._initialize_streamable_http(retry_without_session=True)
+                except Exception:
+                    pass
+            # Log error response body for debugging
+            if response.status >= 400:
+                try:
+                    error_body = await response.text()
+                    logger.error(f"[MCP Client] HTTP {response.status} error response: {error_body[:1000]}")
+                except Exception:
+                    pass
+            response.raise_for_status()
+            # Get session ID from response headers
+            self._mcp_session_id = response.headers.get("mcp-session-id")
+            if self._mcp_session_id:
+                logger.info(f"[MCP Client] Server provided session_id: {self._mcp_session_id}")
+            else:
+                logger.debug(f"[MCP Client] No session_id in response headers. Headers: {dict(response.headers)}")
+            # Parse response
+            result = await self._parse_response(response)
+            logger.debug(f"[MCP Client] Initialize response: {result}")
+        # Send initialized notification
+        await self._send_notification("notifications/initialized")
+        self._initialized = True
+        return result.get('result', {})
+    async def _initialize_sse(self) -> Dict[str, Any]:
+        """Initialize via SSE transport."""
+        result = await self._sse_client.initialize()
+        self._initialized = True
+        return result
+    async def send_request(
+        self,
+        method: str,
+        params: Optional[Dict[str, Any]] = None,
+        request_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Send a JSON-RPC request to the MCP server.
+        Args:
+            method: JSON-RPC method name (e.g., "tools/list", "tools/call")
+            params: Method parameters
+            request_id: Optional request ID (auto-generated if not provided)
+        Returns:
+            Parsed JSON-RPC response
+        """
+        if self._detected_transport == "streamable_http":
+            return await self._send_request_streamable_http(method, params, request_id)
+        else:
+            return await self._sse_client.send_request(method, params, request_id)
+    async def _send_request_streamable_http(
+        self,
+        method: str,
+        params: Optional[Dict[str, Any]] = None,
+        request_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Send request via Streamable HTTP."""
+        if request_id is None:
+            request_id = str(uuid.uuid4())
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream",
+            **self.headers
+        }
+        # Add MCP session ID if we have one
+        if self._mcp_session_id:
+            headers["mcp-session-id"] = self._mcp_session_id
+        request = {
+            "jsonrpc": "2.0",
+            "id": request_id,
+            "method": method,
+            "params": params or {}
+        }
+        logger.debug(f"[MCP Client] Sending request: {method} (id={request_id})")
+        async with self._http_session.post(self.url, json=request, headers=headers) as response:
+            if response.status == 401:
+                await self._handle_401_response(response)
+            response.raise_for_status()
+            result = await self._parse_response(response)
+            # Check for JSON-RPC error
+            if 'error' in result:
+                error = result['error']
+                raise Exception(f"MCP Error: {error.get('message', str(error))}")
+            return result
+    async def _send_notification(self, method: str, params: Optional[Dict[str, Any]] = None):
+        """Send a JSON-RPC notification (no response expected)."""
+        if self._detected_transport == "streamable_http":
+            headers = {
+                "Content-Type": "application/json",
+                **self.headers
+            }
+            if self._mcp_session_id:
+                headers["mcp-session-id"] = self._mcp_session_id
+            notification = {
+                "jsonrpc": "2.0",
+                "method": method
+            }
+            if params:
+                notification["params"] = params
+            async with self._http_session.post(self.url, json=notification, headers=headers) as response:
+                pass  # Notifications don't expect a response
+    async def _parse_response(self, response: aiohttp.ClientResponse) -> Dict[str, Any]:
+        """Parse response, handling both JSON and SSE formats."""
+        content_type = response.headers.get("content-type", "")
+        text = await response.text()
+        if "text/event-stream" in content_type:
+            return self._parse_sse_text(text)
+        else:
+            return json.loads(text) if text else {}
+    def _parse_sse_text(self, text: str) -> Dict[str, Any]:
+        """Parse SSE formatted response to extract JSON data."""
+        for line in text.split('\n'):
+            if line.startswith('data:'):
+                data = line[5:].strip()
+                if data:
+                    return json.loads(data)
+        return {}
+    async def _handle_401_response(self, response: aiohttp.ClientResponse):
+        """Handle 401 Unauthorized response with OAuth flow."""
+        from .mcp_oauth import (
+            canonical_resource,
+            extract_resource_metadata_url,
+            fetch_resource_metadata_async,
+            infer_authorization_servers_from_realm,
+            fetch_oauth_authorization_server_metadata
+        )
+        auth_header = response.headers.get('WWW-Authenticate', '')
+        resource_metadata_url = extract_resource_metadata_url(auth_header, self.url)
+        metadata = None
+        if resource_metadata_url:
+            metadata = await fetch_resource_metadata_async(
+                resource_metadata_url,
+                session=self._http_session,
+                timeout=30
+            )
+        # Infer authorization servers if not in metadata
+        if not metadata or not metadata.get('authorization_servers'):
+            inferred_servers = infer_authorization_servers_from_realm(auth_header, self.url)
+            if inferred_servers:
+                if not metadata:
+                    metadata = {}
+                metadata['authorization_servers'] = inferred_servers
+                # Fetch OAuth metadata
+                auth_server_metadata = fetch_oauth_authorization_server_metadata(inferred_servers[0], timeout=30)
+                if auth_server_metadata:
+                    metadata['oauth_authorization_server'] = auth_server_metadata
+        raise McpAuthorizationRequired(
+            message=f"MCP server {self.url} requires OAuth authorization",
+            server_url=canonical_resource(self.url),
+            resource_metadata_url=resource_metadata_url,
+            www_authenticate=auth_header,
+            resource_metadata=metadata,
+            status=401,
+            tool_name=self.url,
+        )
+    async def list_tools(self) -> List[Dict[str, Any]]:
+        """
+        Get list of available tools from the MCP server.
+        Returns:
+            List of tool definitions
+        """
+        response = await self.send_request("tools/list")
+        result = response.get('result', {})
+        tools = result.get('tools', [])
+        logger.info(f"[MCP Client] Discovered {len(tools)} tools")
+        return tools
+    async def list_prompts(self) -> List[Dict[str, Any]]:
+        """
+        Get list of available prompts from the MCP server.
+        Returns:
+            List of prompt definitions
+        """
+        response = await self.send_request("prompts/list")
+        result = response.get('result', {})
+        prompts = result.get('prompts', [])
+        logger.debug(f"[MCP Client] Discovered {len(prompts)} prompts")
+        return prompts
+    async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
+        """
+        Execute a tool on the MCP server.
+        Args:
+            tool_name: Name of the tool to call
+            arguments: Tool arguments
+        Returns:
+            Tool execution result
+        """
+        response = await self.send_request(
+            "tools/call",
+            params={
+                "name": tool_name,
+                "arguments": arguments
+            }
+        )
+        return response.get('result', {})
+    async def close(self):
+        """Close the client and cleanup resources."""
+        logger.info(f"[MCP Client] Closing connection...")
+        if self._sse_client:
+            await self._sse_client.close()
+            self._sse_client = None
+        if self._http_session and not self._http_session.closed:
+            await self._http_session.close()
+            self._http_session = None
+        logger.info(f"[MCP Client] Connection closed")
+    @property
+    def detected_transport(self) -> Optional[str]:
+        """Return the detected/selected transport type."""
+        return self._detected_transport

alita_sdk/runtime/utils/mcp_tools_discovery.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+MCP Tools Discovery Utility.
+Provides a standalone function to discover tools from remote MCP servers.
+Supports both SSE (Server-Sent Events) and Streamable HTTP transports with auto-detection.
+"""
+import asyncio
+import logging
+from typing import Any, Dict, List, Optional
+from .mcp_oauth import McpAuthorizationRequired
+from .mcp_client import McpClient
+logger = logging.getLogger(__name__)
+def discover_mcp_tools(
+    url: str,
+    headers: Optional[Dict[str, str]] = None,
+    timeout: int = 60,
+    session_id: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    """
+    Discover available tools from a remote MCP server.
+    This function connects to a remote MCP server and retrieves the list of
+    available tools using the MCP protocol. Automatically detects and uses
+    the appropriate transport (SSE or Streamable HTTP).
+    Args:
+        url: MCP server HTTP URL (http:// or https://)
+        headers: Optional HTTP headers for authentication
+        timeout: Request timeout in seconds (default: 60)
+        session_id: Optional session ID for stateful connections
+    Returns:
+        List of tool definitions, each containing:
+        - name: Tool name
+        - description: Tool description
+        - inputSchema: JSON schema for tool input parameters
+    Raises:
+        McpAuthorizationRequired: If the server requires OAuth authorization (401)
+        Exception: For other connection or protocol errors
+    Example:
+        >>> tools = discover_mcp_tools(
+        ...     url="https://mcp.example.com/sse",
+        ...     headers={"Authorization": "Bearer token123"}
+        ... )
+        >>> print(f"Found {len(tools)} tools")
+    """
+    logger.info(f"[MCP Discovery] Starting tool discovery from {url}")
+    try:
+        # Run the async discovery in a new event loop
+        tools_list = asyncio.run(
+            _discover_tools_async(url, headers, timeout, session_id)
+        )
+        logger.info(f"[MCP Discovery] Successfully discovered {len(tools_list)} tools from {url}")
+        return tools_list
+    except McpAuthorizationRequired:
+        # Re-raise auth exceptions directly
+        logger.info(f"[MCP Discovery] Authorization required for {url}")
+        raise
+    except Exception as e:
+        logger.error(f"[MCP Discovery] Failed to discover tools from {url}: {e}")
+        raise
+async def _discover_tools_async(
+    url: str,
+    headers: Optional[Dict[str, str]],
+    timeout: int,
+    session_id: Optional[str],
+) -> List[Dict[str, Any]]:
+    """
+    Async implementation of tool discovery using unified MCP client.
+    """
+    all_tools = []
+    # Create unified MCP client (auto-detects transport)
+    client = McpClient(
+        url=url,
+        session_id=session_id,
+        headers=headers,
+        timeout=timeout
+    )
+    async with client:
+        # Initialize MCP session
+        await client.initialize()
+        logger.debug(f"[MCP Discovery] Session initialized (transport={client.detected_transport})")
+        # Get tools list
+        tools = await client.list_tools()
+        logger.debug(f"[MCP Discovery] Received {len(tools)} tools")
+        # Convert tools to standard format
+        for tool in tools:
+            tool_def = {
+                'name': tool.get('name'),
+                'description': tool.get('description', ''),
+                'inputSchema': tool.get('inputSchema', {}),
+            }
+            all_tools.append(tool_def)
+    return all_tools
+async def discover_mcp_tools_async(
+    url: str,
+    headers: Optional[Dict[str, str]] = None,
+    timeout: int = 60,
+    session_id: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    """
+    Async version of discover_mcp_tools.
+    See discover_mcp_tools for full documentation.
+    """
+    return await _discover_tools_async(url, headers, timeout, session_id)

alita_sdk/runtime/utils/toolkit_utils.py CHANGED Viewed

@@ -13,7 +13,8 @@ logger = logging.getLogger(__name__)
 def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
                                    llm_client: Any,
                                    alita_client: Optional[Any] = None,
-                                   mcp_tokens: Optional[Dict[str, Any]] = None) -> List[Any]:
+                                   mcp_tokens: Optional[Dict[str, Any]] = None,
+                                   use_prefix: bool = False) -> List[Any]:
     """
     Instantiate a toolkit with LLM client support.
@@ -25,6 +26,9 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
         llm_client: LLM client instance for tools that need LLM capabilities
         alita_client: Optional additional client instance
         mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
+        use_prefix: If True, tools get prefixed with toolkit_name to prevent collisions
+                   (for agent use). If False, tools use base names only (for testing interface).
+                   Default False for backward compatibility with testing.
     Returns:
         List of instantiated tools from the toolkit
@@ -54,11 +58,12 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
         toolkit_type = toolkit_config.get('type', toolkit_name.lower())
         # Create a tool configuration dict with required fields
+        # Note: MCP toolkit always requires toolkit_name, other toolkits respect use_prefix flag
         tool_config = {
             'id': toolkit_config.get('id', random.randint(1, 1000000)),
             'type': toolkit_config.get('type', toolkit_type),
             'settings': settings,
-            'toolkit_name': toolkit_name
+            'toolkit_name': toolkit_name if (use_prefix or toolkit_type == 'mcp') else None
         }
         # Get tools using the toolkit configuration with clients
@@ -76,21 +81,10 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
         # Re-raise McpAuthorizationRequired without logging as error
         from ..utils.mcp_oauth import McpAuthorizationRequired
-        # Check if it's McpAuthorizationRequired directly
         if isinstance(e, McpAuthorizationRequired):
             logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
             raise
-        # Also check for wrapped exceptions
-        if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
-            logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (wrapped)")
-            raise e.__cause__
-        # Check exception class name as fallback
-        if e.__class__.__name__ == 'McpAuthorizationRequired':
-            logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (by name)")
-            raise
         # Log and re-raise other errors
         logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
         raise

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -386,7 +386,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def remove_index(self, index_name: str = ""):
         """Cleans the indexed data in the collection."""
-        super()._clean_collection(index_name=index_name)
+        super()._clean_collection(index_name=index_name, including_index_meta=True)
         return (f"Collection '{index_name}' has been removed from the vector store.\n"
                 f"Available collections: {self.list_collections()}") if index_name \
             else "All collections have been removed from the vector store."

alita_sdk/tools/chunkers/sematic/json_chunker.py CHANGED Viewed

@@ -17,6 +17,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
             for chunk in chunks:
                 metadata = doc.metadata.copy()
                 metadata['chunk_id'] = chunk_id
+                metadata['method_name'] = 'json'
                 chunk_id += 1
                 yield Document(page_content=json.dumps(chunk), metadata=metadata)
         except Exception as e:

alita_sdk/tools/chunkers/sematic/markdown_chunker.py CHANGED Viewed

@@ -60,6 +60,7 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
                     docmeta.update({"headers": "; ".join(headers_meta)})
                     docmeta['chunk_id'] = chunk_id
                     docmeta['chunk_type'] = "document"
+                    docmeta['method_name'] = 'markdown'
                     yield Document(
                         page_content=subchunk,
                         metadata=docmeta
@@ -71,6 +72,7 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
                 docmeta.update({"headers": "; ".join(headers_meta)})
                 docmeta['chunk_id'] = chunk_id
                 docmeta['chunk_type'] = "document"
+                docmeta['method_name'] = 'text'
                 yield Document(
                     page_content=chunk.page_content,
                     metadata=docmeta

alita_sdk/tools/chunkers/universal_chunker.py CHANGED Viewed

@@ -86,6 +86,7 @@ def _default_text_chunker(
         for idx, chunk in enumerate(chunks, 1):
             chunk.metadata['chunk_id'] = idx
             chunk.metadata['chunk_type'] = 'text'
+            chunk.metadata['method_name'] = 'text'
             yield chunk

alita-sdk 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.486py3-none-any.whl → 0.3.497py3-none-any.whl