PyPI - alita-sdk - Versions diffs - 0.3.374__py3-none-any.whl → 0.3.423__py3-none-any.whl - Mend

alita-sdk 0.3.374py3-none-any.whl → 0.3.423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (51) hide show

alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/client.py +3 -2
alita_sdk/runtime/clients/sandbox_client.py +8 -0
alita_sdk/runtime/langchain/assistant.py +56 -40
alita_sdk/runtime/langchain/constants.py +4 -0
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
alita_sdk/runtime/langchain/langraph_agent.py +92 -28
alita_sdk/runtime/langchain/utils.py +24 -4
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/tools.py +80 -49
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +28 -23
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +146 -114
alita_sdk/runtime/tools/sandbox.py +166 -63
alita_sdk/runtime/tools/vectorstore.py +22 -21
alita_sdk/runtime/tools/vectorstore_base.py +16 -15
alita_sdk/runtime/utils/utils.py +1 -0
alita_sdk/tools/__init__.py +43 -31
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/base_indexer_toolkit.py +102 -93
alita_sdk/tools/code_indexer_toolkit.py +15 -5
alita_sdk/tools/confluence/api_wrapper.py +30 -8
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/elitea_base.py +22 -22
alita_sdk/tools/gitlab/api_wrapper.py +8 -9
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/qtest/api_wrapper.py +298 -51
alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/utils/content_parser.py +27 -16
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +38 -25
{alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/METADATA +1 -1
{alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/RECORD +51 -51
{alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/sandbox.py CHANGED Viewed

@@ -2,21 +2,60 @@ import asyncio
 import logging
 import subprocess
 import os
-from typing import Any, Type, Optional, Dict
-from langchain_core.tools import BaseTool
-from pydantic import BaseModel, create_model
+from typing import Any, Type, Optional, Dict, List, Literal, Union
+from copy import deepcopy
+from pathlib import Path
+from langchain_core.tools import BaseTool, BaseToolkit
+from langchain_core.messages import ToolCall
+from pydantic import BaseModel, create_model, ConfigDict, Field
 from pydantic.fields import FieldInfo
 logger = logging.getLogger(__name__)
+name = "pyodide"
+def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
+    """
+    Get sandbox tools for the provided tool configurations.
+    Args:
+        tools_list: List of tool configurations
+        alita_client: Alita client instance for sandbox tools
+        llm: LLM client instance (unused for sandbox)
+        memory_store: Optional memory store instance (unused for sandbox)
+    Returns:
+        List of sandbox tools
+    """
+    all_tools = []
+    for tool in tools_list:
+        if tool.get('type') == 'sandbox' or tool.get('toolkit_name') == 'sandbox':
+            try:
+                toolkit_instance = SandboxToolkit.get_toolkit(
+                    stateful=tool['settings'].get('stateful', False),
+                    allow_net=tool['settings'].get('allow_net', True),
+                    alita_client=alita_client,
+                    toolkit_name=tool.get('toolkit_name', '')
+                )
+                all_tools.extend(toolkit_instance.get_tools())
+            except Exception as e:
+                logger.error(f"Error in sandbox toolkit get_tools: {e}")
+                logger.error(f"Tool config: {tool}")
+                raise
+    return all_tools
 def _is_deno_available() -> bool:
     """Check if Deno is available in the PATH"""
     try:
         result = subprocess.run(
-            ["deno", "--version"],
-            capture_output=True,
-            text=True,
+            ["deno", "--version"],
+            capture_output=True,
+            text=True,
             timeout=10
         )
         return result.returncode == 0
@@ -25,43 +64,17 @@ def _is_deno_available() -> bool:
 def _setup_pyodide_cache_env() -> None:
-    """Setup Pyodide caching environment variables for performance optimization"""
+    """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
     try:
-        # Check if cache environment file exists and source it
-        cache_env_file = os.path.expanduser("~/.pyodide_cache_env")
-        if os.path.exists(cache_env_file):
-            with open(cache_env_file, 'r') as f:
-                for line in f:
-                    line = line.strip()
-                    if line.startswith('export ') and '=' in line:
-                        # Parse export VAR=value format
-                        var_assignment = line[7:]  # Remove 'export '
-                        if '=' in var_assignment:
-                            key, value = var_assignment.split('=', 1)
-                            # Remove quotes if present
-                            value = value.strip('"').strip("'")
-                            os.environ[key] = value
-                            logger.debug(f"Set Pyodide cache env: {key}={value}")
-        # Set default caching environment variables if not already set
-        cache_defaults = {
-            'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
-            'DENO_DIR': os.path.expanduser('~/.cache/deno'),
-            'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
-        }
-        for key, default_value in cache_defaults.items():
-            if key not in os.environ:
-                os.environ[key] = default_value
-                logger.debug(f"Set default Pyodide env: {key}={default_value}")
+        for key in ["SANDBOX_BASE", "DENO_DIR"]:
+            logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
     except Exception as e:
         logger.warning(f"Could not setup Pyodide cache environment: {e}")
 # Create input schema for the sandbox tool
 sandbox_tool_input = create_model(
-    "SandboxToolInput",
+    "SandboxToolInput",
     code=(str, FieldInfo(description="Python code to execute in the sandbox environment"))
 )
@@ -72,7 +85,7 @@ class PyodideSandboxTool(BaseTool):
     This tool leverages langchain-sandbox to provide a safe environment for running untrusted Python code.
     Optimized for performance with caching and stateless execution by default.
     """
     name: str = "pyodide_sandbox"
     description: str = """Execute Python code in a secure sandbox environment using Pyodide.
     This tool allows safe execution of Python code without access to the host system.
@@ -81,7 +94,7 @@ class PyodideSandboxTool(BaseTool):
     - Perform calculations or data analysis
     - Test Python algorithms
     - Run code that requires isolation from the host system
     The sandbox supports most Python standard library modules and can install additional packages.
     Note: File access and some system operations are restricted for security.
     Optimized for performance with local caching (stateless by default for faster execution).
@@ -91,14 +104,37 @@ class PyodideSandboxTool(BaseTool):
     allow_net: bool = True
     session_bytes: Optional[bytes] = None
     session_metadata: Optional[Dict] = None
+    alita_client: Optional[Any] = None
     def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._sandbox = None
         # Setup caching environment for optimal performance
         _setup_pyodide_cache_env()
         self._initialize_sandbox()
+    def _prepare_pyodide_input(self, code: str) -> str:
+        """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
+        pyodide_predata = ""
+        # Add alita_client if available
+        if self.alita_client:
+            try:
+                # Get the directory of the current file and construct the path to sandbox_client.py
+                current_dir = Path(__file__).parent
+                sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
+                with open(sandbox_client_path, 'r') as f:
+                    sandbox_client_code = f.read()
+                pyodide_predata += f"{sandbox_client_code}\n"
+                pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
+                                    f"project_id={self.alita_client.project_id},"
+                                    f"auth_token='{self.alita_client.auth_token}')\n")
+            except FileNotFoundError:
+                logger.error(f"sandbox_client.py not found. Ensure the file exists.")
+        return f"#elitea simplified client\n{pyodide_predata}{code}"
     def _initialize_sandbox(self) -> None:
         """Initialize the PyodideSandbox instance with optimized settings"""
         try:
@@ -110,12 +146,22 @@ class PyodideSandboxTool(BaseTool):
                 )
                 logger.error(error_msg)
                 raise RuntimeError(error_msg)
             from langchain_sandbox import PyodideSandbox
+            # Air-gapped settings
+            sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
+            sandbox_tmp = os.path.join(sandbox_base, "tmp")
+            deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
             # Configure sandbox with performance optimizations
             self._sandbox = PyodideSandbox(
                 stateful=self.stateful,
+                #
+                allow_env=["SANDBOX_BASE"],
+                allow_read=[sandbox_base, sandbox_tmp, deno_cache],
+                allow_write=[sandbox_tmp, deno_cache],
+                #
                 allow_net=self.allow_net,
                 # Use auto node_modules_dir for better caching
                 node_modules_dir="auto"
@@ -135,7 +181,7 @@ class PyodideSandboxTool(BaseTool):
         except Exception as e:
             logger.error(f"Failed to initialize PyodideSandbox: {e}")
             raise
     def _run(self, code: str) -> str:
         """
         Synchronous version - runs the async method in a new event loop
@@ -144,7 +190,10 @@ class PyodideSandboxTool(BaseTool):
             # Check if sandbox is initialized, if not try to initialize
             if self._sandbox is None:
                 self._initialize_sandbox()
+            # Prepare code with state and client injection
+            prepared_code = self._prepare_pyodide_input(code)
             # Check if we're already in an async context
             try:
                 loop = asyncio.get_running_loop()
@@ -152,11 +201,11 @@ class PyodideSandboxTool(BaseTool):
                 # We'll need to use a different approach
                 import concurrent.futures
                 with concurrent.futures.ThreadPoolExecutor() as executor:
-                    future = executor.submit(asyncio.run, self._arun(code))
+                    future = executor.submit(asyncio.run, self._arun(prepared_code))
                     return future.result()
             except RuntimeError:
                 # No running loop, safe to use asyncio.run
-                return asyncio.run(self._arun(code))
+                return asyncio.run(self._arun(prepared_code))
         except (ImportError, RuntimeError) as e:
             # Handle specific dependency errors gracefully
             error_msg = str(e)
@@ -169,7 +218,7 @@ class PyodideSandboxTool(BaseTool):
         except Exception as e:
             logger.error(f"Error executing code in sandbox: {e}")
             return f"Error executing code: {str(e)}"
     async def _arun(self, code: str) -> str:
         """
         Execute Python code in the Pyodide sandbox
@@ -177,19 +226,19 @@ class PyodideSandboxTool(BaseTool):
         try:
             if self._sandbox is None:
                 self._initialize_sandbox()
             # Execute the code with session state if available
             result = await self._sandbox.execute(
                 code,
                 session_bytes=self.session_bytes,
                 session_metadata=self.session_metadata
             )
             # Update session state for stateful execution
             if self.stateful:
                 self.session_bytes = result.session_bytes
                 self.session_metadata = result.session_metadata
             result_dict = {}
             if result.result is not None:
@@ -212,10 +261,10 @@ class PyodideSandboxTool(BaseTool):
             result_dict["execution_info"] = execution_info
             return result_dict
         except Exception as e:
             logger.error(f"Error executing code in sandbox: {e}")
-            return f"Error executing code: {str(e)}"
+            return {"error": f"Error executing code: {str(e)}"}
 class StatefulPyodideSandboxTool(PyodideSandboxTool):
@@ -223,7 +272,7 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
     A stateful version of the PyodideSandboxTool that maintains state between executions.
     This version preserves variables, imports, and function definitions across multiple tool calls.
     """
     name: str = "stateful_pyodide_sandbox"
     description: str = """Execute Python code in a stateful sandbox environment using Pyodide.
     This tool maintains state between executions, preserving variables, imports, and function definitions.
@@ -232,41 +281,95 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
     - Maintain variables across multiple calls
     - Develop complex programs step by step
     - Preserve imported libraries and defined functions
     The sandbox supports most Python standard library modules and can install additional packages.
     Note: File access and some system operations are restricted for security.
     """
     def __init__(self, **kwargs: Any) -> None:
         kwargs['stateful'] = True  # Force stateful mode
         super().__init__(**kwargs)
 # Factory function for creating sandbox tools
-def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseTool:
+def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
     """
     Factory function to create sandbox tools with specified configuration.
     Note: This tool requires Deno to be installed and available in PATH.
     For installation and optimization, run the bootstrap.sh script.
     Args:
         stateful: Whether to maintain state between executions (default: False for better performance)
         allow_net: Whether to allow network access (for package installation)
     Returns:
         Configured sandbox tool instance
     Raises:
         ImportError: If langchain-sandbox is not installed
         RuntimeError: If Deno is not found in PATH
     Performance Notes:
         - Stateless mode (default) is faster and avoids session state overhead
         - Run bootstrap.sh script to enable local caching and reduce initialization time
         - Cached wheels reduce package download time from ~4.76s to near-instant
     """
     if stateful:
-        return StatefulPyodideSandboxTool(allow_net=allow_net)
+        return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
     else:
-        return PyodideSandboxTool(stateful=False, allow_net=allow_net)
+        return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
+class SandboxToolkit(BaseToolkit):
+    tools: List[BaseTool] = []
+    @staticmethod
+    def toolkit_config_schema() -> Type[BaseModel]:
+        # Create sample tools to get their schemas
+        sample_tools = [
+            PyodideSandboxTool(),
+            StatefulPyodideSandboxTool()
+        ]
+        selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
+        return create_model(
+            'sandbox',
+            stateful=(bool, Field(default=False, description="Whether to maintain state between executions")),
+            allow_net=(bool, Field(default=True, description="Whether to allow network access for package installation")),
+            selected_tools=(List[Literal[tuple(selected_tools)]],
+                            Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
+            __config__=ConfigDict(json_schema_extra={
+                'metadata': {
+                    "label": "Python Sandbox",
+                    "icon_url": "sandbox.svg",
+                    "hidden": False,
+                    "categories": ["code", "execution", "internal_tool"],
+                    "extra_categories": ["python", "pyodide", "sandbox", "code execution"],
+                }
+            })
+        )
+    @classmethod
+    def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
+        """
+        Get toolkit with sandbox tools.
+        Args:
+            stateful: Whether to maintain state between executions
+            allow_net: Whether to allow network access
+            alita_client: Alita client instance for sandbox tools
+            **kwargs: Additional arguments
+        """
+        tools = []
+        if stateful:
+            tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
+        else:
+            tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
+        return cls(tools=tools)
+    def get_tools(self):
+        return self.tools

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             tool_name="_remove_collection"
         )
-    def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
+    def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from vectorstore"""
-        return self.vector_adapter.get_indexed_ids(self, collection_suffix)
+        return self.vector_adapter.get_indexed_ids(self, index_name)
     def list_collections(self) -> Any:
         """List all collections in the vectorstore.
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             return {"collections": [], "message": "No indexed collections"}
         return cols
-    def _clean_collection(self, collection_suffix: str = ''):
+    def _clean_collection(self, index_name: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        self.vector_adapter.clean_collection(self, collection_suffix)
+        self.vector_adapter.clean_collection(self, index_name)
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
-    def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
+    def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
         """ Get all indexed data from vectorstore for code content """
-        return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
+        return self.vector_adapter.get_code_indexed_data(self, index_name)
     def _add_to_collection(self, entry_id, new_collection_value):
         """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     def _reduce_duplicates(
             self,
             documents: Generator[Any, None, None],
-            collection_suffix: str,
+            index_name: str,
             get_indexed_data: Callable,
             key_fn: Callable,
             compare_fn: Callable,
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     ) -> List[Any]:
         """Generic duplicate reduction logic for documents."""
         self._log_data(log_msg, tool_name="index_documents")
-        indexed_data = get_indexed_data(collection_suffix)
+        indexed_data = get_indexed_data(index_name)
         indexed_keys = set(indexed_data.keys())
         if not indexed_keys:
             self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         for document in documents:
             key = key_fn(document)
             key = key if isinstance(key, str) else str(key)
-            if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
+            if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
                 if compare_fn(document, indexed_data[key]):
                     # Disabled addition of new collection to already indexed documents
                     # # check metadata.collection and update if needed
                     # for update_collection_id in remove_ids_fn(indexed_data, key):
                     #     self._add_to_collection(
                     #         update_collection_id,
-                    #         collection_suffix
+                    #         index_name
                     #     )
                     continue
                 final_docs.append(document)
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         return final_docs
-    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
+    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
         return self._reduce_duplicates(
             documents,
-            collection_suffix,
+            index_name,
             self._get_code_indexed_data,
             lambda doc: doc.metadata.get('filename'),
             lambda doc, idx: (
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             log_msg="Verification of code documents to index started"
         )
-    def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
+    def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
         """ Index documents in the vectorstore.
         Args:
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         from ..langchain.interfaces.llm_processor import add_documents
-        self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
+        self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
         # pre-process documents if needed (find duplicates, etc.)
         if clean_index:
             logger.info("Cleaning index before re-indexing all documents.")
             self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
             try:
-                self._clean_collection(collection_suffix)
+                self._clean_collection(index_name)
                 self.vectoradapter.persist()
                 self.vectoradapter.vacuum()
                 self._log_data("Previous index has been removed",
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 message="Filter for duplicates",
                 tool_name="index_documents")
             # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
-            documents = self._reduce_code_duplicates(documents, collection_suffix)
+            documents = self._reduce_code_duplicates(documents, index_name)
             self._log_tool_event(
                 message="All the duplicates were filtered out. Proceeding with indexing.",
                 tool_name="index_documents")
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
                              tool_name="index_documents")
-        # if collection_suffix is provided, add it to metadata of each document
-        if collection_suffix:
+        # if index_name is provided, add it to metadata of each document
+        if index_name:
             for doc in documents:
                 if not doc.metadata.get('collection'):
-                    doc.metadata['collection'] = collection_suffix
+                    doc.metadata['collection'] = index_name
                 else:
-                    doc.metadata['collection'] += f";{collection_suffix}"
+                    doc.metadata['collection'] += f";{index_name}"
         total_docs = len(documents)
         documents_count = 0
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 return {"status": "error", "message": f"Error: {format_exc()}"}
         if _documents:
             add_documents(vectorstore=self.vectorstore, documents=_documents)
-        return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
+        return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
+        else "No new documents to index."}
     def search_documents(self, query:str, doctype: str = 'code',
                          filter:dict|str={}, cut_off: float=0.5,

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -216,13 +216,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             return "No indexed collections"
         return collections
-    def get_index_meta(self, collection_suffix: str):
-        index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
+    def get_index_meta(self, index_name: str):
+        index_metas = self.vector_adapter.get_index_meta(self, index_name)
         if len(index_metas) > 1:
             raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
         return index_metas[0] if index_metas else None
-    def _clean_collection(self, collection_suffix: str = ''):
+    def _clean_collection(self, index_name: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
@@ -230,13 +230,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        self.vector_adapter.clean_collection(self, collection_suffix)
+        self.vector_adapter.clean_collection(self, index_name)
         self._log_tool_event(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
-    def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
+    def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
         """ Index documents in the vectorstore.
         Args:
@@ -245,21 +245,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             clean_index (bool): If True, clean the index before re-indexing all documents.
         """
         if clean_index:
-            self._clean_index(collection_suffix)
+            self._clean_index(index_name)
-        return self._save_index(list(documents), collection_suffix, progress_step)
+        return self._save_index(list(documents), index_name, progress_step)
-    def _clean_index(self, collection_suffix: str):
+    def _clean_index(self, index_name: str):
         logger.info("Cleaning index before re-indexing all documents.")
         self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
         try:
-            self._clean_collection(collection_suffix)
+            self._clean_collection(index_name)
             self._log_tool_event("Previous index has been removed",
                            tool_name="index_documents")
         except Exception as e:
             logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
-    def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
+    def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
         from ..langchain.interfaces.llm_processor import add_documents
         #
         for doc in documents:
@@ -268,13 +268,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         logger.debug(f"Indexing documents: {documents}")
-        # if collection_suffix is provided, add it to metadata of each document
-        if collection_suffix:
+        # if index_name is provided, add it to metadata of each document
+        if index_name:
             for doc in documents:
                 if not doc.metadata.get('collection'):
-                    doc.metadata['collection'] = collection_suffix
+                    doc.metadata['collection'] = index_name
                 else:
-                    doc.metadata['collection'] += f";{collection_suffix}"
+                    doc.metadata['collection'] += f";{index_name}"
         total_docs = len(documents)
         documents_count = 0
@@ -308,7 +308,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                 return {"status": "error", "message": f"Error: {format_exc()}"}
         if _documents:
             add_documents(vectorstore=self.vectorstore, documents=_documents)
-        return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
+        return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
+        else "no documents to index"}
     def search_documents(self, query:str, doctype: str = 'code',
                          filter:dict|str={}, cut_off: float=0.5,

alita_sdk/runtime/utils/utils.py CHANGED Viewed

@@ -14,6 +14,7 @@ class IndexerKeywords(Enum):
     INDEX_META_TYPE = 'index_meta'
     INDEX_META_IN_PROGRESS = 'in_progress'
     INDEX_META_COMPLETED = 'completed'
+    INDEX_META_FAILED = 'failed'
 # This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
 clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')

alita-sdk 0.3.374__py3-none-any.whl → 0.3.423__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.374py3-none-any.whl → 0.3.423py3-none-any.whl