PyPI - alita-sdk - Versions diffs - 0.3.465__py3-none-any.whl → 0.3.486__py3-none-any.whl - Mend

alita-sdk 0.3.465py3-none-any.whl → 0.3.486py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (90) hide show

alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +83 -1
alita_sdk/cli/agent_loader.py +6 -9
alita_sdk/cli/agent_ui.py +13 -3
alita_sdk/cli/agents.py +1866 -185
alita_sdk/cli/callbacks.py +96 -25
alita_sdk/cli/cli.py +10 -1
alita_sdk/cli/config.py +151 -9
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/input_handler.py +167 -4
alita_sdk/cli/inventory.py +1256 -0
alita_sdk/cli/toolkit.py +14 -17
alita_sdk/cli/toolkit_loader.py +35 -5
alita_sdk/cli/tools/__init__.py +8 -1
alita_sdk/cli/tools/filesystem.py +815 -55
alita_sdk/cli/tools/planning.py +143 -157
alita_sdk/cli/tools/terminal.py +154 -20
alita_sdk/community/__init__.py +64 -8
alita_sdk/community/inventory/__init__.py +224 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +169 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/bitbucket.py +0 -3
alita_sdk/runtime/clients/client.py +84 -26
alita_sdk/runtime/langchain/assistant.py +4 -2
alita_sdk/runtime/langchain/langraph_agent.py +122 -31
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/toolkits/__init__.py +2 -0
alita_sdk/runtime/toolkits/application.py +1 -1
alita_sdk/runtime/toolkits/mcp.py +46 -36
alita_sdk/runtime/toolkits/planning.py +171 -0
alita_sdk/runtime/toolkits/tools.py +39 -6
alita_sdk/runtime/tools/llm.py +185 -8
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/vectorstore_base.py +41 -6
alita_sdk/runtime/utils/mcp_oauth.py +80 -0
alita_sdk/runtime/utils/streamlit.py +6 -10
alita_sdk/runtime/utils/toolkit_utils.py +19 -4
alita_sdk/tools/__init__.py +54 -27
alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
alita_sdk/tools/base_indexer_toolkit.py +98 -19
alita_sdk/tools/bitbucket/__init__.py +2 -2
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +95 -6
alita_sdk/tools/chunkers/universal_chunker.py +269 -0
alita_sdk/tools/code_indexer_toolkit.py +55 -22
alita_sdk/tools/elitea_base.py +86 -21
alita_sdk/tools/jira/__init__.py +1 -1
alita_sdk/tools/jira/api_wrapper.py +91 -40
alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
alita_sdk/tools/qtest/__init__.py +1 -1
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +8 -2
alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/METADATA +2 -1
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/RECORD +90 -50
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/entry_points.txt +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -155,15 +155,45 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         if values.get('alita') and values.get('embedding_model'):
             values['embeddings'] = values.get('alita').get_embeddings(values.get('embedding_model'))
-        if values.get('vectorstore_type') and values.get('vectorstore_params') and values.get('embedding_model'):
-            values['vectorstore'] = get_vectorstore(values['vectorstore_type'], values['vectorstore_params'], embedding_func=values['embeddings'])
-            # Initialize the new vector adapter
-            values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
-            logger.debug(f"Vectorstore wrapper initialized: {values}")
+        # Lazy initialization: vectorstore and vector_adapter are initialized on-demand
+        # This prevents errors when using non-index tools with broken/missing vector DB
         return values
+    def _ensure_vectorstore_initialized(self):
+        """Lazily initialize vectorstore and vector_adapter when needed for index operations."""
+        if self.vectorstore is None:
+            if not self.vectorstore_type or not self.vectorstore_params:
+                raise ToolException(
+                    "Vector store is not configured. "
+                    "Please ensure embedding_model and pgvector_configuration are provided."
+                )
+            from ..langchain.interfaces.llm_processor import get_vectorstore
+            try:
+                self.vectorstore = get_vectorstore(
+                    self.vectorstore_type,
+                    self.vectorstore_params,
+                    embedding_func=self.embeddings
+                )
+                logger.debug(f"Vectorstore initialized: {self.vectorstore_type}")
+            except Exception as e:
+                raise ToolException(
+                    f"Failed to initialize vector store: {str(e)}. "
+                    "Check your vector database configuration and connection."
+                )
+        if self.vector_adapter is None:
+            try:
+                self.vector_adapter = VectorStoreAdapterFactory.create_adapter(self.vectorstore_type)
+                logger.debug(f"Vector adapter initialized: {self.vectorstore_type}")
+            except Exception as e:
+                raise ToolException(
+                    f"Failed to initialize vector adapter: {str(e)}"
+                )
     def _init_pg_helper(self, language='english'):
         """Initialize PGVector helper if needed and not already initialized"""
+        self._ensure_vectorstore_initialized()
         if self.pg_helper is None and hasattr(self.vectorstore, 'connection_string') and hasattr(self.vectorstore, 'collection_name'):
             try:
                 from .pgvector_search import PGVectorSearch
@@ -192,6 +222,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         Raises:
             ToolException: When DataException occurs or other search errors
         """
+        self._ensure_vectorstore_initialized()
         try:
             return self.vectorstore.similarity_search_with_score(
                 query, filter=filter, k=k
@@ -210,19 +241,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
     def list_collections(self) -> List[str]:
         """List all collections in the vectorstore."""
+        self._ensure_vectorstore_initialized()
         collections = self.vector_adapter.list_collections(self)
         if not collections:
             return "No indexed collections"
         return collections
     def get_index_meta(self, index_name: str):
+        self._ensure_vectorstore_initialized()
         index_metas = self.vector_adapter.get_index_meta(self, index_name)
         if len(index_metas) > 1:
             raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
         return index_metas[0] if index_metas else None
     def get_indexed_count(self, index_name: str) -> int:
+        self._ensure_vectorstore_initialized()
         from sqlalchemy.orm import Session
         from sqlalchemy import func, or_
@@ -241,6 +274,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
+        self._ensure_vectorstore_initialized()
         self._log_tool_event(
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
@@ -259,6 +293,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             progress_step (int): Step for progress reporting, default is 20.
             clean_index (bool): If True, clean the index before re-indexing all documents.
         """
+        self._ensure_vectorstore_initialized()
         if clean_index:
             self._clean_index(index_name)

alita_sdk/runtime/utils/mcp_oauth.py CHANGED Viewed

@@ -162,3 +162,83 @@ def canonical_resource(server_url: str) -> str:
     if resource.endswith("/") and parsed.path in ("", "/"):
         resource = resource[:-1]
     return resource
+def exchange_oauth_token(
+    token_endpoint: str,
+    code: str,
+    redirect_uri: str,
+    client_id: str,
+    client_secret: Optional[str] = None,
+    code_verifier: Optional[str] = None,
+    scope: Optional[str] = None,
+    timeout: int = 30,
+) -> Dict[str, Any]:
+    """
+    Exchange an OAuth authorization code for access tokens.
+    This function performs the OAuth token exchange on the server side,
+    avoiding CORS issues that would occur if done from a browser.
+    Args:
+        token_endpoint: OAuth token endpoint URL
+        code: Authorization code from OAuth provider
+        redirect_uri: Redirect URI used in authorization request
+        client_id: OAuth client ID
+        client_secret: OAuth client secret (optional for public clients)
+        code_verifier: PKCE code verifier (optional)
+        scope: OAuth scope (optional)
+        timeout: Request timeout in seconds
+    Returns:
+        Token response from OAuth provider containing access_token, etc.
+    Raises:
+        requests.RequestException: If the HTTP request fails
+        ValueError: If the token exchange fails
+    """
+    # Build the token request body
+    token_body = {
+        "grant_type": "authorization_code",
+        "code": code,
+        "redirect_uri": redirect_uri,
+        "client_id": client_id,
+    }
+    if client_secret:
+        token_body["client_secret"] = client_secret
+    if code_verifier:
+        token_body["code_verifier"] = code_verifier
+    if scope:
+        token_body["scope"] = scope
+    logger.info(f"MCP OAuth: exchanging code at {token_endpoint}")
+    # Make the token exchange request
+    response = requests.post(
+        token_endpoint,
+        data=token_body,
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "application/json",
+        },
+        timeout=timeout
+    )
+    # Try to parse as JSON
+    try:
+        token_data = response.json()
+    except Exception:
+        # Some providers return URL-encoded response
+        from urllib.parse import parse_qs
+        token_data = {k: v[0] if len(v) == 1 else v
+                     for k, v in parse_qs(response.text).items()}
+    if response.ok:
+        logger.info("MCP OAuth: token exchange successful")
+        return token_data
+    else:
+        error_msg = token_data.get("error_description") or token_data.get("error") or response.text
+        logger.error(f"MCP OAuth: token exchange failed - {response.status_code}: {error_msg}")
+        raise ValueError(f"Token exchange failed: {error_msg}")

alita_sdk/runtime/utils/streamlit.py CHANGED Viewed

@@ -287,7 +287,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
                     model_config={
                         "temperature": 0.1,
                         "max_tokens": 1000,
-                        "top_p": 1.0
                     }
                 )
             except Exception as e:
@@ -1256,7 +1255,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
                     model_config={
                         "temperature": 0.1,
                         "max_tokens": 1000,
-                        "top_p": 1.0
                     }
                 )
             except Exception as e:
@@ -1387,20 +1385,18 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
                                 help="Maximum number of tokens in the AI response"
                             )
-                            top_p = st.slider(
-                                "Top-p:",
-                                min_value=0.1,
-                                max_value=1.0,
-                                value=1.0,
-                                step=0.1,
-                                help="Controls diversity via nucleus sampling"
+                            reasoning_effort = st.selectbox(
+                                "Reasoning effort:",
+                                options=['null', 'low', 'medium', 'high'],
+                                index=0,
+                                help="Higher effort better reasoning, slower response"
                             )
                         # Create LLM config
                         llm_config = {
                             'max_tokens': max_tokens,
                             'temperature': temperature,
-                            'top_p': top_p
+                            'reasoning_effort': reasoning_effort
                         }
                         col1, col2 = st.columns([3, 1])

alita_sdk/runtime/utils/toolkit_utils.py CHANGED Viewed

@@ -12,7 +12,8 @@ logger = logging.getLogger(__name__)
 def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
                                    llm_client: Any,
-                                   alita_client: Optional[Any] = None) -> List[Any]:
+                                   alita_client: Optional[Any] = None,
+                                   mcp_tokens: Optional[Dict[str, Any]] = None) -> List[Any]:
     """
     Instantiate a toolkit with LLM client support.
@@ -22,7 +23,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
     Args:
         toolkit_config: Configuration dictionary for the toolkit
         llm_client: LLM client instance for tools that need LLM capabilities
-        client: Optional additional client instance
+        alita_client: Optional additional client instance
+        mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
     Returns:
         List of instantiated tools from the toolkit
@@ -60,8 +62,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
         }
         # Get tools using the toolkit configuration with clients
-        # Parameter order: get_tools(tools_list, alita_client, llm, memory_store)
-        tools = get_tools([tool_config], alita_client, llm_client)
+        # Parameter order: get_tools(tools_list, alita_client, llm, memory_store, debug_mode, mcp_tokens)
+        tools = get_tools([tool_config], alita_client, llm_client, mcp_tokens=mcp_tokens)
         if not tools:
             logger.warning(f"No tools returned for toolkit {toolkit_name}")
@@ -73,9 +75,22 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
     except Exception as e:
         # Re-raise McpAuthorizationRequired without logging as error
         from ..utils.mcp_oauth import McpAuthorizationRequired
+        # Check if it's McpAuthorizationRequired directly
         if isinstance(e, McpAuthorizationRequired):
             logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
             raise
+        # Also check for wrapped exceptions
+        if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
+            logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (wrapped)")
+            raise e.__cause__
+        # Check exception class name as fallback
+        if e.__class__.__name__ == 'McpAuthorizationRequired':
+            logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (by name)")
+            raise
         # Log and re-raise other errors
         logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
         raise

alita_sdk/tools/__init__.py CHANGED Viewed

@@ -13,6 +13,30 @@ AVAILABLE_TOOLS = {}
 AVAILABLE_TOOLKITS = {}
 FAILED_IMPORTS = {}
+def _inject_toolkit_id(tool_conf: dict, toolkit_tools) -> None:
+    """Inject `toolkit_id` into tools that expose `api_wrapper.toolkit_id`.
+    This reads 'id' from the tool configuration and, if it is an integer,
+    assigns it to the 'toolkit_id' attribute of the 'api_wrapper' for each
+    tool in 'toolkit_tools' that supports it.
+    Args:
+        tool_conf: Raw tool configuration item from 'tools_list'.
+        toolkit_tools: List of instantiated tools produced by a toolkit.
+    """
+    toolkit_id = tool_conf.get('id')
+    if isinstance(toolkit_id, int):
+        for t in toolkit_tools:
+            if hasattr(t, 'api_wrapper') and hasattr(t.api_wrapper, 'toolkit_id'):
+                t.api_wrapper.toolkit_id = toolkit_id
+    else:
+        logger.error(
+            f"Toolkit ID is missing or not an integer for tool "
+            f"`{tool_conf.get('type', '')}` with name `{tool_conf.get('name', '')}`"
+        )
 def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class_name=None):
     """Safely import a tool module and register available functions/classes."""
     try:
@@ -34,6 +58,7 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
         FAILED_IMPORTS[tool_name] = str(e)
         logger.debug(f"Failed to import {tool_name}: {e}")
 # Safe imports for all tools
 _safe_import_tool('github', 'github', 'get_tools', 'AlitaGitHubToolkit')
 _safe_import_tool('openapi', 'openapi', 'get_tools')
@@ -90,11 +115,19 @@ available_count = len(AVAILABLE_TOOLS)
 total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
 logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
+# Import community module to trigger community toolkit registration
+try:
+    from alita_sdk import community  # noqa: F401
+    logger.debug("Community toolkits registered successfully")
+except ImportError as e:
+    logger.debug(f"Community module not available: {e}")
 def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
     tools = []
     for tool in tools_list:
+        toolkit_tools = []
         settings = tool.get('settings')
         # Skip tools without settings early
@@ -116,53 +149,47 @@ def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args,
         # Set pgvector collection schema if present
         if settings.get('pgvector_configuration'):
-            settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
+            # Use tool id if available, otherwise use toolkit_name or type as fallback
+            collection_id = tool.get('id') or tool.get('toolkit_name') or tool_type
+            settings['pgvector_configuration']['collection_schema'] = str(collection_id)
         # Handle ADO special cases
         if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
-            tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
-            continue
-        # Handle ADO repos aliases
-        if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
+            toolkit_tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
+        elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
             try:
-                tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
+                toolkit_tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
             except Exception as e:
                 logger.error(f"Error getting ADO repos tools: {e}")
-            continue
-        # Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
-        if tool_type == 'mcp':
+        elif tool_type == 'mcp':
             logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
-            continue
-        # Handle standard tools
-        if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
+        elif tool_type == 'planning':
+            logger.debug(f"Skipping planning toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
+        elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
             try:
-                tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
+                toolkit_tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
             except Exception as e:
                 logger.error(f"Error getting tools for {tool_type}: {e}")
                 raise ToolException(f"Error getting tools for {tool_type}: {e}")
-            continue
-        # Handle custom modules
-        if settings.get("module"):
+        elif settings.get("module"):
             try:
                 mod = import_module(settings.pop("module"))
                 tkitclass = getattr(mod, settings.pop("class"))
                 get_toolkit_params = settings.copy()
                 get_toolkit_params["name"] = tool.get("name")
                 toolkit = tkitclass.get_toolkit(**get_toolkit_params)
-                tools.extend(toolkit.get_tools())
+                toolkit_tools.extend(toolkit.get_tools())
             except Exception as e:
                 logger.error(f"Error in getting custom toolkit: {e}")
-            continue
-        # Tool not available
-        if tool_type in FAILED_IMPORTS:
-            logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
         else:
-            logger.warning(f"Unknown tool type: {tool_type}")
+            if tool_type in FAILED_IMPORTS:
+                logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
+            else:
+                logger.warning(f"Unknown tool type: {tool_type}")
+        #
+        # Always inject toolkit_id to each tool
+        _inject_toolkit_id(tool, toolkit_tools)
+        tools.extend(toolkit_tools)
     return tools

alita_sdk/tools/ado/repos/repos_wrapper.py CHANGED Viewed

@@ -111,8 +111,7 @@ class ArgsSchema(Enum):
             Field(
                 description=(
                     "Branch to be used for read file operation."
-                ),
-                default=None
+                )
             ),
         )
     )

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -2,6 +2,7 @@ import copy
 import json
 import logging
 import time
+from enum import Enum
 from typing import Any, Optional, List, Dict, Generator
 from langchain_core.callbacks import dispatch_custom_event
@@ -16,7 +17,17 @@ from ..runtime.utils.utils import IndexerKeywords
 logger = logging.getLogger(__name__)
-DEFAULT_CUT_OFF = 0.2
+DEFAULT_CUT_OFF = 0.1
+INDEX_META_UPDATE_INTERVAL = 600.0
+class IndexTools(str, Enum):
+    """Enum for index-related tool names."""
+    INDEX_DATA = "index_data"
+    SEARCH_INDEX = "search_index"
+    STEPBACK_SEARCH_INDEX = "stepback_search_index"
+    STEPBACK_SUMMARY_INDEX = "stepback_summary_index"
+    REMOVE_INDEX = "remove_index"
+    LIST_COLLECTIONS = "list_collections"
 # Base Vector Store Schema Models
 BaseIndexParams = create_model(
@@ -157,6 +168,16 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         clean_index = kwargs.get("clean_index")
         chunking_tool = kwargs.get("chunking_tool")
         chunking_config = kwargs.get("chunking_config")
+        # Store the interval in a private dict to avoid Pydantic field errors
+        if not hasattr(self, "_index_meta_config"):
+            self._index_meta_config: Dict[str, Any] = {}
+        self._index_meta_config["update_interval"] = kwargs.get(
+            "meta_update_interval",
+            INDEX_META_UPDATE_INTERVAL,
+        )
         result = {"count": 0}
         #
         try:
@@ -164,6 +185,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 self._clean_index(index_name)
             #
             self.index_meta_init(index_name, kwargs)
+            self._emit_index_event(index_name)
             #
             self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
             self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -179,18 +201,26 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
             #
             results_count = result["count"]
-            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
+            # Final update should always be forced
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True)
             self._emit_index_event(index_name)
             #
             return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
             else "no new documents to index"}
         except Exception as e:
-            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
-            self._emit_index_event(index_name, error=str(e))
+            # Do maximum effort at least send custom event for supposed changed status
+            msg = str(e)
+            try:
+                # Error update should also be forced
+                self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True)
+            except Exception as ie:
+                logger.error(f"Failed to update index meta status to FAILED for index '{index_name}': {ie}")
+                msg = f"{msg}; additionally failed to update index meta status to FAILED: {ie}"
+            self._emit_index_event(index_name, error=msg)
             raise e
     def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
+        self._ensure_vectorstore_initialized()
         self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
         from ..runtime.langchain.interfaces.llm_processor import add_documents
         #
@@ -243,6 +273,11 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             logger.debug(msg)
             self._log_tool_event(msg)
             result["count"] += dependent_docs_counter
+            # After each base document, try a non-forced meta update; throttling handled inside index_meta_update
+            try:
+                self.index_meta_update(index_name, IndexerKeywords.INDEX_META_IN_PROGRESS.value, result["count"], update_force=False)
+            except Exception as exc:  # best-effort, do not break indexing
+                logger.warning(f"Failed to update index meta during indexing process for index '{index_name}': {exc}")
         if pg_vector_add_docs_chunk:
             add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
@@ -308,6 +343,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             log_msg: str = "Verification of documents to index started"
     ) -> Generator[Document, None, None]:
         """Generic duplicate reduction logic for documents."""
+        self._ensure_vectorstore_initialized()
         self._log_tool_event(log_msg, tool_name="index_documents")
         indexed_data = self._get_indexed_data(index_name)
         indexed_keys = set(indexed_data.keys())
@@ -463,6 +499,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         )
     def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
+        self._ensure_vectorstore_initialized()
         index_meta = super().get_index_meta(index_name)
         if not index_meta:
             self._log_tool_event(
@@ -482,12 +519,53 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 "updated_on": created_on,
                 "task_id": None,
                 "conversation_id": None,
+                "toolkit_id": self.toolkit_id,
             }
             metadata["history"] = json.dumps([metadata])
             index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
             add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
-    def index_meta_update(self, index_name: str, state: str, result: int):
+    def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None):
+        """Update `index_meta` document with optional time-based throttling.
+        Args:
+            index_name: Index name to update meta for.
+            state: New state value for the `index_meta` record.
+            result: Number of processed documents to store in the `updated` field.
+            update_force: If `True`, perform the update unconditionally, ignoring throttling.
+                          If `False`, perform the update only when the effective time interval has passed.
+            interval: Optional custom interval (in seconds) for this call when `update_force` is `False`.
+                      If `None`, falls back to the value stored in `self._index_meta_config["update_interval"]`
+                      if present, otherwise uses `INDEX_META_UPDATE_INTERVAL`.
+        """
+        self._ensure_vectorstore_initialized()
+        if not hasattr(self, "_index_meta_last_update_time"):
+            self._index_meta_last_update_time: Dict[str, float] = {}
+        if not update_force:
+            # Resolve effective interval:
+            # 1\) explicit arg
+            # 2\) value from `_index_meta_config`
+            # 3\) default constant
+            cfg_interval = None
+            if hasattr(self, "_index_meta_config"):
+                cfg_interval = self._index_meta_config.get("update_interval")
+            eff_interval = (
+                interval
+                if interval is not None
+                else (cfg_interval if cfg_interval is not None else INDEX_META_UPDATE_INTERVAL)
+            )
+            last_time = self._index_meta_last_update_time.get(index_name)
+            now = time.time()
+            if last_time is not None and (now - last_time) < eff_interval:
+                return
+            self._index_meta_last_update_time[index_name] = now
+        else:
+            # For forced updates, always refresh last update time
+            self._index_meta_last_update_time[index_name] = time.time()
         index_meta_raw = super().get_index_meta(index_name)
         from ..runtime.langchain.interfaces.llm_processor import add_documents
         #
@@ -545,11 +623,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         event_data = {
             "id": index_meta.get("id"),
             "index_name": index_name,
-            "state": metadata.get("state"),
+            "state": "failed" if error is not None else metadata.get("state"),
             "error": error,
             "reindex": is_reindex,
             "indexed": metadata.get("indexed", 0),
             "updated": metadata.get("updated", 0),
+            "toolkit_id": metadata.get("toolkit_id"),
         }
         # Emit the event
@@ -572,8 +651,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         """
         return [
             {
-                "name": "index_data",
-                "mode": "index_data",
+                "name": IndexTools.INDEX_DATA.value,
+                "mode": IndexTools.INDEX_DATA.value,
                 "ref": self.index_data,
                 "description": "Loads data to index.",
                 "args_schema": create_model(
@@ -583,36 +662,36 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 )
             },
             {
-                "name": "search_index",
-                "mode": "search_index",
+                "name": IndexTools.SEARCH_INDEX.value,
+                "mode": IndexTools.SEARCH_INDEX.value,
                 "ref": self.search_index,
                 "description": self.search_index.__doc__,
                 "args_schema": BaseSearchParams
             },
             {
-                "name": "stepback_search_index",
-                "mode": "stepback_search_index",
+                "name": IndexTools.STEPBACK_SEARCH_INDEX.value,
+                "mode": IndexTools.STEPBACK_SEARCH_INDEX.value,
                 "ref": self.stepback_search_index,
                 "description": self.stepback_search_index.__doc__,
                 "args_schema": BaseStepbackSearchParams
             },
             {
-                "name": "stepback_summary_index",
-                "mode": "stepback_summary_index",
+                "name": IndexTools.STEPBACK_SUMMARY_INDEX.value,
+                "mode": IndexTools.STEPBACK_SUMMARY_INDEX.value,
                 "ref": self.stepback_summary_index,
                 "description": self.stepback_summary_index.__doc__,
                 "args_schema": BaseStepbackSearchParams
             },
             {
-                "name": "remove_index",
-                "mode": "remove_index",
+                "name": IndexTools.REMOVE_INDEX.value,
+                "mode": IndexTools.REMOVE_INDEX.value,
                 "ref": self.remove_index,
                 "description": self.remove_index.__doc__,
                 "args_schema": RemoveIndexParams
             },
             {
-                "name": "list_collections",
-                "mode": "list_collections",
+                "name": IndexTools.LIST_COLLECTIONS.value,
+                "mode": IndexTools.LIST_COLLECTIONS.value,
                 "ref": self.list_collections,
                 "description": self.list_collections.__doc__,
                 # No parameters

alita-sdk 0.3.465__py3-none-any.whl → 0.3.486__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.465py3-none-any.whl → 0.3.486py3-none-any.whl