PyPI - alita-sdk - Versions diffs - 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl - Mend

alita-sdk 0.3.486py3-none-any.whl → 0.3.497py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (34) hide show

alita_sdk/cli/agent_loader.py +27 -6
alita_sdk/cli/agents.py +10 -1
alita_sdk/cli/tools/filesystem.py +95 -9
alita_sdk/runtime/clients/client.py +40 -21
alita_sdk/runtime/langchain/constants.py +3 -1
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
alita_sdk/runtime/langchain/langraph_agent.py +2 -1
alita_sdk/runtime/toolkits/mcp.py +68 -62
alita_sdk/runtime/toolkits/planning.py +3 -1
alita_sdk/runtime/toolkits/tools.py +37 -18
alita_sdk/runtime/tools/artifact.py +46 -17
alita_sdk/runtime/tools/function.py +2 -1
alita_sdk/runtime/tools/llm.py +135 -24
alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
alita_sdk/runtime/tools/vectorstore_base.py +3 -3
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/toolkit_utils.py +7 -13
alita_sdk/tools/base_indexer_toolkit.py +1 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
alita_sdk/tools/chunkers/universal_chunker.py +1 -0
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/confluence/api_wrapper.py +63 -14
alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +1 -1
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +34 -32
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0

alita_sdk/cli/agent_loader.py CHANGED Viewed

@@ -8,6 +8,7 @@ import json
 import yaml
 from pathlib import Path
 from typing import Dict, Any
+from pydantic import SecretStr
 from .config import substitute_env_vars
@@ -85,6 +86,25 @@ def load_agent_definition(file_path: str) -> Dict[str, Any]:
     raise ValueError(f"Unsupported file format: {path.suffix}")
+def unwrap_secrets(obj: Any) -> Any:
+    """
+    Recursively unwrap pydantic SecretStr values into plain strings.
+    Handles nested dicts, lists, tuples, and sets while preserving structure.
+    """
+    if isinstance(obj, SecretStr):
+        return obj.get_secret_value()
+    if isinstance(obj, dict):
+        return {k: unwrap_secrets(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [unwrap_secrets(v) for v in obj]
+    if isinstance(obj, tuple):
+        return tuple(unwrap_secrets(v) for v in obj)
+    if isinstance(obj, set):
+        return {unwrap_secrets(v) for v in obj}
+    return obj
 def build_agent_data_structure(agent_def: Dict[str, Any], toolkit_configs: list,
                                llm_model: str, llm_temperature: float, llm_max_tokens: int) -> Dict[str, Any]:
     """
@@ -128,12 +148,13 @@ def build_agent_data_structure(agent_def: Dict[str, Any], toolkit_configs: list,
                     if hasattr(toolkit_class, 'toolkit_config_schema'):
                         schema = toolkit_class.toolkit_config_schema()
                         validated_config = schema(**toolkit_config)
-                        # validated_dict = validated_config.model_dump()
-                        # validated_dict['type'] = toolkit_config.get('type')
-                        # validated_dict['toolkit_name'] = toolkit_config.get('toolkit_name')
-                        # validated_toolkit_configs.append(validated_dict)
-                    validated_toolkit_configs.append(toolkit_config)
+                        # Use python mode so SecretStr remains as objects, then unwrap recursively
+                        validated_dict = unwrap_secrets(validated_config.model_dump(mode="python"))
+                        validated_dict['type'] = toolkit_config.get('type')
+                        validated_dict['toolkit_name'] = toolkit_config.get('toolkit_name')
+                        validated_toolkit_configs.append(validated_dict)
+                    else:
+                        validated_toolkit_configs.append(toolkit_config)
                 else:
                     validated_toolkit_configs.append(toolkit_config)
             except Exception:

alita_sdk/cli/agents.py CHANGED Viewed

@@ -1358,12 +1358,14 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
               help='Grant agent filesystem access to this directory')
 @click.option('--verbose', '-v', type=click.Choice(['quiet', 'default', 'debug']), default='default',
               help='Output verbosity level: quiet (final output only), default (tool calls + outputs), debug (all including LLM calls)')
+@click.option('--recursion-limit', type=int, default=50,
+              help='Maximum number of tool execution steps per turn')
 @click.pass_context
 def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
                toolkit_config: tuple, inventory_path: Optional[str], thread_id: Optional[str],
                model: Optional[str], temperature: Optional[float],
                max_tokens: Optional[int], work_dir: Optional[str],
-               verbose: str):
+               verbose: str, recursion_limit: Optional[int]):
     """Start interactive chat with an agent.
     \b
@@ -2615,6 +2617,11 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
                     invoke_config = RunnableConfig(
                         configurable={"thread_id": current_session_id}
                     )
+                    # always proceed with continuation enabled
+                    invoke_config["should_continue"] = True
+                    # Set recursion limit for tool executions
+                    logger.debug(f"Setting tool steps limit to {recursion_limit}")
+                    invoke_config["recursion_limit"] = recursion_limit
                     cli_callback = None
                     if show_verbose:
                         cli_callback = create_cli_callback(verbose=True, debug=debug_mode)
@@ -2718,6 +2725,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
                                     invoke_config = RunnableConfig(
                                         configurable={"thread_id": continuation_thread_id}
                                     )
+                                    invoke_config["should_continue"] = True
+                                    invoke_config["recursion_limit"] = recursion_limit
                                     if cli_callback:
                                         invoke_config["callbacks"] = [cli_callback]

alita_sdk/cli/tools/filesystem.py CHANGED Viewed

@@ -135,6 +135,7 @@ class ListDirectoryInput(BaseModel):
     path: str = Field(default=".", description="Relative path to the directory to list")
     include_sizes: bool = Field(default=False, description="Include file sizes in the output")
     sort_by: str = Field(default="name", description="Sort by 'name' or 'size'")
+    max_results: Optional[int] = Field(default=200, description="Maximum number of entries to return. Default is 200 to prevent context overflow.")
 class DirectoryTreeInput(BaseModel):
@@ -181,6 +182,8 @@ class FileSystemTool(BaseTool):
     """Base class for filesystem tools with directory restriction."""
     base_directory: str  # Primary directory (for backward compatibility)
     allowed_directories: List[str] = []  # Additional allowed directories
+    _basename_collision_detected: bool = False  # Cache for collision detection
+    _basename_collision_checked: bool = False  # Whether we've checked for collisions
     def _get_all_allowed_directories(self) -> List[Path]:
         """Get all allowed directories as resolved Paths."""
@@ -191,6 +194,56 @@ class FileSystemTool(BaseTool):
                 dirs.append(resolved)
         return dirs
+    def _check_basename_collision(self) -> bool:
+        """Check if multiple allowed directories have the same basename."""
+        if self._basename_collision_checked:
+            return self._basename_collision_detected
+        allowed_dirs = self._get_all_allowed_directories()
+        basenames = [d.name for d in allowed_dirs]
+        self._basename_collision_detected = len(basenames) != len(set(basenames))
+        self._basename_collision_checked = True
+        return self._basename_collision_detected
+    def _get_relative_path_from_allowed_dirs(self, absolute_path: Path) -> tuple:
+        """Get relative path and directory name for a file in allowed directories.
+        Args:
+            absolute_path: Absolute path to the file
+        Returns:
+            Tuple of (relative_path, directory_name)
+        Raises:
+            ValueError: If path is not within any allowed directory
+        """
+        allowed_dirs = self._get_all_allowed_directories()
+        # Find which allowed directory contains this path
+        for base in allowed_dirs:
+            try:
+                rel_path = absolute_path.relative_to(base)
+                # Determine directory name for prefix
+                if self._check_basename_collision():
+                    # Use parent/basename format to disambiguate
+                    dir_name = f"{base.parent.name}/{base.name}"
+                else:
+                    # Use just basename
+                    dir_name = base.name
+                return (str(rel_path), dir_name)
+            except ValueError:
+                continue
+        # Path not in any allowed directory
+        allowed_paths = [str(d) for d in allowed_dirs]
+        raise ValueError(
+            f"Path '{absolute_path}' is not within any allowed directory.\n"
+            f"Allowed directories: {allowed_paths}\n"
+            f"Attempted path: {absolute_path}"
+        )
     def _resolve_path(self, relative_path: str) -> Path:
         """
         Resolve and validate a path within any of the allowed directories.
@@ -602,7 +655,7 @@ class ListDirectoryTool(FileSystemTool):
         "Consider using filesystem_directory_tree with max_depth=1 for hierarchical overview",
     ]
-    def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name") -> str:
+    def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name", max_results: Optional[int] = 200) -> str:
         """List directory contents."""
         try:
             target = self._resolve_path(path)
@@ -618,7 +671,8 @@ class ListDirectoryTool(FileSystemTool):
                 entry_info = {
                     'name': entry.name,
                     'is_dir': entry.is_dir(),
-                    'size': entry.stat().st_size if entry.is_file() else 0
+                    'size': entry.stat().st_size if entry.is_file() else 0,
+                    'path': entry
                 }
                 entries.append(entry_info)
@@ -628,6 +682,18 @@ class ListDirectoryTool(FileSystemTool):
             else:
                 entries.sort(key=lambda x: x['name'].lower())
+            # Apply limit
+            total_count = len(entries)
+            truncated = False
+            if max_results is not None and total_count > max_results:
+                entries = entries[:max_results]
+                truncated = True
+            # Get directory name for multi-directory configs
+            allowed_dirs = self._get_all_allowed_directories()
+            has_multiple_dirs = len(allowed_dirs) > 1
+            _, dir_name = self._get_relative_path_from_allowed_dirs(target) if has_multiple_dirs else ("", "")
             # Format output
             lines = []
             total_files = 0
@@ -636,7 +702,12 @@ class ListDirectoryTool(FileSystemTool):
             for entry in entries:
                 prefix = "[DIR] " if entry['is_dir'] else "[FILE]"
-                name = entry['name']
+                # Add directory prefix for multi-directory configs
+                if has_multiple_dirs:
+                    name = f"{dir_name}/{entry['name']}"
+                else:
+                    name = entry['name']
                 if include_sizes and not entry['is_dir']:
                     size_str = self._format_size(entry['size'])
@@ -665,6 +736,10 @@ class ListDirectoryTool(FileSystemTool):
                     summary += f"\nCombined size: {self._format_size(total_size)}"
                 result += summary
+            if truncated:
+                result += f"\n\n⚠️  OUTPUT TRUNCATED: Showing {len(entries)} of {total_count} entries from '{dir_name if has_multiple_dirs else path}' (max_results={max_results})"
+                result += "\n   To see more: increase max_results or list a specific subdirectory"
             # Add note about how to access files
             result += "\n\nNote: Access files using paths shown above (e.g., 'agents/file.md' for items in agents/ directory)"
@@ -818,23 +893,34 @@ class SearchFilesTool(FileSystemTool):
             else:
                 matches = sorted(all_matches)
-            # Format results
-            base = Path(self.base_directory).resolve()
+            # Format results with directory prefixes for multi-directory configs
+            allowed_dirs = self._get_all_allowed_directories()
+            has_multiple_dirs = len(allowed_dirs) > 1
             results = []
+            search_dir_name = None
             for match in matches:
-                rel_path = match.relative_to(base)
+                if has_multiple_dirs:
+                    rel_path_str, dir_name = self._get_relative_path_from_allowed_dirs(match)
+                    display_path = f"{dir_name}/{rel_path_str}"
+                    if search_dir_name is None:
+                        search_dir_name = dir_name
+                else:
+                    rel_path_str = str(match.relative_to(Path(self.base_directory).resolve()))
+                    display_path = rel_path_str
                 if match.is_dir():
-                    results.append(f"📁 {rel_path}/")
+                    results.append(f"📁 {display_path}/")
                 else:
                     size = self._format_size(match.stat().st_size)
-                    results.append(f"📄 {rel_path} ({size})")
+                    results.append(f"📄 {display_path} ({size})")
             header = f"Found {total_count} matches for '{pattern}':\n\n"
             output = header + "\n".join(results)
             if truncated:
-                output += f"\n\n⚠️  OUTPUT TRUNCATED: Showing {max_results} of {total_count} results (max_results={max_results})"
+                location_str = f"from '{search_dir_name}' " if search_dir_name else ""
+                output += f"\n\n⚠️  OUTPUT TRUNCATED: Showing {max_results} of {total_count} results {location_str}(max_results={max_results})"
                 output += "\n   To see more: increase max_results or use a more specific pattern"
             return output

alita_sdk/runtime/clients/client.py CHANGED Viewed

@@ -21,6 +21,7 @@ from .datasource import AlitaDataSource
 from .artifact import Artifact
 from ..langchain.chat_message_template import Jinja2TemplatedChatMessagesTemplate
 from ..utils.utils import TOOLKIT_SPLITTER
+from ..utils.mcp_oauth import McpAuthorizationRequired
 from ...tools import get_available_toolkit_models
 from ...tools.base_indexer_toolkit import IndexTools
@@ -469,11 +470,44 @@ class AlitaClient:
         return self._process_requst(data)
     def create_artifact(self, bucket_name, artifact_name, artifact_data):
+        # Sanitize filename to prevent regex errors during indexing
+        sanitized_name, was_modified = self._sanitize_artifact_name(artifact_name)
+        if was_modified:
+            logger.warning(f"Artifact filename sanitized: '{artifact_name}' -> '{sanitized_name}'")
         url = f'{self.artifacts_url}/{bucket_name.lower()}'
         data = requests.post(url, headers=self.headers, files={
-            'file': (artifact_name, artifact_data)
+            'file': (sanitized_name, artifact_data)
         }, verify=False)
         return self._process_requst(data)
+    @staticmethod
+    def _sanitize_artifact_name(filename: str) -> tuple:
+        """Sanitize filename for safe storage and regex pattern matching."""
+        import re
+        from pathlib import Path
+        if not filename or not filename.strip():
+            return "unnamed_file", True
+        original = filename
+        path_obj = Path(filename)
+        name = path_obj.stem
+        extension = path_obj.suffix
+        # Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
+        sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
+        sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
+        sanitized_name = sanitized_name.strip('-').strip()
+        if not sanitized_name:
+            sanitized_name = "file"
+        if extension:
+            extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
+        sanitized = sanitized_name + extension
+        return sanitized, (sanitized != original)
     def download_artifact(self, bucket_name, artifact_name):
         url = f'{self.artifact_url}/{bucket_name.lower()}/{artifact_name}'
@@ -814,26 +848,12 @@ class AlitaClient:
             # Instantiate the toolkit with client and LLM support
             try:
-                tools = instantiate_toolkit_with_client(toolkit_config, llm, self, mcp_tokens=mcp_tokens)
-            except Exception as toolkit_error:
+                tools = instantiate_toolkit_with_client(toolkit_config, llm, self, mcp_tokens=mcp_tokens, use_prefix=False)
+            except McpAuthorizationRequired:
                 # Re-raise McpAuthorizationRequired to allow proper handling upstream
-                from ..utils.mcp_oauth import McpAuthorizationRequired
-                # Check if it's McpAuthorizationRequired directly
-                if isinstance(toolkit_error, McpAuthorizationRequired):
-                    logger.info(f"McpAuthorizationRequired detected, re-raising")
-                    raise
-                # Also check for wrapped exceptions (e.g., from asyncio)
-                if hasattr(toolkit_error, '__cause__') and isinstance(toolkit_error.__cause__, McpAuthorizationRequired):
-                    logger.info(f"Wrapped McpAuthorizationRequired detected, re-raising cause")
-                    raise toolkit_error.__cause__
-                # Check exception class name as fallback (in case of module reload issues)
-                if toolkit_error.__class__.__name__ == 'McpAuthorizationRequired':
-                    logger.info(f"McpAuthorizationRequired detected by name, re-raising")
-                    raise
+                logger.info(f"McpAuthorizationRequired detected, re-raising")
+                raise
+            except Exception as toolkit_error:
                 # For other errors, return error response
                 return {
                     "success": False,
@@ -1068,7 +1088,6 @@ class AlitaClient:
         except Exception as e:
             # Re-raise McpAuthorizationRequired to allow proper handling upstream
-            from ..utils.mcp_oauth import McpAuthorizationRequired
             if isinstance(e, McpAuthorizationRequired):
                 raise
             logger = logging.getLogger(__name__)

alita_sdk/runtime/langchain/constants.py CHANGED Viewed

@@ -84,4 +84,6 @@ DEFAULT_MULTIMODAL_PROMPT = """
 ELITEA_RS = "elitea_response"
 PRINTER = "printer"
 PRINTER_NODE_RS = "printer_output"
-PRINTER_COMPLETED_STATE = "PRINTER_COMPLETED"
+PRINTER_COMPLETED_STATE = "PRINTER_COMPLETED"
+LOADER_MAX_TOKENS_DEFAULT = 512

alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py CHANGED Viewed

@@ -21,14 +21,16 @@ from openpyxl import load_workbook
 from xlrd import open_workbook
 from langchain_core.documents import Document
 from .AlitaTableLoader import AlitaTableLoader
+from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
 cell_delimiter = " | "
 class AlitaExcelLoader(AlitaTableLoader):
-    excel_by_sheets: bool = False
     sheet_name: str = None
-    return_type: str = 'str'
     file_name: str = None
+    max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
+    add_header_to_chunks: bool = False
+    header_row_number: int = 1
     def __init__(self, **kwargs):
         if not kwargs.get('file_path'):
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
         else:
             self.file_name = kwargs.get('file_path')
         super().__init__(**kwargs)
-        self.excel_by_sheets = kwargs.get('excel_by_sheets')
-        self.return_type = kwargs.get('return_type')
         self.sheet_name = kwargs.get('sheet_name')
+        # Set and validate chunking parameters only once
+        self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
+        self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
+        header_row_number = kwargs.get('header_row_number', 1)
+        # Validate header_row_number
+        try:
+            header_row_number = int(header_row_number)
+            if header_row_number > 0:
+                self.header_row_number = header_row_number
+            else:
+                self.header_row_number = 1
+                self.add_header_to_chunks = False
+        except (ValueError, TypeError):
+            self.header_row_number = 1
+            self.add_header_to_chunks = False
     def get_content(self):
         try:
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
         Reads .xlsx files using openpyxl.
         """
         workbook = load_workbook(self.file_path, data_only=True)  # `data_only=True` ensures we get cell values, not formulas
+        sheets = workbook.sheetnames
         if self.sheet_name:
-            # If a specific sheet name is provided, parse only that sheet
-            if self.sheet_name in workbook.sheetnames:
+            if self.sheet_name in sheets:
                 sheet_content = self.parse_sheet(workbook[self.sheet_name])
-                return sheet_content
             else:
-                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
-        elif self.excel_by_sheets:
-            # Parse each sheet individually and return as a dictionary
-            result = {}
-            for sheet_name in workbook.sheetnames:
-                sheet_content = self.parse_sheet(workbook[sheet_name])
-                result[sheet_name] = sheet_content
-            return result
+                sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
+            return {self.sheet_name: sheet_content}
         else:
-            # Combine all sheets into a single string result
-            result = []
-            for sheet_name in workbook.sheetnames:
-                sheet_content = self.parse_sheet(workbook[sheet_name])
-                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
-            return "\n\n".join(result)
+            # Dictionary comprehension for all sheets
+            return {name: self.parse_sheet(workbook[name]) for name in sheets}
     def _read_xls(self):
         """
         Reads .xls files using xlrd.
         """
         workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
+        sheets = workbook.sheet_names()
         if self.sheet_name:
-            # If a specific sheet name is provided, parse only that sheet
-            if self.sheet_name in workbook.sheet_names():
+            if self.sheet_name in sheets:
                 sheet = workbook.sheet_by_name(self.sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                return sheet_content
+                return {self.sheet_name: self.parse_sheet_xls(sheet)}
             else:
-                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
-        elif self.excel_by_sheets:
-            # Parse each sheet individually and return as a dictionary
-            result = {}
-            for sheet_name in workbook.sheet_names():
-                sheet = workbook.sheet_by_name(sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                result[sheet_name] = sheet_content
-            return result
+                return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
         else:
-            # Combine all sheets into a single string result
-            result = []
-            for sheet_name in workbook.sheet_names():
-                sheet = workbook.sheet_by_name(sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
-            return "\n\n".join(result)
+            # Dictionary comprehension for all sheets
+            return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
     def parse_sheet(self, sheet):
         """
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
         # Format the sheet content based on the return type
         return self._format_sheet_content(sheet_content)
-    def _format_sheet_content(self, sheet_content):
+    def _format_sheet_content(self, rows):
         """
-        Formats the sheet content based on the return type.
+        Specification:
+        Formats a list of sheet rows into a list of string chunks according to the following rules:
+        1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
+           - If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
+        2. If max_tokens >= 1:
+           a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
+           b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
+           c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
+        3. Returns: List[str], where each string is a chunk ready for further processing.
         """
-        if self.return_type == 'dict':
-            # Convert to a list of dictionaries (each row is a dictionary)
-            headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
-            data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
-            return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
-        elif self.return_type == 'csv':
-            # Return as CSV (newline-separated rows, comma-separated values)
-            return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
-        else:
-            # Default: Return as plain text (newline-separated rows, pipe-separated values)
-            return "\n".join(sheet_content)
+        import tiktoken
+        encoding = tiktoken.get_encoding('cl100k_base')
+        # --- Inner functions ---
+        def count_tokens(text):
+            """Count tokens in text using tiktoken encoding."""
+            return len(encoding.encode(text))
+        def finalize_chunk(chunk_rows):
+            """Join rows for a chunk, prepending header if needed."""
+            if self.add_header_to_chunks and header:
+                return '\n'.join([header] + chunk_rows)
+            else:
+                return '\n'.join(chunk_rows)
+        # --- End inner functions ---
+        # If max_tokens < 1, return all rows as a single chunk
+        if self.max_tokens < 1:
+            return ['\n'.join(rows)]
+        # Extract header if needed
+        header = None
+        if self.add_header_to_chunks and rows:
+            header_idx = self.header_row_number - 1
+            header = rows.pop(header_idx)
+        chunks = []  # List to store final chunks
+        current_chunk = []  # Accumulate rows for the current chunk
+        current_tokens = 0  # Token count for the current chunk
+        for row in rows:
+            row_tokens = count_tokens(row)
+            # If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
+            if row_tokens > self.max_tokens:
+                if current_chunk:
+                    chunks.append(finalize_chunk(current_chunk))
+                    current_chunk = []
+                    current_tokens = 0
+                # Add the large row as its own chunk, with header if needed
+                if self.add_header_to_chunks and header:
+                    chunks.append(finalize_chunk([row]))
+                else:
+                    chunks.append(row)
+                continue
+            # If adding row would exceed max_tokens, flush current chunk and start new
+            if current_tokens + row_tokens > self.max_tokens:
+                if current_chunk:
+                    chunks.append(finalize_chunk(current_chunk))
+                current_chunk = [row]
+                current_tokens = row_tokens
+            else:
+                current_chunk.append(row)
+                current_tokens += row_tokens
+        # Add any remaining rows as the last chunk
+        if current_chunk:
+            chunks.append(finalize_chunk(current_chunk))
+        return chunks
     def load(self) -> list:
         docs = []
         content_per_sheet = self.get_content()
-        for sheet_name, content in content_per_sheet.items():
+        # content_per_sheet is a dict of sheet_name: list of chunk strings
+        for sheet_name, content_chunks in content_per_sheet.items():
             metadata = {
                 "source": f'{self.file_path}:{sheet_name}',
                 "sheet_name": sheet_name,
                 "file_type": "excel",
-                "excel_by_sheets": self.excel_by_sheets,
-                "return_type": self.return_type,
             }
-            docs.append(Document(page_content=f"Sheet: {sheet_name}\n {str(content)}", metadata=metadata))
+            # Each chunk is a separate Document
+            for chunk in content_chunks:
+                docs.append(Document(page_content=chunk, metadata=metadata))
         return docs
     def read(self, lazy: bool = False):

alita_sdk/runtime/langchain/document_loaders/constants.py CHANGED Viewed

@@ -27,6 +27,7 @@ from .AlitaTextLoader import AlitaTextLoader
 from .AlitaMarkdownLoader import AlitaMarkdownLoader
 from .AlitaPythonLoader import AlitaPythonLoader
 from enum import Enum
+from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
 class LoaderProperties(Enum):
@@ -34,7 +35,7 @@ class LoaderProperties(Enum):
     PROMPT_DEFAULT = 'use_default_prompt'
     PROMPT = 'prompt'
-DEFAULT_ALLOWED_BASE = {'max_tokens': 512}
+DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
 DEFAULT_ALLOWED_WITH_LLM = {
     **DEFAULT_ALLOWED_BASE,
@@ -43,6 +44,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
     LoaderProperties.PROMPT.value: "",
 }
+DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
 # Image file loaders mapping - directly supported by LLM with image_url
 image_loaders_map = {
     '.png': {
@@ -162,11 +165,12 @@ document_loaders_map = {
                       'spreadsheetml.sheet'),
         'is_multimodal_processing': False,
         'kwargs': {
-            'excel_by_sheets': True,
-            'raw_content': True,
-            'cleanse': False
+            'add_header_to_chunks': False,
+            'header_row_number': 1,
+            'max_tokens': -1,
+            'sheet_name': ''
         },
-        'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
+        'allowed_to_override': DEFAULT_ALLOWED_EXCEL
     },
     '.xls': {
         'class': AlitaExcelLoader,
@@ -177,7 +181,7 @@ document_loaders_map = {
             'raw_content': True,
             'cleanse': False
         },
-        'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
+        'allowed_to_override': DEFAULT_ALLOWED_EXCEL
     },
     '.pdf': {
         'class': AlitaPDFLoader,

alita_sdk/runtime/langchain/langraph_agent.py CHANGED Viewed

@@ -635,6 +635,7 @@ def create_graph(
                     output_variables=output_vars,
                     input_variables=node.get('input', ['messages']),
                     structured_output=node.get('structured_output', False),
+                    tool_execution_timeout=node.get('tool_execution_timeout', 900),
                     available_tools=available_tools,
                     tool_names=tool_names,
                     steps_limit=kwargs.get('steps_limit', 25)
@@ -1010,7 +1011,7 @@ class LangGraphAgentRunnable(CompiledStateGraph):
             thread_id: str,
             current_recursion_limit: int,
     ) -> dict:
-        """Handle GraphRecursionError by returning a soft\-boundary response."""
+        """Handle GraphRecursionError by returning a soft-boundary response."""
         config_state = self.get_state(config)
         is_execution_finished = False

alita-sdk 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.486py3-none-any.whl → 0.3.497py3-none-any.whl