PyPI - autobyteus - Versions diffs - 1.1.9__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

autobyteus 1.1.9py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

autobyteus/tools/file/{file_reader.py → read_file.py} RENAMED Viewed

@@ -10,8 +10,8 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-@tool(name="FileReader", category=ToolCategory.FILE_SYSTEM)
-async def file_reader(context: 'AgentContext', path: str) -> str:
+@tool(name="read_file", category=ToolCategory.FILE_SYSTEM)
+async def read_file(context: 'AgentContext', path: str) -> str:
     """
     Reads content from a specified file.
     'path' is the path to the file. If relative, it must be resolved against a configured agent workspace.
@@ -19,7 +19,7 @@ async def file_reader(context: 'AgentContext', path: str) -> str:
     Raises FileNotFoundError if the file does not exist.
     Raises IOError if file reading fails for other reasons.
     """
-    logger.debug(f"Functional FileReader tool for agent {context.agent_id}, initial path: {path}")
+    logger.debug(f"Functional read_file tool for agent {context.agent_id}, initial path: {path}")
     final_path: str
     if os.path.isabs(path):

autobyteus/tools/file/search_files.py ADDED Viewed

@@ -0,0 +1,188 @@
+# file: autobyteus/autobyteus/tools/file/search_files.py
+"""
+This module provides a high-performance fuzzy file search tool.
+It uses 'git ls-files' for speed in Git repositories and falls back
+to a filesystem walk for other directories, respecting .gitignore.
+"""
+import asyncio
+import json
+import logging
+import os
+import subprocess
+from pathlib import Path
+from typing import List, Dict, Optional, Tuple, TYPE_CHECKING
+from rapidfuzz import process, fuzz
+from pathspec import PathSpec
+from pathspec.patterns import GitWildMatchPattern
+from autobyteus.tools.functional_tool import tool
+from autobyteus.tools.tool_category import ToolCategory
+if TYPE_CHECKING:
+    from autobyteus.agent.context import AgentContext
+logger = logging.getLogger(__name__)
+@tool(name="search_files", category=ToolCategory.FILE_SYSTEM)
+async def search_files(
+    context: 'AgentContext',
+    query: Optional[str] = None,
+    path: str = '.',
+    limit: int = 64,
+    exclude_patterns: Optional[List[str]] = None
+) -> str:
+    """
+    Performs a high-performance fuzzy search for files in a directory.
+    This tool intelligently discovers files. If the search directory is a Git repository,
+    it uses the highly efficient 'git ls-files' command. Otherwise, it performs a
+    standard filesystem walk. In both cases, it respects .gitignore rules and any
+    additional exclusion patterns provided. The search results are returned as a
+    JSON string, with each result including the file path and a relevance score.
+    Args:
+        query: The fuzzy search pattern. If omitted, the tool lists all discoverable files up to the limit.
+        path: The directory to search in. Relative paths are resolved against the agent's workspace. Defaults to the workspace root.
+        limit: The maximum number of results to return.
+        exclude_patterns: A list of glob patterns to exclude from the search, in addition to .gitignore rules.
+    """
+    final_path = _resolve_search_path(context, path)
+    if not final_path.is_dir():
+        raise FileNotFoundError(f"The specified search path does not exist or is not a directory: {final_path}")
+    exclude = exclude_patterns or []
+    files, discovery_method = await _discover_files(final_path, exclude)
+    if not query:
+        # If no query, just return the first 'limit' files found
+        matches = [{"path": f, "score": 100} for f in files[:limit]]
+        result_summary = {
+            "discovery_method": discovery_method,
+            "total_files_scanned": len(files),
+            "matches_found": len(matches),
+            "results": matches
+        }
+        return json.dumps(result_summary, indent=2)
+    # Use rapidfuzz to find the best matches
+    results = process.extract(
+        query,
+        files,
+        scorer=fuzz.WRatio,
+        limit=limit,
+        score_cutoff=50
+    )
+    file_matches = [{"path": path, "score": round(score)} for path, score, _ in results]
+    result_summary = {
+        "discovery_method": discovery_method,
+        "total_files_scanned": len(files),
+        "matches_found": len(file_matches),
+        "results": file_matches
+    }
+    return json.dumps(result_summary, indent=2)
+def _resolve_search_path(context: 'AgentContext', path: str) -> Path:
+    """Resolves the search path against the agent's workspace if relative."""
+    if os.path.isabs(path):
+        return Path(path)
+    if not context.workspace:
+        raise ValueError(f"Relative path '{path}' provided, but no workspace is configured for agent '{context.agent_id}'.")
+    base_path = context.workspace.get_base_path()
+    if not base_path:
+        raise ValueError(f"Agent '{context.agent_id}' has a workspace, but it provided an invalid base path.")
+    return Path(os.path.normpath(os.path.join(base_path, path)))
+async def _is_git_repository_async(path: Path) -> bool:
+    """Asynchronously checks if a given path is within a Git repository."""
+    process = await asyncio.create_subprocess_exec(
+        "git", "rev-parse", "--is-inside-work-tree",
+        cwd=str(path),
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, _ = await process.communicate()
+    return stdout.decode().strip() == "true"
+async def _get_files_from_git_async(path: Path) -> List[str]:
+    """Uses 'git ls-files' to get a list of all tracked and untracked files."""
+    try:
+        process = await asyncio.create_subprocess_exec(
+            "git", "ls-files", "-co", "--exclude-standard",
+            cwd=str(path),
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout_bytes, stderr_bytes = await process.communicate()
+        if process.returncode != 0:
+            stderr = stderr_bytes.decode().strip()
+            logger.error(f"Failed to run 'git ls-files' in '{path}': {stderr}")
+            return []
+        stdout = stdout_bytes.decode().strip()
+        return stdout.strip().split("\n") if stdout.strip() else []
+    except (subprocess.CalledProcessError, FileNotFoundError) as e:
+        logger.error(f"Failed to run 'git ls-files': {e}")
+        return []
+def _get_files_with_walk_sync(path: Path, exclude_patterns: List[str]) -> List[str]:
+    """Synchronously walks the filesystem to find files, respecting ignore patterns."""
+    files: List[str] = []
+    all_exclude_patterns = exclude_patterns[:]
+    gitignore_path = path / ".gitignore"
+    if gitignore_path.is_file():
+        try:
+            with open(gitignore_path, "r", encoding='utf-8') as f:
+                all_exclude_patterns.extend(f.read().splitlines())
+        except Exception as e:
+            logger.warning(f"Could not read .gitignore file at '{gitignore_path}': {e}")
+    spec = PathSpec.from_lines(GitWildMatchPattern, all_exclude_patterns)
+    for root, _, filenames in os.walk(path, topdown=True):
+        root_path = Path(root)
+        for filename in filenames:
+            full_path = root_path / filename
+            try:
+                relative_path = full_path.relative_to(path)
+                if not spec.match_file(str(relative_path)):
+                    files.append(str(relative_path))
+            except (ValueError, IsADirectoryError):
+                # Handles cases like broken symlinks
+                continue
+    return files
+async def _get_files_with_walk_async(path: Path, exclude_patterns: List[str]) -> List[str]:
+    """Runs the synchronous walk in a thread pool."""
+    loop = asyncio.get_running_loop()
+    return await loop.run_in_executor(
+        None, _get_files_with_walk_sync, path, exclude_patterns
+    )
+async def _discover_files(cwd: Path, exclude: List[str]) -> Tuple[List[str], str]:
+    """Orchestrates the file discovery, choosing between Git and os.walk."""
+    if await _is_git_repository_async(cwd):
+        logger.info(f"Using 'git ls-files' for fast file discovery in '{cwd}'.")
+        files = await _get_files_from_git_async(cwd)
+        # Git ls-files already handles gitignore, but we may have extra excludes
+        if exclude:
+            spec = PathSpec.from_lines(GitWildMatchPattern, exclude)
+            files = [f for f in files if not spec.match_file(f)]
+        return files, "git"
+    else:
+        logger.info(f"Using 'os.walk' to scan directory '{cwd}'.")
+        return await _get_files_with_walk_async(cwd, exclude), "os_walk"

autobyteus/tools/file/{file_writer.py → write_file.py} RENAMED Viewed

@@ -10,8 +10,8 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-@tool(name="FileWriter", category=ToolCategory.FILE_SYSTEM)
-async def file_writer(context: 'AgentContext', path: str, content: str) -> str:
+@tool(name="write_file", category=ToolCategory.FILE_SYSTEM)
+async def write_file(context: 'AgentContext', path: str, content: str) -> str:
     """
     Creates or overwrites a file with specified content.
     'path' is the path where the file will be written. If relative, it must be resolved against a configured agent workspace.
@@ -20,7 +20,7 @@ async def file_writer(context: 'AgentContext', path: str, content: str) -> str:
     Raises ValueError if a relative path is given without a valid workspace.
     Raises IOError if file writing fails.
     """
-    logger.debug(f"Functional FileWriter tool for agent {context.agent_id}, initial path: {path}")
+    logger.debug(f"Functional write_file tool for agent {context.agent_id}, initial path: {path}")
     final_path: str
     if os.path.isabs(path):

autobyteus/tools/functional_tool.py CHANGED Viewed

@@ -99,7 +99,7 @@ def _python_type_to_json_schema(py_type: Any) -> Optional[Dict[str, Any]]:
     if py_type is float: return {"type": "number"}
     if py_type is bool: return {"type": "boolean"}
     if py_type is dict: return {"type": "object"}
-    if py_type is list: return {"type": "array", "items": True}
+    if py_type is list: return {"type": "array", "items": {}} # Use empty dict for 'any'
     origin_type = get_origin(py_type)
     if origin_type is Union:
@@ -111,8 +111,8 @@ def _python_type_to_json_schema(py_type: Any) -> Optional[Dict[str, Any]]:
         list_args = get_args(py_type)
         if list_args and len(list_args) == 1:
             item_schema = _python_type_to_json_schema(list_args[0])
-            return {"type": "array", "items": item_schema if item_schema else True}
-        return {"type": "array", "items": True}
+            return {"type": "array", "items": item_schema if item_schema else {}}
+        return {"type": "array", "items": {}} # Use empty dict for 'any'
     if origin_type is Dict or origin_type is dict: return {"type": "object"}
     logger.debug(f"Could not map Python type {py_type} to a simple JSON schema for array items.")
     return None
@@ -141,13 +141,15 @@ def _get_parameter_type_from_hint(py_type: Any, param_name: str) -> Tuple[Parame
         list_args = get_args(actual_type)
         if list_args and len(list_args) == 1:
             array_item_js_schema = _python_type_to_json_schema(list_args[0])
-        if not array_item_js_schema:
-            array_item_js_schema = True
+        # FIX: For an untyped list, the item schema should be None, not True.
+        # An empty dict `{}` is a valid JSON schema for 'any'.
+        if array_item_js_schema is None:
+             array_item_js_schema = {}
         return param_type_enum, array_item_js_schema
     mapped_type = _TYPE_MAPPING.get(actual_type)
     if mapped_type:
-        item_schema_for_array = True if mapped_type == ParameterType.ARRAY else None
+        item_schema_for_array = {} if mapped_type == ParameterType.ARRAY else None
         return mapped_type, item_schema_for_array
     logger.warning(f"Unmapped type hint {py_type} (actual_type: {actual_type}) for param '{param_name}'. Defaulting to ParameterType.STRING.")
@@ -232,8 +234,8 @@ def tool(
         tool_def = ToolDefinition(
             name=tool_name,
             description=tool_desc,
-            argument_schema=final_arg_schema,
-            config_schema=config_schema,
+            argument_schema_provider=lambda: final_arg_schema,
+            config_schema_provider=lambda: config_schema,
             custom_factory=factory,
             tool_class=None,
             origin=ToolOrigin.LOCAL,

autobyteus/tools/mcp/tool.py CHANGED Viewed

@@ -41,7 +41,7 @@ class GenericMcpTool(BaseTool):
         self.get_description = self.get_instance_description
         self.get_argument_schema = self.get_instance_argument_schema
-        logger.info(f"GenericMcpTool instance created for remote tool '{remote_tool_name}' on server '{self._server_id}'. "
+        logger.info(f"call_remote_mcp_tool instance created for remote tool '{remote_tool_name}' on server '{self._server_id}'. "
                     f"Registered in AutoByteUs as '{self._instance_name}'.")
     # --- Getters for instance-specific data ---
@@ -51,7 +51,7 @@ class GenericMcpTool(BaseTool):
     # --- Base class methods (class-level, not instance-level) ---
     @classmethod
-    def get_name(cls) -> str: return "GenericMcpTool"
+    def get_name(cls) -> str: return "call_remote_mcp_tool"
     @classmethod
     def get_description(cls) -> str: return "A generic wrapper for executing remote MCP tools."
     @classmethod
@@ -65,7 +65,7 @@ class GenericMcpTool(BaseTool):
         agent_id = context.agent_id
         tool_name_for_log = self.get_instance_name()
-        logger.info(f"GenericMcpTool '{tool_name_for_log}': Creating proxy for agent '{agent_id}' and server '{self._server_id}'.")
+        logger.info(f"call_remote_mcp_tool '{tool_name_for_log}': Creating proxy for agent '{agent_id}' and server '{self._server_id}'.")
         try:
             # The proxy is created on-demand for each execution.

autobyteus/tools/mcp/tool_registrar.py CHANGED Viewed

@@ -59,6 +59,8 @@ class McpToolRegistrar(metaclass=SingletonMeta):
         if server_config.tool_name_prefix:
             registered_name = f"{server_config.tool_name_prefix.rstrip('_')}_{remote_tool.name}"
+        # Note: McpToolFactory is now somewhat redundant as it holds static info,
+        # but we keep it for consistency. It creates a GenericMcpTool which needs this static info.
         tool_factory = McpToolFactory(
             server_id=server_config.server_id,
             remote_tool_name=remote_tool.name,
@@ -70,12 +72,13 @@ class McpToolRegistrar(metaclass=SingletonMeta):
         return ToolDefinition(
             name=registered_name,
             description=actual_desc,
-            argument_schema=actual_arg_schema,
+            # Pass schema providers as lambdas to conform to the new constructor
+            argument_schema_provider=lambda: actual_arg_schema,
+            config_schema_provider=lambda: None,
             origin=ToolOrigin.MCP,
             category=server_config.server_id, # Use server_id as the category
             metadata={"mcp_server_id": server_config.server_id}, # Store origin in generic metadata
             custom_factory=tool_factory.create_tool,
-            config_schema=None,
             tool_class=None
         )

autobyteus/tools/multimedia/__init__.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from .image_tools import GenerateImageTool, EditImageTool
 from .audio_tools import GenerateSpeechTool
 from .media_reader_tool import ReadMediaFile
+from .download_media_tool import DownloadMediaTool
 __all__ = [
     "GenerateImageTool",
     "EditImageTool",
     "GenerateSpeechTool",
     "ReadMediaFile",
+    "DownloadMediaTool",
 ]

autobyteus/tools/multimedia/audio_tools.py CHANGED Viewed

@@ -63,7 +63,7 @@ class GenerateSpeechTool(BaseTool):
     @classmethod
     def get_name(cls) -> str:
-        return "GenerateSpeech"
+        return "generate_speech"
     @classmethod
     def get_description(cls) -> str:
@@ -91,7 +91,7 @@ class GenerateSpeechTool(BaseTool):
     async def _execute(self, context, prompt: str, generation_config: Optional[dict] = None) -> List[str]:
         model_identifier = _get_configured_model_identifier(self.MODEL_ENV_VAR, self.DEFAULT_MODEL)
-        logger.info(f"GenerateSpeechTool executing with configured model '{model_identifier}'.")
+        logger.info(f"generate_speech executing with configured model '{model_identifier}'.")
         client = None
         try:
             client = audio_client_factory.create_audio_client(model_identifier=model_identifier)

autobyteus/tools/multimedia/download_media_tool.py ADDED Viewed

@@ -0,0 +1,136 @@
+import os
+import logging
+import mimetypes
+import aiohttp
+from typing import Optional, TYPE_CHECKING
+from urllib.parse import urlparse
+from autobyteus.tools.base_tool import BaseTool
+from autobyteus.tools.tool_category import ToolCategory
+from autobyteus.utils.file_utils import get_default_download_folder
+from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefinition, ParameterType
+if TYPE_CHECKING:
+    from autobyteus.agent.context import AgentContext
+logger = logging.getLogger(__name__)
+class DownloadMediaTool(BaseTool):
+    """
+    A unified tool to download any media file (e.g., image, PDF, audio) from a URL.
+    """
+    CATEGORY = ToolCategory.MULTIMEDIA
+    @classmethod
+    def get_name(cls) -> str:
+        return "download_media"
+    @classmethod
+    def get_description(cls) -> str:
+        return (
+            "Downloads various media files (e.g., images like PNG/JPG, documents like PDF, audio like MP3/WAV) "
+            "from a direct URL and saves them locally. It intelligently determines the correct file extension "
+            "based on the content type. Returns the absolute path to the downloaded file."
+        )
+    @classmethod
+    def get_argument_schema(cls) -> ParameterSchema:
+        schema = ParameterSchema()
+        schema.add_parameter(ParameterDefinition(
+            name="url",
+            param_type=ParameterType.STRING,
+            description="The direct URL of the media file to download.",
+            required=True
+        ))
+        schema.add_parameter(ParameterDefinition(
+            name="filename",
+            param_type=ParameterType.STRING,
+            description="The desired base name for the downloaded file (e.g., 'vacation_photo', 'annual_report'). The tool will automatically add the correct file extension.",
+            required=True
+        ))
+        schema.add_parameter(ParameterDefinition(
+            name="folder",
+            param_type=ParameterType.STRING,
+            description="Optional. A custom directory path to save the file. If not provided, the system's default download folder will be used.",
+            required=False
+        ))
+        return schema
+    async def _execute(self, context: 'AgentContext', url: str, filename: str, folder: Optional[str] = None) -> str:
+        # 1. Determine download directory
+        try:
+            if folder:
+                # Security: prevent path traversal attacks.
+                if ".." in folder:
+                    raise ValueError("Security error: 'folder' path cannot contain '..'.")
+                destination_dir = os.path.abspath(folder)
+            else:
+                destination_dir = get_default_download_folder()
+            os.makedirs(destination_dir, exist_ok=True)
+        except Exception as e:
+            logger.error(f"Error preparing download directory '{folder or 'default'}': {e}", exc_info=True)
+            raise IOError(f"Failed to create or access download directory: {e}")
+        # 2. Sanitize filename provided by the LLM
+        if not filename or ".." in filename or os.path.isabs(filename) or "/" in filename or "\\" in filename:
+            raise ValueError("Invalid filename. It must be a simple name without any path characters ('..', '/', '\\').")
+        logger.info(f"Attempting to download from {url} to save as '{filename}' in '{destination_dir}'.")
+        # 3. Download and process file asynchronously
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url, timeout=60) as response:
+                    response.raise_for_status()
+                    # 4. Intelligently determine file extension from Content-Type header
+                    content_type = response.headers.get('Content-Type')
+                    correct_ext = ''
+                    if content_type:
+                        mime_type = content_type.split(';')[0].strip()
+                        guess = mimetypes.guess_extension(mime_type)
+                        if guess:
+                            correct_ext = guess
+                            logger.debug(f"Determined extension '{correct_ext}' from Content-Type: '{mime_type}'")
+                    # Fallback to URL extension if Content-Type is generic or missing
+                    if not correct_ext or correct_ext == '.bin':
+                        url_path = urlparse(url).path
+                        _, ext_from_url = os.path.splitext(os.path.basename(url_path))
+                        if ext_from_url and len(ext_from_url) > 1: # Ensure it's not just a dot
+                            logger.debug(f"Using fallback extension '{ext_from_url}' from URL.")
+                            correct_ext = ext_from_url
+                    if not correct_ext:
+                        logger.warning("Could not determine a file extension. The file will be saved without one.")
+                    # 5. Construct final filename and path
+                    base_filename, _ = os.path.splitext(filename)
+                    final_filename = f"{base_filename}{correct_ext}"
+                    save_path = os.path.join(destination_dir, final_filename)
+                    # Ensure filename is unique to avoid overwriting
+                    counter = 1
+                    while os.path.exists(save_path):
+                        final_filename = f"{base_filename}_{counter}{correct_ext}"
+                        save_path = os.path.join(destination_dir, final_filename)
+                        counter += 1
+                    # 6. Stream file content to disk
+                    with open(save_path, 'wb') as f:
+                        async for chunk in response.content.iter_chunked(8192):
+                            f.write(chunk)
+                    logger.info(f"Successfully downloaded and saved file to: {save_path}")
+                    return f"Successfully downloaded file to: {save_path}"
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error while downloading from {url}: {e}", exc_info=True)
+            raise ConnectionError(f"Failed to download from {url}: {e}")
+        except IOError as e:
+            logger.error(f"Failed to write downloaded file to {destination_dir}: {e}", exc_info=True)
+            raise
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during download from {url}: {e}", exc_info=True)
+            raise RuntimeError(f"An unexpected error occurred: {e}")

autobyteus/tools/multimedia/image_tools.py CHANGED Viewed

@@ -62,7 +62,7 @@ class GenerateImageTool(BaseTool):
     @classmethod
     def get_name(cls) -> str:
-        return "GenerateImage"
+        return "generate_image"
     @classmethod
     def get_description(cls) -> str:
@@ -92,7 +92,7 @@ class GenerateImageTool(BaseTool):
     async def _execute(self, context, prompt: str, input_image_urls: Optional[str] = None, generation_config: Optional[dict] = None) -> List[str]:
         model_identifier = _get_configured_model_identifier(self.MODEL_ENV_VAR, self.DEFAULT_MODEL)
-        logger.info(f"GenerateImageTool executing with configured model '{model_identifier}'.")
+        logger.info(f"generate_image executing with configured model '{model_identifier}'.")
         client = None
         try:
             urls_list = None
@@ -125,7 +125,7 @@ class EditImageTool(BaseTool):
     @classmethod
     def get_name(cls) -> str:
-        return "EditImage"
+        return "edit_image"
     @classmethod
     def get_description(cls) -> str:
@@ -161,7 +161,7 @@ class EditImageTool(BaseTool):
     async def _execute(self, context, prompt: str, input_image_urls: str, generation_config: Optional[dict] = None, mask_image_url: Optional[str] = None) -> List[str]:
         model_identifier = _get_configured_model_identifier(self.MODEL_ENV_VAR, self.DEFAULT_MODEL)
-        logger.info(f"EditImageTool executing with configured model '{model_identifier}'.")
+        logger.info(f"edit_image executing with configured model '{model_identifier}'.")
         client = None
         try:
             urls_list = [url.strip() for url in input_image_urls.split(',') if url.strip()]

autobyteus/tools/multimedia/media_reader_tool.py CHANGED Viewed

@@ -21,7 +21,7 @@ class ReadMediaFile(BaseTool):
     the file's content. The tool's result is a structured object that the system
     uses to construct a multimodal prompt, not plain text.
     """
-    TOOL_NAME = "ReadMediaFile"
+    TOOL_NAME = "read_media_file"
     CATEGORY = ToolCategory.MULTIMEDIA
     @classmethod

autobyteus 1.1.9__py3-none-any.whl → 1.2.1__py3-none-any.whl

autobyteus 1.1.9py3-none-any.whl → 1.2.1py3-none-any.whl