PyPI - aixtools - Versions diffs - 0.3.7__tar.gz → 0.3.9__tar.gz - Mend

aixtools 0.3.7tar.gz → 0.3.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aixtools might be problematic. Click here for more details.

Files changed (104) hide show

{aixtools-0.3.7 → aixtools-0.3.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aixtools
-Version: 0.3.7
+Version: 0.3.9
 Summary: Tools for AI exploration and debugging
 Requires-Python: >=3.11.2
 Description-Content-Type: text/markdown
@@ -26,7 +26,6 @@ Requires-Dist: rich>=14.0.0
 Requires-Dist: ruff>=0.11.6
 Requires-Dist: streamlit>=1.44.1
 Requires-Dist: watchdog>=6.0.0
-Requires-Dist: markitdown[docx,pdf,pptx,xls,xlsx]>=0.1.3
 Provides-Extra: test
 Requires-Dist: pyyaml; extra == "test"
 Provides-Extra: feature

{aixtools-0.3.7 → aixtools-0.3.9}/aixtools/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.3.7'
-__version_tuple__ = version_tuple = (0, 3, 7)
+__version__ = version = '0.3.9'
+__version_tuple__ = version_tuple = (0, 3, 9)
-__commit_id__ = commit_id = 'g889028f57'
+__commit_id__ = commit_id = 'gf2f460fba'

aixtools-0.3.9/aixtools/agents/prompt.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""Prompt building utilities for Pydantic AI agent, including file handling and context management."""
+import mimetypes
+from pathlib import Path
+from pydantic_ai import BinaryContent
+from aixtools.context import SessionIdTuple
+from aixtools.server import container_to_host_path
+from aixtools.utils.files import is_text_content
+CLAUDE_MAX_FILE_SIZE_IN_CONTEXT = 4 * 1024 * 1024  # Claude limit 4.5 MB for PDF files
+CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT = (
+    5 * 1024 * 1024
+)  # Claude limit 5 MB for images, to avoid large image files in context
+def should_be_included_into_context(
+    file_content: BinaryContent | str | None,
+    file_size: int,
+    *,
+    max_img_size_bytes: int = CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT,
+    max_file_size_bytes: int = CLAUDE_MAX_FILE_SIZE_IN_CONTEXT,
+) -> bool:
+    """Decide whether a file content should be included into the model context based on its type and size."""
+    if not isinstance(file_content, BinaryContent):
+        return False
+    if file_content.media_type.startswith("text/"):
+        return False
+    # Exclude archive files as they're not supported by OpenAI models
+    archive_types = {
+        "application/zip",
+        "application/x-tar",
+        "application/gzip",
+        "application/x-gzip",
+        "application/x-rar-compressed",
+        "application/x-7z-compressed",
+    }
+    if file_content.media_type in archive_types:
+        return False
+    if file_content.is_image and file_size < max_img_size_bytes:
+        return True
+    return file_size < max_file_size_bytes
+def file_to_binary_content(file_path: str | Path, mime_type: str = "") -> str | BinaryContent:
+    """
+    Read a file and return its content as either a UTF-8 string (for text files)
+    or BinaryContent (for binary files).
+    """
+    with open(file_path, "rb") as f:
+        data = f.read()
+    if not mime_type:
+        mime_type, _ = mimetypes.guess_type(file_path)
+        mime_type = mime_type or "application/octet-stream"
+    if is_text_content(data, mime_type):
+        return data.decode("utf-8")
+    return BinaryContent(data=data, media_type=mime_type)
+def build_user_input(
+    session_tuple: SessionIdTuple,
+    user_text: str,
+    file_paths: list[Path],
+) -> str | list[str | BinaryContent]:
+    """Build user input for the Pydantic AI agent, including file attachments if provided."""
+    if not file_paths:
+        return user_text
+    attachment_info_lines = []
+    binary_attachments = []
+    for workspace_path in file_paths:
+        host_path = container_to_host_path(workspace_path, ctx=session_tuple)
+        file_size = host_path.stat().st_size
+        mime_type, _ = mimetypes.guess_type(host_path)
+        mime_type = mime_type or "application/octet-stream"
+        attachment_info = f"* {workspace_path.name} (file_size={file_size} bytes) (path in workspace: {workspace_path})"
+        binary_content = file_to_binary_content(host_path, mime_type)
+        if should_be_included_into_context(binary_content, file_size):
+            binary_attachments.append(binary_content)
+            attachment_info += f" -- provided to model context at index {len(binary_attachments) - 1}"
+        attachment_info_lines.append(attachment_info)
+    full_prompt = user_text + "\nAttachments:\n" + "\n".join(attachment_info_lines)
+    return [full_prompt] + binary_attachments

{aixtools-0.3.7 → aixtools-0.3.9}/aixtools/logfilters/context_filter.py RENAMED Viewed

@@ -53,8 +53,13 @@ class ContextFilter(logging.Filter):  # pylint: disable=too-few-public-methods
         except ImportError:
             pass
-        if not user_id and not session_id:
-            user_id, session_id = self._extract_from_mcp_context()
+        mcp_user_id = None
+        mcp_session_id = None
+        if not user_id or not session_id:
+            mcp_user_id, mcp_session_id = self._extract_from_mcp_context()
+        user_id = user_id or mcp_user_id
+        session_id = session_id or mcp_session_id
         context = ""
         if session_id and not str(session_id).startswith("default"):

{aixtools-0.3.7 → aixtools-0.3.9}/aixtools/utils/config.py RENAMED Viewed

@@ -146,26 +146,3 @@ APP_DEFAULT_SCOPE = get_variable_env("APP_DEFAULT_SCOPE", allow_empty=True)
 AUTH_TEST_TOKEN = get_variable_env("AUTH_TEST_TOKEN", allow_empty=True)
 MCP_TOOLS_MAX_RETRIES = int(get_variable_env("MCP_TOOLS_MAX_RETRIES", default=10))
-# File attachment limits and supported types for model context
-# Maximum extracted document text size (5MB default, planned for future use)
-MAX_EXTRACTED_TEXT_SIZE = int(get_variable_env("MAX_EXTRACTED_TEXT_SIZE", default=str(5 * 1024 * 1024)))
-# Maximum image attachment size (2MB default)
-MAX_IMAGE_ATTACHMENT_SIZE = int(get_variable_env("MAX_IMAGE_ATTACHMENT_SIZE", default=str(2 * 1024 * 1024)))
-# Image MIME types that can be attached to model context
-IMAGE_ATTACHMENT_TYPES = {
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/gif",
-    "image/webp",
-}
-# Document MIME types that can be extracted as text
-EXTRACTABLE_DOCUMENT_TYPES = {
-    "application/vnd.openxmlformats-officedocument.presentationml.presentation",  # .pptx
-    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",  # .docx
-    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",  # .xlsx
-    "application/vnd.ms-excel",  # .xls
-    "application/pdf",  # .pdf
-}

{aixtools-0.3.7 → aixtools-0.3.9}/pyproject.toml RENAMED Viewed

@@ -33,8 +33,7 @@ dependencies = [
     "rich>=14.0.0",
     "ruff>=0.11.6",
     "streamlit>=1.44.1",
-    "watchdog>=6.0.0",
-    "markitdown[docx,pdf,pptx,xls,xlsx]>=0.1.3",
+    "watchdog>=6.0.0"
 ]
 [project.scripts]

aixtools-0.3.7/aixtools/agents/prompt.py DELETED Viewed

@@ -1,175 +0,0 @@
-"""Prompt building utilities for Pydantic AI agent, including file handling and context management."""
-import mimetypes
-from dataclasses import dataclass
-from pathlib import Path, PurePosixPath
-from typing import Optional
-from markitdown import MarkItDown
-from pydantic_ai import BinaryContent
-from aixtools.context import SessionIdTuple
-from aixtools.logging.logging_config import get_logger
-from aixtools.server import container_to_host_path
-from aixtools.utils.config import (
-    EXTRACTABLE_DOCUMENT_TYPES,
-    IMAGE_ATTACHMENT_TYPES,
-    MAX_EXTRACTED_TEXT_SIZE,
-    MAX_IMAGE_ATTACHMENT_SIZE,
-)
-from aixtools.utils.files import is_text_content
-logger = get_logger(__name__)
-@dataclass
-class FileExtractionResult:
-    """Result of file content extraction.
-    Attributes:
-        content: Extracted file content (str for text/documents, BinaryContent for images, None on failure)
-        success: True if file was successfully read or extracted, False on any failure
-        error_message: Error description if extraction failed, None otherwise
-        was_extracted: True if document extraction via markitdown was used successfully
-    """
-    content: str | BinaryContent | None
-    success: bool
-    error_message: str | None = None
-    was_extracted: bool = False
-def should_be_included_into_context(
-    file_content: BinaryContent | str | None,
-    *,
-    max_image_size_bytes: int = MAX_IMAGE_ATTACHMENT_SIZE,
-    max_extracted_text_size_bytes: int = MAX_EXTRACTED_TEXT_SIZE,
-) -> bool:
-    """Check if file content should be included in model context based on type and size limits."""
-    if file_content is None:
-        return False
-    # Handle extracted text (strings)
-    if isinstance(file_content, str):
-        text_size = len(file_content.encode("utf-8"))
-        return text_size < max_extracted_text_size_bytes
-    # Handle binary content (images only)
-    if isinstance(file_content, BinaryContent):
-        if file_content.media_type not in IMAGE_ATTACHMENT_TYPES:
-            return False
-        image_size = len(file_content.data)
-        return image_size < max_image_size_bytes
-    return False
-def file_to_binary_content(file_path: str | Path, mime_type: Optional[str] = None) -> FileExtractionResult:
-    """Read file and extract text from documents (PDF, DOCX, XLSX, PPTX) using markitdown."""
-    if not mime_type:
-        mime_type, _ = mimetypes.guess_type(file_path)
-        mime_type = mime_type or "application/octet-stream"
-    # Extract text from supported document types using markitdown
-    if mime_type in EXTRACTABLE_DOCUMENT_TYPES:
-        try:
-            markitdown = MarkItDown()
-            result = markitdown.convert(str(file_path))
-            return FileExtractionResult(
-                content=result.text_content, success=True, error_message=None, was_extracted=True
-            )
-        except Exception as e:  # pylint: disable=broad-exception-caught
-            error_msg = f"Extraction failed: {type(e).__name__}: {str(e)}"
-            logger.error("Document extraction failed for %s: %s", file_path, error_msg)
-            return FileExtractionResult(content=None, success=False, error_message=error_msg)
-    # Read the file data for non-document types
-    try:
-        with open(file_path, "rb") as f:
-            data = f.read()
-        # Return as string if it's text content
-        if is_text_content(data, mime_type):
-            return FileExtractionResult(content=data.decode("utf-8"), success=True)
-        # Return as binary content for images and other binary files
-        return FileExtractionResult(content=BinaryContent(data=data, media_type=mime_type), success=True)
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        error_msg = f"Failed to read file: {type(e).__name__}: {str(e)}"
-        logger.error("File reading failed for %s: %s", file_path, error_msg)
-        return FileExtractionResult(content=None, success=False, error_message=error_msg)
-def truncate_extracted_text(text: str, max_bytes: int = MAX_EXTRACTED_TEXT_SIZE) -> str:
-    """Truncate text to max_bytes with warning prefix."""
-    truncated_bytes = text.encode("utf-8")[:max_bytes]
-    truncated_text = truncated_bytes.decode("utf-8", errors="ignore")
-    total_chars = len(text)
-    truncated_chars = len(truncated_text)
-    return f"[TRUNCATED - showing first {truncated_chars} of {total_chars} characters]\n\n{truncated_text}"
-def build_user_input(
-    session_tuple: SessionIdTuple,
-    user_text: str,
-    file_paths: list[Path],
-) -> str | list[str | BinaryContent]:
-    """Build user input for the Pydantic AI agent, including file attachments if provided."""
-    if not file_paths:
-        return user_text
-    attachment_info_lines = []
-    binary_attachments: list[str | BinaryContent] = []
-    for workspace_path in file_paths:
-        # Convert Path to PurePosixPath for container_to_host_path
-        workspace_posix_path = PurePosixPath(workspace_path)
-        host_path = container_to_host_path(workspace_posix_path, ctx=session_tuple)
-        # Handle None return from container_to_host_path
-        if host_path is None:
-            attachment_info = (
-                f"* {workspace_path.name} (path in workspace: {workspace_path}) -- conversion failed: invalid path"
-            )
-            attachment_info_lines.append(attachment_info)
-            continue
-        file_size = host_path.stat().st_size
-        mime_type, _ = mimetypes.guess_type(host_path)
-        mime_type = mime_type or "application/octet-stream"
-        attachment_info = f"* {workspace_path.name} (file_size={file_size} bytes) (path in workspace: {workspace_path})"
-        extraction_result = file_to_binary_content(host_path, mime_type)
-        # Handle extraction failure - exclude from attachments
-        if not extraction_result.success:
-            attachment_info += f" -- extraction failed: {extraction_result.error_message}"
-            attachment_info_lines.append(attachment_info)
-            continue
-        # Handle successful extraction
-        if extraction_result.was_extracted:
-            attachment_info += " -- extracted as text"
-        # Check if content should be included in context
-        if should_be_included_into_context(extraction_result.content) and extraction_result.content is not None:
-            binary_attachments.append(extraction_result.content)
-            attachment_info += f" -- provided to model context at index {len(binary_attachments) - 1}"
-        elif (
-            isinstance(extraction_result.content, str) and extraction_result.content and extraction_result.was_extracted
-        ):
-            # Truncate large extracted text and include with warning (only for extracted documents)
-            truncated_content = truncate_extracted_text(extraction_result.content)
-            binary_attachments.append(truncated_content)
-            attachment_info += f" -- truncated and provided to model context at index {len(binary_attachments) - 1}"
-        elif extraction_result.content is not None:
-            # Content exists but excluded from context (e.g., images too large, non-extracted text)
-            attachment_info += " -- too large for context"
-        attachment_info_lines.append(attachment_info)
-    full_prompt = user_text + "\nAttachments:\n" + "\n".join(attachment_info_lines)
-    return [full_prompt] + binary_attachments