PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

ai_pipeline_core/__init__.py +83 -119
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +14 -15
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +349 -1062
ai_pipeline_core/documents/mime_type.py +40 -85
ai_pipeline_core/documents/utils.py +62 -7
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +5 -3
ai_pipeline_core/llm/ai_messages.py +284 -73
ai_pipeline_core/llm/client.py +462 -209
ai_pipeline_core/llm/model_options.py +86 -53
ai_pipeline_core/llm/model_response.py +187 -241
ai_pipeline_core/llm/model_types.py +34 -54
ai_pipeline_core/logging/__init__.py +2 -9
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -43
ai_pipeline_core/logging/logging_mixin.py +17 -51
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/observability/tracing.py +640 -0
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +26 -105
ai_pipeline_core/settings.py +41 -32
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -240
ai_pipeline_core/documents/flow_document.py +0 -128
ai_pipeline_core/documents/task_document.py +0 -133
ai_pipeline_core/documents/temporary_document.py +0 -95
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -314
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -717
ai_pipeline_core/prefect.py +0 -54
ai_pipeline_core/simple_runner/__init__.py +0 -24
ai_pipeline_core/simple_runner/cli.py +0 -255
ai_pipeline_core/simple_runner/simple_runner.py +0 -385
ai_pipeline_core/tracing.py +0 -475
ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/documents/mime_type.py CHANGED Viewed

@@ -24,6 +24,8 @@ EXTENSION_MIME_MAP = {
     "gif": "image/gif",
     "bmp": "image/bmp",
     "webp": "image/webp",
+    "heic": "image/heic",
+    "heif": "image/heif",
     "json": "application/json",
     "yaml": "application/yaml",
     "yml": "application/yaml",
@@ -43,7 +45,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
     r"""Detect MIME type from document content and filename.
     Uses a multi-stage detection strategy for maximum accuracy:
-    1. Returns 'application/x-empty' for empty content
+    1. Returns 'text/plain' for empty content
     2. Uses extension-based detection for known formats (most reliable)
     3. Falls back to python-magic content analysis
     4. Final fallback to extension or 'application/octet-stream'
@@ -57,7 +59,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
         Never returns None or empty string.
     Fallback behavior:
-        - Empty content: 'application/x-empty'
+        - Empty content: 'text/plain'
         - Unknown extension with binary content: 'application/octet-stream'
         - Magic library failure: Falls back to extension or 'application/octet-stream'
@@ -65,23 +67,12 @@ def detect_mime_type(content: bytes, name: str) -> str:
         Only the first 1024 bytes are analyzed for content detection.
         Extension-based detection is O(1) lookup.
-    Note:
-        Extension-based detection is preferred for text formats as
-        content analysis can sometimes misidentify structured text.
-    Example:
-        >>> detect_mime_type(b'{"key": "value"}', "data.json")
-        'application/json'
-        >>> detect_mime_type(b'Hello World', "text.txt")
-        'text/plain'
-        >>> detect_mime_type(b'', "empty.txt")
-        'application/x-empty'
-        >>> detect_mime_type(b'\\x89PNG', "image.xyz")
-        'image/png'  # Magic detects PNG despite wrong extension
+    Extension-based detection is preferred for text formats as
+    content analysis can sometimes misidentify structured text.
     """
     # Check for empty content
     if len(content) == 0:
-        return "application/x-empty"
+        return "text/plain"
     # Try extension-based detection first for known formats
     # This is more reliable for text formats that magic might misidentify
@@ -97,40 +88,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
             return mime
     except (AttributeError, OSError, magic.MagicException) as e:
         logger.warning(f"MIME detection failed for {name}: {e}")
-    except Exception as e:
-        logger.error(f"Unexpected error in MIME detection for {name}: {e}")
+    except Exception:
+        logger.exception(f"Unexpected error in MIME detection for {name}")
     # Final fallback based on extension or default
     return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
-def mime_type_from_extension(name: str) -> str:
-    """Get MIME type based solely on file extension.
-    Simple extension-based MIME type detection without content analysis.
-    This is a legacy function maintained for backward compatibility.
-    Args:
-        name: Filename with extension.
-    Returns:
-        MIME type based on extension, or 'application/octet-stream'
-        if extension is unknown.
-    Note:
-        Prefer detect_mime_type() for more accurate detection.
-        This function only checks the file extension.
-    Example:
-        >>> mime_type_from_extension("document.pdf")
-        'application/pdf'
-        >>> mime_type_from_extension("unknown.xyz")
-        'application/octet-stream'
-    """
-    ext = name.lower().split(".")[-1] if "." in name else ""
-    return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
 def is_text_mime_type(mime_type: str) -> bool:
     """Check if MIME type represents text-based content.
@@ -151,13 +115,6 @@ def is_text_mime_type(mime_type: str) -> bool:
         - application/yaml
         - application/x-yaml
-    Example:
-        >>> is_text_mime_type('text/plain')
-        True
-        >>> is_text_mime_type('application/json')
-        True
-        >>> is_text_mime_type('image/png')
-        False
     """
     text_types = [
         "text/",
@@ -179,15 +136,8 @@ def is_json_mime_type(mime_type: str) -> bool:
     Returns:
         True if MIME type is 'application/json', False otherwise.
-    Note:
-        Only matches exact 'application/json', not variants like
-        'application/ld+json' or 'application/vnd.api+json'.
-    Example:
-        >>> is_json_mime_type('application/json')
-        True
-        >>> is_json_mime_type('text/json')  # Not standard JSON MIME
-        False
+    Only matches exact 'application/json', not variants like
+    'application/ld+json' or 'application/vnd.api+json'.
     """
     return mime_type == "application/json"
@@ -207,13 +157,8 @@ def is_yaml_mime_type(mime_type: str) -> bool:
         - application/yaml (standard)
         - application/x-yaml (legacy)
-    Example:
-        >>> is_yaml_mime_type('application/yaml')
-        True
-        >>> is_yaml_mime_type('application/x-yaml')
-        True
     """
-    return mime_type == "application/yaml" or mime_type == "application/x-yaml"
+    return mime_type in {"application/yaml", "application/x-yaml"}
 def is_pdf_mime_type(mime_type: str) -> bool:
@@ -225,15 +170,8 @@ def is_pdf_mime_type(mime_type: str) -> bool:
     Returns:
         True if MIME type is 'application/pdf', False otherwise.
-    Note:
-        PDF documents require special handling in the LLM module
-        and are supported by certain vision-capable models.
-    Example:
-        >>> is_pdf_mime_type('application/pdf')
-        True
-        >>> is_pdf_mime_type('text/plain')
-        False
+    PDF documents require special handling in the LLM module
+    and are supported by certain vision-capable models.
     """
     return mime_type == "application/pdf"
@@ -255,14 +193,31 @@ def is_image_mime_type(mime_type: str) -> bool:
         - image/webp
         - image/svg+xml
-    Note:
-        Image documents are automatically encoded for vision-capable
-        LLM models in the AIMessages.document_to_prompt() method.
-    Example:
-        >>> is_image_mime_type('image/png')
-        True
-        >>> is_image_mime_type('application/pdf')
-        False
+    Image documents are automatically encoded for vision-capable
+    LLM models in the AIMessages.document_to_prompt() method.
     """
     return mime_type.startswith("image/")
+LLM_SUPPORTED_IMAGE_MIME_TYPES: frozenset[str] = frozenset({
+    "image/png",
+    "image/jpeg",
+    "image/webp",
+    "image/heic",
+    "image/heif",
+})
+def is_llm_supported_image(mime_type: str) -> bool:
+    """Check if MIME type is an image format directly supported by LLMs.
+    Unsupported image formats (gif, bmp, tiff, svg, etc.) need conversion
+    to PNG before sending to the LLM.
+    Args:
+        mime_type: MIME type string to check.
+    Returns:
+        True if the image format is natively supported by LLMs.
+    """
+    return mime_type in LLM_SUPPORTED_IMAGE_MIME_TYPES

ai_pipeline_core/documents/utils.py CHANGED Viewed

@@ -1,19 +1,18 @@
 """Utility functions for document handling.
 Provides helper functions for URL sanitization, naming conventions,
-and canonical key generation used throughout the document system.
+canonical key generation, and hash validation used throughout the document system.
 """
 import re
-from typing import Any, Iterable, Type
+from collections.abc import Iterable
+from typing import Any
 from urllib.parse import urlparse
 def sanitize_url(url: str) -> str:
     """Sanitize URL or query string for use in filenames.
-    @public
     Removes or replaces characters that are invalid in filenames.
     Args:
@@ -63,15 +62,13 @@ def camel_to_snake(name: str) -> str:
 def canonical_name_key(
-    obj_or_name: Type[Any] | str,
+    obj_or_name: type[Any] | str,
     *,
     max_parent_suffixes: int = 3,
     extra_suffixes: Iterable[str] = (),
 ) -> str:
     """Produce a canonical snake_case key from a class or name.
-    @public
     Process:
       1) Starting with the class name (or given string),
       2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
@@ -115,3 +112,61 @@ def canonical_name_key(
                 break
     return camel_to_snake(name)
+def is_document_sha256(value: str) -> bool:
+    """Check if a string is a valid base32-encoded SHA256 hash with proper entropy.
+    This function validates that a string is not just formatted like a SHA256 hash,
+    but actually has the entropy characteristics of a real hash. It checks:
+    1. Correct length (52 characters without padding)
+    2. Valid base32 characters (A-Z, 2-7)
+    3. Sufficient entropy (at least 8 unique characters)
+    The entropy check prevents false positives like 'AAAAAAA...AAA' from being
+    identified as valid document hashes.
+    Args:
+        value: String to check if it's a document SHA256 hash.
+    Returns:
+        True if the string appears to be a real base32-encoded SHA256 hash,
+        False otherwise.
+    Examples:
+        >>> # Real SHA256 hash
+        >>> is_document_sha256("P3AEMA2PSYILKFYVBUALJLMIYWVZIS2QDI3S5VTMD2X7SOODF2YQ")
+        True
+        >>> # Too uniform - lacks entropy
+        >>> is_document_sha256("A" * 52)
+        False
+        >>> # Wrong length
+        >>> is_document_sha256("ABC123")
+        False
+        >>> # Invalid characters
+        >>> is_document_sha256("a" * 52)  # lowercase
+        False
+    """
+    # Check basic format: exactly 52 uppercase base32 characters
+    try:
+        if not value or len(value) != 52:
+            return False
+    except (TypeError, AttributeError):
+        return False
+    # Check if all characters are valid base32 (A-Z, 2-7)
+    try:
+        if not re.match(r"^[A-Z2-7]{52}$", value):
+            return False
+    except TypeError:
+        # re.match raises TypeError for non-string types like bytes
+        return False
+    # Check entropy: real SHA256 hashes have high entropy
+    # Require at least 8 unique characters (out of 32 possible in base32)
+    # This prevents patterns like "AAAAAAA..." from being identified as real hashes
+    unique_chars = len(set(value))
+    return unique_chars >= 8

ai_pipeline_core/exceptions.py CHANGED Viewed

@@ -1,97 +1,45 @@
 """Exception hierarchy for AI Pipeline Core.
-@public
 This module defines the exception hierarchy used throughout the AI Pipeline Core library.
 All exceptions inherit from PipelineCoreError, providing a consistent error handling interface.
 """
 class PipelineCoreError(Exception):
-    """Base exception for all AI Pipeline Core errors.
-    @public
-    """
-    pass
+    """Base exception for all AI Pipeline Core errors."""
 class DocumentError(PipelineCoreError):
-    """Base exception for document-related errors.
-    @public
-    """
-    pass
+    """Base exception for document-related errors."""
 class DocumentValidationError(DocumentError):
-    """Raised when document validation fails.
-    @public
-    """
-    pass
+    """Raised when document validation fails."""
 class DocumentSizeError(DocumentValidationError):
-    """Raised when document content exceeds MAX_CONTENT_SIZE limit.
-    @public
-    """
-    pass
+    """Raised when document content exceeds MAX_CONTENT_SIZE limit."""
 class DocumentNameError(DocumentValidationError):
-    """Raised when document name contains invalid characters or patterns.
-    @public
-    """
-    pass
+    """Raised when document name contains invalid characters or patterns."""
 class LLMError(PipelineCoreError):
-    """Raised when LLM generation fails after all retries.
-    @public
-    """
-    pass
+    """Raised when LLM generation fails after all retries."""
 class PromptError(PipelineCoreError):
-    """Base exception for prompt template errors.
-    @public
-    """
-    pass
+    """Base exception for prompt template errors."""
 class PromptRenderError(PromptError):
-    """Raised when Jinja2 template rendering fails.
-    @public
-    """
-    pass
+    """Raised when Jinja2 template rendering fails."""
 class PromptNotFoundError(PromptError):
-    """Raised when prompt template file is not found in search paths.
-    @public
-    """
-    pass
+    """Raised when prompt template file is not found in search paths."""
 class MimeTypeError(DocumentError):
-    """Raised when MIME type detection or validation fails.
-    @public
-    """
-    pass
+    """Raised when MIME type detection or validation fails."""

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl