PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

ai_pipeline_core/__init__.py +83 -119
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +14 -15
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +349 -1062
ai_pipeline_core/documents/mime_type.py +40 -85
ai_pipeline_core/documents/utils.py +62 -7
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +5 -3
ai_pipeline_core/llm/ai_messages.py +284 -73
ai_pipeline_core/llm/client.py +462 -209
ai_pipeline_core/llm/model_options.py +86 -53
ai_pipeline_core/llm/model_response.py +187 -241
ai_pipeline_core/llm/model_types.py +34 -54
ai_pipeline_core/logging/__init__.py +2 -9
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -43
ai_pipeline_core/logging/logging_mixin.py +17 -51
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/observability/tracing.py +640 -0
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +26 -105
ai_pipeline_core/settings.py +41 -32
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -240
ai_pipeline_core/documents/flow_document.py +0 -128
ai_pipeline_core/documents/task_document.py +0 -133
ai_pipeline_core/documents/temporary_document.py +0 -95
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -314
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -717
ai_pipeline_core/prefect.py +0 -54
ai_pipeline_core/simple_runner/__init__.py +0 -24
ai_pipeline_core/simple_runner/cli.py +0 -255
ai_pipeline_core/simple_runner/simple_runner.py +0 -385
ai_pipeline_core/tracing.py +0 -475
ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -1,30 +1,46 @@
 """AI message handling for LLM interactions.
-@public
 Provides AIMessages container for managing conversations with mixed content types
 including text, documents, and model responses.
 """
 import base64
 import hashlib
+import io
 import json
+from collections.abc import Callable, Iterable
+from copy import deepcopy
+from typing import Any, SupportsIndex
 from openai.types.chat import (
     ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
 )
-from prefect.logging import get_logger
+from PIL import Image
 from ai_pipeline_core.documents import Document
+from ai_pipeline_core.documents.document import get_tiktoken_encoding
+from ai_pipeline_core.documents.mime_type import is_llm_supported_image
+from ai_pipeline_core.logging import get_pipeline_logger
 from .model_response import ModelResponse
+logger = get_pipeline_logger(__name__)
+def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes, str]:
+    """Convert unsupported image formats to PNG for LLM consumption."""
+    if is_llm_supported_image(mime_type):
+        return content, mime_type
+    img = Image.open(io.BytesIO(content))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue(), "image/png"
 AIMessageType = str | Document | ModelResponse
 """Type for messages in AIMessages container.
-@public
 Represents the allowed types for conversation messages:
 - str: Plain text messages
 - Document: Structured document content
@@ -32,11 +48,9 @@ Represents the allowed types for conversation messages:
 """
-class AIMessages(list[AIMessageType]):
+class AIMessages(list[AIMessageType]):  # noqa: PLR0904
     """Container for AI conversation messages supporting mixed types.
-    @public
     This class extends list to manage conversation messages between user
     and AI, supporting text, Document objects, and ModelResponse instances.
     Messages are converted to OpenAI-compatible format for LLM interactions.
@@ -44,28 +58,151 @@ class AIMessages(list[AIMessageType]):
     Conversion Rules:
         - str: Becomes {"role": "user", "content": text}
         - Document: Becomes {"role": "user", "content": document_content}
-          (automatically handles text, images, PDFs based on MIME type)
+          (automatically handles text, images, PDFs based on MIME type; attachments
+          are rendered as <attachment> XML blocks)
         - ModelResponse: Becomes {"role": "assistant", "content": response.content}
     Note: Document conversion is automatic. Text content becomes user text messages.
-    Images are sent to vision-capable models (non-vision models will raise ValueError).
-    PDFs are attached when supported by the model, otherwise a text extraction
-    fallback is used. LiteLLM proxy handles the specific encoding requirements
-    for each provider.
+    VISION/PDF MODEL COMPATIBILITY WARNING:
+    Images require vision-capable models (e.g., gpt-5.1, gemini-3-flash, gemini-3-pro).
+    Non-vision models will raise ValueError when encountering image documents.
+    PDFs require models with document processing support - check your model's capabilities
+    before including PDF documents in messages. Unsupported models may fall back to
+    text extraction or raise errors depending on provider configuration.
+    LiteLLM proxy handles the specific encoding requirements for each provider.
     IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
     expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
     of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
-    Example:
-        >>> from ai_pipeline_core import llm
-        >>> messages = AIMessages()
-        >>> messages.append("What is the capital of France?")
-        >>> response = await llm.generate("gpt-5", messages=messages)
-        >>> messages.append(response)  # Add the actual response
-        >>> prompt = messages.get_last_message_as_str()  # Get the last message as a string
+    CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
+    `AIMessages(["text"])`) or empty constructor with append (e.g.,
+    `AIMessages(); messages.append("text")`). Never pass raw strings directly to the
+    constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
+    accidental character iteration.
     """
+    def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
+        """Initialize AIMessages with optional iterable.
+        Args:
+            iterable: Optional iterable of messages (list, tuple, etc.).
+                     Must not be a string.
+            frozen: If True, list is immutable from creation.
+        Raises:
+            TypeError: If a string is passed directly to the constructor.
+        """
+        if isinstance(iterable, str):
+            raise TypeError(
+                "AIMessages cannot be constructed from a string directly. "
+                "Use AIMessages(['text']) for a single message or "
+                "AIMessages() and then append('text')."
+            )
+        self._frozen = False  # Initialize as unfrozen to allow initial population
+        if iterable is None:
+            super().__init__()
+        else:
+            super().__init__(iterable)
+        self._frozen = frozen  # Set frozen state after initial population
+    def freeze(self) -> None:
+        """Permanently freeze the list, preventing modifications.
+        Once frozen, the list cannot be unfrozen.
+        """
+        self._frozen = True
+    def copy(self) -> "AIMessages":
+        """Create an unfrozen deep copy of the list.
+        Returns:
+            New unfrozen AIMessages with deep-copied messages.
+        """
+        copied_messages = deepcopy(list(self))
+        return AIMessages(copied_messages, frozen=False)
+    def _check_frozen(self) -> None:
+        """Check if list is frozen and raise if it is.
+        Raises:
+            RuntimeError: If the list is frozen.
+        """
+        if self._frozen:
+            raise RuntimeError("Cannot modify frozen AIMessages")
+    def append(self, message: AIMessageType) -> None:
+        """Add a message to the end of the list."""
+        self._check_frozen()
+        super().append(message)
+    def extend(self, messages: Iterable[AIMessageType]) -> None:
+        """Add multiple messages to the list."""
+        self._check_frozen()
+        super().extend(messages)
+    def insert(self, index: SupportsIndex, message: AIMessageType) -> None:
+        """Insert a message at the specified position."""
+        self._check_frozen()
+        super().insert(index, message)
+    def __setitem__(
+        self,
+        index: SupportsIndex | slice,
+        value: AIMessageType | Iterable[AIMessageType],
+    ) -> None:
+        """Set item or slice."""
+        self._check_frozen()
+        super().__setitem__(index, value)  # type: ignore[arg-type]
+    def __iadd__(self, other: Iterable[AIMessageType]) -> "AIMessages":
+        """In-place addition (+=).
+        Returns:
+            This AIMessages instance after modification.
+        """
+        self._check_frozen()
+        return super().__iadd__(other)
+    def __delitem__(self, index: SupportsIndex | slice) -> None:
+        """Delete item or slice from list."""
+        self._check_frozen()
+        super().__delitem__(index)
+    def pop(self, index: SupportsIndex = -1) -> AIMessageType:
+        """Remove and return item at index.
+        Returns:
+            AIMessageType removed from the list.
+        """
+        self._check_frozen()
+        return super().pop(index)
+    def remove(self, message: AIMessageType) -> None:
+        """Remove first occurrence of message."""
+        self._check_frozen()
+        super().remove(message)
+    def clear(self) -> None:
+        """Remove all items from list."""
+        self._check_frozen()
+        super().clear()
+    def reverse(self) -> None:
+        """Reverse list in place."""
+        self._check_frozen()
+        super().reverse()
+    def sort(self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False) -> None:
+        """Sort list in place."""
+        self._check_frozen()
+        if key is None:
+            super().sort(reverse=reverse)  # type: ignore[call-arg]
+        else:
+            super().sort(key=key, reverse=reverse)
     def get_last_message(self) -> AIMessageType:
         """Get the last message in the conversation.
@@ -78,8 +215,6 @@ class AIMessages(list[AIMessageType]):
     def get_last_message_as_str(self) -> str:
         """Get the last message as a string, raising if not a string.
-        @public
         Returns:
             The last message as a string.
@@ -107,6 +242,8 @@ class AIMessages(list[AIMessageType]):
         Transforms the message list into the format expected by OpenAI API.
         Each message type is converted according to its role and content.
+        Documents are rendered as XML with any attachments included as nested
+        <attachment> blocks.
         Returns:
             List of ChatCompletionMessageParam dicts (from openai.types.chat)
@@ -116,26 +253,40 @@ class AIMessages(list[AIMessageType]):
         Raises:
             ValueError: If message type is not supported.
-        Example:
-            >>> messages = AIMessages(["Hello", response, "Follow up"])
-            >>> prompt = messages.to_prompt()
-            >>> # Result: [
-            >>> #   {"role": "user", "content": "Hello"},
-            >>> #   {"role": "assistant", "content": "..."},
-            >>> #   {"role": "user", "content": "Follow up"}
-            >>> # ]
         """
         messages: list[ChatCompletionMessageParam] = []
         for message in self:
             if isinstance(message, str):
-                messages.append({"role": "user", "content": message})
+                messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
             elif isinstance(message, Document):
                 messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
             elif isinstance(message, ModelResponse):  # type: ignore
-                messages.append({"role": "assistant", "content": message.content})
+                # Build base assistant message
+                assistant_message: ChatCompletionMessageParam = {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": message.content}],
+                }
+                # Preserve reasoning_content (Gemini Flash 3+, O1, O3, GPT-5)
+                if reasoning_content := message.reasoning_content:
+                    assistant_message["reasoning_content"] = reasoning_content  # type: ignore[typeddict-item]
+                # Preserve thinking_blocks (structured thinking)
+                if hasattr(message.choices[0].message, "thinking_blocks"):
+                    thinking_blocks = getattr(message.choices[0].message, "thinking_blocks", None)
+                    if thinking_blocks:
+                        assistant_message["thinking_blocks"] = thinking_blocks  # type: ignore[typeddict-item]
+                # Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
+                if hasattr(message.choices[0].message, "provider_specific_fields"):
+                    provider_fields = getattr(message.choices[0].message, "provider_specific_fields", None)
+                    if provider_fields:
+                        assistant_message["provider_specific_fields"] = provider_fields  # type: ignore[typeddict-item]
+                messages.append(assistant_message)
             else:
-                raise ValueError(f"Unsupported message type: {type(message)}")
+                raise TypeError(f"Unsupported message type: {type(message)}")
         return messages
@@ -149,8 +300,8 @@ class AIMessages(list[AIMessageType]):
         for message in self:
             if isinstance(message, Document):
                 serialized_document = message.serialize_model()
-                del serialized_document["content"]
-                messages.append(json.dumps(serialized_document, indent=2))
+                filtered_doc = {k: v for k, v in serialized_document.items() if k != "content"}
+                messages.append(json.dumps(filtered_doc, indent=2))
             elif isinstance(message, ModelResponse):
                 messages.append(message.content)
             else:
@@ -171,10 +322,40 @@ class AIMessages(list[AIMessageType]):
             system_prompt = ""
         return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
+    @property
+    def approximate_tokens_count(self) -> int:
+        """Approximate tokens count for the messages.
+        Uses tiktoken with gpt-4 encoding to estimate total token count
+        across all messages in the conversation.
+        Returns:
+            Approximate tokens count for all messages.
+        Raises:
+            ValueError: If message contains unsupported type.
+        """
+        count = 0
+        enc = get_tiktoken_encoding()
+        for message in self:
+            if isinstance(message, str):
+                count += len(enc.encode(message))
+            elif isinstance(message, Document):
+                count += message.approximate_tokens_count
+            elif isinstance(message, ModelResponse):  # type: ignore
+                count += len(enc.encode(message.content))
+            else:
+                raise TypeError(f"Unsupported message type: {type(message)}")
+        return count
     @staticmethod
-    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
+    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:  # noqa: PLR0912, PLR0914
         """Convert a document to prompt format for LLM consumption.
+        Renders the document as XML with text/image/PDF content, followed by any
+        attachments as separate <attachment> XML blocks with name and description attributes.
         Args:
             document: The document to convert.
@@ -184,50 +365,80 @@ class AIMessages(list[AIMessageType]):
         prompt: list[ChatCompletionContentPartParam] = []
         # Build the text header
-        description = (
-            f"<description>{document.description}</description>\n" if document.description else ""
-        )
-        header_text = (
-            f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
-        )
+        description = f"<description>{document.description}</description>\n" if document.description else ""
+        header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
         # Handle text documents
         if document.is_text:
             text_content = document.content.decode("utf-8")
-            content_text = f"{header_text}<content>\n{text_content}\n</content>\n</document>\n"
+            content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
             prompt.append({"type": "text", "text": content_text})
-            return prompt
-        # Handle non-text documents
-        if not document.is_image and not document.is_pdf:
-            get_logger(__name__).error(
-                f"Document is not a text, image or PDF: {document.name} - {document.mime_type}"
-            )
+        # Handle binary documents (image/PDF)
+        elif document.is_image or document.is_pdf:
+            prompt.append({"type": "text", "text": f"{header_text}<content>\n"})
+            if document.is_image:
+                content_bytes, mime_type = _ensure_llm_compatible_image(document.content, document.mime_type)
+            else:
+                content_bytes, mime_type = document.content, document.mime_type
+            base64_content = base64.b64encode(content_bytes).decode("utf-8")
+            data_uri = f"data:{mime_type};base64,{base64_content}"
+            if document.is_pdf:
+                prompt.append({
+                    "type": "file",
+                    "file": {"file_data": data_uri},
+                })
+            else:
+                prompt.append({
+                    "type": "image_url",
+                    "image_url": {"url": data_uri, "detail": "high"},
+                })
+            prompt.append({"type": "text", "text": "</content>\n"})
+        else:
+            logger.error(f"Document is not a text, image or PDF: {document.name} - {document.mime_type}")
             return []
-        # Add header for binary content
-        prompt.append({
-            "type": "text",
-            "text": f"{header_text}<content>\n",
-        })
-        # Encode binary content
-        base64_content = base64.b64encode(document.content).decode("utf-8")
-        data_uri = f"data:{document.mime_type};base64,{base64_content}"
-        # Add appropriate content type
-        if document.is_pdf:
-            prompt.append({
-                "type": "file",
-                "file": {"file_data": data_uri},
-            })
-        else:  # is_image
-            prompt.append({
-                "type": "image_url",
-                "image_url": {"url": data_uri, "detail": "high"},
-            })
-        # Close the document tag
-        prompt.append({"type": "text", "text": "</content>\n</document>\n"})
+        # Render attachments
+        for att in document.attachments:
+            desc_attr = f' description="{att.description}"' if att.description else ""
+            att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
+            if att.is_text:
+                prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
+            elif att.is_image or att.is_pdf:
+                prompt.append({"type": "text", "text": att_open})
+                if att.is_image:
+                    att_bytes, att_mime = _ensure_llm_compatible_image(att.content, att.mime_type)
+                else:
+                    att_bytes, att_mime = att.content, att.mime_type
+                att_b64 = base64.b64encode(att_bytes).decode("utf-8")
+                att_uri = f"data:{att_mime};base64,{att_b64}"
+                if att.is_pdf:
+                    prompt.append({
+                        "type": "file",
+                        "file": {"file_data": att_uri},
+                    })
+                else:
+                    prompt.append({
+                        "type": "image_url",
+                        "image_url": {"url": att_uri, "detail": "high"},
+                    })
+                prompt.append({"type": "text", "text": "</attachment>\n"})
+            else:
+                logger.warning(f"Skipping unsupported attachment type: {att.name} - {att.mime_type}")
+        # Close document — merge into last text part to preserve JSON structure (and cache key)
+        last = prompt[-1]
+        if last["type"] == "text":
+            prompt[-1] = {"type": "text", "text": last["text"] + "</document>\n"}
+        else:
+            prompt.append({"type": "text", "text": "</document>\n"})
         return prompt

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl