PyPI - ai-pipeline-core - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

ai_pipeline_core/__init__.py +64 -158
ai_pipeline_core/deployment/__init__.py +6 -18
ai_pipeline_core/deployment/base.py +392 -212
ai_pipeline_core/deployment/contract.py +6 -10
ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
ai_pipeline_core/deployment/helpers.py +16 -17
ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +11 -84
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +32 -85
ai_pipeline_core/images/_processing.py +5 -11
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +102 -90
ai_pipeline_core/llm/client.py +229 -183
ai_pipeline_core/llm/model_options.py +12 -84
ai_pipeline_core/llm/model_response.py +53 -99
ai_pipeline_core/llm/model_types.py +8 -23
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
ai_pipeline_core/debug/__init__.py +0 -26
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -494
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/prompt_builder/__init__.py +0 -5
ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
ai_pipeline_core/prompt_builder/global_cache.py +0 -78
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """AI message handling for LLM interactions.
-@public
 Provides AIMessages container for managing conversations with mixed content types
 including text, documents, and model responses.
 """
@@ -10,27 +8,39 @@ import base64
 import hashlib
 import io
 import json
+from collections.abc import Callable, Iterable
 from copy import deepcopy
-from typing import Any, Callable, Iterable, SupportsIndex, Union
+from typing import Any, SupportsIndex
-import tiktoken
 from openai.types.chat import (
     ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
 )
 from PIL import Image
-from prefect.logging import get_logger
 from ai_pipeline_core.documents import Document
+from ai_pipeline_core.documents.document import get_tiktoken_encoding
 from ai_pipeline_core.documents.mime_type import is_llm_supported_image
+from ai_pipeline_core.logging import get_pipeline_logger
 from .model_response import ModelResponse
+logger = get_pipeline_logger(__name__)
+def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes, str]:
+    """Convert unsupported image formats to PNG for LLM consumption."""
+    if is_llm_supported_image(mime_type):
+        return content, mime_type
+    img = Image.open(io.BytesIO(content))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue(), "image/png"
 AIMessageType = str | Document | ModelResponse
 """Type for messages in AIMessages container.
-@public
 Represents the allowed types for conversation messages:
 - str: Plain text messages
 - Document: Structured document content
@@ -38,11 +48,9 @@ Represents the allowed types for conversation messages:
 """
-class AIMessages(list[AIMessageType]):
+class AIMessages(list[AIMessageType]):  # noqa: PLR0904
     """Container for AI conversation messages supporting mixed types.
-    @public
     This class extends list to manage conversation messages between user
     and AI, supporting text, Document objects, and ModelResponse instances.
     Messages are converted to OpenAI-compatible format for LLM interactions.
@@ -50,7 +58,8 @@ class AIMessages(list[AIMessageType]):
     Conversion Rules:
         - str: Becomes {"role": "user", "content": text}
         - Document: Becomes {"role": "user", "content": document_content}
-          (automatically handles text, images, PDFs based on MIME type)
+          (automatically handles text, images, PDFs based on MIME type; attachments
+          are rendered as <attachment> XML blocks)
         - ModelResponse: Becomes {"role": "assistant", "content": response.content}
     Note: Document conversion is automatic. Text content becomes user text messages.
@@ -73,12 +82,6 @@ class AIMessages(list[AIMessageType]):
     constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
     accidental character iteration.
-    Example:
-        >>> from ai_pipeline_core import llm
-        >>> messages = AIMessages()
-        >>> messages.append("What is the capital of France?")
-        >>> response = await llm.generate("gpt-5.1", messages=messages)
-        >>> messages.append(response)  # Add the actual response
     """
     def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
@@ -147,8 +150,8 @@ class AIMessages(list[AIMessageType]):
     def __setitem__(
         self,
-        index: Union[SupportsIndex, slice],
-        value: Union[AIMessageType, Iterable[AIMessageType]],
+        index: SupportsIndex | slice,
+        value: AIMessageType | Iterable[AIMessageType],
     ) -> None:
         """Set item or slice."""
         self._check_frozen()
@@ -163,7 +166,7 @@ class AIMessages(list[AIMessageType]):
         self._check_frozen()
         return super().__iadd__(other)
-    def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
+    def __delitem__(self, index: SupportsIndex | slice) -> None:
         """Delete item or slice from list."""
         self._check_frozen()
         super().__delitem__(index)
@@ -192,9 +195,7 @@ class AIMessages(list[AIMessageType]):
         self._check_frozen()
         super().reverse()
-    def sort(
-        self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
-    ) -> None:
+    def sort(self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False) -> None:
         """Sort list in place."""
         self._check_frozen()
         if key is None:
@@ -241,6 +242,8 @@ class AIMessages(list[AIMessageType]):
         Transforms the message list into the format expected by OpenAI API.
         Each message type is converted according to its role and content.
+        Documents are rendered as XML with any attachments included as nested
+        <attachment> blocks.
         Returns:
             List of ChatCompletionMessageParam dicts (from openai.types.chat)
@@ -250,14 +253,6 @@ class AIMessages(list[AIMessageType]):
         Raises:
             ValueError: If message type is not supported.
-        Example:
-            >>> messages = AIMessages(["Hello", response, "Follow up"])
-            >>> prompt = messages.to_prompt()
-            >>> # Result: [
-            >>> #   {"role": "user", "content": "Hello"},
-            >>> #   {"role": "assistant", "content": "..."},
-            >>> #   {"role": "user", "content": "Follow up"}
-            >>> # ]
         """
         messages: list[ChatCompletionMessageParam] = []
@@ -285,15 +280,13 @@ class AIMessages(list[AIMessageType]):
                 # Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
                 if hasattr(message.choices[0].message, "provider_specific_fields"):
-                    provider_fields = getattr(
-                        message.choices[0].message, "provider_specific_fields", None
-                    )
+                    provider_fields = getattr(message.choices[0].message, "provider_specific_fields", None)
                     if provider_fields:
                         assistant_message["provider_specific_fields"] = provider_fields  # type: ignore[typeddict-item]
                 messages.append(assistant_message)
             else:
-                raise ValueError(f"Unsupported message type: {type(message)}")
+                raise TypeError(f"Unsupported message type: {type(message)}")
         return messages
@@ -333,8 +326,6 @@ class AIMessages(list[AIMessageType]):
     def approximate_tokens_count(self) -> int:
         """Approximate tokens count for the messages.
-        @public
         Uses tiktoken with gpt-4 encoding to estimate total token count
         across all messages in the conversation.
@@ -344,26 +335,27 @@ class AIMessages(list[AIMessageType]):
         Raises:
             ValueError: If message contains unsupported type.
-        Example:
-            >>> messages = AIMessages(["Hello", "World"])
-            >>> messages.approximate_tokens_count  # ~2-3 tokens
         """
         count = 0
+        enc = get_tiktoken_encoding()
         for message in self:
             if isinstance(message, str):
-                count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
+                count += len(enc.encode(message))
             elif isinstance(message, Document):
                 count += message.approximate_tokens_count
             elif isinstance(message, ModelResponse):  # type: ignore
-                count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
+                count += len(enc.encode(message.content))
             else:
-                raise ValueError(f"Unsupported message type: {type(message)}")
+                raise TypeError(f"Unsupported message type: {type(message)}")
         return count
     @staticmethod
-    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
+    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:  # noqa: PLR0912, PLR0914
         """Convert a document to prompt format for LLM consumption.
+        Renders the document as XML with text/image/PDF content, followed by any
+        attachments as separate <attachment> XML blocks with name and description attributes.
         Args:
             document: The document to convert.
@@ -373,60 +365,80 @@ class AIMessages(list[AIMessageType]):
         prompt: list[ChatCompletionContentPartParam] = []
         # Build the text header
-        description = (
-            f"<description>{document.description}</description>\n" if document.description else ""
-        )
-        header_text = (
-            f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
-        )
+        description = f"<description>{document.description}</description>\n" if document.description else ""
+        header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
         # Handle text documents
         if document.is_text:
             text_content = document.content.decode("utf-8")
-            content_text = f"{header_text}<content>\n{text_content}\n</content>\n</document>\n"
+            content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
             prompt.append({"type": "text", "text": content_text})
-            return prompt
-        # Handle non-text documents
-        if not document.is_image and not document.is_pdf:
-            get_logger(__name__).error(
-                f"Document is not a text, image or PDF: {document.name} - {document.mime_type}"
-            )
+        # Handle binary documents (image/PDF)
+        elif document.is_image or document.is_pdf:
+            prompt.append({"type": "text", "text": f"{header_text}<content>\n"})
+            if document.is_image:
+                content_bytes, mime_type = _ensure_llm_compatible_image(document.content, document.mime_type)
+            else:
+                content_bytes, mime_type = document.content, document.mime_type
+            base64_content = base64.b64encode(content_bytes).decode("utf-8")
+            data_uri = f"data:{mime_type};base64,{base64_content}"
+            if document.is_pdf:
+                prompt.append({
+                    "type": "file",
+                    "file": {"file_data": data_uri},
+                })
+            else:
+                prompt.append({
+                    "type": "image_url",
+                    "image_url": {"url": data_uri, "detail": "high"},
+                })
+            prompt.append({"type": "text", "text": "</content>\n"})
+        else:
+            logger.error(f"Document is not a text, image or PDF: {document.name} - {document.mime_type}")
             return []
-        # Add header for binary content
-        prompt.append({
-            "type": "text",
-            "text": f"{header_text}<content>\n",
-        })
-        # Encode binary content, converting unsupported image formats to PNG
-        if document.is_image and not is_llm_supported_image(document.mime_type):
-            img = Image.open(io.BytesIO(document.content))
-            buf = io.BytesIO()
-            img.save(buf, format="PNG")
-            content_bytes = buf.getvalue()
-            mime_type = "image/png"
+        # Render attachments
+        for att in document.attachments:
+            desc_attr = f' description="{att.description}"' if att.description else ""
+            att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
+            if att.is_text:
+                prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
+            elif att.is_image or att.is_pdf:
+                prompt.append({"type": "text", "text": att_open})
+                if att.is_image:
+                    att_bytes, att_mime = _ensure_llm_compatible_image(att.content, att.mime_type)
+                else:
+                    att_bytes, att_mime = att.content, att.mime_type
+                att_b64 = base64.b64encode(att_bytes).decode("utf-8")
+                att_uri = f"data:{att_mime};base64,{att_b64}"
+                if att.is_pdf:
+                    prompt.append({
+                        "type": "file",
+                        "file": {"file_data": att_uri},
+                    })
+                else:
+                    prompt.append({
+                        "type": "image_url",
+                        "image_url": {"url": att_uri, "detail": "high"},
+                    })
+                prompt.append({"type": "text", "text": "</attachment>\n"})
+            else:
+                logger.warning(f"Skipping unsupported attachment type: {att.name} - {att.mime_type}")
+        # Close document — merge into last text part to preserve JSON structure (and cache key)
+        last = prompt[-1]
+        if last["type"] == "text":
+            prompt[-1] = {"type": "text", "text": last["text"] + "</document>\n"}
         else:
-            content_bytes = document.content
-            mime_type = document.mime_type
-        base64_content = base64.b64encode(content_bytes).decode("utf-8")
-        data_uri = f"data:{mime_type};base64,{base64_content}"
-        # Add appropriate content type
-        if document.is_pdf:
-            prompt.append({
-                "type": "file",
-                "file": {"file_data": data_uri},
-            })
-        else:  # is_image
-            prompt.append({
-                "type": "image_url",
-                "image_url": {"url": data_uri, "detail": "high"},
-            })
-        # Close the document tag
-        prompt.append({"type": "text", "text": "</content>\n</document>\n"})
+            prompt.append({"type": "text", "text": "</document>\n"})
         return prompt

ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl