PyPI - ai-pipeline-core - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

ai-pipeline-core 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ai_pipeline_core/documents/document.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import base64
 import hashlib
 import json
+import re
 from abc import ABC, abstractmethod
 from base64 import b32encode
 from enum import StrEnum
@@ -26,6 +27,7 @@ class Document(BaseModel, ABC):
     MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024  # 10MB default
     DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
+    MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
     def __init__(self, **data: Any) -> None:
         """Prevent direct instantiation of abstract Document class."""
@@ -199,15 +201,34 @@ class Document(BaseModel, ABC):
     def as_yaml(self) -> Any:
         """Parse document as YAML"""
-        if not self.is_text:
-            raise ValueError(f"Document is not text: {self.name}")
-        return YAML().load(self.content.decode("utf-8"))  # type: ignore
+        return YAML().load(self.as_text())
     def as_json(self) -> Any:
         """Parse document as JSON"""
-        if not self.is_text:
-            raise ValueError(f"Document is not text: {self.name}")
-        return json.loads(self.content.decode("utf-8"))
+        return json.loads(self.as_text())
+    def as_markdown_list(self) -> list[str]:
+        """Parse document as a markdown list"""
+        return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
+    @classmethod
+    def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
+        """Create a document from a name, description, and content"""
+        if isinstance(content, str):
+            content = content.encode("utf-8")
+        return cls(name=name, description=description, content=content)
+    @classmethod
+    def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
+        """Create a document from a name, description, and list of strings"""
+        # remove other list separators (lines that are only the separator + whitespace)
+        separator = Document.MARKDOWN_LIST_SEPARATOR.strip()
+        pattern = re.compile(rf"^[ \t]*{re.escape(separator)}[ \t]*(?:\r?\n|$)", flags=re.MULTILINE)
+        # Normalize CRLF/CR to LF before cleaning to ensure consistent behavior
+        normalized_items = [re.sub(r"\r\n?", "\n", item) for item in items]
+        cleaned_items = [pattern.sub("", item) for item in normalized_items]
+        content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
+        return cls.create(name, description, content)
     def serialize_model(self) -> dict[str, Any]:
         """Serialize document to a dictionary with proper encoding."""

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import json
 from openai.types.chat import (
@@ -59,6 +60,11 @@ class AIMessages(list[AIMessageType]):
                 messages.append(message)
         return messages
+    def get_prompt_cache_key(self, system_prompt: str | None = None) -> str:
+        if not system_prompt:
+            system_prompt = ""
+        return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
     @staticmethod
     def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
         """

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -48,15 +48,13 @@ def _process_messages(
         # Use AIMessages.to_prompt() for context
         context_messages = context.to_prompt()
-        # Apply caching to context messages
-        for msg in context_messages:
-            if msg.get("role") == "user":
-                # Add cache control to user messages in context
-                msg["cache_control"] = {  # type: ignore
-                    "type": "ephemeral",
-                    "ttl": "120s",  # Cache for 2m
-                }
-            processed_messages.append(msg)
+        # Apply caching to last context message
+        context_messages[-1]["cache_control"] = {  # type: ignore
+            "type": "ephemeral",
+            "ttl": "120s",  # Cache for 2m
+        }
+        processed_messages.extend(context_messages)
     # Process regular messages without caching
     if messages:
@@ -108,9 +106,14 @@ async def _generate_with_retry(
         **options.to_openai_completion_kwargs(),
     }
+    if context:
+        completion_kwargs["prompt_cache_key"] = context.get_prompt_cache_key(options.system_prompt)
     for attempt in range(options.retries):
         try:
-            with Laminar.start_as_current_span(model, span_type="LLM", input=messages) as span:
+            with Laminar.start_as_current_span(
+                model, span_type="LLM", input=processed_messages
+            ) as span:
                 response = await _generate(model, processed_messages, completion_kwargs)
                 span.set_attributes(response.get_laminar_metadata())
                 Laminar.set_span_output(response.content)
@@ -162,7 +165,7 @@ async def generate(
 T = TypeVar("T", bound=BaseModel)
-@trace
+@trace(ignore_inputs=["context"])
 async def generate_structured(
     model: ModelName,
     response_format: type[T],

{ai_pipeline_core-0.1.2.dist-info → ai_pipeline_core-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.1.2
+Version: 0.1.4
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core

{ai_pipeline_core-0.1.2.dist-info → ai_pipeline_core-0.1.4.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
 ai_pipeline_core/tracing.py,sha256=UcQ_z1F8KrBLq5ZJaXyoIZsiMdqjHzBhADV07pYXY-w,7663
 ai_pipeline_core/documents/__init__.py,sha256=rEnKj-sSlZ9WnFlZAmSGVi1P8vnsHmU9O9_YwtP40ms,242
-ai_pipeline_core/documents/document.py,sha256=Gj4WR57VW67hhRN1360oqHfVQg6Xxj4jx_XueK4cvl0,8941
+ai_pipeline_core/documents/document.py,sha256=LNKomZnkhgiJHuH2pIzZJUHwrl1rBMBypOiSKg_AcHY,10068
 ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
 ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
 ai_pipeline_core/documents/mime_type.py,sha256=tMWGH9PVmHe6a_IzdaJUqIHf4qnwQOwOCBhsgW2AyTE,2244
@@ -14,8 +14,8 @@ ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTH
 ai_pipeline_core/flow/__init__.py,sha256=_Sji2yY1ICkvVX6QiiGWKzqIXtg9UAiuvhjHSK_gdO8,57
 ai_pipeline_core/flow/config.py,sha256=crbe_OvNE6qulIKv1D8yKoe8xrEsIlvICyxjhqHHBxQ,2266
 ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
-ai_pipeline_core/llm/ai_messages.py,sha256=lzzp4t6xDU9ULhwbmORFTQbdl0BbsqxD5YLzlHstQwU,4333
-ai_pipeline_core/llm/client.py,sha256=FrSWmO2CiwinGLXo431YgtOY_ePAGuHgMRBjSwzyL0I,7663
+ai_pipeline_core/llm/ai_messages.py,sha256=DwJJe05BtYdnMZeHbBbyEbDCqrW63SRvprxptoJUCn4,4586
+ai_pipeline_core/llm/client.py,sha256=IOcyjwyAKQWlqnwC5p2Hl4FeRCzOJAHC5Yqr_oCBQ8s,7703
 ai_pipeline_core/llm/model_options.py,sha256=TvAAlDFZN-TP9-J-RZBuU_dpSocskf6paaQMw1XY9UE,1321
 ai_pipeline_core/llm/model_response.py,sha256=fIWueaemgo0cMruvToMZyKsRPzKwL6IlvUJN7DLG710,5558
 ai_pipeline_core/llm/model_types.py,sha256=rIwY6voT8-xdfsKPDC0Gkdl2iTp9Q2LuvWGSRU9Mp3k,342
@@ -23,7 +23,7 @@ ai_pipeline_core/logging/__init__.py,sha256=DOO6ckgnMVXl29Sy7q6jhO-iW96h54pCHQDz
 ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
 ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
 ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
-ai_pipeline_core-0.1.2.dist-info/METADATA,sha256=LO-DGxVRhZPcwRwb2_zibFzp8aRE59STOYp2BxDag8M,15869
-ai_pipeline_core-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ai_pipeline_core-0.1.2.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
-ai_pipeline_core-0.1.2.dist-info/RECORD,,
+ai_pipeline_core-0.1.4.dist-info/METADATA,sha256=oB5vtkmCTKTlJKiTetHT8Lt8PKgYEAihOGIlKsD8tSQ,15869
+ai_pipeline_core-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ai_pipeline_core-0.1.4.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
+ai_pipeline_core-0.1.4.dist-info/RECORD,,

{ai_pipeline_core-0.1.2.dist-info → ai_pipeline_core-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_pipeline_core-0.1.2.dist-info → ai_pipeline_core-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

ai-pipeline-core 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

ai-pipeline-core 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl