ai-pipeline-core 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import base64
2
2
  import hashlib
3
3
  import json
4
+ import re
4
5
  from abc import ABC, abstractmethod
5
6
  from base64 import b32encode
6
7
  from enum import StrEnum
@@ -26,6 +27,7 @@ class Document(BaseModel, ABC):
26
27
 
27
28
  MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024 # 10MB default
28
29
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
30
+ MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
29
31
 
30
32
  def __init__(self, **data: Any) -> None:
31
33
  """Prevent direct instantiation of abstract Document class."""
@@ -199,15 +201,34 @@ class Document(BaseModel, ABC):
199
201
 
200
202
  def as_yaml(self) -> Any:
201
203
  """Parse document as YAML"""
202
- if not self.is_text:
203
- raise ValueError(f"Document is not text: {self.name}")
204
- return YAML().load(self.content.decode("utf-8")) # type: ignore
204
+ return YAML().load(self.as_text())
205
205
 
206
206
  def as_json(self) -> Any:
207
207
  """Parse document as JSON"""
208
- if not self.is_text:
209
- raise ValueError(f"Document is not text: {self.name}")
210
- return json.loads(self.content.decode("utf-8"))
208
+ return json.loads(self.as_text())
209
+
210
+ def as_markdown_list(self) -> list[str]:
211
+ """Parse document as a markdown list"""
212
+ return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
213
+
214
+ @classmethod
215
+ def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
216
+ """Create a document from a name, description, and content"""
217
+ if isinstance(content, str):
218
+ content = content.encode("utf-8")
219
+ return cls(name=name, description=description, content=content)
220
+
221
+ @classmethod
222
+ def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
223
+ """Create a document from a name, description, and list of strings"""
224
+ # remove other list separators (lines that are only the separator + whitespace)
225
+ separator = Document.MARKDOWN_LIST_SEPARATOR.strip()
226
+ pattern = re.compile(rf"^[ \t]*{re.escape(separator)}[ \t]*(?:\r?\n|$)", flags=re.MULTILINE)
227
+ # Normalize CRLF/CR to LF before cleaning to ensure consistent behavior
228
+ normalized_items = [re.sub(r"\r\n?", "\n", item) for item in items]
229
+ cleaned_items = [pattern.sub("", item) for item in normalized_items]
230
+ content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
231
+ return cls.create(name, description, content)
211
232
 
212
233
  def serialize_model(self) -> dict[str, Any]:
213
234
  """Serialize document to a dictionary with proper encoding."""
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import hashlib
2
3
  import json
3
4
 
4
5
  from openai.types.chat import (
@@ -59,6 +60,11 @@ class AIMessages(list[AIMessageType]):
59
60
  messages.append(message)
60
61
  return messages
61
62
 
63
+ def get_prompt_cache_key(self, system_prompt: str | None = None) -> str:
64
+ if not system_prompt:
65
+ system_prompt = ""
66
+ return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
67
+
62
68
  @staticmethod
63
69
  def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
64
70
  """
@@ -48,15 +48,13 @@ def _process_messages(
48
48
  # Use AIMessages.to_prompt() for context
49
49
  context_messages = context.to_prompt()
50
50
 
51
- # Apply caching to context messages
52
- for msg in context_messages:
53
- if msg.get("role") == "user":
54
- # Add cache control to user messages in context
55
- msg["cache_control"] = { # type: ignore
56
- "type": "ephemeral",
57
- "ttl": "120s", # Cache for 2m
58
- }
59
- processed_messages.append(msg)
51
+ # Apply caching to last context message
52
+ context_messages[-1]["cache_control"] = { # type: ignore
53
+ "type": "ephemeral",
54
+ "ttl": "120s", # Cache for 2m
55
+ }
56
+
57
+ processed_messages.extend(context_messages)
60
58
 
61
59
  # Process regular messages without caching
62
60
  if messages:
@@ -108,9 +106,14 @@ async def _generate_with_retry(
108
106
  **options.to_openai_completion_kwargs(),
109
107
  }
110
108
 
109
+ if context:
110
+ completion_kwargs["prompt_cache_key"] = context.get_prompt_cache_key(options.system_prompt)
111
+
111
112
  for attempt in range(options.retries):
112
113
  try:
113
- with Laminar.start_as_current_span(model, span_type="LLM", input=messages) as span:
114
+ with Laminar.start_as_current_span(
115
+ model, span_type="LLM", input=processed_messages
116
+ ) as span:
114
117
  response = await _generate(model, processed_messages, completion_kwargs)
115
118
  span.set_attributes(response.get_laminar_metadata())
116
119
  Laminar.set_span_output(response.content)
@@ -162,7 +165,7 @@ async def generate(
162
165
  T = TypeVar("T", bound=BaseModel)
163
166
 
164
167
 
165
- @trace
168
+ @trace(ignore_inputs=["context"])
166
169
  async def generate_structured(
167
170
  model: ModelName,
168
171
  response_format: type[T],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -5,7 +5,7 @@ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
6
6
  ai_pipeline_core/tracing.py,sha256=UcQ_z1F8KrBLq5ZJaXyoIZsiMdqjHzBhADV07pYXY-w,7663
7
7
  ai_pipeline_core/documents/__init__.py,sha256=rEnKj-sSlZ9WnFlZAmSGVi1P8vnsHmU9O9_YwtP40ms,242
8
- ai_pipeline_core/documents/document.py,sha256=Gj4WR57VW67hhRN1360oqHfVQg6Xxj4jx_XueK4cvl0,8941
8
+ ai_pipeline_core/documents/document.py,sha256=LNKomZnkhgiJHuH2pIzZJUHwrl1rBMBypOiSKg_AcHY,10068
9
9
  ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
10
10
  ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
11
11
  ai_pipeline_core/documents/mime_type.py,sha256=tMWGH9PVmHe6a_IzdaJUqIHf4qnwQOwOCBhsgW2AyTE,2244
@@ -14,8 +14,8 @@ ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTH
14
14
  ai_pipeline_core/flow/__init__.py,sha256=_Sji2yY1ICkvVX6QiiGWKzqIXtg9UAiuvhjHSK_gdO8,57
15
15
  ai_pipeline_core/flow/config.py,sha256=crbe_OvNE6qulIKv1D8yKoe8xrEsIlvICyxjhqHHBxQ,2266
16
16
  ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
17
- ai_pipeline_core/llm/ai_messages.py,sha256=lzzp4t6xDU9ULhwbmORFTQbdl0BbsqxD5YLzlHstQwU,4333
18
- ai_pipeline_core/llm/client.py,sha256=FrSWmO2CiwinGLXo431YgtOY_ePAGuHgMRBjSwzyL0I,7663
17
+ ai_pipeline_core/llm/ai_messages.py,sha256=DwJJe05BtYdnMZeHbBbyEbDCqrW63SRvprxptoJUCn4,4586
18
+ ai_pipeline_core/llm/client.py,sha256=IOcyjwyAKQWlqnwC5p2Hl4FeRCzOJAHC5Yqr_oCBQ8s,7703
19
19
  ai_pipeline_core/llm/model_options.py,sha256=TvAAlDFZN-TP9-J-RZBuU_dpSocskf6paaQMw1XY9UE,1321
20
20
  ai_pipeline_core/llm/model_response.py,sha256=fIWueaemgo0cMruvToMZyKsRPzKwL6IlvUJN7DLG710,5558
21
21
  ai_pipeline_core/llm/model_types.py,sha256=rIwY6voT8-xdfsKPDC0Gkdl2iTp9Q2LuvWGSRU9Mp3k,342
@@ -23,7 +23,7 @@ ai_pipeline_core/logging/__init__.py,sha256=DOO6ckgnMVXl29Sy7q6jhO-iW96h54pCHQDz
23
23
  ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
24
24
  ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
25
25
  ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
26
- ai_pipeline_core-0.1.2.dist-info/METADATA,sha256=LO-DGxVRhZPcwRwb2_zibFzp8aRE59STOYp2BxDag8M,15869
27
- ai_pipeline_core-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- ai_pipeline_core-0.1.2.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
29
- ai_pipeline_core-0.1.2.dist-info/RECORD,,
26
+ ai_pipeline_core-0.1.4.dist-info/METADATA,sha256=oB5vtkmCTKTlJKiTetHT8Lt8PKgYEAihOGIlKsD8tSQ,15869
27
+ ai_pipeline_core-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ ai_pipeline_core-0.1.4.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
29
+ ai_pipeline_core-0.1.4.dist-info/RECORD,,