ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""AI message handling for LLM interactions.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides AIMessages container for managing conversations with mixed content types
|
|
6
4
|
including text, documents, and model responses.
|
|
7
5
|
"""
|
|
@@ -10,27 +8,39 @@ import base64
|
|
|
10
8
|
import hashlib
|
|
11
9
|
import io
|
|
12
10
|
import json
|
|
11
|
+
from collections.abc import Callable, Iterable
|
|
13
12
|
from copy import deepcopy
|
|
14
|
-
from typing import Any,
|
|
13
|
+
from typing import Any, SupportsIndex
|
|
15
14
|
|
|
16
|
-
import tiktoken
|
|
17
15
|
from openai.types.chat import (
|
|
18
16
|
ChatCompletionContentPartParam,
|
|
19
17
|
ChatCompletionMessageParam,
|
|
20
18
|
)
|
|
21
19
|
from PIL import Image
|
|
22
|
-
from prefect.logging import get_logger
|
|
23
20
|
|
|
24
21
|
from ai_pipeline_core.documents import Document
|
|
22
|
+
from ai_pipeline_core.documents.document import get_tiktoken_encoding
|
|
25
23
|
from ai_pipeline_core.documents.mime_type import is_llm_supported_image
|
|
24
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
26
25
|
|
|
27
26
|
from .model_response import ModelResponse
|
|
28
27
|
|
|
28
|
+
logger = get_pipeline_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes, str]:
|
|
32
|
+
"""Convert unsupported image formats to PNG for LLM consumption."""
|
|
33
|
+
if is_llm_supported_image(mime_type):
|
|
34
|
+
return content, mime_type
|
|
35
|
+
img = Image.open(io.BytesIO(content))
|
|
36
|
+
buf = io.BytesIO()
|
|
37
|
+
img.save(buf, format="PNG")
|
|
38
|
+
return buf.getvalue(), "image/png"
|
|
39
|
+
|
|
40
|
+
|
|
29
41
|
AIMessageType = str | Document | ModelResponse
|
|
30
42
|
"""Type for messages in AIMessages container.
|
|
31
43
|
|
|
32
|
-
@public
|
|
33
|
-
|
|
34
44
|
Represents the allowed types for conversation messages:
|
|
35
45
|
- str: Plain text messages
|
|
36
46
|
- Document: Structured document content
|
|
@@ -38,11 +48,9 @@ Represents the allowed types for conversation messages:
|
|
|
38
48
|
"""
|
|
39
49
|
|
|
40
50
|
|
|
41
|
-
class AIMessages(list[AIMessageType]):
|
|
51
|
+
class AIMessages(list[AIMessageType]): # noqa: PLR0904
|
|
42
52
|
"""Container for AI conversation messages supporting mixed types.
|
|
43
53
|
|
|
44
|
-
@public
|
|
45
|
-
|
|
46
54
|
This class extends list to manage conversation messages between user
|
|
47
55
|
and AI, supporting text, Document objects, and ModelResponse instances.
|
|
48
56
|
Messages are converted to OpenAI-compatible format for LLM interactions.
|
|
@@ -50,7 +58,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
50
58
|
Conversion Rules:
|
|
51
59
|
- str: Becomes {"role": "user", "content": text}
|
|
52
60
|
- Document: Becomes {"role": "user", "content": document_content}
|
|
53
|
-
(automatically handles text, images, PDFs based on MIME type
|
|
61
|
+
(automatically handles text, images, PDFs based on MIME type; attachments
|
|
62
|
+
are rendered as <attachment> XML blocks)
|
|
54
63
|
- ModelResponse: Becomes {"role": "assistant", "content": response.content}
|
|
55
64
|
|
|
56
65
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
@@ -73,12 +82,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
73
82
|
constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
|
|
74
83
|
accidental character iteration.
|
|
75
84
|
|
|
76
|
-
Example:
|
|
77
|
-
>>> from ai_pipeline_core import llm
|
|
78
|
-
>>> messages = AIMessages()
|
|
79
|
-
>>> messages.append("What is the capital of France?")
|
|
80
|
-
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
81
|
-
>>> messages.append(response) # Add the actual response
|
|
82
85
|
"""
|
|
83
86
|
|
|
84
87
|
def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
|
|
@@ -147,8 +150,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
147
150
|
|
|
148
151
|
def __setitem__(
|
|
149
152
|
self,
|
|
150
|
-
index:
|
|
151
|
-
value:
|
|
153
|
+
index: SupportsIndex | slice,
|
|
154
|
+
value: AIMessageType | Iterable[AIMessageType],
|
|
152
155
|
) -> None:
|
|
153
156
|
"""Set item or slice."""
|
|
154
157
|
self._check_frozen()
|
|
@@ -163,7 +166,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
163
166
|
self._check_frozen()
|
|
164
167
|
return super().__iadd__(other)
|
|
165
168
|
|
|
166
|
-
def __delitem__(self, index:
|
|
169
|
+
def __delitem__(self, index: SupportsIndex | slice) -> None:
|
|
167
170
|
"""Delete item or slice from list."""
|
|
168
171
|
self._check_frozen()
|
|
169
172
|
super().__delitem__(index)
|
|
@@ -192,9 +195,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
192
195
|
self._check_frozen()
|
|
193
196
|
super().reverse()
|
|
194
197
|
|
|
195
|
-
def sort(
|
|
196
|
-
self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
|
|
197
|
-
) -> None:
|
|
198
|
+
def sort(self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False) -> None:
|
|
198
199
|
"""Sort list in place."""
|
|
199
200
|
self._check_frozen()
|
|
200
201
|
if key is None:
|
|
@@ -241,6 +242,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
241
242
|
|
|
242
243
|
Transforms the message list into the format expected by OpenAI API.
|
|
243
244
|
Each message type is converted according to its role and content.
|
|
245
|
+
Documents are rendered as XML with any attachments included as nested
|
|
246
|
+
<attachment> blocks.
|
|
244
247
|
|
|
245
248
|
Returns:
|
|
246
249
|
List of ChatCompletionMessageParam dicts (from openai.types.chat)
|
|
@@ -250,14 +253,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
250
253
|
Raises:
|
|
251
254
|
ValueError: If message type is not supported.
|
|
252
255
|
|
|
253
|
-
Example:
|
|
254
|
-
>>> messages = AIMessages(["Hello", response, "Follow up"])
|
|
255
|
-
>>> prompt = messages.to_prompt()
|
|
256
|
-
>>> # Result: [
|
|
257
|
-
>>> # {"role": "user", "content": "Hello"},
|
|
258
|
-
>>> # {"role": "assistant", "content": "..."},
|
|
259
|
-
>>> # {"role": "user", "content": "Follow up"}
|
|
260
|
-
>>> # ]
|
|
261
256
|
"""
|
|
262
257
|
messages: list[ChatCompletionMessageParam] = []
|
|
263
258
|
|
|
@@ -285,15 +280,13 @@ class AIMessages(list[AIMessageType]):
|
|
|
285
280
|
|
|
286
281
|
# Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
|
|
287
282
|
if hasattr(message.choices[0].message, "provider_specific_fields"):
|
|
288
|
-
provider_fields = getattr(
|
|
289
|
-
message.choices[0].message, "provider_specific_fields", None
|
|
290
|
-
)
|
|
283
|
+
provider_fields = getattr(message.choices[0].message, "provider_specific_fields", None)
|
|
291
284
|
if provider_fields:
|
|
292
285
|
assistant_message["provider_specific_fields"] = provider_fields # type: ignore[typeddict-item]
|
|
293
286
|
|
|
294
287
|
messages.append(assistant_message)
|
|
295
288
|
else:
|
|
296
|
-
raise
|
|
289
|
+
raise TypeError(f"Unsupported message type: {type(message)}")
|
|
297
290
|
|
|
298
291
|
return messages
|
|
299
292
|
|
|
@@ -333,8 +326,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
333
326
|
def approximate_tokens_count(self) -> int:
|
|
334
327
|
"""Approximate tokens count for the messages.
|
|
335
328
|
|
|
336
|
-
@public
|
|
337
|
-
|
|
338
329
|
Uses tiktoken with gpt-4 encoding to estimate total token count
|
|
339
330
|
across all messages in the conversation.
|
|
340
331
|
|
|
@@ -344,26 +335,27 @@ class AIMessages(list[AIMessageType]):
|
|
|
344
335
|
Raises:
|
|
345
336
|
ValueError: If message contains unsupported type.
|
|
346
337
|
|
|
347
|
-
Example:
|
|
348
|
-
>>> messages = AIMessages(["Hello", "World"])
|
|
349
|
-
>>> messages.approximate_tokens_count # ~2-3 tokens
|
|
350
338
|
"""
|
|
351
339
|
count = 0
|
|
340
|
+
enc = get_tiktoken_encoding()
|
|
352
341
|
for message in self:
|
|
353
342
|
if isinstance(message, str):
|
|
354
|
-
count += len(
|
|
343
|
+
count += len(enc.encode(message))
|
|
355
344
|
elif isinstance(message, Document):
|
|
356
345
|
count += message.approximate_tokens_count
|
|
357
346
|
elif isinstance(message, ModelResponse): # type: ignore
|
|
358
|
-
count += len(
|
|
347
|
+
count += len(enc.encode(message.content))
|
|
359
348
|
else:
|
|
360
|
-
raise
|
|
349
|
+
raise TypeError(f"Unsupported message type: {type(message)}")
|
|
361
350
|
return count
|
|
362
351
|
|
|
363
352
|
@staticmethod
|
|
364
|
-
def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
|
|
353
|
+
def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
|
|
365
354
|
"""Convert a document to prompt format for LLM consumption.
|
|
366
355
|
|
|
356
|
+
Renders the document as XML with text/image/PDF content, followed by any
|
|
357
|
+
attachments as separate <attachment> XML blocks with name and description attributes.
|
|
358
|
+
|
|
367
359
|
Args:
|
|
368
360
|
document: The document to convert.
|
|
369
361
|
|
|
@@ -373,60 +365,80 @@ class AIMessages(list[AIMessageType]):
|
|
|
373
365
|
prompt: list[ChatCompletionContentPartParam] = []
|
|
374
366
|
|
|
375
367
|
# Build the text header
|
|
376
|
-
description =
|
|
377
|
-
|
|
378
|
-
)
|
|
379
|
-
header_text = (
|
|
380
|
-
f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
|
|
381
|
-
)
|
|
368
|
+
description = f"<description>{document.description}</description>\n" if document.description else ""
|
|
369
|
+
header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
|
|
382
370
|
|
|
383
371
|
# Handle text documents
|
|
384
372
|
if document.is_text:
|
|
385
373
|
text_content = document.content.decode("utf-8")
|
|
386
|
-
content_text = f"{header_text}<content>\n{text_content}\n</content>\n
|
|
374
|
+
content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
|
|
387
375
|
prompt.append({"type": "text", "text": content_text})
|
|
388
|
-
return prompt
|
|
389
376
|
|
|
390
|
-
# Handle
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
377
|
+
# Handle binary documents (image/PDF)
|
|
378
|
+
elif document.is_image or document.is_pdf:
|
|
379
|
+
prompt.append({"type": "text", "text": f"{header_text}<content>\n"})
|
|
380
|
+
|
|
381
|
+
if document.is_image:
|
|
382
|
+
content_bytes, mime_type = _ensure_llm_compatible_image(document.content, document.mime_type)
|
|
383
|
+
else:
|
|
384
|
+
content_bytes, mime_type = document.content, document.mime_type
|
|
385
|
+
base64_content = base64.b64encode(content_bytes).decode("utf-8")
|
|
386
|
+
data_uri = f"data:{mime_type};base64,{base64_content}"
|
|
387
|
+
|
|
388
|
+
if document.is_pdf:
|
|
389
|
+
prompt.append({
|
|
390
|
+
"type": "file",
|
|
391
|
+
"file": {"file_data": data_uri},
|
|
392
|
+
})
|
|
393
|
+
else:
|
|
394
|
+
prompt.append({
|
|
395
|
+
"type": "image_url",
|
|
396
|
+
"image_url": {"url": data_uri, "detail": "high"},
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
prompt.append({"type": "text", "text": "</content>\n"})
|
|
400
|
+
|
|
401
|
+
else:
|
|
402
|
+
logger.error(f"Document is not a text, image or PDF: {document.name} - {document.mime_type}")
|
|
395
403
|
return []
|
|
396
404
|
|
|
397
|
-
#
|
|
398
|
-
|
|
399
|
-
"
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
405
|
+
# Render attachments
|
|
406
|
+
for att in document.attachments:
|
|
407
|
+
desc_attr = f' description="{att.description}"' if att.description else ""
|
|
408
|
+
att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
|
|
409
|
+
|
|
410
|
+
if att.is_text:
|
|
411
|
+
prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
|
|
412
|
+
elif att.is_image or att.is_pdf:
|
|
413
|
+
prompt.append({"type": "text", "text": att_open})
|
|
414
|
+
|
|
415
|
+
if att.is_image:
|
|
416
|
+
att_bytes, att_mime = _ensure_llm_compatible_image(att.content, att.mime_type)
|
|
417
|
+
else:
|
|
418
|
+
att_bytes, att_mime = att.content, att.mime_type
|
|
419
|
+
att_b64 = base64.b64encode(att_bytes).decode("utf-8")
|
|
420
|
+
att_uri = f"data:{att_mime};base64,{att_b64}"
|
|
421
|
+
|
|
422
|
+
if att.is_pdf:
|
|
423
|
+
prompt.append({
|
|
424
|
+
"type": "file",
|
|
425
|
+
"file": {"file_data": att_uri},
|
|
426
|
+
})
|
|
427
|
+
else:
|
|
428
|
+
prompt.append({
|
|
429
|
+
"type": "image_url",
|
|
430
|
+
"image_url": {"url": att_uri, "detail": "high"},
|
|
431
|
+
})
|
|
432
|
+
|
|
433
|
+
prompt.append({"type": "text", "text": "</attachment>\n"})
|
|
434
|
+
else:
|
|
435
|
+
logger.warning(f"Skipping unsupported attachment type: {att.name} - {att.mime_type}")
|
|
436
|
+
|
|
437
|
+
# Close document — merge into last text part to preserve JSON structure (and cache key)
|
|
438
|
+
last = prompt[-1]
|
|
439
|
+
if last["type"] == "text":
|
|
440
|
+
prompt[-1] = {"type": "text", "text": last["text"] + "</document>\n"}
|
|
410
441
|
else:
|
|
411
|
-
|
|
412
|
-
mime_type = document.mime_type
|
|
413
|
-
|
|
414
|
-
base64_content = base64.b64encode(content_bytes).decode("utf-8")
|
|
415
|
-
data_uri = f"data:{mime_type};base64,{base64_content}"
|
|
416
|
-
|
|
417
|
-
# Add appropriate content type
|
|
418
|
-
if document.is_pdf:
|
|
419
|
-
prompt.append({
|
|
420
|
-
"type": "file",
|
|
421
|
-
"file": {"file_data": data_uri},
|
|
422
|
-
})
|
|
423
|
-
else: # is_image
|
|
424
|
-
prompt.append({
|
|
425
|
-
"type": "image_url",
|
|
426
|
-
"image_url": {"url": data_uri, "detail": "high"},
|
|
427
|
-
})
|
|
428
|
-
|
|
429
|
-
# Close the document tag
|
|
430
|
-
prompt.append({"type": "text", "text": "</content>\n</document>\n"})
|
|
442
|
+
prompt.append({"type": "text", "text": "</document>\n"})
|
|
431
443
|
|
|
432
444
|
return prompt
|