ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,33 +1,46 @@
|
|
|
1
1
|
"""AI message handling for LLM interactions.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides AIMessages container for managing conversations with mixed content types
|
|
6
4
|
including text, documents, and model responses.
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
7
|
import base64
|
|
10
8
|
import hashlib
|
|
9
|
+
import io
|
|
11
10
|
import json
|
|
11
|
+
from collections.abc import Callable, Iterable
|
|
12
12
|
from copy import deepcopy
|
|
13
|
-
from typing import Any,
|
|
13
|
+
from typing import Any, SupportsIndex
|
|
14
14
|
|
|
15
|
-
import tiktoken
|
|
16
15
|
from openai.types.chat import (
|
|
17
16
|
ChatCompletionContentPartParam,
|
|
18
17
|
ChatCompletionMessageParam,
|
|
19
18
|
)
|
|
20
|
-
from
|
|
19
|
+
from PIL import Image
|
|
21
20
|
|
|
22
21
|
from ai_pipeline_core.documents import Document
|
|
22
|
+
from ai_pipeline_core.documents.document import get_tiktoken_encoding
|
|
23
|
+
from ai_pipeline_core.documents.mime_type import is_llm_supported_image
|
|
24
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
23
25
|
|
|
24
26
|
from .model_response import ModelResponse
|
|
25
27
|
|
|
28
|
+
logger = get_pipeline_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes, str]:
|
|
32
|
+
"""Convert unsupported image formats to PNG for LLM consumption."""
|
|
33
|
+
if is_llm_supported_image(mime_type):
|
|
34
|
+
return content, mime_type
|
|
35
|
+
img = Image.open(io.BytesIO(content))
|
|
36
|
+
buf = io.BytesIO()
|
|
37
|
+
img.save(buf, format="PNG")
|
|
38
|
+
return buf.getvalue(), "image/png"
|
|
39
|
+
|
|
40
|
+
|
|
26
41
|
AIMessageType = str | Document | ModelResponse
|
|
27
42
|
"""Type for messages in AIMessages container.
|
|
28
43
|
|
|
29
|
-
@public
|
|
30
|
-
|
|
31
44
|
Represents the allowed types for conversation messages:
|
|
32
45
|
- str: Plain text messages
|
|
33
46
|
- Document: Structured document content
|
|
@@ -35,11 +48,9 @@ Represents the allowed types for conversation messages:
|
|
|
35
48
|
"""
|
|
36
49
|
|
|
37
50
|
|
|
38
|
-
class AIMessages(list[AIMessageType]):
|
|
51
|
+
class AIMessages(list[AIMessageType]): # noqa: PLR0904
|
|
39
52
|
"""Container for AI conversation messages supporting mixed types.
|
|
40
53
|
|
|
41
|
-
@public
|
|
42
|
-
|
|
43
54
|
This class extends list to manage conversation messages between user
|
|
44
55
|
and AI, supporting text, Document objects, and ModelResponse instances.
|
|
45
56
|
Messages are converted to OpenAI-compatible format for LLM interactions.
|
|
@@ -47,7 +58,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
47
58
|
Conversion Rules:
|
|
48
59
|
- str: Becomes {"role": "user", "content": text}
|
|
49
60
|
- Document: Becomes {"role": "user", "content": document_content}
|
|
50
|
-
(automatically handles text, images, PDFs based on MIME type
|
|
61
|
+
(automatically handles text, images, PDFs based on MIME type; attachments
|
|
62
|
+
are rendered as <attachment> XML blocks)
|
|
51
63
|
- ModelResponse: Becomes {"role": "assistant", "content": response.content}
|
|
52
64
|
|
|
53
65
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
@@ -70,12 +82,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
70
82
|
constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
|
|
71
83
|
accidental character iteration.
|
|
72
84
|
|
|
73
|
-
Example:
|
|
74
|
-
>>> from ai_pipeline_core import llm
|
|
75
|
-
>>> messages = AIMessages()
|
|
76
|
-
>>> messages.append("What is the capital of France?")
|
|
77
|
-
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
78
|
-
>>> messages.append(response) # Add the actual response
|
|
79
85
|
"""
|
|
80
86
|
|
|
81
87
|
def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
|
|
@@ -144,8 +150,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
144
150
|
|
|
145
151
|
def __setitem__(
|
|
146
152
|
self,
|
|
147
|
-
index:
|
|
148
|
-
value:
|
|
153
|
+
index: SupportsIndex | slice,
|
|
154
|
+
value: AIMessageType | Iterable[AIMessageType],
|
|
149
155
|
) -> None:
|
|
150
156
|
"""Set item or slice."""
|
|
151
157
|
self._check_frozen()
|
|
@@ -160,7 +166,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
160
166
|
self._check_frozen()
|
|
161
167
|
return super().__iadd__(other)
|
|
162
168
|
|
|
163
|
-
def __delitem__(self, index:
|
|
169
|
+
def __delitem__(self, index: SupportsIndex | slice) -> None:
|
|
164
170
|
"""Delete item or slice from list."""
|
|
165
171
|
self._check_frozen()
|
|
166
172
|
super().__delitem__(index)
|
|
@@ -189,9 +195,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
189
195
|
self._check_frozen()
|
|
190
196
|
super().reverse()
|
|
191
197
|
|
|
192
|
-
def sort(
|
|
193
|
-
self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
|
|
194
|
-
) -> None:
|
|
198
|
+
def sort(self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False) -> None:
|
|
195
199
|
"""Sort list in place."""
|
|
196
200
|
self._check_frozen()
|
|
197
201
|
if key is None:
|
|
@@ -238,6 +242,8 @@ class AIMessages(list[AIMessageType]):
|
|
|
238
242
|
|
|
239
243
|
Transforms the message list into the format expected by OpenAI API.
|
|
240
244
|
Each message type is converted according to its role and content.
|
|
245
|
+
Documents are rendered as XML with any attachments included as nested
|
|
246
|
+
<attachment> blocks.
|
|
241
247
|
|
|
242
248
|
Returns:
|
|
243
249
|
List of ChatCompletionMessageParam dicts (from openai.types.chat)
|
|
@@ -247,14 +253,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
247
253
|
Raises:
|
|
248
254
|
ValueError: If message type is not supported.
|
|
249
255
|
|
|
250
|
-
Example:
|
|
251
|
-
>>> messages = AIMessages(["Hello", response, "Follow up"])
|
|
252
|
-
>>> prompt = messages.to_prompt()
|
|
253
|
-
>>> # Result: [
|
|
254
|
-
>>> # {"role": "user", "content": "Hello"},
|
|
255
|
-
>>> # {"role": "assistant", "content": "..."},
|
|
256
|
-
>>> # {"role": "user", "content": "Follow up"}
|
|
257
|
-
>>> # ]
|
|
258
256
|
"""
|
|
259
257
|
messages: list[ChatCompletionMessageParam] = []
|
|
260
258
|
|
|
@@ -282,15 +280,13 @@ class AIMessages(list[AIMessageType]):
|
|
|
282
280
|
|
|
283
281
|
# Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
|
|
284
282
|
if hasattr(message.choices[0].message, "provider_specific_fields"):
|
|
285
|
-
provider_fields = getattr(
|
|
286
|
-
message.choices[0].message, "provider_specific_fields", None
|
|
287
|
-
)
|
|
283
|
+
provider_fields = getattr(message.choices[0].message, "provider_specific_fields", None)
|
|
288
284
|
if provider_fields:
|
|
289
285
|
assistant_message["provider_specific_fields"] = provider_fields # type: ignore[typeddict-item]
|
|
290
286
|
|
|
291
287
|
messages.append(assistant_message)
|
|
292
288
|
else:
|
|
293
|
-
raise
|
|
289
|
+
raise TypeError(f"Unsupported message type: {type(message)}")
|
|
294
290
|
|
|
295
291
|
return messages
|
|
296
292
|
|
|
@@ -330,8 +326,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
330
326
|
def approximate_tokens_count(self) -> int:
|
|
331
327
|
"""Approximate tokens count for the messages.
|
|
332
328
|
|
|
333
|
-
@public
|
|
334
|
-
|
|
335
329
|
Uses tiktoken with gpt-4 encoding to estimate total token count
|
|
336
330
|
across all messages in the conversation.
|
|
337
331
|
|
|
@@ -341,26 +335,27 @@ class AIMessages(list[AIMessageType]):
|
|
|
341
335
|
Raises:
|
|
342
336
|
ValueError: If message contains unsupported type.
|
|
343
337
|
|
|
344
|
-
Example:
|
|
345
|
-
>>> messages = AIMessages(["Hello", "World"])
|
|
346
|
-
>>> messages.approximate_tokens_count # ~2-3 tokens
|
|
347
338
|
"""
|
|
348
339
|
count = 0
|
|
340
|
+
enc = get_tiktoken_encoding()
|
|
349
341
|
for message in self:
|
|
350
342
|
if isinstance(message, str):
|
|
351
|
-
count += len(
|
|
343
|
+
count += len(enc.encode(message))
|
|
352
344
|
elif isinstance(message, Document):
|
|
353
345
|
count += message.approximate_tokens_count
|
|
354
346
|
elif isinstance(message, ModelResponse): # type: ignore
|
|
355
|
-
count += len(
|
|
347
|
+
count += len(enc.encode(message.content))
|
|
356
348
|
else:
|
|
357
|
-
raise
|
|
349
|
+
raise TypeError(f"Unsupported message type: {type(message)}")
|
|
358
350
|
return count
|
|
359
351
|
|
|
360
352
|
@staticmethod
|
|
361
|
-
def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
|
|
353
|
+
def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
|
|
362
354
|
"""Convert a document to prompt format for LLM consumption.
|
|
363
355
|
|
|
356
|
+
Renders the document as XML with text/image/PDF content, followed by any
|
|
357
|
+
attachments as separate <attachment> XML blocks with name and description attributes.
|
|
358
|
+
|
|
364
359
|
Args:
|
|
365
360
|
document: The document to convert.
|
|
366
361
|
|
|
@@ -370,50 +365,80 @@ class AIMessages(list[AIMessageType]):
|
|
|
370
365
|
prompt: list[ChatCompletionContentPartParam] = []
|
|
371
366
|
|
|
372
367
|
# Build the text header
|
|
373
|
-
description =
|
|
374
|
-
|
|
375
|
-
)
|
|
376
|
-
header_text = (
|
|
377
|
-
f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
|
|
378
|
-
)
|
|
368
|
+
description = f"<description>{document.description}</description>\n" if document.description else ""
|
|
369
|
+
header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
|
|
379
370
|
|
|
380
371
|
# Handle text documents
|
|
381
372
|
if document.is_text:
|
|
382
373
|
text_content = document.content.decode("utf-8")
|
|
383
|
-
content_text = f"{header_text}<content>\n{text_content}\n</content>\n
|
|
374
|
+
content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
|
|
384
375
|
prompt.append({"type": "text", "text": content_text})
|
|
385
|
-
return prompt
|
|
386
376
|
|
|
387
|
-
# Handle
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
377
|
+
# Handle binary documents (image/PDF)
|
|
378
|
+
elif document.is_image or document.is_pdf:
|
|
379
|
+
prompt.append({"type": "text", "text": f"{header_text}<content>\n"})
|
|
380
|
+
|
|
381
|
+
if document.is_image:
|
|
382
|
+
content_bytes, mime_type = _ensure_llm_compatible_image(document.content, document.mime_type)
|
|
383
|
+
else:
|
|
384
|
+
content_bytes, mime_type = document.content, document.mime_type
|
|
385
|
+
base64_content = base64.b64encode(content_bytes).decode("utf-8")
|
|
386
|
+
data_uri = f"data:{mime_type};base64,{base64_content}"
|
|
387
|
+
|
|
388
|
+
if document.is_pdf:
|
|
389
|
+
prompt.append({
|
|
390
|
+
"type": "file",
|
|
391
|
+
"file": {"file_data": data_uri},
|
|
392
|
+
})
|
|
393
|
+
else:
|
|
394
|
+
prompt.append({
|
|
395
|
+
"type": "image_url",
|
|
396
|
+
"image_url": {"url": data_uri, "detail": "high"},
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
prompt.append({"type": "text", "text": "</content>\n"})
|
|
400
|
+
|
|
401
|
+
else:
|
|
402
|
+
logger.error(f"Document is not a text, image or PDF: {document.name} - {document.mime_type}")
|
|
392
403
|
return []
|
|
393
404
|
|
|
394
|
-
#
|
|
395
|
-
|
|
396
|
-
"
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
405
|
+
# Render attachments
|
|
406
|
+
for att in document.attachments:
|
|
407
|
+
desc_attr = f' description="{att.description}"' if att.description else ""
|
|
408
|
+
att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
|
|
409
|
+
|
|
410
|
+
if att.is_text:
|
|
411
|
+
prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
|
|
412
|
+
elif att.is_image or att.is_pdf:
|
|
413
|
+
prompt.append({"type": "text", "text": att_open})
|
|
414
|
+
|
|
415
|
+
if att.is_image:
|
|
416
|
+
att_bytes, att_mime = _ensure_llm_compatible_image(att.content, att.mime_type)
|
|
417
|
+
else:
|
|
418
|
+
att_bytes, att_mime = att.content, att.mime_type
|
|
419
|
+
att_b64 = base64.b64encode(att_bytes).decode("utf-8")
|
|
420
|
+
att_uri = f"data:{att_mime};base64,{att_b64}"
|
|
421
|
+
|
|
422
|
+
if att.is_pdf:
|
|
423
|
+
prompt.append({
|
|
424
|
+
"type": "file",
|
|
425
|
+
"file": {"file_data": att_uri},
|
|
426
|
+
})
|
|
427
|
+
else:
|
|
428
|
+
prompt.append({
|
|
429
|
+
"type": "image_url",
|
|
430
|
+
"image_url": {"url": att_uri, "detail": "high"},
|
|
431
|
+
})
|
|
432
|
+
|
|
433
|
+
prompt.append({"type": "text", "text": "</attachment>\n"})
|
|
434
|
+
else:
|
|
435
|
+
logger.warning(f"Skipping unsupported attachment type: {att.name} - {att.mime_type}")
|
|
436
|
+
|
|
437
|
+
# Close document — merge into last text part to preserve JSON structure (and cache key)
|
|
438
|
+
last = prompt[-1]
|
|
439
|
+
if last["type"] == "text":
|
|
440
|
+
prompt[-1] = {"type": "text", "text": last["text"] + "</document>\n"}
|
|
441
|
+
else:
|
|
442
|
+
prompt.append({"type": "text", "text": "</document>\n"})
|
|
418
443
|
|
|
419
444
|
return prompt
|