ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,5 @@
1
1
  """AI message handling for LLM interactions.
2
2
 
3
- @public
4
-
5
3
  Provides AIMessages container for managing conversations with mixed content types
6
4
  including text, documents, and model responses.
7
5
  """
@@ -10,27 +8,39 @@ import base64
10
8
  import hashlib
11
9
  import io
12
10
  import json
11
+ from collections.abc import Callable, Iterable
13
12
  from copy import deepcopy
14
- from typing import Any, Callable, Iterable, SupportsIndex, Union
13
+ from typing import Any, SupportsIndex
15
14
 
16
- import tiktoken
17
15
  from openai.types.chat import (
18
16
  ChatCompletionContentPartParam,
19
17
  ChatCompletionMessageParam,
20
18
  )
21
19
  from PIL import Image
22
- from prefect.logging import get_logger
23
20
 
24
21
  from ai_pipeline_core.documents import Document
22
+ from ai_pipeline_core.documents.document import get_tiktoken_encoding
25
23
  from ai_pipeline_core.documents.mime_type import is_llm_supported_image
24
+ from ai_pipeline_core.logging import get_pipeline_logger
26
25
 
27
26
  from .model_response import ModelResponse
28
27
 
28
+ logger = get_pipeline_logger(__name__)
29
+
30
+
31
+ def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes, str]:
32
+ """Convert unsupported image formats to PNG for LLM consumption."""
33
+ if is_llm_supported_image(mime_type):
34
+ return content, mime_type
35
+ img = Image.open(io.BytesIO(content))
36
+ buf = io.BytesIO()
37
+ img.save(buf, format="PNG")
38
+ return buf.getvalue(), "image/png"
39
+
40
+
29
41
  AIMessageType = str | Document | ModelResponse
30
42
  """Type for messages in AIMessages container.
31
43
 
32
- @public
33
-
34
44
  Represents the allowed types for conversation messages:
35
45
  - str: Plain text messages
36
46
  - Document: Structured document content
@@ -38,11 +48,9 @@ Represents the allowed types for conversation messages:
38
48
  """
39
49
 
40
50
 
41
- class AIMessages(list[AIMessageType]):
51
+ class AIMessages(list[AIMessageType]): # noqa: PLR0904
42
52
  """Container for AI conversation messages supporting mixed types.
43
53
 
44
- @public
45
-
46
54
  This class extends list to manage conversation messages between user
47
55
  and AI, supporting text, Document objects, and ModelResponse instances.
48
56
  Messages are converted to OpenAI-compatible format for LLM interactions.
@@ -50,7 +58,8 @@ class AIMessages(list[AIMessageType]):
50
58
  Conversion Rules:
51
59
  - str: Becomes {"role": "user", "content": text}
52
60
  - Document: Becomes {"role": "user", "content": document_content}
53
- (automatically handles text, images, PDFs based on MIME type)
61
+ (automatically handles text, images, PDFs based on MIME type; attachments
62
+ are rendered as <attachment> XML blocks)
54
63
  - ModelResponse: Becomes {"role": "assistant", "content": response.content}
55
64
 
56
65
  Note: Document conversion is automatic. Text content becomes user text messages.
@@ -73,12 +82,6 @@ class AIMessages(list[AIMessageType]):
73
82
  constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
74
83
  accidental character iteration.
75
84
 
76
- Example:
77
- >>> from ai_pipeline_core import llm
78
- >>> messages = AIMessages()
79
- >>> messages.append("What is the capital of France?")
80
- >>> response = await llm.generate("gpt-5.1", messages=messages)
81
- >>> messages.append(response) # Add the actual response
82
85
  """
83
86
 
84
87
  def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
@@ -147,8 +150,8 @@ class AIMessages(list[AIMessageType]):
147
150
 
148
151
  def __setitem__(
149
152
  self,
150
- index: Union[SupportsIndex, slice],
151
- value: Union[AIMessageType, Iterable[AIMessageType]],
153
+ index: SupportsIndex | slice,
154
+ value: AIMessageType | Iterable[AIMessageType],
152
155
  ) -> None:
153
156
  """Set item or slice."""
154
157
  self._check_frozen()
@@ -163,7 +166,7 @@ class AIMessages(list[AIMessageType]):
163
166
  self._check_frozen()
164
167
  return super().__iadd__(other)
165
168
 
166
- def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
169
+ def __delitem__(self, index: SupportsIndex | slice) -> None:
167
170
  """Delete item or slice from list."""
168
171
  self._check_frozen()
169
172
  super().__delitem__(index)
@@ -192,9 +195,7 @@ class AIMessages(list[AIMessageType]):
192
195
  self._check_frozen()
193
196
  super().reverse()
194
197
 
195
- def sort(
196
- self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
197
- ) -> None:
198
+ def sort(self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False) -> None:
198
199
  """Sort list in place."""
199
200
  self._check_frozen()
200
201
  if key is None:
@@ -241,6 +242,8 @@ class AIMessages(list[AIMessageType]):
241
242
 
242
243
  Transforms the message list into the format expected by OpenAI API.
243
244
  Each message type is converted according to its role and content.
245
+ Documents are rendered as XML with any attachments included as nested
246
+ <attachment> blocks.
244
247
 
245
248
  Returns:
246
249
  List of ChatCompletionMessageParam dicts (from openai.types.chat)
@@ -250,14 +253,6 @@ class AIMessages(list[AIMessageType]):
250
253
  Raises:
251
254
  ValueError: If message type is not supported.
252
255
 
253
- Example:
254
- >>> messages = AIMessages(["Hello", response, "Follow up"])
255
- >>> prompt = messages.to_prompt()
256
- >>> # Result: [
257
- >>> # {"role": "user", "content": "Hello"},
258
- >>> # {"role": "assistant", "content": "..."},
259
- >>> # {"role": "user", "content": "Follow up"}
260
- >>> # ]
261
256
  """
262
257
  messages: list[ChatCompletionMessageParam] = []
263
258
 
@@ -285,15 +280,13 @@ class AIMessages(list[AIMessageType]):
285
280
 
286
281
  # Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
287
282
  if hasattr(message.choices[0].message, "provider_specific_fields"):
288
- provider_fields = getattr(
289
- message.choices[0].message, "provider_specific_fields", None
290
- )
283
+ provider_fields = getattr(message.choices[0].message, "provider_specific_fields", None)
291
284
  if provider_fields:
292
285
  assistant_message["provider_specific_fields"] = provider_fields # type: ignore[typeddict-item]
293
286
 
294
287
  messages.append(assistant_message)
295
288
  else:
296
- raise ValueError(f"Unsupported message type: {type(message)}")
289
+ raise TypeError(f"Unsupported message type: {type(message)}")
297
290
 
298
291
  return messages
299
292
 
@@ -333,8 +326,6 @@ class AIMessages(list[AIMessageType]):
333
326
  def approximate_tokens_count(self) -> int:
334
327
  """Approximate tokens count for the messages.
335
328
 
336
- @public
337
-
338
329
  Uses tiktoken with gpt-4 encoding to estimate total token count
339
330
  across all messages in the conversation.
340
331
 
@@ -344,26 +335,27 @@ class AIMessages(list[AIMessageType]):
344
335
  Raises:
345
336
  ValueError: If message contains unsupported type.
346
337
 
347
- Example:
348
- >>> messages = AIMessages(["Hello", "World"])
349
- >>> messages.approximate_tokens_count # ~2-3 tokens
350
338
  """
351
339
  count = 0
340
+ enc = get_tiktoken_encoding()
352
341
  for message in self:
353
342
  if isinstance(message, str):
354
- count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
343
+ count += len(enc.encode(message))
355
344
  elif isinstance(message, Document):
356
345
  count += message.approximate_tokens_count
357
346
  elif isinstance(message, ModelResponse): # type: ignore
358
- count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
347
+ count += len(enc.encode(message.content))
359
348
  else:
360
- raise ValueError(f"Unsupported message type: {type(message)}")
349
+ raise TypeError(f"Unsupported message type: {type(message)}")
361
350
  return count
362
351
 
363
352
  @staticmethod
364
- def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
353
+ def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
365
354
  """Convert a document to prompt format for LLM consumption.
366
355
 
356
+ Renders the document as XML with text/image/PDF content, followed by any
357
+ attachments as separate <attachment> XML blocks with name and description attributes.
358
+
367
359
  Args:
368
360
  document: The document to convert.
369
361
 
@@ -373,60 +365,80 @@ class AIMessages(list[AIMessageType]):
373
365
  prompt: list[ChatCompletionContentPartParam] = []
374
366
 
375
367
  # Build the text header
376
- description = (
377
- f"<description>{document.description}</description>\n" if document.description else ""
378
- )
379
- header_text = (
380
- f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
381
- )
368
+ description = f"<description>{document.description}</description>\n" if document.description else ""
369
+ header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
382
370
 
383
371
  # Handle text documents
384
372
  if document.is_text:
385
373
  text_content = document.content.decode("utf-8")
386
- content_text = f"{header_text}<content>\n{text_content}\n</content>\n</document>\n"
374
+ content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
387
375
  prompt.append({"type": "text", "text": content_text})
388
- return prompt
389
376
 
390
- # Handle non-text documents
391
- if not document.is_image and not document.is_pdf:
392
- get_logger(__name__).error(
393
- f"Document is not a text, image or PDF: {document.name} - {document.mime_type}"
394
- )
377
+ # Handle binary documents (image/PDF)
378
+ elif document.is_image or document.is_pdf:
379
+ prompt.append({"type": "text", "text": f"{header_text}<content>\n"})
380
+
381
+ if document.is_image:
382
+ content_bytes, mime_type = _ensure_llm_compatible_image(document.content, document.mime_type)
383
+ else:
384
+ content_bytes, mime_type = document.content, document.mime_type
385
+ base64_content = base64.b64encode(content_bytes).decode("utf-8")
386
+ data_uri = f"data:{mime_type};base64,{base64_content}"
387
+
388
+ if document.is_pdf:
389
+ prompt.append({
390
+ "type": "file",
391
+ "file": {"file_data": data_uri},
392
+ })
393
+ else:
394
+ prompt.append({
395
+ "type": "image_url",
396
+ "image_url": {"url": data_uri, "detail": "high"},
397
+ })
398
+
399
+ prompt.append({"type": "text", "text": "</content>\n"})
400
+
401
+ else:
402
+ logger.error(f"Document is not a text, image or PDF: {document.name} - {document.mime_type}")
395
403
  return []
396
404
 
397
- # Add header for binary content
398
- prompt.append({
399
- "type": "text",
400
- "text": f"{header_text}<content>\n",
401
- })
402
-
403
- # Encode binary content, converting unsupported image formats to PNG
404
- if document.is_image and not is_llm_supported_image(document.mime_type):
405
- img = Image.open(io.BytesIO(document.content))
406
- buf = io.BytesIO()
407
- img.save(buf, format="PNG")
408
- content_bytes = buf.getvalue()
409
- mime_type = "image/png"
405
+ # Render attachments
406
+ for att in document.attachments:
407
+ desc_attr = f' description="{att.description}"' if att.description else ""
408
+ att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
409
+
410
+ if att.is_text:
411
+ prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
412
+ elif att.is_image or att.is_pdf:
413
+ prompt.append({"type": "text", "text": att_open})
414
+
415
+ if att.is_image:
416
+ att_bytes, att_mime = _ensure_llm_compatible_image(att.content, att.mime_type)
417
+ else:
418
+ att_bytes, att_mime = att.content, att.mime_type
419
+ att_b64 = base64.b64encode(att_bytes).decode("utf-8")
420
+ att_uri = f"data:{att_mime};base64,{att_b64}"
421
+
422
+ if att.is_pdf:
423
+ prompt.append({
424
+ "type": "file",
425
+ "file": {"file_data": att_uri},
426
+ })
427
+ else:
428
+ prompt.append({
429
+ "type": "image_url",
430
+ "image_url": {"url": att_uri, "detail": "high"},
431
+ })
432
+
433
+ prompt.append({"type": "text", "text": "</attachment>\n"})
434
+ else:
435
+ logger.warning(f"Skipping unsupported attachment type: {att.name} - {att.mime_type}")
436
+
437
+ # Close document — merge into last text part to preserve JSON structure (and cache key)
438
+ last = prompt[-1]
439
+ if last["type"] == "text":
440
+ prompt[-1] = {"type": "text", "text": last["text"] + "</document>\n"}
410
441
  else:
411
- content_bytes = document.content
412
- mime_type = document.mime_type
413
-
414
- base64_content = base64.b64encode(content_bytes).decode("utf-8")
415
- data_uri = f"data:{mime_type};base64,{base64_content}"
416
-
417
- # Add appropriate content type
418
- if document.is_pdf:
419
- prompt.append({
420
- "type": "file",
421
- "file": {"file_data": data_uri},
422
- })
423
- else: # is_image
424
- prompt.append({
425
- "type": "image_url",
426
- "image_url": {"url": data_uri, "detail": "high"},
427
- })
428
-
429
- # Close the document tag
430
- prompt.append({"type": "text", "text": "</content>\n</document>\n"})
442
+ prompt.append({"type": "text", "text": "</document>\n"})
431
443
 
432
444
  return prompt