ai-pipeline-core 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/PKG-INFO +1 -1
  2. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/__init__.py +21 -1
  3. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/content.py +1 -0
  4. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/mime_type.py +28 -0
  5. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/ai_messages.py +16 -3
  6. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/client.py +64 -1
  7. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/pyproject.toml +4 -2
  8. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/.gitignore +0 -0
  9. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/LICENSE +0 -0
  10. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/README.md +0 -0
  11. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/__init__.py +0 -0
  12. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/config.py +0 -0
  13. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/processor.py +0 -0
  14. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/summary.py +0 -0
  15. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/writer.py +0 -0
  16. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/__init__.py +0 -0
  17. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/base.py +0 -0
  18. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/contract.py +0 -0
  19. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/helpers.py +0 -0
  20. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/__init__.py +0 -0
  21. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/document.py +0 -0
  22. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/document_list.py +0 -0
  23. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/flow_document.py +0 -0
  24. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/task_document.py +0 -0
  25. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/temporary_document.py +0 -0
  26. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/utils.py +0 -0
  27. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/exceptions.py +0 -0
  28. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/__init__.py +0 -0
  29. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/config.py +0 -0
  30. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/options.py +0 -0
  31. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/images/__init__.py +0 -0
  32. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/images/_processing.py +0 -0
  33. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/__init__.py +0 -0
  34. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_options.py +0 -0
  35. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_response.py +0 -0
  36. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_types.py +0 -0
  37. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/__init__.py +0 -0
  38. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging.yml +0 -0
  39. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging_config.py +0 -0
  40. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  41. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/pipeline.py +0 -0
  42. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prefect.py +0 -0
  43. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/progress.py +0 -0
  44. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/__init__.py +0 -0
  45. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -0
  46. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/global_cache.py +0 -0
  47. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -0
  48. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/prompt_builder.py +0 -0
  49. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -0
  50. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_manager.py +0 -0
  51. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/py.typed +0 -0
  52. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/settings.py +0 -0
  53. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/storage/__init__.py +0 -0
  54. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/storage/storage.py +0 -0
  55. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/tracing.py +0 -0
  56. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/__init__.py +0 -0
  57. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/deploy.py +0 -0
  58. {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/remote_deployment.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -82,6 +82,26 @@ Optional Environment Variables:
82
82
  - LMNR_DEBUG: Set to "true" to enable debug-level traces
83
83
  """
84
84
 
85
+ import os
86
+ import sys
87
+
88
+ # Disable Prefect's built-in OpenTelemetry spans to prevent duplicates.
89
+ # All tracing is handled by our @trace decorator and Laminar SDK.
90
+ # Must be set before Prefect is imported by submodules below.
91
+ os.environ.setdefault("PREFECT_CLOUD_ENABLE_ORCHESTRATION_TELEMETRY", "false")
92
+
93
+ # If Prefect was already imported (user imported it before us), refresh its cached settings.
94
+ if "prefect" in sys.modules:
95
+ try:
96
+ from prefect.settings import get_current_settings # noqa: PLC0415
97
+
98
+ if get_current_settings().cloud.enable_orchestration_telemetry:
99
+ from prefect.context import refresh_global_settings_context # noqa: PLC0415
100
+
101
+ refresh_global_settings_context()
102
+ except (ImportError, AttributeError):
103
+ pass
104
+
85
105
  from . import llm, progress
86
106
  from .deployment import DeploymentContext, DeploymentResult, PipelineDeployment
87
107
  from .documents import (
@@ -130,7 +150,7 @@ from .settings import Settings
130
150
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
131
151
  from .utils.remote_deployment import remote_deployment
132
152
 
133
- __version__ = "0.3.3"
153
+ __version__ = "0.3.4"
134
154
 
135
155
  __all__ = [
136
156
  # Config/Settings
@@ -85,6 +85,7 @@ class ArtifactStore:
85
85
  "image/png": ".png",
86
86
  "image/jpeg": ".jpg",
87
87
  "image/gif": ".gif",
88
+ "image/webp": ".webp",
88
89
  "application/pdf": ".pdf",
89
90
  }
90
91
  ext = ext_map.get(mime_type, ".bin")
@@ -24,6 +24,8 @@ EXTENSION_MIME_MAP = {
24
24
  "gif": "image/gif",
25
25
  "bmp": "image/bmp",
26
26
  "webp": "image/webp",
27
+ "heic": "image/heic",
28
+ "heif": "image/heif",
27
29
  "json": "application/json",
28
30
  "yaml": "application/yaml",
29
31
  "yml": "application/yaml",
@@ -266,3 +268,29 @@ def is_image_mime_type(mime_type: str) -> bool:
266
268
  False
267
269
  """
268
270
  return mime_type.startswith("image/")
271
+
272
+
273
+ LLM_SUPPORTED_IMAGE_MIME_TYPES: frozenset[str] = frozenset({
274
+ "image/png",
275
+ "image/jpeg",
276
+ "image/webp",
277
+ "image/heic",
278
+ "image/heif",
279
+ })
280
+
281
+
282
+ def is_llm_supported_image(mime_type: str) -> bool:
283
+ """Check if MIME type is an image format directly supported by LLMs.
284
+
285
+ Unsupported image formats (gif, bmp, tiff, svg, etc.) need conversion
286
+ to PNG before sending to the LLM.
287
+
288
+ @public
289
+
290
+ Args:
291
+ mime_type: MIME type string to check.
292
+
293
+ Returns:
294
+ True if the image format is natively supported by LLMs.
295
+ """
296
+ return mime_type in LLM_SUPPORTED_IMAGE_MIME_TYPES
@@ -8,6 +8,7 @@ including text, documents, and model responses.
8
8
 
9
9
  import base64
10
10
  import hashlib
11
+ import io
11
12
  import json
12
13
  from copy import deepcopy
13
14
  from typing import Any, Callable, Iterable, SupportsIndex, Union
@@ -17,9 +18,11 @@ from openai.types.chat import (
17
18
  ChatCompletionContentPartParam,
18
19
  ChatCompletionMessageParam,
19
20
  )
21
+ from PIL import Image
20
22
  from prefect.logging import get_logger
21
23
 
22
24
  from ai_pipeline_core.documents import Document
25
+ from ai_pipeline_core.documents.mime_type import is_llm_supported_image
23
26
 
24
27
  from .model_response import ModelResponse
25
28
 
@@ -397,9 +400,19 @@ class AIMessages(list[AIMessageType]):
397
400
  "text": f"{header_text}<content>\n",
398
401
  })
399
402
 
400
- # Encode binary content
401
- base64_content = base64.b64encode(document.content).decode("utf-8")
402
- data_uri = f"data:{document.mime_type};base64,{base64_content}"
403
+ # Encode binary content, converting unsupported image formats to PNG
404
+ if document.is_image and not is_llm_supported_image(document.mime_type):
405
+ img = Image.open(io.BytesIO(document.content))
406
+ buf = io.BytesIO()
407
+ img.save(buf, format="PNG")
408
+ content_bytes = buf.getvalue()
409
+ mime_type = "image/png"
410
+ else:
411
+ content_bytes = document.content
412
+ mime_type = document.mime_type
413
+
414
+ base64_content = base64.b64encode(content_bytes).decode("utf-8")
415
+ data_uri = f"data:{mime_type};base64,{base64_content}"
403
416
 
404
417
  # Add appropriate content type
405
418
  if document.is_pdf:
@@ -13,6 +13,7 @@ Key functions:
13
13
 
14
14
  import asyncio
15
15
  import time
16
+ from io import BytesIO
16
17
  from typing import Any, TypeVar
17
18
 
18
19
  from lmnr import Laminar
@@ -21,19 +22,77 @@ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDone
21
22
  from openai.types.chat import (
22
23
  ChatCompletionMessageParam,
23
24
  )
25
+ from PIL import Image
24
26
  from prefect.logging import get_logger
25
27
  from pydantic import BaseModel, ValidationError
26
28
 
29
+ from ai_pipeline_core.documents import Document
27
30
  from ai_pipeline_core.exceptions import LLMError
31
+ from ai_pipeline_core.images import ImageProcessingConfig, process_image_to_documents
28
32
  from ai_pipeline_core.settings import settings
29
33
 
30
- from .ai_messages import AIMessages
34
+ from .ai_messages import AIMessages, AIMessageType
31
35
  from .model_options import ModelOptions
32
36
  from .model_response import ModelResponse, StructuredModelResponse
33
37
  from .model_types import ModelName
34
38
 
35
39
  logger = get_logger()
36
40
 
41
+ # Image splitting configs for automatic large-image handling at the LLM boundary.
42
+ # Gemini supports up to 3000x3000; all other models use a conservative 1000x1000 default.
43
+ _GEMINI_IMAGE_CONFIG = ImageProcessingConfig(
44
+ max_dimension=3000, max_pixels=9_000_000, jpeg_quality=75
45
+ )
46
+ _DEFAULT_IMAGE_CONFIG = ImageProcessingConfig(
47
+ max_dimension=1000, max_pixels=1_000_000, jpeg_quality=75
48
+ )
49
+
50
+
51
+ def _get_image_config(model: str) -> ImageProcessingConfig:
52
+ """Return the image splitting config for a model."""
53
+ if "gemini" in model.lower():
54
+ return _GEMINI_IMAGE_CONFIG
55
+ return _DEFAULT_IMAGE_CONFIG
56
+
57
+
58
+ def _prepare_images_for_model(messages: AIMessages, model: str) -> AIMessages:
59
+ """Split image documents that exceed model constraints.
60
+
61
+ Returns a new AIMessages with oversized images replaced by tiles.
62
+ Returns the original instance unchanged if no splitting is needed.
63
+ """
64
+ if not any(isinstance(m, Document) and m.is_image for m in messages):
65
+ return messages
66
+
67
+ config = _get_image_config(model)
68
+ result: list[AIMessageType] = []
69
+ changed = False
70
+
71
+ for msg in messages:
72
+ if not (isinstance(msg, Document) and msg.is_image):
73
+ result.append(msg)
74
+ continue
75
+
76
+ try:
77
+ with Image.open(BytesIO(msg.content)) as img:
78
+ w, h = img.size
79
+ except Exception:
80
+ result.append(msg)
81
+ continue
82
+
83
+ if w <= config.max_dimension and h <= config.max_dimension and w * h <= config.max_pixels:
84
+ result.append(msg)
85
+ continue
86
+
87
+ name_prefix = msg.name.rsplit(".", 1)[0] if "." in msg.name else msg.name
88
+ tiles = process_image_to_documents(msg, config=config, name_prefix=name_prefix)
89
+ result.extend(tiles)
90
+ changed = True
91
+
92
+ if not changed:
93
+ return messages
94
+ return AIMessages(result)
95
+
37
96
 
38
97
  def _process_messages(
39
98
  context: AIMessages,
@@ -271,6 +330,10 @@ async def _generate_with_retry(
271
330
  if not context and not messages:
272
331
  raise ValueError("Either context or messages must be provided")
273
332
 
333
+ # Auto-split large images based on model-specific constraints
334
+ context = _prepare_images_for_model(context, model)
335
+ messages = _prepare_images_for_model(messages, model)
336
+
274
337
  if "gemini" in model.lower() and context.approximate_tokens_count < 10000:
275
338
  # Bug fix for minimum explicit context size for Gemini models
276
339
  options.cache_ttl = None
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.3.3"
3
+ version = "0.3.4"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -177,9 +177,11 @@ reportIncompatibleMethodOverride = "error"
177
177
  reportIncompatibleVariableOverride = "error"
178
178
  # Report missing parameter types as warnings
179
179
  reportMissingParameterType = "warning"
180
+ # Allow tests to import private internals for unit testing
181
+ reportPrivateUsage = "warning"
180
182
 
181
183
  [tool.bumpversion]
182
- current_version = "0.3.2"
184
+ current_version = "0.3.4"
183
185
  commit = true
184
186
  tag = true
185
187
  tag_name = "v{new_version}"