ai-pipeline-core 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/PKG-INFO +1 -1
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/__init__.py +21 -1
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/content.py +1 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/mime_type.py +28 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/ai_messages.py +16 -3
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/client.py +64 -1
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/pyproject.toml +4 -2
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/.gitignore +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/LICENSE +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/README.md +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/config.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/processor.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/summary.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/debug/writer.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/base.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/contract.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/deployment/helpers.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/document.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/document_list.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/flow_document.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/task_document.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/temporary_document.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/utils.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/exceptions.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/config.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/flow/options.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/images/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/images/_processing.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_options.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_response.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/llm/model_types.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging.yml +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging_config.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging_mixin.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/pipeline.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prefect.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/progress.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/global_cache.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/prompt_builder.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_manager.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/py.typed +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/settings.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/storage/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/storage/storage.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/tracing.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/__init__.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/deploy.py +0 -0
- {ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/remote_deployment.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -82,6 +82,26 @@ Optional Environment Variables:
|
|
|
82
82
|
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
83
83
|
"""
|
|
84
84
|
|
|
85
|
+
import os
|
|
86
|
+
import sys
|
|
87
|
+
|
|
88
|
+
# Disable Prefect's built-in OpenTelemetry spans to prevent duplicates.
|
|
89
|
+
# All tracing is handled by our @trace decorator and Laminar SDK.
|
|
90
|
+
# Must be set before Prefect is imported by submodules below.
|
|
91
|
+
os.environ.setdefault("PREFECT_CLOUD_ENABLE_ORCHESTRATION_TELEMETRY", "false")
|
|
92
|
+
|
|
93
|
+
# If Prefect was already imported (user imported it before us), refresh its cached settings.
|
|
94
|
+
if "prefect" in sys.modules:
|
|
95
|
+
try:
|
|
96
|
+
from prefect.settings import get_current_settings # noqa: PLC0415
|
|
97
|
+
|
|
98
|
+
if get_current_settings().cloud.enable_orchestration_telemetry:
|
|
99
|
+
from prefect.context import refresh_global_settings_context # noqa: PLC0415
|
|
100
|
+
|
|
101
|
+
refresh_global_settings_context()
|
|
102
|
+
except (ImportError, AttributeError):
|
|
103
|
+
pass
|
|
104
|
+
|
|
85
105
|
from . import llm, progress
|
|
86
106
|
from .deployment import DeploymentContext, DeploymentResult, PipelineDeployment
|
|
87
107
|
from .documents import (
|
|
@@ -130,7 +150,7 @@ from .settings import Settings
|
|
|
130
150
|
from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
|
|
131
151
|
from .utils.remote_deployment import remote_deployment
|
|
132
152
|
|
|
133
|
-
__version__ = "0.3.
|
|
153
|
+
__version__ = "0.3.4"
|
|
134
154
|
|
|
135
155
|
__all__ = [
|
|
136
156
|
# Config/Settings
|
|
@@ -24,6 +24,8 @@ EXTENSION_MIME_MAP = {
|
|
|
24
24
|
"gif": "image/gif",
|
|
25
25
|
"bmp": "image/bmp",
|
|
26
26
|
"webp": "image/webp",
|
|
27
|
+
"heic": "image/heic",
|
|
28
|
+
"heif": "image/heif",
|
|
27
29
|
"json": "application/json",
|
|
28
30
|
"yaml": "application/yaml",
|
|
29
31
|
"yml": "application/yaml",
|
|
@@ -266,3 +268,29 @@ def is_image_mime_type(mime_type: str) -> bool:
|
|
|
266
268
|
False
|
|
267
269
|
"""
|
|
268
270
|
return mime_type.startswith("image/")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
LLM_SUPPORTED_IMAGE_MIME_TYPES: frozenset[str] = frozenset({
|
|
274
|
+
"image/png",
|
|
275
|
+
"image/jpeg",
|
|
276
|
+
"image/webp",
|
|
277
|
+
"image/heic",
|
|
278
|
+
"image/heif",
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def is_llm_supported_image(mime_type: str) -> bool:
|
|
283
|
+
"""Check if MIME type is an image format directly supported by LLMs.
|
|
284
|
+
|
|
285
|
+
Unsupported image formats (gif, bmp, tiff, svg, etc.) need conversion
|
|
286
|
+
to PNG before sending to the LLM.
|
|
287
|
+
|
|
288
|
+
@public
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
mime_type: MIME type string to check.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
True if the image format is natively supported by LLMs.
|
|
295
|
+
"""
|
|
296
|
+
return mime_type in LLM_SUPPORTED_IMAGE_MIME_TYPES
|
|
@@ -8,6 +8,7 @@ including text, documents, and model responses.
|
|
|
8
8
|
|
|
9
9
|
import base64
|
|
10
10
|
import hashlib
|
|
11
|
+
import io
|
|
11
12
|
import json
|
|
12
13
|
from copy import deepcopy
|
|
13
14
|
from typing import Any, Callable, Iterable, SupportsIndex, Union
|
|
@@ -17,9 +18,11 @@ from openai.types.chat import (
|
|
|
17
18
|
ChatCompletionContentPartParam,
|
|
18
19
|
ChatCompletionMessageParam,
|
|
19
20
|
)
|
|
21
|
+
from PIL import Image
|
|
20
22
|
from prefect.logging import get_logger
|
|
21
23
|
|
|
22
24
|
from ai_pipeline_core.documents import Document
|
|
25
|
+
from ai_pipeline_core.documents.mime_type import is_llm_supported_image
|
|
23
26
|
|
|
24
27
|
from .model_response import ModelResponse
|
|
25
28
|
|
|
@@ -397,9 +400,19 @@ class AIMessages(list[AIMessageType]):
|
|
|
397
400
|
"text": f"{header_text}<content>\n",
|
|
398
401
|
})
|
|
399
402
|
|
|
400
|
-
# Encode binary content
|
|
401
|
-
|
|
402
|
-
|
|
403
|
+
# Encode binary content, converting unsupported image formats to PNG
|
|
404
|
+
if document.is_image and not is_llm_supported_image(document.mime_type):
|
|
405
|
+
img = Image.open(io.BytesIO(document.content))
|
|
406
|
+
buf = io.BytesIO()
|
|
407
|
+
img.save(buf, format="PNG")
|
|
408
|
+
content_bytes = buf.getvalue()
|
|
409
|
+
mime_type = "image/png"
|
|
410
|
+
else:
|
|
411
|
+
content_bytes = document.content
|
|
412
|
+
mime_type = document.mime_type
|
|
413
|
+
|
|
414
|
+
base64_content = base64.b64encode(content_bytes).decode("utf-8")
|
|
415
|
+
data_uri = f"data:{mime_type};base64,{base64_content}"
|
|
403
416
|
|
|
404
417
|
# Add appropriate content type
|
|
405
418
|
if document.is_pdf:
|
|
@@ -13,6 +13,7 @@ Key functions:
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import time
|
|
16
|
+
from io import BytesIO
|
|
16
17
|
from typing import Any, TypeVar
|
|
17
18
|
|
|
18
19
|
from lmnr import Laminar
|
|
@@ -21,19 +22,77 @@ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDone
|
|
|
21
22
|
from openai.types.chat import (
|
|
22
23
|
ChatCompletionMessageParam,
|
|
23
24
|
)
|
|
25
|
+
from PIL import Image
|
|
24
26
|
from prefect.logging import get_logger
|
|
25
27
|
from pydantic import BaseModel, ValidationError
|
|
26
28
|
|
|
29
|
+
from ai_pipeline_core.documents import Document
|
|
27
30
|
from ai_pipeline_core.exceptions import LLMError
|
|
31
|
+
from ai_pipeline_core.images import ImageProcessingConfig, process_image_to_documents
|
|
28
32
|
from ai_pipeline_core.settings import settings
|
|
29
33
|
|
|
30
|
-
from .ai_messages import AIMessages
|
|
34
|
+
from .ai_messages import AIMessages, AIMessageType
|
|
31
35
|
from .model_options import ModelOptions
|
|
32
36
|
from .model_response import ModelResponse, StructuredModelResponse
|
|
33
37
|
from .model_types import ModelName
|
|
34
38
|
|
|
35
39
|
logger = get_logger()
|
|
36
40
|
|
|
41
|
+
# Image splitting configs for automatic large-image handling at the LLM boundary.
|
|
42
|
+
# Gemini supports up to 3000x3000; all other models use a conservative 1000x1000 default.
|
|
43
|
+
_GEMINI_IMAGE_CONFIG = ImageProcessingConfig(
|
|
44
|
+
max_dimension=3000, max_pixels=9_000_000, jpeg_quality=75
|
|
45
|
+
)
|
|
46
|
+
_DEFAULT_IMAGE_CONFIG = ImageProcessingConfig(
|
|
47
|
+
max_dimension=1000, max_pixels=1_000_000, jpeg_quality=75
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_image_config(model: str) -> ImageProcessingConfig:
|
|
52
|
+
"""Return the image splitting config for a model."""
|
|
53
|
+
if "gemini" in model.lower():
|
|
54
|
+
return _GEMINI_IMAGE_CONFIG
|
|
55
|
+
return _DEFAULT_IMAGE_CONFIG
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _prepare_images_for_model(messages: AIMessages, model: str) -> AIMessages:
|
|
59
|
+
"""Split image documents that exceed model constraints.
|
|
60
|
+
|
|
61
|
+
Returns a new AIMessages with oversized images replaced by tiles.
|
|
62
|
+
Returns the original instance unchanged if no splitting is needed.
|
|
63
|
+
"""
|
|
64
|
+
if not any(isinstance(m, Document) and m.is_image for m in messages):
|
|
65
|
+
return messages
|
|
66
|
+
|
|
67
|
+
config = _get_image_config(model)
|
|
68
|
+
result: list[AIMessageType] = []
|
|
69
|
+
changed = False
|
|
70
|
+
|
|
71
|
+
for msg in messages:
|
|
72
|
+
if not (isinstance(msg, Document) and msg.is_image):
|
|
73
|
+
result.append(msg)
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
with Image.open(BytesIO(msg.content)) as img:
|
|
78
|
+
w, h = img.size
|
|
79
|
+
except Exception:
|
|
80
|
+
result.append(msg)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if w <= config.max_dimension and h <= config.max_dimension and w * h <= config.max_pixels:
|
|
84
|
+
result.append(msg)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
name_prefix = msg.name.rsplit(".", 1)[0] if "." in msg.name else msg.name
|
|
88
|
+
tiles = process_image_to_documents(msg, config=config, name_prefix=name_prefix)
|
|
89
|
+
result.extend(tiles)
|
|
90
|
+
changed = True
|
|
91
|
+
|
|
92
|
+
if not changed:
|
|
93
|
+
return messages
|
|
94
|
+
return AIMessages(result)
|
|
95
|
+
|
|
37
96
|
|
|
38
97
|
def _process_messages(
|
|
39
98
|
context: AIMessages,
|
|
@@ -271,6 +330,10 @@ async def _generate_with_retry(
|
|
|
271
330
|
if not context and not messages:
|
|
272
331
|
raise ValueError("Either context or messages must be provided")
|
|
273
332
|
|
|
333
|
+
# Auto-split large images based on model-specific constraints
|
|
334
|
+
context = _prepare_images_for_model(context, model)
|
|
335
|
+
messages = _prepare_images_for_model(messages, model)
|
|
336
|
+
|
|
274
337
|
if "gemini" in model.lower() and context.approximate_tokens_count < 10000:
|
|
275
338
|
# Bug fix for minimum explicit context size for Gemini models
|
|
276
339
|
options.cache_ttl = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ai-pipeline-core"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.4"
|
|
4
4
|
description = "Core utilities for AI-powered processing pipelines using prefect"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -177,9 +177,11 @@ reportIncompatibleMethodOverride = "error"
|
|
|
177
177
|
reportIncompatibleVariableOverride = "error"
|
|
178
178
|
# Report missing parameter types as warnings
|
|
179
179
|
reportMissingParameterType = "warning"
|
|
180
|
+
# Allow tests to import private internals for unit testing
|
|
181
|
+
reportPrivateUsage = "warning"
|
|
180
182
|
|
|
181
183
|
[tool.bumpversion]
|
|
182
|
-
current_version = "0.3.
|
|
184
|
+
current_version = "0.3.4"
|
|
183
185
|
commit = true
|
|
184
186
|
tag = true
|
|
185
187
|
tag_name = "v{new_version}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/document_list.py
RENAMED
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/flow_document.py
RENAMED
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/task_document.py
RENAMED
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/documents/temporary_document.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/logging/logging_config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/global_cache.py
RENAMED
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/prompt_builder/prompt_builder.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_pipeline_core-0.3.3 → ai_pipeline_core-0.3.4}/ai_pipeline_core/utils/remote_deployment.py
RENAMED
|
File without changes
|