ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +19 -2
- ai_pipeline_core/debug/__init__.py +26 -0
- ai_pipeline_core/debug/config.py +91 -0
- ai_pipeline_core/debug/content.py +705 -0
- ai_pipeline_core/debug/processor.py +99 -0
- ai_pipeline_core/debug/summary.py +236 -0
- ai_pipeline_core/debug/writer.py +913 -0
- ai_pipeline_core/flow/options.py +3 -3
- ai_pipeline_core/images/__init__.py +362 -0
- ai_pipeline_core/images/_processing.py +157 -0
- ai_pipeline_core/llm/ai_messages.py +25 -4
- ai_pipeline_core/llm/client.py +14 -16
- ai_pipeline_core/llm/model_response.py +5 -5
- ai_pipeline_core/llm/model_types.py +10 -12
- ai_pipeline_core/logging/logging_mixin.py +2 -2
- ai_pipeline_core/prompt_builder/prompt_builder.py +3 -3
- ai_pipeline_core/tracing.py +53 -1
- ai_pipeline_core/utils/deploy.py +214 -6
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +74 -8
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.3.dist-info}/RECORD +22 -14
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/flow/options.py
CHANGED
|
@@ -41,7 +41,7 @@ class FlowOptions(BaseSettings):
|
|
|
41
41
|
|
|
42
42
|
>>> # Or create programmatically:
|
|
43
43
|
>>> options = MyFlowOptions(
|
|
44
|
-
... core_model="gemini-
|
|
44
|
+
... core_model="gemini-3-pro",
|
|
45
45
|
... temperature=0.9
|
|
46
46
|
... )
|
|
47
47
|
|
|
@@ -61,11 +61,11 @@ class FlowOptions(BaseSettings):
|
|
|
61
61
|
"""
|
|
62
62
|
|
|
63
63
|
core_model: ModelName = Field(
|
|
64
|
-
default="gemini-
|
|
64
|
+
default="gemini-3-pro",
|
|
65
65
|
description="Primary model for complex analysis and generation tasks.",
|
|
66
66
|
)
|
|
67
67
|
small_model: ModelName = Field(
|
|
68
|
-
default="grok-4-fast",
|
|
68
|
+
default="grok-4.1-fast",
|
|
69
69
|
description="Fast, cost-effective model for simple tasks and orchestration.",
|
|
70
70
|
)
|
|
71
71
|
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""Image processing utilities for LLM vision models.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
Splits large images, compresses to JPEG, and respects model-specific constraints.
|
|
6
|
+
Designed for website screenshots, document pages, and other visual content
|
|
7
|
+
sent to vision-capable LLMs.
|
|
8
|
+
|
|
9
|
+
Quick Start:
|
|
10
|
+
>>> from ai_pipeline_core.images import process_image, ImagePreset
|
|
11
|
+
>>>
|
|
12
|
+
>>> result = process_image(screenshot_bytes)
|
|
13
|
+
>>> for part in result:
|
|
14
|
+
... send_to_llm(part.data, context=part.label)
|
|
15
|
+
>>>
|
|
16
|
+
>>> result = process_image(screenshot_bytes, preset=ImagePreset.GEMINI)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from enum import StrEnum
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
from ai_pipeline_core.documents import Document, TemporaryDocument
|
|
24
|
+
|
|
25
|
+
from ._processing import execute_split, load_and_normalize, plan_split
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"ImagePreset",
|
|
29
|
+
"ImageProcessingConfig",
|
|
30
|
+
"ImagePart",
|
|
31
|
+
"ProcessedImage",
|
|
32
|
+
"ImageProcessingError",
|
|
33
|
+
"process_image",
|
|
34
|
+
"process_image_to_documents",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Configuration
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ImagePreset(StrEnum):
|
|
44
|
+
"""Presets for LLM vision model constraints.
|
|
45
|
+
|
|
46
|
+
@public
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
GEMINI = "gemini"
|
|
50
|
+
CLAUDE = "claude"
|
|
51
|
+
GPT4V = "gpt4v"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ImageProcessingConfig(BaseModel):
|
|
55
|
+
"""Configuration for image processing.
|
|
56
|
+
|
|
57
|
+
@public
|
|
58
|
+
|
|
59
|
+
Use ``for_preset`` for standard configurations or construct directly for
|
|
60
|
+
custom constraints.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> config = ImageProcessingConfig.for_preset(ImagePreset.GEMINI)
|
|
64
|
+
>>> config = ImageProcessingConfig(max_dimension=2000, jpeg_quality=80)
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
model_config = {"frozen": True}
|
|
68
|
+
|
|
69
|
+
max_dimension: int = Field(
|
|
70
|
+
default=3000,
|
|
71
|
+
ge=100,
|
|
72
|
+
le=8192,
|
|
73
|
+
description="Maximum width AND height in pixels",
|
|
74
|
+
)
|
|
75
|
+
max_pixels: int = Field(
|
|
76
|
+
default=9_000_000,
|
|
77
|
+
ge=10_000,
|
|
78
|
+
description="Maximum total pixels per output image part",
|
|
79
|
+
)
|
|
80
|
+
overlap_fraction: float = Field(
|
|
81
|
+
default=0.20,
|
|
82
|
+
ge=0.0,
|
|
83
|
+
le=0.5,
|
|
84
|
+
description="Overlap between adjacent vertical parts (0.0-0.5)",
|
|
85
|
+
)
|
|
86
|
+
max_parts: int = Field(
|
|
87
|
+
default=20,
|
|
88
|
+
ge=1,
|
|
89
|
+
le=100,
|
|
90
|
+
description="Maximum number of output image parts",
|
|
91
|
+
)
|
|
92
|
+
jpeg_quality: int = Field(
|
|
93
|
+
default=60,
|
|
94
|
+
ge=10,
|
|
95
|
+
le=95,
|
|
96
|
+
description="JPEG compression quality (10-95)",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def for_preset(cls, preset: ImagePreset) -> "ImageProcessingConfig":
|
|
101
|
+
"""Create configuration from a model preset.
|
|
102
|
+
|
|
103
|
+
@public
|
|
104
|
+
"""
|
|
105
|
+
return _PRESETS[preset]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
_PRESETS: dict[ImagePreset, ImageProcessingConfig] = {
|
|
109
|
+
ImagePreset.GEMINI: ImageProcessingConfig(
|
|
110
|
+
max_dimension=3000,
|
|
111
|
+
max_pixels=9_000_000,
|
|
112
|
+
jpeg_quality=75,
|
|
113
|
+
),
|
|
114
|
+
ImagePreset.CLAUDE: ImageProcessingConfig(
|
|
115
|
+
max_dimension=1568,
|
|
116
|
+
max_pixels=1_150_000,
|
|
117
|
+
jpeg_quality=60,
|
|
118
|
+
),
|
|
119
|
+
ImagePreset.GPT4V: ImageProcessingConfig(
|
|
120
|
+
max_dimension=2048,
|
|
121
|
+
max_pixels=4_000_000,
|
|
122
|
+
jpeg_quality=70,
|
|
123
|
+
),
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# Result models
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ImagePart(BaseModel):
|
|
133
|
+
"""A single processed image part.
|
|
134
|
+
|
|
135
|
+
@public
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
model_config = {"frozen": True}
|
|
139
|
+
|
|
140
|
+
data: bytes = Field(repr=False)
|
|
141
|
+
width: int
|
|
142
|
+
height: int
|
|
143
|
+
index: int = Field(ge=0, description="0-indexed position")
|
|
144
|
+
total: int = Field(ge=1, description="Total number of parts")
|
|
145
|
+
source_y: int = Field(ge=0, description="Y offset in original image")
|
|
146
|
+
source_height: int = Field(ge=1, description="Height of region in original")
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def label(self) -> str:
|
|
150
|
+
"""Human-readable label for LLM context, 1-indexed.
|
|
151
|
+
|
|
152
|
+
@public
|
|
153
|
+
"""
|
|
154
|
+
if self.total == 1:
|
|
155
|
+
return "Full image"
|
|
156
|
+
return f"Part {self.index + 1}/{self.total}"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ProcessedImage(BaseModel):
|
|
160
|
+
"""Result of image processing.
|
|
161
|
+
|
|
162
|
+
@public
|
|
163
|
+
|
|
164
|
+
Iterable: ``for part in result`` iterates over parts.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
model_config = {"frozen": True}
|
|
168
|
+
|
|
169
|
+
parts: list[ImagePart]
|
|
170
|
+
original_width: int
|
|
171
|
+
original_height: int
|
|
172
|
+
original_bytes: int
|
|
173
|
+
output_bytes: int
|
|
174
|
+
was_trimmed: bool = Field(description="True if width was trimmed to fit")
|
|
175
|
+
warnings: list[str] = Field(default_factory=list)
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def compression_ratio(self) -> float:
|
|
179
|
+
"""Output size / input size (lower means more compression).
|
|
180
|
+
|
|
181
|
+
@public
|
|
182
|
+
"""
|
|
183
|
+
if self.original_bytes <= 0:
|
|
184
|
+
return 1.0
|
|
185
|
+
return self.output_bytes / self.original_bytes
|
|
186
|
+
|
|
187
|
+
def __len__(self) -> int:
|
|
188
|
+
return len(self.parts)
|
|
189
|
+
|
|
190
|
+
def __iter__(self): # type: ignore[override]
|
|
191
|
+
return iter(self.parts)
|
|
192
|
+
|
|
193
|
+
def __getitem__(self, idx: int) -> ImagePart:
|
|
194
|
+
return self.parts[idx]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Exceptions
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class ImageProcessingError(Exception):
|
|
203
|
+
"""Image processing failed.
|
|
204
|
+
|
|
205
|
+
@public
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
# Public API
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def process_image(
|
|
215
|
+
image: bytes | Document,
|
|
216
|
+
preset: ImagePreset = ImagePreset.GEMINI,
|
|
217
|
+
config: ImageProcessingConfig | None = None,
|
|
218
|
+
) -> ProcessedImage:
|
|
219
|
+
"""Process an image for LLM vision models.
|
|
220
|
+
|
|
221
|
+
@public
|
|
222
|
+
|
|
223
|
+
Splits tall images vertically with overlap, trims width if needed, and
|
|
224
|
+
compresses to JPEG. The default preset is **GEMINI** (3 000 px, 9 M pixels).
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
image: Raw image bytes or a Document whose content is an image.
|
|
228
|
+
preset: Model preset (ignored when *config* is provided).
|
|
229
|
+
config: Custom configuration that overrides the preset.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
A ``ProcessedImage`` containing one or more ``ImagePart`` objects.
|
|
233
|
+
|
|
234
|
+
Raises:
|
|
235
|
+
ImageProcessingError: If the image cannot be decoded or processed.
|
|
236
|
+
|
|
237
|
+
Example:
|
|
238
|
+
>>> result = process_image(screenshot_bytes)
|
|
239
|
+
>>> for part in result:
|
|
240
|
+
... print(part.label, len(part.data))
|
|
241
|
+
"""
|
|
242
|
+
effective = config if config is not None else ImageProcessingConfig.for_preset(preset)
|
|
243
|
+
|
|
244
|
+
# Resolve input bytes
|
|
245
|
+
raw: bytes
|
|
246
|
+
if isinstance(image, Document):
|
|
247
|
+
raw = image.content
|
|
248
|
+
elif isinstance(image, bytes): # type: ignore[reportUnnecessaryIsInstance]
|
|
249
|
+
raw = image
|
|
250
|
+
else:
|
|
251
|
+
raise ImageProcessingError(f"Unsupported image input type: {type(image)}")
|
|
252
|
+
|
|
253
|
+
if not raw:
|
|
254
|
+
raise ImageProcessingError("Empty image data")
|
|
255
|
+
|
|
256
|
+
original_bytes = len(raw)
|
|
257
|
+
|
|
258
|
+
# Load & normalise
|
|
259
|
+
try:
|
|
260
|
+
img = load_and_normalize(raw)
|
|
261
|
+
except Exception as exc:
|
|
262
|
+
raise ImageProcessingError(f"Failed to decode image: {exc}") from exc
|
|
263
|
+
|
|
264
|
+
original_width, original_height = img.size
|
|
265
|
+
|
|
266
|
+
# Plan
|
|
267
|
+
plan = plan_split(
|
|
268
|
+
width=original_width,
|
|
269
|
+
height=original_height,
|
|
270
|
+
max_dimension=effective.max_dimension,
|
|
271
|
+
max_pixels=effective.max_pixels,
|
|
272
|
+
overlap_fraction=effective.overlap_fraction,
|
|
273
|
+
max_parts=effective.max_parts,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Execute
|
|
277
|
+
raw_parts = execute_split(img, plan, effective.jpeg_quality)
|
|
278
|
+
|
|
279
|
+
# Build result
|
|
280
|
+
parts: list[ImagePart] = []
|
|
281
|
+
total = len(raw_parts)
|
|
282
|
+
total_output = 0
|
|
283
|
+
|
|
284
|
+
for idx, (data, w, h, sy, sh) in enumerate(raw_parts):
|
|
285
|
+
total_output += len(data)
|
|
286
|
+
parts.append(
|
|
287
|
+
ImagePart(
|
|
288
|
+
data=data,
|
|
289
|
+
width=w,
|
|
290
|
+
height=h,
|
|
291
|
+
index=idx,
|
|
292
|
+
total=total,
|
|
293
|
+
source_y=sy,
|
|
294
|
+
source_height=sh,
|
|
295
|
+
)
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
return ProcessedImage(
|
|
299
|
+
parts=parts,
|
|
300
|
+
original_width=original_width,
|
|
301
|
+
original_height=original_height,
|
|
302
|
+
original_bytes=original_bytes,
|
|
303
|
+
output_bytes=total_output,
|
|
304
|
+
was_trimmed=plan.trim_width is not None,
|
|
305
|
+
warnings=plan.warnings,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def process_image_to_documents(
|
|
310
|
+
image: bytes | Document,
|
|
311
|
+
preset: ImagePreset = ImagePreset.GEMINI,
|
|
312
|
+
config: ImageProcessingConfig | None = None,
|
|
313
|
+
name_prefix: str = "image",
|
|
314
|
+
sources: list[str] | None = None,
|
|
315
|
+
) -> list[TemporaryDocument]:
|
|
316
|
+
"""Process an image and return parts as ``TemporaryDocument`` list.
|
|
317
|
+
|
|
318
|
+
@public
|
|
319
|
+
|
|
320
|
+
Convenience wrapper around ``process_image`` for direct integration
|
|
321
|
+
with ``AIMessages``.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
image: Raw image bytes or a Document.
|
|
325
|
+
preset: Model preset (ignored when *config* is provided).
|
|
326
|
+
config: Custom configuration.
|
|
327
|
+
name_prefix: Prefix for generated document names.
|
|
328
|
+
sources: Optional provenance references attached to each document.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
List of ``TemporaryDocument`` instances with JPEG image data.
|
|
332
|
+
|
|
333
|
+
Example:
|
|
334
|
+
>>> docs = process_image_to_documents(screenshot_bytes)
|
|
335
|
+
>>> messages = AIMessages(docs)
|
|
336
|
+
"""
|
|
337
|
+
result = process_image(image, preset=preset, config=config)
|
|
338
|
+
|
|
339
|
+
# Resolve sources
|
|
340
|
+
doc_sources: list[str] = list(sources or [])
|
|
341
|
+
if isinstance(image, Document):
|
|
342
|
+
doc_sources.append(image.sha256)
|
|
343
|
+
|
|
344
|
+
documents: list[TemporaryDocument] = []
|
|
345
|
+
for part in result.parts:
|
|
346
|
+
if len(result.parts) == 1:
|
|
347
|
+
name = f"{name_prefix}.jpg"
|
|
348
|
+
desc = None
|
|
349
|
+
else:
|
|
350
|
+
name = f"{name_prefix}_{part.index + 1:02d}_of_{part.total:02d}.jpg"
|
|
351
|
+
desc = part.label
|
|
352
|
+
|
|
353
|
+
documents.append(
|
|
354
|
+
TemporaryDocument.create(
|
|
355
|
+
name=name,
|
|
356
|
+
content=part.data,
|
|
357
|
+
description=desc,
|
|
358
|
+
sources=doc_sources or None,
|
|
359
|
+
)
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return documents
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Internal image processing logic: planning, splitting, encoding."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from math import ceil
|
|
6
|
+
|
|
7
|
+
from PIL import Image, ImageOps
|
|
8
|
+
|
|
9
|
+
PIL_MAX_PIXELS = 100_000_000 # 100MP security limit
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class SplitPlan:
|
|
14
|
+
"""Describes how to split an image into parts."""
|
|
15
|
+
|
|
16
|
+
tile_width: int
|
|
17
|
+
tile_height: int
|
|
18
|
+
step_y: int
|
|
19
|
+
num_parts: int
|
|
20
|
+
trim_width: int | None # None = no trim needed
|
|
21
|
+
warnings: list[str]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plan_split(
|
|
25
|
+
width: int,
|
|
26
|
+
height: int,
|
|
27
|
+
max_dimension: int,
|
|
28
|
+
max_pixels: int,
|
|
29
|
+
overlap_fraction: float,
|
|
30
|
+
max_parts: int,
|
|
31
|
+
) -> SplitPlan:
|
|
32
|
+
"""Calculate how to split an image. Pure function, no side effects.
|
|
33
|
+
|
|
34
|
+
Returns a SplitPlan describing tile size, step, and number of parts.
|
|
35
|
+
"""
|
|
36
|
+
warnings: list[str] = []
|
|
37
|
+
|
|
38
|
+
# Effective tile size respecting both max_dimension and max_pixels
|
|
39
|
+
tile_size = max_dimension
|
|
40
|
+
while tile_size * tile_size > max_pixels and tile_size > 100:
|
|
41
|
+
tile_size -= 10
|
|
42
|
+
|
|
43
|
+
# Width: trim if needed (left-aligned, web content is left-aligned)
|
|
44
|
+
trim_width = tile_size if width > tile_size else None
|
|
45
|
+
|
|
46
|
+
effective_width = min(width, tile_size)
|
|
47
|
+
|
|
48
|
+
# If single-tile pixel budget is still exceeded by width * tile_height, reduce tile_height
|
|
49
|
+
tile_h = tile_size
|
|
50
|
+
while effective_width * tile_h > max_pixels and tile_h > 100:
|
|
51
|
+
tile_h -= 10
|
|
52
|
+
|
|
53
|
+
# No vertical split needed
|
|
54
|
+
if height <= tile_h:
|
|
55
|
+
return SplitPlan(
|
|
56
|
+
tile_width=effective_width,
|
|
57
|
+
tile_height=height,
|
|
58
|
+
step_y=0,
|
|
59
|
+
num_parts=1,
|
|
60
|
+
trim_width=trim_width,
|
|
61
|
+
warnings=warnings,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Vertical split with overlap
|
|
65
|
+
overlap_px = int(tile_h * overlap_fraction)
|
|
66
|
+
step = tile_h - overlap_px
|
|
67
|
+
if step <= 0:
|
|
68
|
+
step = 1
|
|
69
|
+
|
|
70
|
+
num_parts = 1 + ceil((height - tile_h) / step)
|
|
71
|
+
|
|
72
|
+
# Auto-reduce if exceeds max_parts
|
|
73
|
+
if num_parts > max_parts:
|
|
74
|
+
warnings.append(
|
|
75
|
+
f"Image requires {num_parts} parts but max is {max_parts}. "
|
|
76
|
+
f"Reducing to {max_parts} parts with larger step."
|
|
77
|
+
)
|
|
78
|
+
num_parts = max_parts
|
|
79
|
+
if num_parts > 1:
|
|
80
|
+
step = (height - tile_h) // (num_parts - 1)
|
|
81
|
+
else:
|
|
82
|
+
step = 0
|
|
83
|
+
|
|
84
|
+
return SplitPlan(
|
|
85
|
+
tile_width=effective_width,
|
|
86
|
+
tile_height=tile_h,
|
|
87
|
+
step_y=step,
|
|
88
|
+
num_parts=num_parts,
|
|
89
|
+
trim_width=trim_width,
|
|
90
|
+
warnings=warnings,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def load_and_normalize(data: bytes) -> Image.Image:
|
|
95
|
+
"""Load image from bytes, apply EXIF orientation, validate size."""
|
|
96
|
+
img = Image.open(BytesIO(data))
|
|
97
|
+
img.load()
|
|
98
|
+
|
|
99
|
+
if img.width * img.height > PIL_MAX_PIXELS:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Image too large: {img.width}x{img.height} = {img.width * img.height:,} pixels "
|
|
102
|
+
f"(limit: {PIL_MAX_PIXELS:,})"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Fix EXIF orientation (important for mobile photos)
|
|
106
|
+
img = ImageOps.exif_transpose(img)
|
|
107
|
+
return img
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def encode_jpeg(img: Image.Image, quality: int) -> bytes:
|
|
111
|
+
"""Encode PIL Image as JPEG bytes."""
|
|
112
|
+
# Convert to RGB if needed (JPEG doesn't support alpha)
|
|
113
|
+
if img.mode not in ("RGB", "L"):
|
|
114
|
+
img = img.convert("RGB")
|
|
115
|
+
|
|
116
|
+
buf = BytesIO()
|
|
117
|
+
img.save(buf, format="JPEG", quality=quality, optimize=True)
|
|
118
|
+
return buf.getvalue()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def execute_split(
|
|
122
|
+
img: Image.Image,
|
|
123
|
+
plan: SplitPlan,
|
|
124
|
+
jpeg_quality: int,
|
|
125
|
+
) -> list[tuple[bytes, int, int, int, int]]:
|
|
126
|
+
"""Execute a split plan on an image.
|
|
127
|
+
|
|
128
|
+
Returns list of (data, width, height, source_y, source_height) tuples.
|
|
129
|
+
"""
|
|
130
|
+
width, height = img.size
|
|
131
|
+
|
|
132
|
+
# Trim width if needed (left-aligned crop)
|
|
133
|
+
if plan.trim_width is not None and width > plan.trim_width:
|
|
134
|
+
img = img.crop((0, 0, plan.trim_width, height))
|
|
135
|
+
width = plan.trim_width
|
|
136
|
+
|
|
137
|
+
# Convert to RGB once for JPEG
|
|
138
|
+
if img.mode not in ("RGB", "L"):
|
|
139
|
+
img = img.convert("RGB")
|
|
140
|
+
|
|
141
|
+
parts: list[tuple[bytes, int, int, int, int]] = []
|
|
142
|
+
|
|
143
|
+
for i in range(plan.num_parts):
|
|
144
|
+
if plan.num_parts == 1:
|
|
145
|
+
y = 0
|
|
146
|
+
else:
|
|
147
|
+
y = i * plan.step_y
|
|
148
|
+
# Clamp so last tile aligns to bottom
|
|
149
|
+
y = min(y, max(0, height - plan.tile_height))
|
|
150
|
+
|
|
151
|
+
h = min(plan.tile_height, height - y)
|
|
152
|
+
tile = img.crop((0, y, width, y + h))
|
|
153
|
+
|
|
154
|
+
data = encode_jpeg(tile, jpeg_quality)
|
|
155
|
+
parts.append((data, width, h, y, h))
|
|
156
|
+
|
|
157
|
+
return parts
|
|
@@ -53,7 +53,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
53
53
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
54
54
|
|
|
55
55
|
VISION/PDF MODEL COMPATIBILITY WARNING:
|
|
56
|
-
Images require vision-capable models (e.g., gpt-
|
|
56
|
+
Images require vision-capable models (e.g., gpt-5.1, gemini-3-flash, gemini-3-pro).
|
|
57
57
|
Non-vision models will raise ValueError when encountering image documents.
|
|
58
58
|
PDFs require models with document processing support - check your model's capabilities
|
|
59
59
|
before including PDF documents in messages. Unsupported models may fall back to
|
|
@@ -74,7 +74,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
74
74
|
>>> from ai_pipeline_core import llm
|
|
75
75
|
>>> messages = AIMessages()
|
|
76
76
|
>>> messages.append("What is the capital of France?")
|
|
77
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
77
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
78
78
|
>>> messages.append(response) # Add the actual response
|
|
79
79
|
"""
|
|
80
80
|
|
|
@@ -264,10 +264,31 @@ class AIMessages(list[AIMessageType]):
|
|
|
264
264
|
elif isinstance(message, Document):
|
|
265
265
|
messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
|
|
266
266
|
elif isinstance(message, ModelResponse): # type: ignore
|
|
267
|
-
|
|
267
|
+
# Build base assistant message
|
|
268
|
+
assistant_message: ChatCompletionMessageParam = {
|
|
268
269
|
"role": "assistant",
|
|
269
270
|
"content": [{"type": "text", "text": message.content}],
|
|
270
|
-
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Preserve reasoning_content (Gemini Flash 3+, O1, O3, GPT-5)
|
|
274
|
+
if reasoning_content := message.reasoning_content:
|
|
275
|
+
assistant_message["reasoning_content"] = reasoning_content # type: ignore[typeddict-item]
|
|
276
|
+
|
|
277
|
+
# Preserve thinking_blocks (structured thinking)
|
|
278
|
+
if hasattr(message.choices[0].message, "thinking_blocks"):
|
|
279
|
+
thinking_blocks = getattr(message.choices[0].message, "thinking_blocks", None)
|
|
280
|
+
if thinking_blocks:
|
|
281
|
+
assistant_message["thinking_blocks"] = thinking_blocks # type: ignore[typeddict-item]
|
|
282
|
+
|
|
283
|
+
# Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
|
|
284
|
+
if hasattr(message.choices[0].message, "provider_specific_fields"):
|
|
285
|
+
provider_fields = getattr(
|
|
286
|
+
message.choices[0].message, "provider_specific_fields", None
|
|
287
|
+
)
|
|
288
|
+
if provider_fields:
|
|
289
|
+
assistant_message["provider_specific_fields"] = provider_fields # type: ignore[typeddict-item]
|
|
290
|
+
|
|
291
|
+
messages.append(assistant_message)
|
|
271
292
|
else:
|
|
272
293
|
raise ValueError(f"Unsupported message type: {type(message)}")
|
|
273
294
|
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -150,10 +150,8 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
|
|
|
150
150
|
Returns:
|
|
151
151
|
OpenRouter model name.
|
|
152
152
|
"""
|
|
153
|
-
if model == "
|
|
154
|
-
return "
|
|
155
|
-
if model == "gemini-2.5-flash-search":
|
|
156
|
-
return "google/gemini-2.5-flash:online"
|
|
153
|
+
if model == "gemini-3-flash-search":
|
|
154
|
+
return "google/gemini-3-flash:online"
|
|
157
155
|
if model == "sonar-pro-search":
|
|
158
156
|
return "perplexity/sonar-pro-search"
|
|
159
157
|
if model.startswith("gemini"):
|
|
@@ -184,7 +182,7 @@ async def _generate(
|
|
|
184
182
|
Handles both regular and structured output generation.
|
|
185
183
|
|
|
186
184
|
Args:
|
|
187
|
-
model: Model identifier (e.g., "gpt-5", "gemini-
|
|
185
|
+
model: Model identifier (e.g., "gpt-5.1", "gemini-3-pro").
|
|
188
186
|
messages: Formatted messages for the API.
|
|
189
187
|
completion_kwargs: Additional parameters for the completion API.
|
|
190
188
|
|
|
@@ -339,7 +337,7 @@ async def generate(
|
|
|
339
337
|
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
340
338
|
|
|
341
339
|
Args:
|
|
342
|
-
model: Model to use (e.g., "gpt-5", "gemini-
|
|
340
|
+
model: Model to use (e.g., "gpt-5.1", "gemini-3-pro", "grok-4.1-fast").
|
|
343
341
|
Accepts predefined models or any string for custom models.
|
|
344
342
|
context: Static context to cache (documents, examples, instructions).
|
|
345
343
|
Defaults to None (empty context). Cached for 5 minutes by default.
|
|
@@ -367,17 +365,17 @@ async def generate(
|
|
|
367
365
|
Wrap Documents in AIMessages - DO NOT pass directly or convert to .text:
|
|
368
366
|
|
|
369
367
|
# CORRECT - wrap Document in AIMessages
|
|
370
|
-
response = await llm.generate("gpt-5", messages=AIMessages([my_document]))
|
|
368
|
+
response = await llm.generate("gpt-5.1", messages=AIMessages([my_document]))
|
|
371
369
|
|
|
372
370
|
# WRONG - don't pass Document directly
|
|
373
|
-
response = await llm.generate("gpt-5", messages=my_document) # NO!
|
|
371
|
+
response = await llm.generate("gpt-5.1", messages=my_document) # NO!
|
|
374
372
|
|
|
375
373
|
# WRONG - don't convert to string yourself
|
|
376
|
-
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
374
|
+
response = await llm.generate("gpt-5.1", messages=my_document.text) # NO!
|
|
377
375
|
|
|
378
376
|
VISION/PDF MODEL COMPATIBILITY:
|
|
379
377
|
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
380
|
-
- Images require vision-capable models (gpt-
|
|
378
|
+
- Images require vision-capable models (gpt-5.1, gemini-3-flash, gemini-3-pro)
|
|
381
379
|
- PDFs require document processing support (varies by provider)
|
|
382
380
|
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
383
381
|
- Check model capabilities before including visual/PDF content
|
|
@@ -395,7 +393,7 @@ async def generate(
|
|
|
395
393
|
|
|
396
394
|
Example:
|
|
397
395
|
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
398
|
-
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
396
|
+
>>> response = await llm.generate("gpt-5.1", messages="Explain quantum computing")
|
|
399
397
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
400
398
|
|
|
401
399
|
>>> # With context caching for efficiency
|
|
@@ -403,10 +401,10 @@ async def generate(
|
|
|
403
401
|
>>> static_doc = AIMessages([large_document, "few-shot example: ..."])
|
|
404
402
|
>>>
|
|
405
403
|
>>> # First call: caches context
|
|
406
|
-
>>> r1 = await llm.generate("gpt-5", context=static_doc, messages="Summarize")
|
|
404
|
+
>>> r1 = await llm.generate("gpt-5.1", context=static_doc, messages="Summarize")
|
|
407
405
|
>>>
|
|
408
406
|
>>> # Second call: reuses cache, saves tokens!
|
|
409
|
-
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
407
|
+
>>> r2 = await llm.generate("gpt-5.1", context=static_doc, messages="Key points?")
|
|
410
408
|
|
|
411
409
|
>>> # Multi-turn conversation
|
|
412
410
|
>>> messages = AIMessages([
|
|
@@ -414,7 +412,7 @@ async def generate(
|
|
|
414
412
|
... previous_response,
|
|
415
413
|
... "Can you give an example?"
|
|
416
414
|
... ])
|
|
417
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
415
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
418
416
|
|
|
419
417
|
Performance:
|
|
420
418
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
@@ -509,7 +507,7 @@ async def generate_structured(
|
|
|
509
507
|
|
|
510
508
|
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
511
509
|
>>> research = await llm.generate(
|
|
512
|
-
... "gpt-5",
|
|
510
|
+
... "gpt-5.1",
|
|
513
511
|
... messages="Research and analyze this complex topic..."
|
|
514
512
|
... )
|
|
515
513
|
>>>
|
|
@@ -566,7 +564,7 @@ async def generate_structured(
|
|
|
566
564
|
>>>
|
|
567
565
|
>>> # CORRECT - No options parameter
|
|
568
566
|
>>> response = await llm.generate_structured(
|
|
569
|
-
... "gpt-5",
|
|
567
|
+
... "gpt-5.1",
|
|
570
568
|
... response_format=Analysis,
|
|
571
569
|
... messages="Analyze this product review: ..."
|
|
572
570
|
... )
|