ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +32 -5
- ai_pipeline_core/debug/__init__.py +26 -0
- ai_pipeline_core/debug/config.py +91 -0
- ai_pipeline_core/debug/content.py +705 -0
- ai_pipeline_core/debug/processor.py +99 -0
- ai_pipeline_core/debug/summary.py +236 -0
- ai_pipeline_core/debug/writer.py +913 -0
- ai_pipeline_core/deployment/__init__.py +46 -0
- ai_pipeline_core/deployment/base.py +681 -0
- ai_pipeline_core/deployment/contract.py +84 -0
- ai_pipeline_core/deployment/helpers.py +98 -0
- ai_pipeline_core/documents/flow_document.py +1 -1
- ai_pipeline_core/documents/task_document.py +1 -1
- ai_pipeline_core/documents/temporary_document.py +1 -1
- ai_pipeline_core/flow/config.py +13 -2
- ai_pipeline_core/flow/options.py +4 -4
- ai_pipeline_core/images/__init__.py +362 -0
- ai_pipeline_core/images/_processing.py +157 -0
- ai_pipeline_core/llm/ai_messages.py +25 -4
- ai_pipeline_core/llm/client.py +15 -19
- ai_pipeline_core/llm/model_response.py +5 -5
- ai_pipeline_core/llm/model_types.py +10 -13
- ai_pipeline_core/logging/logging_mixin.py +2 -2
- ai_pipeline_core/pipeline.py +1 -1
- ai_pipeline_core/progress.py +127 -0
- ai_pipeline_core/prompt_builder/__init__.py +5 -0
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
- ai_pipeline_core/prompt_builder/global_cache.py +78 -0
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
- ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
- ai_pipeline_core/tracing.py +54 -2
- ai_pipeline_core/utils/deploy.py +214 -6
- ai_pipeline_core/utils/remote_deployment.py +37 -187
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +96 -27
- ai_pipeline_core-0.3.3.dist-info/RECORD +57 -0
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +1 -1
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Internal image processing logic: planning, splitting, encoding."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from math import ceil
|
|
6
|
+
|
|
7
|
+
from PIL import Image, ImageOps
|
|
8
|
+
|
|
9
|
+
PIL_MAX_PIXELS = 100_000_000 # 100MP security limit
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class SplitPlan:
|
|
14
|
+
"""Describes how to split an image into parts."""
|
|
15
|
+
|
|
16
|
+
tile_width: int
|
|
17
|
+
tile_height: int
|
|
18
|
+
step_y: int
|
|
19
|
+
num_parts: int
|
|
20
|
+
trim_width: int | None # None = no trim needed
|
|
21
|
+
warnings: list[str]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plan_split(
|
|
25
|
+
width: int,
|
|
26
|
+
height: int,
|
|
27
|
+
max_dimension: int,
|
|
28
|
+
max_pixels: int,
|
|
29
|
+
overlap_fraction: float,
|
|
30
|
+
max_parts: int,
|
|
31
|
+
) -> SplitPlan:
|
|
32
|
+
"""Calculate how to split an image. Pure function, no side effects.
|
|
33
|
+
|
|
34
|
+
Returns a SplitPlan describing tile size, step, and number of parts.
|
|
35
|
+
"""
|
|
36
|
+
warnings: list[str] = []
|
|
37
|
+
|
|
38
|
+
# Effective tile size respecting both max_dimension and max_pixels
|
|
39
|
+
tile_size = max_dimension
|
|
40
|
+
while tile_size * tile_size > max_pixels and tile_size > 100:
|
|
41
|
+
tile_size -= 10
|
|
42
|
+
|
|
43
|
+
# Width: trim if needed (left-aligned, web content is left-aligned)
|
|
44
|
+
trim_width = tile_size if width > tile_size else None
|
|
45
|
+
|
|
46
|
+
effective_width = min(width, tile_size)
|
|
47
|
+
|
|
48
|
+
# If single-tile pixel budget is still exceeded by width * tile_height, reduce tile_height
|
|
49
|
+
tile_h = tile_size
|
|
50
|
+
while effective_width * tile_h > max_pixels and tile_h > 100:
|
|
51
|
+
tile_h -= 10
|
|
52
|
+
|
|
53
|
+
# No vertical split needed
|
|
54
|
+
if height <= tile_h:
|
|
55
|
+
return SplitPlan(
|
|
56
|
+
tile_width=effective_width,
|
|
57
|
+
tile_height=height,
|
|
58
|
+
step_y=0,
|
|
59
|
+
num_parts=1,
|
|
60
|
+
trim_width=trim_width,
|
|
61
|
+
warnings=warnings,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Vertical split with overlap
|
|
65
|
+
overlap_px = int(tile_h * overlap_fraction)
|
|
66
|
+
step = tile_h - overlap_px
|
|
67
|
+
if step <= 0:
|
|
68
|
+
step = 1
|
|
69
|
+
|
|
70
|
+
num_parts = 1 + ceil((height - tile_h) / step)
|
|
71
|
+
|
|
72
|
+
# Auto-reduce if exceeds max_parts
|
|
73
|
+
if num_parts > max_parts:
|
|
74
|
+
warnings.append(
|
|
75
|
+
f"Image requires {num_parts} parts but max is {max_parts}. "
|
|
76
|
+
f"Reducing to {max_parts} parts with larger step."
|
|
77
|
+
)
|
|
78
|
+
num_parts = max_parts
|
|
79
|
+
if num_parts > 1:
|
|
80
|
+
step = (height - tile_h) // (num_parts - 1)
|
|
81
|
+
else:
|
|
82
|
+
step = 0
|
|
83
|
+
|
|
84
|
+
return SplitPlan(
|
|
85
|
+
tile_width=effective_width,
|
|
86
|
+
tile_height=tile_h,
|
|
87
|
+
step_y=step,
|
|
88
|
+
num_parts=num_parts,
|
|
89
|
+
trim_width=trim_width,
|
|
90
|
+
warnings=warnings,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def load_and_normalize(data: bytes) -> Image.Image:
|
|
95
|
+
"""Load image from bytes, apply EXIF orientation, validate size."""
|
|
96
|
+
img = Image.open(BytesIO(data))
|
|
97
|
+
img.load()
|
|
98
|
+
|
|
99
|
+
if img.width * img.height > PIL_MAX_PIXELS:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Image too large: {img.width}x{img.height} = {img.width * img.height:,} pixels "
|
|
102
|
+
f"(limit: {PIL_MAX_PIXELS:,})"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Fix EXIF orientation (important for mobile photos)
|
|
106
|
+
img = ImageOps.exif_transpose(img)
|
|
107
|
+
return img
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def encode_jpeg(img: Image.Image, quality: int) -> bytes:
|
|
111
|
+
"""Encode PIL Image as JPEG bytes."""
|
|
112
|
+
# Convert to RGB if needed (JPEG doesn't support alpha)
|
|
113
|
+
if img.mode not in ("RGB", "L"):
|
|
114
|
+
img = img.convert("RGB")
|
|
115
|
+
|
|
116
|
+
buf = BytesIO()
|
|
117
|
+
img.save(buf, format="JPEG", quality=quality, optimize=True)
|
|
118
|
+
return buf.getvalue()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def execute_split(
|
|
122
|
+
img: Image.Image,
|
|
123
|
+
plan: SplitPlan,
|
|
124
|
+
jpeg_quality: int,
|
|
125
|
+
) -> list[tuple[bytes, int, int, int, int]]:
|
|
126
|
+
"""Execute a split plan on an image.
|
|
127
|
+
|
|
128
|
+
Returns list of (data, width, height, source_y, source_height) tuples.
|
|
129
|
+
"""
|
|
130
|
+
width, height = img.size
|
|
131
|
+
|
|
132
|
+
# Trim width if needed (left-aligned crop)
|
|
133
|
+
if plan.trim_width is not None and width > plan.trim_width:
|
|
134
|
+
img = img.crop((0, 0, plan.trim_width, height))
|
|
135
|
+
width = plan.trim_width
|
|
136
|
+
|
|
137
|
+
# Convert to RGB once for JPEG
|
|
138
|
+
if img.mode not in ("RGB", "L"):
|
|
139
|
+
img = img.convert("RGB")
|
|
140
|
+
|
|
141
|
+
parts: list[tuple[bytes, int, int, int, int]] = []
|
|
142
|
+
|
|
143
|
+
for i in range(plan.num_parts):
|
|
144
|
+
if plan.num_parts == 1:
|
|
145
|
+
y = 0
|
|
146
|
+
else:
|
|
147
|
+
y = i * plan.step_y
|
|
148
|
+
# Clamp so last tile aligns to bottom
|
|
149
|
+
y = min(y, max(0, height - plan.tile_height))
|
|
150
|
+
|
|
151
|
+
h = min(plan.tile_height, height - y)
|
|
152
|
+
tile = img.crop((0, y, width, y + h))
|
|
153
|
+
|
|
154
|
+
data = encode_jpeg(tile, jpeg_quality)
|
|
155
|
+
parts.append((data, width, h, y, h))
|
|
156
|
+
|
|
157
|
+
return parts
|
|
@@ -53,7 +53,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
53
53
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
54
54
|
|
|
55
55
|
VISION/PDF MODEL COMPATIBILITY WARNING:
|
|
56
|
-
Images require vision-capable models (e.g., gpt-
|
|
56
|
+
Images require vision-capable models (e.g., gpt-5.1, gemini-3-flash, gemini-3-pro).
|
|
57
57
|
Non-vision models will raise ValueError when encountering image documents.
|
|
58
58
|
PDFs require models with document processing support - check your model's capabilities
|
|
59
59
|
before including PDF documents in messages. Unsupported models may fall back to
|
|
@@ -74,7 +74,7 @@ class AIMessages(list[AIMessageType]):
|
|
|
74
74
|
>>> from ai_pipeline_core import llm
|
|
75
75
|
>>> messages = AIMessages()
|
|
76
76
|
>>> messages.append("What is the capital of France?")
|
|
77
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
77
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
78
78
|
>>> messages.append(response) # Add the actual response
|
|
79
79
|
"""
|
|
80
80
|
|
|
@@ -264,10 +264,31 @@ class AIMessages(list[AIMessageType]):
|
|
|
264
264
|
elif isinstance(message, Document):
|
|
265
265
|
messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
|
|
266
266
|
elif isinstance(message, ModelResponse): # type: ignore
|
|
267
|
-
|
|
267
|
+
# Build base assistant message
|
|
268
|
+
assistant_message: ChatCompletionMessageParam = {
|
|
268
269
|
"role": "assistant",
|
|
269
270
|
"content": [{"type": "text", "text": message.content}],
|
|
270
|
-
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Preserve reasoning_content (Gemini Flash 3+, O1, O3, GPT-5)
|
|
274
|
+
if reasoning_content := message.reasoning_content:
|
|
275
|
+
assistant_message["reasoning_content"] = reasoning_content # type: ignore[typeddict-item]
|
|
276
|
+
|
|
277
|
+
# Preserve thinking_blocks (structured thinking)
|
|
278
|
+
if hasattr(message.choices[0].message, "thinking_blocks"):
|
|
279
|
+
thinking_blocks = getattr(message.choices[0].message, "thinking_blocks", None)
|
|
280
|
+
if thinking_blocks:
|
|
281
|
+
assistant_message["thinking_blocks"] = thinking_blocks # type: ignore[typeddict-item]
|
|
282
|
+
|
|
283
|
+
# Preserve provider_specific_fields (thought_signatures for Gemini multi-turn)
|
|
284
|
+
if hasattr(message.choices[0].message, "provider_specific_fields"):
|
|
285
|
+
provider_fields = getattr(
|
|
286
|
+
message.choices[0].message, "provider_specific_fields", None
|
|
287
|
+
)
|
|
288
|
+
if provider_fields:
|
|
289
|
+
assistant_message["provider_specific_fields"] = provider_fields # type: ignore[typeddict-item]
|
|
290
|
+
|
|
291
|
+
messages.append(assistant_message)
|
|
271
292
|
else:
|
|
272
293
|
raise ValueError(f"Unsupported message type: {type(message)}")
|
|
273
294
|
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -150,12 +150,8 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
|
|
|
150
150
|
Returns:
|
|
151
151
|
OpenRouter model name.
|
|
152
152
|
"""
|
|
153
|
-
if model == "
|
|
154
|
-
return "
|
|
155
|
-
if model == "gemini-2.5-flash-search":
|
|
156
|
-
return "google/gemini-2.5-flash:online"
|
|
157
|
-
if model == "grok-4-fast-search":
|
|
158
|
-
return "x-ai/grok-4-fast:online"
|
|
153
|
+
if model == "gemini-3-flash-search":
|
|
154
|
+
return "google/gemini-3-flash:online"
|
|
159
155
|
if model == "sonar-pro-search":
|
|
160
156
|
return "perplexity/sonar-pro-search"
|
|
161
157
|
if model.startswith("gemini"):
|
|
@@ -186,7 +182,7 @@ async def _generate(
|
|
|
186
182
|
Handles both regular and structured output generation.
|
|
187
183
|
|
|
188
184
|
Args:
|
|
189
|
-
model: Model identifier (e.g., "gpt-5", "gemini-
|
|
185
|
+
model: Model identifier (e.g., "gpt-5.1", "gemini-3-pro").
|
|
190
186
|
messages: Formatted messages for the API.
|
|
191
187
|
completion_kwargs: Additional parameters for the completion API.
|
|
192
188
|
|
|
@@ -295,7 +291,7 @@ async def _generate_with_retry(
|
|
|
295
291
|
model, span_type="LLM", input=processed_messages
|
|
296
292
|
) as span:
|
|
297
293
|
response = await _generate(model, processed_messages, completion_kwargs)
|
|
298
|
-
span.set_attributes(response.get_laminar_metadata())
|
|
294
|
+
span.set_attributes(response.get_laminar_metadata()) # pyright: ignore[reportArgumentType]
|
|
299
295
|
Laminar.set_span_output([
|
|
300
296
|
r for r in (response.reasoning_content, response.content) if r
|
|
301
297
|
])
|
|
@@ -341,7 +337,7 @@ async def generate(
|
|
|
341
337
|
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
342
338
|
|
|
343
339
|
Args:
|
|
344
|
-
model: Model to use (e.g., "gpt-5", "gemini-
|
|
340
|
+
model: Model to use (e.g., "gpt-5.1", "gemini-3-pro", "grok-4.1-fast").
|
|
345
341
|
Accepts predefined models or any string for custom models.
|
|
346
342
|
context: Static context to cache (documents, examples, instructions).
|
|
347
343
|
Defaults to None (empty context). Cached for 5 minutes by default.
|
|
@@ -369,17 +365,17 @@ async def generate(
|
|
|
369
365
|
Wrap Documents in AIMessages - DO NOT pass directly or convert to .text:
|
|
370
366
|
|
|
371
367
|
# CORRECT - wrap Document in AIMessages
|
|
372
|
-
response = await llm.generate("gpt-5", messages=AIMessages([my_document]))
|
|
368
|
+
response = await llm.generate("gpt-5.1", messages=AIMessages([my_document]))
|
|
373
369
|
|
|
374
370
|
# WRONG - don't pass Document directly
|
|
375
|
-
response = await llm.generate("gpt-5", messages=my_document) # NO!
|
|
371
|
+
response = await llm.generate("gpt-5.1", messages=my_document) # NO!
|
|
376
372
|
|
|
377
373
|
# WRONG - don't convert to string yourself
|
|
378
|
-
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
374
|
+
response = await llm.generate("gpt-5.1", messages=my_document.text) # NO!
|
|
379
375
|
|
|
380
376
|
VISION/PDF MODEL COMPATIBILITY:
|
|
381
377
|
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
382
|
-
- Images require vision-capable models (gpt-
|
|
378
|
+
- Images require vision-capable models (gpt-5.1, gemini-3-flash, gemini-3-pro)
|
|
383
379
|
- PDFs require document processing support (varies by provider)
|
|
384
380
|
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
385
381
|
- Check model capabilities before including visual/PDF content
|
|
@@ -397,7 +393,7 @@ async def generate(
|
|
|
397
393
|
|
|
398
394
|
Example:
|
|
399
395
|
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
400
|
-
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
396
|
+
>>> response = await llm.generate("gpt-5.1", messages="Explain quantum computing")
|
|
401
397
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
402
398
|
|
|
403
399
|
>>> # With context caching for efficiency
|
|
@@ -405,10 +401,10 @@ async def generate(
|
|
|
405
401
|
>>> static_doc = AIMessages([large_document, "few-shot example: ..."])
|
|
406
402
|
>>>
|
|
407
403
|
>>> # First call: caches context
|
|
408
|
-
>>> r1 = await llm.generate("gpt-5", context=static_doc, messages="Summarize")
|
|
404
|
+
>>> r1 = await llm.generate("gpt-5.1", context=static_doc, messages="Summarize")
|
|
409
405
|
>>>
|
|
410
406
|
>>> # Second call: reuses cache, saves tokens!
|
|
411
|
-
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
407
|
+
>>> r2 = await llm.generate("gpt-5.1", context=static_doc, messages="Key points?")
|
|
412
408
|
|
|
413
409
|
>>> # Multi-turn conversation
|
|
414
410
|
>>> messages = AIMessages([
|
|
@@ -416,7 +412,7 @@ async def generate(
|
|
|
416
412
|
... previous_response,
|
|
417
413
|
... "Can you give an example?"
|
|
418
414
|
... ])
|
|
419
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
415
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
420
416
|
|
|
421
417
|
Performance:
|
|
422
418
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
@@ -511,7 +507,7 @@ async def generate_structured(
|
|
|
511
507
|
|
|
512
508
|
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
513
509
|
>>> research = await llm.generate(
|
|
514
|
-
... "gpt-5",
|
|
510
|
+
... "gpt-5.1",
|
|
515
511
|
... messages="Research and analyze this complex topic..."
|
|
516
512
|
... )
|
|
517
513
|
>>>
|
|
@@ -568,7 +564,7 @@ async def generate_structured(
|
|
|
568
564
|
>>>
|
|
569
565
|
>>> # CORRECT - No options parameter
|
|
570
566
|
>>> response = await llm.generate_structured(
|
|
571
|
-
... "gpt-5",
|
|
567
|
+
... "gpt-5.1",
|
|
572
568
|
... response_format=Analysis,
|
|
573
569
|
... messages="Analyze this product review: ..."
|
|
574
570
|
... )
|
|
@@ -28,7 +28,7 @@ class ModelResponse(ChatCompletion):
|
|
|
28
28
|
|
|
29
29
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
30
30
|
|
|
31
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
31
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
32
32
|
>>> messages.append(response) # Add assistant response to conversation
|
|
33
33
|
>>> print(response.content) # Access generated text
|
|
34
34
|
|
|
@@ -43,7 +43,7 @@ class ModelResponse(ChatCompletion):
|
|
|
43
43
|
>>> from ai_pipeline_core import llm, AIMessages
|
|
44
44
|
>>>
|
|
45
45
|
>>> messages = AIMessages(["Explain quantum computing"])
|
|
46
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
46
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
47
47
|
>>>
|
|
48
48
|
>>> # Primary usage: add to conversation
|
|
49
49
|
>>> messages.append(response)
|
|
@@ -81,7 +81,7 @@ class ModelResponse(ChatCompletion):
|
|
|
81
81
|
>>> # Usually created internally by generate()
|
|
82
82
|
>>> response = ModelResponse(
|
|
83
83
|
... chat_completion=completion,
|
|
84
|
-
... model_options={"temperature": 0.7, "model": "gpt-
|
|
84
|
+
... model_options={"temperature": 0.7, "model": "gpt-5.1"},
|
|
85
85
|
... metadata={"time_taken": 1.5, "first_token_time": 0.3}
|
|
86
86
|
... )
|
|
87
87
|
"""
|
|
@@ -116,7 +116,7 @@ class ModelResponse(ChatCompletion):
|
|
|
116
116
|
Generated text from the model, or empty string if none.
|
|
117
117
|
|
|
118
118
|
Example:
|
|
119
|
-
>>> response = await generate("gpt-5", messages="Hello")
|
|
119
|
+
>>> response = await generate("gpt-5.1", messages="Hello")
|
|
120
120
|
>>> text = response.content # The generated response
|
|
121
121
|
>>>
|
|
122
122
|
>>> # Common pattern: add to messages then use content
|
|
@@ -185,7 +185,7 @@ class ModelResponse(ChatCompletion):
|
|
|
185
185
|
|
|
186
186
|
Example:
|
|
187
187
|
>>> response = await llm.generate(
|
|
188
|
-
... "gpt-5",
|
|
188
|
+
... "gpt-5.1",
|
|
189
189
|
... context=large_doc,
|
|
190
190
|
... messages="Summarize this"
|
|
191
191
|
... )
|
|
@@ -15,18 +15,15 @@ from typing import Literal, TypeAlias
|
|
|
15
15
|
ModelName: TypeAlias = (
|
|
16
16
|
Literal[
|
|
17
17
|
# Core models
|
|
18
|
-
"gemini-
|
|
19
|
-
"gpt-5",
|
|
20
|
-
"grok-4",
|
|
18
|
+
"gemini-3-pro",
|
|
19
|
+
"gpt-5.1",
|
|
21
20
|
# Small models
|
|
22
|
-
"gemini-
|
|
21
|
+
"gemini-3-flash",
|
|
23
22
|
"gpt-5-mini",
|
|
24
|
-
"grok-4-fast",
|
|
23
|
+
"grok-4.1-fast",
|
|
25
24
|
# Search models
|
|
26
|
-
"gemini-
|
|
25
|
+
"gemini-3-flash-search",
|
|
27
26
|
"sonar-pro-search",
|
|
28
|
-
"gpt-4o-search",
|
|
29
|
-
"grok-4-fast-search",
|
|
30
27
|
]
|
|
31
28
|
| str
|
|
32
29
|
)
|
|
@@ -39,15 +36,15 @@ string for custom models. The type is a union of predefined literals
|
|
|
39
36
|
and str, giving you the best of both worlds: suggestions for known
|
|
40
37
|
models and flexibility for custom ones.
|
|
41
38
|
|
|
42
|
-
Note: These are example common model names as of
|
|
39
|
+
Note: These are example common model names as of Q1 2026. Actual availability
|
|
43
40
|
depends on your LiteLLM proxy configuration and provider access.
|
|
44
41
|
|
|
45
42
|
Model categories:
|
|
46
|
-
Core models (gemini-
|
|
43
|
+
Core models (gemini-3-pro, gpt-5.1):
|
|
47
44
|
High-capability models for complex tasks requiring deep reasoning,
|
|
48
45
|
nuanced understanding, or creative generation.
|
|
49
46
|
|
|
50
|
-
Small models (gemini-
|
|
47
|
+
Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
|
|
51
48
|
Efficient models optimized for speed and cost, suitable for
|
|
52
49
|
simpler tasks or high-volume processing.
|
|
53
50
|
|
|
@@ -65,7 +62,7 @@ Example:
|
|
|
65
62
|
>>> from ai_pipeline_core import llm, ModelName
|
|
66
63
|
>>>
|
|
67
64
|
>>> # Predefined model with IDE autocomplete
|
|
68
|
-
>>> model: ModelName = "gpt-5" # IDE suggests common models
|
|
65
|
+
>>> model: ModelName = "gpt-5.1" # IDE suggests common models
|
|
69
66
|
>>> response = await llm.generate(model, messages="Hello")
|
|
70
67
|
>>>
|
|
71
68
|
>>> # Custom model works directly
|
|
@@ -73,7 +70,7 @@ Example:
|
|
|
73
70
|
>>> response = await llm.generate(model, messages="Hello")
|
|
74
71
|
>>>
|
|
75
72
|
>>> # Both types work seamlessly
|
|
76
|
-
>>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-
|
|
73
|
+
>>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
|
|
77
74
|
|
|
78
75
|
Note:
|
|
79
76
|
The ModelName type includes both predefined literals and str,
|
|
@@ -117,7 +117,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
117
117
|
|
|
118
118
|
Example:
|
|
119
119
|
self.log_metric("processing_time", 1.23, "seconds",
|
|
120
|
-
document_type="pdf", model="gpt-
|
|
120
|
+
document_type="pdf", model="gpt-5.1")
|
|
121
121
|
"""
|
|
122
122
|
self.logger.info(
|
|
123
123
|
f"Metric: {metric_name}",
|
|
@@ -140,7 +140,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
140
140
|
|
|
141
141
|
Example:
|
|
142
142
|
self.log_span("llm_generation", 1234.5,
|
|
143
|
-
model="gpt-
|
|
143
|
+
model="gpt-5.1", tokens=500)
|
|
144
144
|
"""
|
|
145
145
|
self.logger.info(
|
|
146
146
|
f"Span: {operation}",
|
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -605,7 +605,7 @@ def pipeline_flow(
|
|
|
605
605
|
- pipeline_task: For task-level decoration
|
|
606
606
|
- FlowConfig: Type-safe flow configuration
|
|
607
607
|
- FlowOptions: Base class for flow options
|
|
608
|
-
-
|
|
608
|
+
- PipelineDeployment: Execute flows locally or remotely
|
|
609
609
|
"""
|
|
610
610
|
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
611
611
|
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""@public Intra-flow progress tracking with order-preserving webhook delivery."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import Generator
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from contextvars import ContextVar
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
from ai_pipeline_core.deployment.contract import ProgressRun
|
|
12
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
13
|
+
|
|
14
|
+
logger = get_pipeline_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True, slots=True)
|
|
18
|
+
class ProgressContext:
|
|
19
|
+
"""Internal context holding state for progress calculation and webhook delivery."""
|
|
20
|
+
|
|
21
|
+
webhook_url: str
|
|
22
|
+
project_name: str
|
|
23
|
+
run_id: str
|
|
24
|
+
flow_run_id: str
|
|
25
|
+
flow_name: str
|
|
26
|
+
step: int
|
|
27
|
+
total_steps: int
|
|
28
|
+
weights: tuple[float, ...]
|
|
29
|
+
completed_weight: float
|
|
30
|
+
current_flow_weight: float
|
|
31
|
+
queue: asyncio.Queue[ProgressRun | None]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_context: ContextVar[ProgressContext | None] = ContextVar("progress_context", default=None)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def update(fraction: float, message: str = "") -> None:
|
|
38
|
+
"""@public Report intra-flow progress (0.0-1.0). No-op without context."""
|
|
39
|
+
ctx = _context.get()
|
|
40
|
+
if ctx is None or not ctx.webhook_url:
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
fraction = max(0.0, min(1.0, fraction))
|
|
44
|
+
|
|
45
|
+
total_weight = sum(ctx.weights)
|
|
46
|
+
if total_weight > 0:
|
|
47
|
+
overall = (ctx.completed_weight + ctx.current_flow_weight * fraction) / total_weight
|
|
48
|
+
else:
|
|
49
|
+
overall = fraction
|
|
50
|
+
overall = round(max(0.0, min(1.0, overall)), 4)
|
|
51
|
+
|
|
52
|
+
payload = ProgressRun(
|
|
53
|
+
flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
|
|
54
|
+
project_name=ctx.project_name,
|
|
55
|
+
state="RUNNING",
|
|
56
|
+
timestamp=datetime.now(timezone.utc),
|
|
57
|
+
step=ctx.step,
|
|
58
|
+
total_steps=ctx.total_steps,
|
|
59
|
+
flow_name=ctx.flow_name,
|
|
60
|
+
status="progress",
|
|
61
|
+
progress=overall,
|
|
62
|
+
step_progress=round(fraction, 4),
|
|
63
|
+
message=message,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
ctx.queue.put_nowait(payload)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
async def webhook_worker(
|
|
70
|
+
queue: asyncio.Queue[ProgressRun | None],
|
|
71
|
+
webhook_url: str,
|
|
72
|
+
max_retries: int = 3,
|
|
73
|
+
retry_delay: float = 10.0,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Process webhooks sequentially with retries, preserving order."""
|
|
76
|
+
from ai_pipeline_core.deployment.helpers import send_webhook # noqa: PLC0415
|
|
77
|
+
|
|
78
|
+
while True:
|
|
79
|
+
payload = await queue.get()
|
|
80
|
+
if payload is None:
|
|
81
|
+
queue.task_done()
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
await send_webhook(webhook_url, payload, max_retries, retry_delay)
|
|
86
|
+
except Exception:
|
|
87
|
+
pass # Already logged in send_webhook
|
|
88
|
+
|
|
89
|
+
queue.task_done()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@contextmanager
|
|
93
|
+
def flow_context(
|
|
94
|
+
webhook_url: str,
|
|
95
|
+
project_name: str,
|
|
96
|
+
run_id: str,
|
|
97
|
+
flow_run_id: str,
|
|
98
|
+
flow_name: str,
|
|
99
|
+
step: int,
|
|
100
|
+
total_steps: int,
|
|
101
|
+
weights: tuple[float, ...],
|
|
102
|
+
completed_weight: float,
|
|
103
|
+
queue: asyncio.Queue[ProgressRun | None],
|
|
104
|
+
) -> Generator[None, None, None]:
|
|
105
|
+
"""Set up progress context for a flow. Framework internal use."""
|
|
106
|
+
current_flow_weight = weights[step - 1] if step <= len(weights) else 1.0
|
|
107
|
+
ctx = ProgressContext(
|
|
108
|
+
webhook_url=webhook_url,
|
|
109
|
+
project_name=project_name,
|
|
110
|
+
run_id=run_id,
|
|
111
|
+
flow_run_id=flow_run_id,
|
|
112
|
+
flow_name=flow_name,
|
|
113
|
+
step=step,
|
|
114
|
+
total_steps=total_steps,
|
|
115
|
+
weights=weights,
|
|
116
|
+
completed_weight=completed_weight,
|
|
117
|
+
current_flow_weight=current_flow_weight,
|
|
118
|
+
queue=queue,
|
|
119
|
+
)
|
|
120
|
+
token = _context.set(ctx)
|
|
121
|
+
try:
|
|
122
|
+
yield
|
|
123
|
+
finally:
|
|
124
|
+
_context.reset(token)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
__all__ = ["update", "webhook_worker", "flow_context", "ProgressContext"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
You were provided with the following documents:
|
|
2
|
+
- **core documents** - these are already a reviewed documents which are part of official project documentation.
|
|
3
|
+
- **source documents** (called also **sources**) - these are not part of official project documentation and they will be deleted after your task is completed.
|
|
4
|
+
|
|
5
|
+
{% if core_documents %}
|
|
6
|
+
There are the following **core documents** available during this session:
|
|
7
|
+
{% for document in core_documents %}
|
|
8
|
+
- {{ document.id }} - {{ document.name }}
|
|
9
|
+
{% endfor %}
|
|
10
|
+
{% else %}
|
|
11
|
+
There are no **core documents** available during this session.
|
|
12
|
+
{% endif %}
|
|
13
|
+
|
|
14
|
+
{% if new_documents %}
|
|
15
|
+
There are the following **source documents** (called also **sources**) available during this session:
|
|
16
|
+
{% for document in new_documents %}
|
|
17
|
+
- {{ document.id }} - {{ document.name }}
|
|
18
|
+
{% endfor %}
|
|
19
|
+
{% else %}
|
|
20
|
+
There are no **source documents** (called also **sources**) available during this session.
|
|
21
|
+
{% endif %}
|
|
22
|
+
|
|
23
|
+
There won't be more **core documents** and **source documents** provided during this conversation, however **new core documents** may be provided.
|