ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ Key functions:
13
13
 
14
14
  import asyncio
15
15
  import time
16
+ from io import BytesIO
16
17
  from typing import Any, TypeVar
17
18
 
18
19
  from lmnr import Laminar
@@ -21,19 +22,77 @@ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDone
21
22
  from openai.types.chat import (
22
23
  ChatCompletionMessageParam,
23
24
  )
25
+ from PIL import Image
24
26
  from prefect.logging import get_logger
25
27
  from pydantic import BaseModel, ValidationError
26
28
 
29
+ from ai_pipeline_core.documents import Document
27
30
  from ai_pipeline_core.exceptions import LLMError
31
+ from ai_pipeline_core.images import ImageProcessingConfig, process_image_to_documents
28
32
  from ai_pipeline_core.settings import settings
29
33
 
30
- from .ai_messages import AIMessages
34
+ from .ai_messages import AIMessages, AIMessageType
31
35
  from .model_options import ModelOptions
32
36
  from .model_response import ModelResponse, StructuredModelResponse
33
37
  from .model_types import ModelName
34
38
 
35
39
  logger = get_logger()
36
40
 
41
+ # Image splitting configs for automatic large-image handling at the LLM boundary.
42
+ # Gemini supports up to 3000x3000; all other models use a conservative 1000x1000 default.
43
+ _GEMINI_IMAGE_CONFIG = ImageProcessingConfig(
44
+ max_dimension=3000, max_pixels=9_000_000, jpeg_quality=75
45
+ )
46
+ _DEFAULT_IMAGE_CONFIG = ImageProcessingConfig(
47
+ max_dimension=1000, max_pixels=1_000_000, jpeg_quality=75
48
+ )
49
+
50
+
51
+ def _get_image_config(model: str) -> ImageProcessingConfig:
52
+ """Return the image splitting config for a model."""
53
+ if "gemini" in model.lower():
54
+ return _GEMINI_IMAGE_CONFIG
55
+ return _DEFAULT_IMAGE_CONFIG
56
+
57
+
58
+ def _prepare_images_for_model(messages: AIMessages, model: str) -> AIMessages:
59
+ """Split image documents that exceed model constraints.
60
+
61
+ Returns a new AIMessages with oversized images replaced by tiles.
62
+ Returns the original instance unchanged if no splitting is needed.
63
+ """
64
+ if not any(isinstance(m, Document) and m.is_image for m in messages):
65
+ return messages
66
+
67
+ config = _get_image_config(model)
68
+ result: list[AIMessageType] = []
69
+ changed = False
70
+
71
+ for msg in messages:
72
+ if not (isinstance(msg, Document) and msg.is_image):
73
+ result.append(msg)
74
+ continue
75
+
76
+ try:
77
+ with Image.open(BytesIO(msg.content)) as img:
78
+ w, h = img.size
79
+ except Exception:
80
+ result.append(msg)
81
+ continue
82
+
83
+ if w <= config.max_dimension and h <= config.max_dimension and w * h <= config.max_pixels:
84
+ result.append(msg)
85
+ continue
86
+
87
+ name_prefix = msg.name.rsplit(".", 1)[0] if "." in msg.name else msg.name
88
+ tiles = process_image_to_documents(msg, config=config, name_prefix=name_prefix)
89
+ result.extend(tiles)
90
+ changed = True
91
+
92
+ if not changed:
93
+ return messages
94
+ return AIMessages(result)
95
+
37
96
 
38
97
  def _process_messages(
39
98
  context: AIMessages,
@@ -150,10 +209,8 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
150
209
  Returns:
151
210
  OpenRouter model name.
152
211
  """
153
- if model == "gpt-4o-search":
154
- return "openai/gpt-4o-search-preview"
155
- if model == "gemini-2.5-flash-search":
156
- return "google/gemini-2.5-flash:online"
212
+ if model == "gemini-3-flash-search":
213
+ return "google/gemini-3-flash:online"
157
214
  if model == "sonar-pro-search":
158
215
  return "perplexity/sonar-pro-search"
159
216
  if model.startswith("gemini"):
@@ -184,7 +241,7 @@ async def _generate(
184
241
  Handles both regular and structured output generation.
185
242
 
186
243
  Args:
187
- model: Model identifier (e.g., "gpt-5", "gemini-2.5-pro").
244
+ model: Model identifier (e.g., "gpt-5.1", "gemini-3-pro").
188
245
  messages: Formatted messages for the API.
189
246
  completion_kwargs: Additional parameters for the completion API.
190
247
 
@@ -273,6 +330,10 @@ async def _generate_with_retry(
273
330
  if not context and not messages:
274
331
  raise ValueError("Either context or messages must be provided")
275
332
 
333
+ # Auto-split large images based on model-specific constraints
334
+ context = _prepare_images_for_model(context, model)
335
+ messages = _prepare_images_for_model(messages, model)
336
+
276
337
  if "gemini" in model.lower() and context.approximate_tokens_count < 10000:
277
338
  # Bug fix for minimum explicit context size for Gemini models
278
339
  options.cache_ttl = None
@@ -339,7 +400,7 @@ async def generate(
339
400
  4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
340
401
 
341
402
  Args:
342
- model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
403
+ model: Model to use (e.g., "gpt-5.1", "gemini-3-pro", "grok-4.1-fast").
343
404
  Accepts predefined models or any string for custom models.
344
405
  context: Static context to cache (documents, examples, instructions).
345
406
  Defaults to None (empty context). Cached for 5 minutes by default.
@@ -367,17 +428,17 @@ async def generate(
367
428
  Wrap Documents in AIMessages - DO NOT pass directly or convert to .text:
368
429
 
369
430
  # CORRECT - wrap Document in AIMessages
370
- response = await llm.generate("gpt-5", messages=AIMessages([my_document]))
431
+ response = await llm.generate("gpt-5.1", messages=AIMessages([my_document]))
371
432
 
372
433
  # WRONG - don't pass Document directly
373
- response = await llm.generate("gpt-5", messages=my_document) # NO!
434
+ response = await llm.generate("gpt-5.1", messages=my_document) # NO!
374
435
 
375
436
  # WRONG - don't convert to string yourself
376
- response = await llm.generate("gpt-5", messages=my_document.text) # NO!
437
+ response = await llm.generate("gpt-5.1", messages=my_document.text) # NO!
377
438
 
378
439
  VISION/PDF MODEL COMPATIBILITY:
379
440
  When using Documents containing images or PDFs, ensure your model supports these formats:
380
- - Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
441
+ - Images require vision-capable models (gpt-5.1, gemini-3-flash, gemini-3-pro)
381
442
  - PDFs require document processing support (varies by provider)
382
443
  - Non-compatible models will raise ValueError or fall back to text extraction
383
444
  - Check model capabilities before including visual/PDF content
@@ -395,7 +456,7 @@ async def generate(
395
456
 
396
457
  Example:
397
458
  >>> # CORRECT - No options parameter (this is the recommended pattern)
398
- >>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
459
+ >>> response = await llm.generate("gpt-5.1", messages="Explain quantum computing")
399
460
  >>> print(response.content) # In production, use get_pipeline_logger instead of print
400
461
 
401
462
  >>> # With context caching for efficiency
@@ -403,10 +464,10 @@ async def generate(
403
464
  >>> static_doc = AIMessages([large_document, "few-shot example: ..."])
404
465
  >>>
405
466
  >>> # First call: caches context
406
- >>> r1 = await llm.generate("gpt-5", context=static_doc, messages="Summarize")
467
+ >>> r1 = await llm.generate("gpt-5.1", context=static_doc, messages="Summarize")
407
468
  >>>
408
469
  >>> # Second call: reuses cache, saves tokens!
409
- >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
470
+ >>> r2 = await llm.generate("gpt-5.1", context=static_doc, messages="Key points?")
410
471
 
411
472
  >>> # Multi-turn conversation
412
473
  >>> messages = AIMessages([
@@ -414,7 +475,7 @@ async def generate(
414
475
  ... previous_response,
415
476
  ... "Can you give an example?"
416
477
  ... ])
417
- >>> response = await llm.generate("gpt-5", messages=messages)
478
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
418
479
 
419
480
  Performance:
420
481
  - Context caching saves ~50-90% tokens on repeated calls
@@ -509,7 +570,7 @@ async def generate_structured(
509
570
 
510
571
  >>> # Step 1: Research/analysis with generate() - no options parameter
511
572
  >>> research = await llm.generate(
512
- ... "gpt-5",
573
+ ... "gpt-5.1",
513
574
  ... messages="Research and analyze this complex topic..."
514
575
  ... )
515
576
  >>>
@@ -566,7 +627,7 @@ async def generate_structured(
566
627
  >>>
567
628
  >>> # CORRECT - No options parameter
568
629
  >>> response = await llm.generate_structured(
569
- ... "gpt-5",
630
+ ... "gpt-5.1",
570
631
  ... response_format=Analysis,
571
632
  ... messages="Analyze this product review: ..."
572
633
  ... )
@@ -28,7 +28,7 @@ class ModelResponse(ChatCompletion):
28
28
 
29
29
  Primary usage is adding to AIMessages for multi-turn conversations:
30
30
 
31
- >>> response = await llm.generate("gpt-5", messages=messages)
31
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
32
32
  >>> messages.append(response) # Add assistant response to conversation
33
33
  >>> print(response.content) # Access generated text
34
34
 
@@ -43,7 +43,7 @@ class ModelResponse(ChatCompletion):
43
43
  >>> from ai_pipeline_core import llm, AIMessages
44
44
  >>>
45
45
  >>> messages = AIMessages(["Explain quantum computing"])
46
- >>> response = await llm.generate("gpt-5", messages=messages)
46
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
47
47
  >>>
48
48
  >>> # Primary usage: add to conversation
49
49
  >>> messages.append(response)
@@ -81,7 +81,7 @@ class ModelResponse(ChatCompletion):
81
81
  >>> # Usually created internally by generate()
82
82
  >>> response = ModelResponse(
83
83
  ... chat_completion=completion,
84
- ... model_options={"temperature": 0.7, "model": "gpt-4"},
84
+ ... model_options={"temperature": 0.7, "model": "gpt-5.1"},
85
85
  ... metadata={"time_taken": 1.5, "first_token_time": 0.3}
86
86
  ... )
87
87
  """
@@ -116,7 +116,7 @@ class ModelResponse(ChatCompletion):
116
116
  Generated text from the model, or empty string if none.
117
117
 
118
118
  Example:
119
- >>> response = await generate("gpt-5", messages="Hello")
119
+ >>> response = await generate("gpt-5.1", messages="Hello")
120
120
  >>> text = response.content # The generated response
121
121
  >>>
122
122
  >>> # Common pattern: add to messages then use content
@@ -185,7 +185,7 @@ class ModelResponse(ChatCompletion):
185
185
 
186
186
  Example:
187
187
  >>> response = await llm.generate(
188
- ... "gpt-5",
188
+ ... "gpt-5.1",
189
189
  ... context=large_doc,
190
190
  ... messages="Summarize this"
191
191
  ... )
@@ -15,17 +15,15 @@ from typing import Literal, TypeAlias
15
15
  ModelName: TypeAlias = (
16
16
  Literal[
17
17
  # Core models
18
- "gemini-2.5-pro",
19
- "gpt-5",
20
- "grok-4",
18
+ "gemini-3-pro",
19
+ "gpt-5.1",
21
20
  # Small models
22
- "gemini-2.5-flash",
21
+ "gemini-3-flash",
23
22
  "gpt-5-mini",
24
- "grok-4-fast",
23
+ "grok-4.1-fast",
25
24
  # Search models
26
- "gemini-2.5-flash-search",
25
+ "gemini-3-flash-search",
27
26
  "sonar-pro-search",
28
- "gpt-4o-search",
29
27
  ]
30
28
  | str
31
29
  )
@@ -38,15 +36,15 @@ string for custom models. The type is a union of predefined literals
38
36
  and str, giving you the best of both worlds: suggestions for known
39
37
  models and flexibility for custom ones.
40
38
 
41
- Note: These are example common model names as of Q3 2025. Actual availability
39
+ Note: These are example common model names as of Q1 2026. Actual availability
42
40
  depends on your LiteLLM proxy configuration and provider access.
43
41
 
44
42
  Model categories:
45
- Core models (gemini-2.5-pro, gpt-5, grok-4):
43
+ Core models (gemini-3-pro, gpt-5.1):
46
44
  High-capability models for complex tasks requiring deep reasoning,
47
45
  nuanced understanding, or creative generation.
48
46
 
49
- Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
47
+ Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
50
48
  Efficient models optimized for speed and cost, suitable for
51
49
  simpler tasks or high-volume processing.
52
50
 
@@ -64,7 +62,7 @@ Example:
64
62
  >>> from ai_pipeline_core import llm, ModelName
65
63
  >>>
66
64
  >>> # Predefined model with IDE autocomplete
67
- >>> model: ModelName = "gpt-5" # IDE suggests common models
65
+ >>> model: ModelName = "gpt-5.1" # IDE suggests common models
68
66
  >>> response = await llm.generate(model, messages="Hello")
69
67
  >>>
70
68
  >>> # Custom model works directly
@@ -72,7 +70,7 @@ Example:
72
70
  >>> response = await llm.generate(model, messages="Hello")
73
71
  >>>
74
72
  >>> # Both types work seamlessly
75
- >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
73
+ >>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
76
74
 
77
75
  Note:
78
76
  The ModelName type includes both predefined literals and str,
@@ -117,7 +117,7 @@ class StructuredLoggerMixin(LoggerMixin):
117
117
 
118
118
  Example:
119
119
  self.log_metric("processing_time", 1.23, "seconds",
120
- document_type="pdf", model="gpt-4")
120
+ document_type="pdf", model="gpt-5.1")
121
121
  """
122
122
  self.logger.info(
123
123
  f"Metric: {metric_name}",
@@ -140,7 +140,7 @@ class StructuredLoggerMixin(LoggerMixin):
140
140
 
141
141
  Example:
142
142
  self.log_span("llm_generation", 1234.5,
143
- model="gpt-4", tokens=500)
143
+ model="gpt-5.1", tokens=500)
144
144
  """
145
145
  self.logger.info(
146
146
  f"Span: {operation}",
@@ -144,7 +144,7 @@ class PromptBuilder(BaseModel):
144
144
  options.service_tier = None
145
145
  options.cache_ttl = None
146
146
  cache_lock = False
147
- if "grok-4-fast" in model:
147
+ if "grok-4.1-fast" in model:
148
148
  options.max_completion_tokens = 30000
149
149
 
150
150
  if self.mode == "test":
@@ -154,7 +154,7 @@ class PromptBuilder(BaseModel):
154
154
  options.reasoning_effort = "medium"
155
155
  options.verbosity = None
156
156
 
157
- if model.startswith("gpt-5"):
157
+ if model.startswith("gpt-5.1"):
158
158
  options.service_tier = "flex"
159
159
 
160
160
  return options, cache_lock
@@ -224,7 +224,7 @@ class PromptBuilder(BaseModel):
224
224
  self, model: ModelName, prompt: str | AIMessages, options: ModelOptions | None = None
225
225
  ) -> str:
226
226
  options, _ = self._get_options(model, options)
227
- if "gpt-5" not in model and "grok-4" not in model and "openrouter/" not in model:
227
+ if "gpt-5.1" not in model and "grok-4.1-fast" not in model and "openrouter/" not in model:
228
228
  options.stop = "</document>"
229
229
 
230
230
  response = await self.call(model, prompt, options)
@@ -276,6 +276,9 @@ class TraceInfo(BaseModel):
276
276
  # ---------------------------------------------------------------------------
277
277
 
278
278
 
279
+ _debug_processor_initialized = False
280
+
281
+
279
282
  def _initialise_laminar() -> None:
280
283
  """Initialize Laminar SDK with project configuration.
281
284
 
@@ -287,17 +290,66 @@ def _initialise_laminar() -> None:
287
290
  - Uses settings.lmnr_project_api_key for authentication
288
291
  - Disables OPENAI instrument to prevent double-tracing
289
292
  - Called automatically by trace decorator on first use
293
+ - Optionally adds local debug processor if TRACE_DEBUG_PATH is set
290
294
 
291
295
  Note:
292
296
  This is an internal function called once per process.
293
297
  Multiple calls are safe (Laminar handles idempotency).
294
298
  """
299
+ global _debug_processor_initialized
300
+
295
301
  if settings.lmnr_project_api_key:
296
302
  Laminar.initialize(
297
303
  project_api_key=settings.lmnr_project_api_key,
298
304
  disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
299
305
  )
300
306
 
307
+ # Add local debug processor if configured (only once)
308
+ if not _debug_processor_initialized:
309
+ _debug_processor_initialized = True
310
+ debug_path = os.environ.get("TRACE_DEBUG_PATH")
311
+ if debug_path:
312
+ _setup_debug_processor(debug_path)
313
+
314
+
315
+ def _setup_debug_processor(debug_path: str) -> None:
316
+ """Set up local debug trace processor."""
317
+ try:
318
+ from pathlib import Path # noqa: PLC0415
319
+
320
+ from opentelemetry import trace # noqa: PLC0415
321
+
322
+ from ai_pipeline_core.debug import ( # noqa: PLC0415
323
+ LocalDebugSpanProcessor,
324
+ LocalTraceWriter,
325
+ TraceDebugConfig,
326
+ )
327
+
328
+ config = TraceDebugConfig(
329
+ path=Path(debug_path),
330
+ max_element_bytes=int(os.environ.get("TRACE_DEBUG_MAX_INLINE", 10000)),
331
+ max_traces=int(os.environ.get("TRACE_DEBUG_MAX_TRACES", 20)) or None,
332
+ )
333
+
334
+ writer = LocalTraceWriter(config)
335
+ processor = LocalDebugSpanProcessor(writer)
336
+
337
+ # Add to tracer provider
338
+ provider = trace.get_tracer_provider()
339
+ add_processor = getattr(provider, "add_span_processor", None)
340
+ if add_processor is not None:
341
+ add_processor(processor)
342
+
343
+ # Register shutdown
344
+ import atexit # noqa: PLC0415
345
+
346
+ atexit.register(processor.shutdown)
347
+
348
+ except Exception as e:
349
+ import logging # noqa: PLC0415
350
+
351
+ logging.getLogger(__name__).warning(f"Failed to setup debug trace processor: {e}")
352
+
301
353
 
302
354
  # Overload for calls like @trace(name="...", level="debug")
303
355
  @overload
@@ -728,7 +780,7 @@ def set_trace_cost(cost: float | str) -> None:
728
780
  >>> @pipeline_task
729
781
  >>> async def enriched_generation(prompt: str) -> str:
730
782
  ... # LLM cost tracked automatically via ModelResponse
731
- ... response = await llm.generate("gpt-5", messages=prompt)
783
+ ... response = await llm.generate("gpt-5.1", messages=prompt)
732
784
  ...
733
785
  ... # Add cost for post-processing
734
786
  ... processing_cost = 0.02 # Fixed cost for enrichment