ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +39 -2
- ai_pipeline_core/debug/__init__.py +26 -0
- ai_pipeline_core/debug/config.py +91 -0
- ai_pipeline_core/debug/content.py +706 -0
- ai_pipeline_core/debug/processor.py +99 -0
- ai_pipeline_core/debug/summary.py +236 -0
- ai_pipeline_core/debug/writer.py +913 -0
- ai_pipeline_core/documents/mime_type.py +28 -0
- ai_pipeline_core/flow/options.py +3 -3
- ai_pipeline_core/images/__init__.py +362 -0
- ai_pipeline_core/images/_processing.py +157 -0
- ai_pipeline_core/llm/ai_messages.py +41 -7
- ai_pipeline_core/llm/client.py +78 -17
- ai_pipeline_core/llm/model_response.py +5 -5
- ai_pipeline_core/llm/model_types.py +10 -12
- ai_pipeline_core/logging/logging_mixin.py +2 -2
- ai_pipeline_core/prompt_builder/prompt_builder.py +3 -3
- ai_pipeline_core/tracing.py +53 -1
- ai_pipeline_core/utils/deploy.py +214 -6
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/METADATA +74 -8
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/RECORD +23 -15
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -13,6 +13,7 @@ Key functions:
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import time
|
|
16
|
+
from io import BytesIO
|
|
16
17
|
from typing import Any, TypeVar
|
|
17
18
|
|
|
18
19
|
from lmnr import Laminar
|
|
@@ -21,19 +22,77 @@ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDone
|
|
|
21
22
|
from openai.types.chat import (
|
|
22
23
|
ChatCompletionMessageParam,
|
|
23
24
|
)
|
|
25
|
+
from PIL import Image
|
|
24
26
|
from prefect.logging import get_logger
|
|
25
27
|
from pydantic import BaseModel, ValidationError
|
|
26
28
|
|
|
29
|
+
from ai_pipeline_core.documents import Document
|
|
27
30
|
from ai_pipeline_core.exceptions import LLMError
|
|
31
|
+
from ai_pipeline_core.images import ImageProcessingConfig, process_image_to_documents
|
|
28
32
|
from ai_pipeline_core.settings import settings
|
|
29
33
|
|
|
30
|
-
from .ai_messages import AIMessages
|
|
34
|
+
from .ai_messages import AIMessages, AIMessageType
|
|
31
35
|
from .model_options import ModelOptions
|
|
32
36
|
from .model_response import ModelResponse, StructuredModelResponse
|
|
33
37
|
from .model_types import ModelName
|
|
34
38
|
|
|
35
39
|
logger = get_logger()
|
|
36
40
|
|
|
41
|
+
# Image splitting configs for automatic large-image handling at the LLM boundary.
|
|
42
|
+
# Gemini supports up to 3000x3000; all other models use a conservative 1000x1000 default.
|
|
43
|
+
_GEMINI_IMAGE_CONFIG = ImageProcessingConfig(
|
|
44
|
+
max_dimension=3000, max_pixels=9_000_000, jpeg_quality=75
|
|
45
|
+
)
|
|
46
|
+
_DEFAULT_IMAGE_CONFIG = ImageProcessingConfig(
|
|
47
|
+
max_dimension=1000, max_pixels=1_000_000, jpeg_quality=75
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_image_config(model: str) -> ImageProcessingConfig:
|
|
52
|
+
"""Return the image splitting config for a model."""
|
|
53
|
+
if "gemini" in model.lower():
|
|
54
|
+
return _GEMINI_IMAGE_CONFIG
|
|
55
|
+
return _DEFAULT_IMAGE_CONFIG
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _prepare_images_for_model(messages: AIMessages, model: str) -> AIMessages:
|
|
59
|
+
"""Split image documents that exceed model constraints.
|
|
60
|
+
|
|
61
|
+
Returns a new AIMessages with oversized images replaced by tiles.
|
|
62
|
+
Returns the original instance unchanged if no splitting is needed.
|
|
63
|
+
"""
|
|
64
|
+
if not any(isinstance(m, Document) and m.is_image for m in messages):
|
|
65
|
+
return messages
|
|
66
|
+
|
|
67
|
+
config = _get_image_config(model)
|
|
68
|
+
result: list[AIMessageType] = []
|
|
69
|
+
changed = False
|
|
70
|
+
|
|
71
|
+
for msg in messages:
|
|
72
|
+
if not (isinstance(msg, Document) and msg.is_image):
|
|
73
|
+
result.append(msg)
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
with Image.open(BytesIO(msg.content)) as img:
|
|
78
|
+
w, h = img.size
|
|
79
|
+
except Exception:
|
|
80
|
+
result.append(msg)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if w <= config.max_dimension and h <= config.max_dimension and w * h <= config.max_pixels:
|
|
84
|
+
result.append(msg)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
name_prefix = msg.name.rsplit(".", 1)[0] if "." in msg.name else msg.name
|
|
88
|
+
tiles = process_image_to_documents(msg, config=config, name_prefix=name_prefix)
|
|
89
|
+
result.extend(tiles)
|
|
90
|
+
changed = True
|
|
91
|
+
|
|
92
|
+
if not changed:
|
|
93
|
+
return messages
|
|
94
|
+
return AIMessages(result)
|
|
95
|
+
|
|
37
96
|
|
|
38
97
|
def _process_messages(
|
|
39
98
|
context: AIMessages,
|
|
@@ -150,10 +209,8 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
|
|
|
150
209
|
Returns:
|
|
151
210
|
OpenRouter model name.
|
|
152
211
|
"""
|
|
153
|
-
if model == "
|
|
154
|
-
return "
|
|
155
|
-
if model == "gemini-2.5-flash-search":
|
|
156
|
-
return "google/gemini-2.5-flash:online"
|
|
212
|
+
if model == "gemini-3-flash-search":
|
|
213
|
+
return "google/gemini-3-flash:online"
|
|
157
214
|
if model == "sonar-pro-search":
|
|
158
215
|
return "perplexity/sonar-pro-search"
|
|
159
216
|
if model.startswith("gemini"):
|
|
@@ -184,7 +241,7 @@ async def _generate(
|
|
|
184
241
|
Handles both regular and structured output generation.
|
|
185
242
|
|
|
186
243
|
Args:
|
|
187
|
-
model: Model identifier (e.g., "gpt-5", "gemini-
|
|
244
|
+
model: Model identifier (e.g., "gpt-5.1", "gemini-3-pro").
|
|
188
245
|
messages: Formatted messages for the API.
|
|
189
246
|
completion_kwargs: Additional parameters for the completion API.
|
|
190
247
|
|
|
@@ -273,6 +330,10 @@ async def _generate_with_retry(
|
|
|
273
330
|
if not context and not messages:
|
|
274
331
|
raise ValueError("Either context or messages must be provided")
|
|
275
332
|
|
|
333
|
+
# Auto-split large images based on model-specific constraints
|
|
334
|
+
context = _prepare_images_for_model(context, model)
|
|
335
|
+
messages = _prepare_images_for_model(messages, model)
|
|
336
|
+
|
|
276
337
|
if "gemini" in model.lower() and context.approximate_tokens_count < 10000:
|
|
277
338
|
# Bug fix for minimum explicit context size for Gemini models
|
|
278
339
|
options.cache_ttl = None
|
|
@@ -339,7 +400,7 @@ async def generate(
|
|
|
339
400
|
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
340
401
|
|
|
341
402
|
Args:
|
|
342
|
-
model: Model to use (e.g., "gpt-5", "gemini-
|
|
403
|
+
model: Model to use (e.g., "gpt-5.1", "gemini-3-pro", "grok-4.1-fast").
|
|
343
404
|
Accepts predefined models or any string for custom models.
|
|
344
405
|
context: Static context to cache (documents, examples, instructions).
|
|
345
406
|
Defaults to None (empty context). Cached for 5 minutes by default.
|
|
@@ -367,17 +428,17 @@ async def generate(
|
|
|
367
428
|
Wrap Documents in AIMessages - DO NOT pass directly or convert to .text:
|
|
368
429
|
|
|
369
430
|
# CORRECT - wrap Document in AIMessages
|
|
370
|
-
response = await llm.generate("gpt-5", messages=AIMessages([my_document]))
|
|
431
|
+
response = await llm.generate("gpt-5.1", messages=AIMessages([my_document]))
|
|
371
432
|
|
|
372
433
|
# WRONG - don't pass Document directly
|
|
373
|
-
response = await llm.generate("gpt-5", messages=my_document) # NO!
|
|
434
|
+
response = await llm.generate("gpt-5.1", messages=my_document) # NO!
|
|
374
435
|
|
|
375
436
|
# WRONG - don't convert to string yourself
|
|
376
|
-
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
437
|
+
response = await llm.generate("gpt-5.1", messages=my_document.text) # NO!
|
|
377
438
|
|
|
378
439
|
VISION/PDF MODEL COMPATIBILITY:
|
|
379
440
|
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
380
|
-
- Images require vision-capable models (gpt-
|
|
441
|
+
- Images require vision-capable models (gpt-5.1, gemini-3-flash, gemini-3-pro)
|
|
381
442
|
- PDFs require document processing support (varies by provider)
|
|
382
443
|
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
383
444
|
- Check model capabilities before including visual/PDF content
|
|
@@ -395,7 +456,7 @@ async def generate(
|
|
|
395
456
|
|
|
396
457
|
Example:
|
|
397
458
|
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
398
|
-
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
459
|
+
>>> response = await llm.generate("gpt-5.1", messages="Explain quantum computing")
|
|
399
460
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
400
461
|
|
|
401
462
|
>>> # With context caching for efficiency
|
|
@@ -403,10 +464,10 @@ async def generate(
|
|
|
403
464
|
>>> static_doc = AIMessages([large_document, "few-shot example: ..."])
|
|
404
465
|
>>>
|
|
405
466
|
>>> # First call: caches context
|
|
406
|
-
>>> r1 = await llm.generate("gpt-5", context=static_doc, messages="Summarize")
|
|
467
|
+
>>> r1 = await llm.generate("gpt-5.1", context=static_doc, messages="Summarize")
|
|
407
468
|
>>>
|
|
408
469
|
>>> # Second call: reuses cache, saves tokens!
|
|
409
|
-
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
470
|
+
>>> r2 = await llm.generate("gpt-5.1", context=static_doc, messages="Key points?")
|
|
410
471
|
|
|
411
472
|
>>> # Multi-turn conversation
|
|
412
473
|
>>> messages = AIMessages([
|
|
@@ -414,7 +475,7 @@ async def generate(
|
|
|
414
475
|
... previous_response,
|
|
415
476
|
... "Can you give an example?"
|
|
416
477
|
... ])
|
|
417
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
478
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
418
479
|
|
|
419
480
|
Performance:
|
|
420
481
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
@@ -509,7 +570,7 @@ async def generate_structured(
|
|
|
509
570
|
|
|
510
571
|
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
511
572
|
>>> research = await llm.generate(
|
|
512
|
-
... "gpt-5",
|
|
573
|
+
... "gpt-5.1",
|
|
513
574
|
... messages="Research and analyze this complex topic..."
|
|
514
575
|
... )
|
|
515
576
|
>>>
|
|
@@ -566,7 +627,7 @@ async def generate_structured(
|
|
|
566
627
|
>>>
|
|
567
628
|
>>> # CORRECT - No options parameter
|
|
568
629
|
>>> response = await llm.generate_structured(
|
|
569
|
-
... "gpt-5",
|
|
630
|
+
... "gpt-5.1",
|
|
570
631
|
... response_format=Analysis,
|
|
571
632
|
... messages="Analyze this product review: ..."
|
|
572
633
|
... )
|
|
@@ -28,7 +28,7 @@ class ModelResponse(ChatCompletion):
|
|
|
28
28
|
|
|
29
29
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
30
30
|
|
|
31
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
31
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
32
32
|
>>> messages.append(response) # Add assistant response to conversation
|
|
33
33
|
>>> print(response.content) # Access generated text
|
|
34
34
|
|
|
@@ -43,7 +43,7 @@ class ModelResponse(ChatCompletion):
|
|
|
43
43
|
>>> from ai_pipeline_core import llm, AIMessages
|
|
44
44
|
>>>
|
|
45
45
|
>>> messages = AIMessages(["Explain quantum computing"])
|
|
46
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
46
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
47
47
|
>>>
|
|
48
48
|
>>> # Primary usage: add to conversation
|
|
49
49
|
>>> messages.append(response)
|
|
@@ -81,7 +81,7 @@ class ModelResponse(ChatCompletion):
|
|
|
81
81
|
>>> # Usually created internally by generate()
|
|
82
82
|
>>> response = ModelResponse(
|
|
83
83
|
... chat_completion=completion,
|
|
84
|
-
... model_options={"temperature": 0.7, "model": "gpt-
|
|
84
|
+
... model_options={"temperature": 0.7, "model": "gpt-5.1"},
|
|
85
85
|
... metadata={"time_taken": 1.5, "first_token_time": 0.3}
|
|
86
86
|
... )
|
|
87
87
|
"""
|
|
@@ -116,7 +116,7 @@ class ModelResponse(ChatCompletion):
|
|
|
116
116
|
Generated text from the model, or empty string if none.
|
|
117
117
|
|
|
118
118
|
Example:
|
|
119
|
-
>>> response = await generate("gpt-5", messages="Hello")
|
|
119
|
+
>>> response = await generate("gpt-5.1", messages="Hello")
|
|
120
120
|
>>> text = response.content # The generated response
|
|
121
121
|
>>>
|
|
122
122
|
>>> # Common pattern: add to messages then use content
|
|
@@ -185,7 +185,7 @@ class ModelResponse(ChatCompletion):
|
|
|
185
185
|
|
|
186
186
|
Example:
|
|
187
187
|
>>> response = await llm.generate(
|
|
188
|
-
... "gpt-5",
|
|
188
|
+
... "gpt-5.1",
|
|
189
189
|
... context=large_doc,
|
|
190
190
|
... messages="Summarize this"
|
|
191
191
|
... )
|
|
@@ -15,17 +15,15 @@ from typing import Literal, TypeAlias
|
|
|
15
15
|
ModelName: TypeAlias = (
|
|
16
16
|
Literal[
|
|
17
17
|
# Core models
|
|
18
|
-
"gemini-
|
|
19
|
-
"gpt-5",
|
|
20
|
-
"grok-4",
|
|
18
|
+
"gemini-3-pro",
|
|
19
|
+
"gpt-5.1",
|
|
21
20
|
# Small models
|
|
22
|
-
"gemini-
|
|
21
|
+
"gemini-3-flash",
|
|
23
22
|
"gpt-5-mini",
|
|
24
|
-
"grok-4-fast",
|
|
23
|
+
"grok-4.1-fast",
|
|
25
24
|
# Search models
|
|
26
|
-
"gemini-
|
|
25
|
+
"gemini-3-flash-search",
|
|
27
26
|
"sonar-pro-search",
|
|
28
|
-
"gpt-4o-search",
|
|
29
27
|
]
|
|
30
28
|
| str
|
|
31
29
|
)
|
|
@@ -38,15 +36,15 @@ string for custom models. The type is a union of predefined literals
|
|
|
38
36
|
and str, giving you the best of both worlds: suggestions for known
|
|
39
37
|
models and flexibility for custom ones.
|
|
40
38
|
|
|
41
|
-
Note: These are example common model names as of
|
|
39
|
+
Note: These are example common model names as of Q1 2026. Actual availability
|
|
42
40
|
depends on your LiteLLM proxy configuration and provider access.
|
|
43
41
|
|
|
44
42
|
Model categories:
|
|
45
|
-
Core models (gemini-
|
|
43
|
+
Core models (gemini-3-pro, gpt-5.1):
|
|
46
44
|
High-capability models for complex tasks requiring deep reasoning,
|
|
47
45
|
nuanced understanding, or creative generation.
|
|
48
46
|
|
|
49
|
-
Small models (gemini-
|
|
47
|
+
Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
|
|
50
48
|
Efficient models optimized for speed and cost, suitable for
|
|
51
49
|
simpler tasks or high-volume processing.
|
|
52
50
|
|
|
@@ -64,7 +62,7 @@ Example:
|
|
|
64
62
|
>>> from ai_pipeline_core import llm, ModelName
|
|
65
63
|
>>>
|
|
66
64
|
>>> # Predefined model with IDE autocomplete
|
|
67
|
-
>>> model: ModelName = "gpt-5" # IDE suggests common models
|
|
65
|
+
>>> model: ModelName = "gpt-5.1" # IDE suggests common models
|
|
68
66
|
>>> response = await llm.generate(model, messages="Hello")
|
|
69
67
|
>>>
|
|
70
68
|
>>> # Custom model works directly
|
|
@@ -72,7 +70,7 @@ Example:
|
|
|
72
70
|
>>> response = await llm.generate(model, messages="Hello")
|
|
73
71
|
>>>
|
|
74
72
|
>>> # Both types work seamlessly
|
|
75
|
-
>>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-
|
|
73
|
+
>>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
|
|
76
74
|
|
|
77
75
|
Note:
|
|
78
76
|
The ModelName type includes both predefined literals and str,
|
|
@@ -117,7 +117,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
117
117
|
|
|
118
118
|
Example:
|
|
119
119
|
self.log_metric("processing_time", 1.23, "seconds",
|
|
120
|
-
document_type="pdf", model="gpt-
|
|
120
|
+
document_type="pdf", model="gpt-5.1")
|
|
121
121
|
"""
|
|
122
122
|
self.logger.info(
|
|
123
123
|
f"Metric: {metric_name}",
|
|
@@ -140,7 +140,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
140
140
|
|
|
141
141
|
Example:
|
|
142
142
|
self.log_span("llm_generation", 1234.5,
|
|
143
|
-
model="gpt-
|
|
143
|
+
model="gpt-5.1", tokens=500)
|
|
144
144
|
"""
|
|
145
145
|
self.logger.info(
|
|
146
146
|
f"Span: {operation}",
|
|
@@ -144,7 +144,7 @@ class PromptBuilder(BaseModel):
|
|
|
144
144
|
options.service_tier = None
|
|
145
145
|
options.cache_ttl = None
|
|
146
146
|
cache_lock = False
|
|
147
|
-
if "grok-4-fast" in model:
|
|
147
|
+
if "grok-4.1-fast" in model:
|
|
148
148
|
options.max_completion_tokens = 30000
|
|
149
149
|
|
|
150
150
|
if self.mode == "test":
|
|
@@ -154,7 +154,7 @@ class PromptBuilder(BaseModel):
|
|
|
154
154
|
options.reasoning_effort = "medium"
|
|
155
155
|
options.verbosity = None
|
|
156
156
|
|
|
157
|
-
if model.startswith("gpt-5"):
|
|
157
|
+
if model.startswith("gpt-5.1"):
|
|
158
158
|
options.service_tier = "flex"
|
|
159
159
|
|
|
160
160
|
return options, cache_lock
|
|
@@ -224,7 +224,7 @@ class PromptBuilder(BaseModel):
|
|
|
224
224
|
self, model: ModelName, prompt: str | AIMessages, options: ModelOptions | None = None
|
|
225
225
|
) -> str:
|
|
226
226
|
options, _ = self._get_options(model, options)
|
|
227
|
-
if "gpt-5" not in model and "grok-4" not in model and "openrouter/" not in model:
|
|
227
|
+
if "gpt-5.1" not in model and "grok-4.1-fast" not in model and "openrouter/" not in model:
|
|
228
228
|
options.stop = "</document>"
|
|
229
229
|
|
|
230
230
|
response = await self.call(model, prompt, options)
|
ai_pipeline_core/tracing.py
CHANGED
|
@@ -276,6 +276,9 @@ class TraceInfo(BaseModel):
|
|
|
276
276
|
# ---------------------------------------------------------------------------
|
|
277
277
|
|
|
278
278
|
|
|
279
|
+
_debug_processor_initialized = False
|
|
280
|
+
|
|
281
|
+
|
|
279
282
|
def _initialise_laminar() -> None:
|
|
280
283
|
"""Initialize Laminar SDK with project configuration.
|
|
281
284
|
|
|
@@ -287,17 +290,66 @@ def _initialise_laminar() -> None:
|
|
|
287
290
|
- Uses settings.lmnr_project_api_key for authentication
|
|
288
291
|
- Disables OPENAI instrument to prevent double-tracing
|
|
289
292
|
- Called automatically by trace decorator on first use
|
|
293
|
+
- Optionally adds local debug processor if TRACE_DEBUG_PATH is set
|
|
290
294
|
|
|
291
295
|
Note:
|
|
292
296
|
This is an internal function called once per process.
|
|
293
297
|
Multiple calls are safe (Laminar handles idempotency).
|
|
294
298
|
"""
|
|
299
|
+
global _debug_processor_initialized
|
|
300
|
+
|
|
295
301
|
if settings.lmnr_project_api_key:
|
|
296
302
|
Laminar.initialize(
|
|
297
303
|
project_api_key=settings.lmnr_project_api_key,
|
|
298
304
|
disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
|
|
299
305
|
)
|
|
300
306
|
|
|
307
|
+
# Add local debug processor if configured (only once)
|
|
308
|
+
if not _debug_processor_initialized:
|
|
309
|
+
_debug_processor_initialized = True
|
|
310
|
+
debug_path = os.environ.get("TRACE_DEBUG_PATH")
|
|
311
|
+
if debug_path:
|
|
312
|
+
_setup_debug_processor(debug_path)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _setup_debug_processor(debug_path: str) -> None:
|
|
316
|
+
"""Set up local debug trace processor."""
|
|
317
|
+
try:
|
|
318
|
+
from pathlib import Path # noqa: PLC0415
|
|
319
|
+
|
|
320
|
+
from opentelemetry import trace # noqa: PLC0415
|
|
321
|
+
|
|
322
|
+
from ai_pipeline_core.debug import ( # noqa: PLC0415
|
|
323
|
+
LocalDebugSpanProcessor,
|
|
324
|
+
LocalTraceWriter,
|
|
325
|
+
TraceDebugConfig,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
config = TraceDebugConfig(
|
|
329
|
+
path=Path(debug_path),
|
|
330
|
+
max_element_bytes=int(os.environ.get("TRACE_DEBUG_MAX_INLINE", 10000)),
|
|
331
|
+
max_traces=int(os.environ.get("TRACE_DEBUG_MAX_TRACES", 20)) or None,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
writer = LocalTraceWriter(config)
|
|
335
|
+
processor = LocalDebugSpanProcessor(writer)
|
|
336
|
+
|
|
337
|
+
# Add to tracer provider
|
|
338
|
+
provider = trace.get_tracer_provider()
|
|
339
|
+
add_processor = getattr(provider, "add_span_processor", None)
|
|
340
|
+
if add_processor is not None:
|
|
341
|
+
add_processor(processor)
|
|
342
|
+
|
|
343
|
+
# Register shutdown
|
|
344
|
+
import atexit # noqa: PLC0415
|
|
345
|
+
|
|
346
|
+
atexit.register(processor.shutdown)
|
|
347
|
+
|
|
348
|
+
except Exception as e:
|
|
349
|
+
import logging # noqa: PLC0415
|
|
350
|
+
|
|
351
|
+
logging.getLogger(__name__).warning(f"Failed to setup debug trace processor: {e}")
|
|
352
|
+
|
|
301
353
|
|
|
302
354
|
# Overload for calls like @trace(name="...", level="debug")
|
|
303
355
|
@overload
|
|
@@ -728,7 +780,7 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
728
780
|
>>> @pipeline_task
|
|
729
781
|
>>> async def enriched_generation(prompt: str) -> str:
|
|
730
782
|
... # LLM cost tracked automatically via ModelResponse
|
|
731
|
-
... response = await llm.generate("gpt-5", messages=prompt)
|
|
783
|
+
... response = await llm.generate("gpt-5.1", messages=prompt)
|
|
732
784
|
...
|
|
733
785
|
... # Add cost for post-processing
|
|
734
786
|
... processing_cost = 0.02 # Fixed cost for enrichment
|