ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -41,9 +41,9 @@ class ModelOptions(BaseModel):
|
|
|
41
41
|
|
|
42
42
|
retries: Number of retry attempts on failure (default: 3).
|
|
43
43
|
|
|
44
|
-
retry_delay_seconds: Seconds to wait between retries (default:
|
|
44
|
+
retry_delay_seconds: Seconds to wait between retries (default: 20).
|
|
45
45
|
|
|
46
|
-
timeout: Maximum seconds to wait for response (default:
|
|
46
|
+
timeout: Maximum seconds to wait for response (default: 600).
|
|
47
47
|
|
|
48
48
|
cache_ttl: Cache TTL for context messages (default: "300s").
|
|
49
49
|
String format like "60s", "5m", or None to disable caching.
|
|
@@ -99,77 +99,11 @@ class ModelOptions(BaseModel):
|
|
|
99
99
|
Merged with usage_tracking if both are set.
|
|
100
100
|
Useful for beta features or provider-specific capabilities.
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
... )
|
|
108
|
-
>>>
|
|
109
|
-
>>> # With system prompt
|
|
110
|
-
>>> options = ModelOptions(
|
|
111
|
-
... system_prompt="You are a helpful coding assistant",
|
|
112
|
-
... temperature=0.3 # Lower for code generation
|
|
113
|
-
... )
|
|
114
|
-
>>>
|
|
115
|
-
>>> # With custom cache TTL
|
|
116
|
-
>>> options = ModelOptions(
|
|
117
|
-
... cache_ttl="300s", # Cache context for 5 minutes
|
|
118
|
-
... max_completion_tokens=1000
|
|
119
|
-
... )
|
|
120
|
-
>>>
|
|
121
|
-
>>> # Disable caching
|
|
122
|
-
>>> options = ModelOptions(
|
|
123
|
-
... cache_ttl=None, # No context caching
|
|
124
|
-
... temperature=0.5
|
|
125
|
-
... )
|
|
126
|
-
>>>
|
|
127
|
-
>>> # For search-enabled models
|
|
128
|
-
>>> options = ModelOptions(
|
|
129
|
-
... search_context_size="high", # Get more search results
|
|
130
|
-
... max_completion_tokens=2000
|
|
131
|
-
... )
|
|
132
|
-
>>>
|
|
133
|
-
>>> # For reasoning models
|
|
134
|
-
>>> options = ModelOptions(
|
|
135
|
-
... reasoning_effort="high", # Deep reasoning
|
|
136
|
-
... timeout=600 # More time for complex reasoning
|
|
137
|
-
... )
|
|
138
|
-
>>>
|
|
139
|
-
>>> # With stop sequences
|
|
140
|
-
>>> options = ModelOptions(
|
|
141
|
-
... stop=["STOP", "END", "\n\n"], # Stop on these sequences
|
|
142
|
-
... temperature=0.7
|
|
143
|
-
... )
|
|
144
|
-
>>>
|
|
145
|
-
>>> # With custom extra_body parameters
|
|
146
|
-
>>> options = ModelOptions(
|
|
147
|
-
... extra_body={"custom_param": "value", "beta_feature": True},
|
|
148
|
-
... usage_tracking=True # Still tracks usage alongside custom params
|
|
149
|
-
... )
|
|
150
|
-
>>>
|
|
151
|
-
>>> # With user tracking for cost monitoring
|
|
152
|
-
>>> options = ModelOptions(
|
|
153
|
-
... user="user_12345", # Track costs per user
|
|
154
|
-
... temperature=0.7
|
|
155
|
-
... )
|
|
156
|
-
>>>
|
|
157
|
-
>>> # With metadata for tracking and observability
|
|
158
|
-
>>> options = ModelOptions(
|
|
159
|
-
... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
|
|
160
|
-
... temperature=0.7
|
|
161
|
-
... )
|
|
162
|
-
|
|
163
|
-
Note:
|
|
164
|
-
- Not all options apply to all models
|
|
165
|
-
- search_context_size only works with search models
|
|
166
|
-
- reasoning_effort only works with models that support explicit reasoning
|
|
167
|
-
- response_format is set internally by generate_structured()
|
|
168
|
-
- cache_ttl accepts formats like "120s", "5m", "1h" or None (default: "300s")
|
|
169
|
-
- stop sequences are limited to 4 by most providers
|
|
170
|
-
- user identifier helps track costs per end-user (max 256 chars)
|
|
171
|
-
- extra_body allows passing provider-specific parameters
|
|
172
|
-
- usage_tracking is enabled by default for cost monitoring
|
|
102
|
+
Not all options apply to all models. search_context_size only works with search models,
|
|
103
|
+
reasoning_effort only works with models that support explicit reasoning, and
|
|
104
|
+
response_format is set internally by generate_structured(). cache_ttl accepts formats
|
|
105
|
+
like "120s", "5m", "1h" or None (default: "300s"). Stop sequences are limited to 4 by
|
|
106
|
+
most providers.
|
|
173
107
|
"""
|
|
174
108
|
|
|
175
109
|
temperature: float | None = None
|
|
@@ -185,12 +119,13 @@ class ModelOptions(BaseModel):
|
|
|
185
119
|
stop: str | list[str] | None = None
|
|
186
120
|
response_format: type[BaseModel] | None = None
|
|
187
121
|
verbosity: Literal["low", "medium", "high"] | None = None
|
|
122
|
+
stream: bool = False
|
|
188
123
|
usage_tracking: bool = True
|
|
189
124
|
user: str | None = None
|
|
190
125
|
metadata: dict[str, str] | None = None
|
|
191
126
|
extra_body: dict[str, Any] | None = None
|
|
192
127
|
|
|
193
|
-
def to_openai_completion_kwargs(self) -> dict[str, Any]:
|
|
128
|
+
def to_openai_completion_kwargs(self) -> dict[str, Any]: # noqa: C901
|
|
194
129
|
"""Convert options to OpenAI API completion parameters.
|
|
195
130
|
|
|
196
131
|
Transforms ModelOptions fields into the format expected by
|
|
@@ -221,16 +156,9 @@ class ModelOptions(BaseModel):
|
|
|
221
156
|
{"web_search_options": {"search_context_size": "low|medium|high"}}
|
|
222
157
|
Non-search models silently ignore this parameter.
|
|
223
158
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
>>> kwargs
|
|
228
|
-
{'timeout': 60, 'extra_body': {}, 'temperature': 0.5}
|
|
229
|
-
|
|
230
|
-
Note:
|
|
231
|
-
- system_prompt is handled separately in _process_messages()
|
|
232
|
-
- retries and retry_delay_seconds are used by retry logic
|
|
233
|
-
- extra_body always includes usage tracking for cost monitoring
|
|
159
|
+
system_prompt is handled separately in _process_messages().
|
|
160
|
+
retries and retry_delay_seconds are used by retry logic.
|
|
161
|
+
extra_body always includes usage tracking for cost monitoring.
|
|
234
162
|
"""
|
|
235
163
|
kwargs: dict[str, Any] = {
|
|
236
164
|
"timeout": self.timeout,
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""Model response structures for LLM interactions.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
|
|
6
4
|
with additional metadata, cost tracking, and structured output support.
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
7
|
import json
|
|
10
8
|
from copy import deepcopy
|
|
9
|
+
from dataclasses import dataclass
|
|
11
10
|
from typing import Any, Generic, TypeVar
|
|
12
11
|
|
|
13
12
|
from openai.types.chat import ChatCompletion
|
|
@@ -21,11 +20,17 @@ T = TypeVar(
|
|
|
21
20
|
"""Type parameter for structured response Pydantic models."""
|
|
22
21
|
|
|
23
22
|
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class Citation:
|
|
25
|
+
"""A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
|
|
26
|
+
|
|
27
|
+
title: str
|
|
28
|
+
url: str
|
|
29
|
+
|
|
30
|
+
|
|
24
31
|
class ModelResponse(ChatCompletion):
|
|
25
32
|
"""Response wrapper for LLM text generation.
|
|
26
33
|
|
|
27
|
-
@public
|
|
28
|
-
|
|
29
34
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
30
35
|
|
|
31
36
|
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
@@ -39,22 +44,9 @@ class ModelResponse(ChatCompletion):
|
|
|
39
44
|
Almost all use cases are covered by these two patterns. Advanced features
|
|
40
45
|
like token usage and cost tracking are available but rarely needed.
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
>>> messages = AIMessages(["Explain quantum computing"])
|
|
46
|
-
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
47
|
-
>>>
|
|
48
|
-
>>> # Primary usage: add to conversation
|
|
49
|
-
>>> messages.append(response)
|
|
50
|
-
>>>
|
|
51
|
-
>>> # Access generated text
|
|
52
|
-
>>> print(response.content)
|
|
53
|
-
|
|
54
|
-
Note:
|
|
55
|
-
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
56
|
-
Other properties (usage, model, id) should only be accessed
|
|
57
|
-
when absolutely necessary.
|
|
47
|
+
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
48
|
+
Other properties (usage, model, id) should only be accessed
|
|
49
|
+
when absolutely necessary.
|
|
58
50
|
"""
|
|
59
51
|
|
|
60
52
|
def __init__(
|
|
@@ -77,13 +69,6 @@ class ModelResponse(ChatCompletion):
|
|
|
77
69
|
Includes timing information and custom tags.
|
|
78
70
|
usage: Optional usage information from streaming response.
|
|
79
71
|
|
|
80
|
-
Example:
|
|
81
|
-
>>> # Usually created internally by generate()
|
|
82
|
-
>>> response = ModelResponse(
|
|
83
|
-
... chat_completion=completion,
|
|
84
|
-
... model_options={"temperature": 0.7, "model": "gpt-5.1"},
|
|
85
|
-
... metadata={"time_taken": 1.5, "first_token_time": 0.3}
|
|
86
|
-
... )
|
|
87
72
|
"""
|
|
88
73
|
data = chat_completion.model_dump()
|
|
89
74
|
|
|
@@ -95,6 +80,10 @@ class ModelResponse(ChatCompletion):
|
|
|
95
80
|
current_finish_reason = data["choices"][i].get("finish_reason")
|
|
96
81
|
if current_finish_reason not in valid_finish_reasons:
|
|
97
82
|
data["choices"][i]["finish_reason"] = "stop"
|
|
83
|
+
# Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
|
|
84
|
+
# but OpenAI's ChatCompletion only accepts type="url_citation")
|
|
85
|
+
if annotations := data["choices"][i]["message"].get("annotations"):
|
|
86
|
+
data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
|
|
98
87
|
|
|
99
88
|
super().__init__(**data)
|
|
100
89
|
|
|
@@ -107,22 +96,12 @@ class ModelResponse(ChatCompletion):
|
|
|
107
96
|
def content(self) -> str:
|
|
108
97
|
"""Get the generated text content.
|
|
109
98
|
|
|
110
|
-
@public
|
|
111
|
-
|
|
112
99
|
Primary property for accessing the LLM's response text.
|
|
113
100
|
This is the main property you'll use with ModelResponse.
|
|
114
101
|
|
|
115
102
|
Returns:
|
|
116
103
|
Generated text from the model, or empty string if none.
|
|
117
104
|
|
|
118
|
-
Example:
|
|
119
|
-
>>> response = await generate("gpt-5.1", messages="Hello")
|
|
120
|
-
>>> text = response.content # The generated response
|
|
121
|
-
>>>
|
|
122
|
-
>>> # Common pattern: add to messages then use content
|
|
123
|
-
>>> messages.append(response)
|
|
124
|
-
>>> if "error" in response.content.lower():
|
|
125
|
-
... # Handle error case
|
|
126
105
|
"""
|
|
127
106
|
content = self.choices[0].message.content or ""
|
|
128
107
|
return content.split("</think>")[-1].strip()
|
|
@@ -131,8 +110,6 @@ class ModelResponse(ChatCompletion):
|
|
|
131
110
|
def reasoning_content(self) -> str:
|
|
132
111
|
"""Get the reasoning content.
|
|
133
112
|
|
|
134
|
-
@public
|
|
135
|
-
|
|
136
113
|
Returns:
|
|
137
114
|
The reasoning content from the model, or empty string if none.
|
|
138
115
|
"""
|
|
@@ -143,7 +120,19 @@ class ModelResponse(ChatCompletion):
|
|
|
143
120
|
return ""
|
|
144
121
|
return message.content.split("</think>")[0].strip()
|
|
145
122
|
|
|
146
|
-
|
|
123
|
+
@property
|
|
124
|
+
def citations(self) -> list[Citation]:
|
|
125
|
+
"""Get URL citations from search-enabled models.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
List of Citation objects with title and url. Empty list for non-search models.
|
|
129
|
+
"""
|
|
130
|
+
annotations = self.choices[0].message.annotations
|
|
131
|
+
if not annotations:
|
|
132
|
+
return []
|
|
133
|
+
return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
|
|
134
|
+
|
|
135
|
+
def get_laminar_metadata(self) -> dict[str, str | int | float]: # noqa: C901
|
|
147
136
|
"""Extract metadata for LMNR (Laminar) observability including cost tracking.
|
|
148
137
|
|
|
149
138
|
Collects comprehensive metadata about the generation for tracing,
|
|
@@ -178,56 +167,26 @@ class ModelResponse(ChatCompletion):
|
|
|
178
167
|
1. x-litellm-response-cost header (primary)
|
|
179
168
|
2. usage.cost attribute (fallback)
|
|
180
169
|
|
|
181
|
-
Cost is stored in three fields for
|
|
182
|
-
- gen_ai.usage.output_cost (
|
|
183
|
-
- gen_ai.usage.cost (
|
|
184
|
-
- gen_ai.cost (
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
... context=large_doc,
|
|
190
|
-
... messages="Summarize this"
|
|
191
|
-
... )
|
|
192
|
-
>>>
|
|
193
|
-
>>> # Get comprehensive metadata
|
|
194
|
-
>>> metadata = response.get_laminar_metadata()
|
|
195
|
-
>>>
|
|
196
|
-
>>> # Track generation cost
|
|
197
|
-
>>> cost = metadata.get('gen_ai.usage.output_cost', 0)
|
|
198
|
-
>>> if cost > 0:
|
|
199
|
-
... print(f"Generation cost: ${cost:.4f}")
|
|
200
|
-
>>>
|
|
201
|
-
>>> # Monitor token usage
|
|
202
|
-
>>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
|
|
203
|
-
>>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
|
|
204
|
-
>>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
|
|
205
|
-
>>>
|
|
206
|
-
>>> # Check cache effectiveness
|
|
207
|
-
>>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
|
|
208
|
-
>>> if cached > 0:
|
|
209
|
-
... total = metadata.get('gen_ai.usage.total_tokens', 1)
|
|
210
|
-
... savings = (cached / total) * 100
|
|
211
|
-
... print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
|
|
212
|
-
>>>
|
|
213
|
-
>>> # Calculate cost per token
|
|
214
|
-
>>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
|
|
215
|
-
... cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
|
|
216
|
-
... print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
|
|
217
|
-
|
|
218
|
-
Note:
|
|
219
|
-
- Cost availability depends on LiteLLM proxy configuration
|
|
220
|
-
- Not all providers return cost information
|
|
221
|
-
- Cached tokens reduce actual cost but may not be reflected
|
|
222
|
-
- Used internally by tracing but accessible for cost analysis
|
|
170
|
+
Cost is stored in three fields for observability tool consumption:
|
|
171
|
+
- gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
|
|
172
|
+
- gen_ai.usage.cost (aggregated cost)
|
|
173
|
+
- gen_ai.cost (short-form)
|
|
174
|
+
|
|
175
|
+
Cost availability depends on LiteLLM proxy configuration. Not all providers
|
|
176
|
+
return cost information. Cached tokens reduce actual cost but may not be reflected.
|
|
177
|
+
Used internally by tracing but accessible for cost analysis.
|
|
223
178
|
"""
|
|
224
179
|
metadata: dict[str, str | int | float] = deepcopy(self._metadata)
|
|
225
180
|
|
|
226
181
|
# Add base metadata
|
|
182
|
+
# NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
|
|
183
|
+
# to override the span display name in the tree view, hiding the actual span name
|
|
184
|
+
# (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
|
|
185
|
+
# in frontend/components/traces/trace-view/utils.ts prefers model over span name
|
|
186
|
+
# for LLM spans. Restore once Laminar shows both or prefers span name.
|
|
227
187
|
metadata.update({
|
|
228
188
|
"gen_ai.response.id": self.id,
|
|
229
|
-
"gen_ai.
|
|
230
|
-
"get_ai.system": "litellm",
|
|
189
|
+
"gen_ai.system": "litellm",
|
|
231
190
|
})
|
|
232
191
|
|
|
233
192
|
# Add usage metadata if available
|
|
@@ -245,21 +204,19 @@ class ModelResponse(ChatCompletion):
|
|
|
245
204
|
cost = float(self.usage.cost) # type: ignore[attr-defined]
|
|
246
205
|
|
|
247
206
|
# Add reasoning tokens if available
|
|
248
|
-
if completion_details := self.usage.completion_tokens_details:
|
|
249
|
-
|
|
250
|
-
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
207
|
+
if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
|
|
208
|
+
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
251
209
|
|
|
252
210
|
# Add cached tokens if available
|
|
253
|
-
if prompt_details := self.usage.prompt_tokens_details:
|
|
254
|
-
|
|
255
|
-
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
211
|
+
if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
|
|
212
|
+
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
256
213
|
|
|
257
214
|
# Add cost metadata if available
|
|
258
215
|
if cost and cost > 0:
|
|
259
216
|
metadata.update({
|
|
260
217
|
"gen_ai.usage.output_cost": cost,
|
|
261
218
|
"gen_ai.usage.cost": cost,
|
|
262
|
-
"
|
|
219
|
+
"gen_ai.cost": cost,
|
|
263
220
|
})
|
|
264
221
|
|
|
265
222
|
for key, value in self._model_options.items():
|
|
@@ -269,7 +226,7 @@ class ModelResponse(ChatCompletion):
|
|
|
269
226
|
|
|
270
227
|
other_fields = self.__dict__
|
|
271
228
|
for key, value in other_fields.items():
|
|
272
|
-
if key in
|
|
229
|
+
if key in {"_model_options", "_metadata", "choices"}:
|
|
273
230
|
continue
|
|
274
231
|
try:
|
|
275
232
|
metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
|
|
@@ -278,7 +235,7 @@ class ModelResponse(ChatCompletion):
|
|
|
278
235
|
|
|
279
236
|
message = self.choices[0].message
|
|
280
237
|
for key, value in message.__dict__.items():
|
|
281
|
-
if key in
|
|
238
|
+
if key in {"content"}:
|
|
282
239
|
continue
|
|
283
240
|
metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
|
|
284
241
|
|
|
@@ -297,16 +254,13 @@ class ModelResponse(ChatCompletion):
|
|
|
297
254
|
if not self.content:
|
|
298
255
|
raise ValueError("Empty response content")
|
|
299
256
|
|
|
300
|
-
if response_format := self._model_options.get("response_format"):
|
|
301
|
-
|
|
302
|
-
response_format.model_validate_json(self.content)
|
|
257
|
+
if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
|
|
258
|
+
response_format.model_validate_json(self.content)
|
|
303
259
|
|
|
304
260
|
|
|
305
|
-
class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
261
|
+
class StructuredModelResponse(ModelResponse, Generic[T]): # noqa: UP046
|
|
306
262
|
"""Response wrapper for structured/typed LLM output.
|
|
307
263
|
|
|
308
|
-
@public
|
|
309
|
-
|
|
310
264
|
Primary usage is accessing the .parsed property for the structured data.
|
|
311
265
|
"""
|
|
312
266
|
|
|
@@ -10,9 +10,9 @@ Model categories:
|
|
|
10
10
|
- Search models: Models with web search capabilities
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from typing import Literal
|
|
13
|
+
from typing import Literal
|
|
14
14
|
|
|
15
|
-
ModelName
|
|
15
|
+
type ModelName = (
|
|
16
16
|
Literal[
|
|
17
17
|
# Core models
|
|
18
18
|
"gemini-3-pro",
|
|
@@ -23,20 +23,20 @@ ModelName: TypeAlias = (
|
|
|
23
23
|
"grok-4.1-fast",
|
|
24
24
|
# Search models
|
|
25
25
|
"gemini-3-flash-search",
|
|
26
|
+
"gpt-5-mini-search",
|
|
27
|
+
"grok-4.1-fast-search",
|
|
26
28
|
"sonar-pro-search",
|
|
27
29
|
]
|
|
28
30
|
| str
|
|
29
31
|
)
|
|
30
32
|
"""Type-safe model name identifiers with support for custom models.
|
|
31
33
|
|
|
32
|
-
@public
|
|
33
|
-
|
|
34
34
|
Provides IDE autocompletion for common model names while allowing any
|
|
35
35
|
string for custom models. The type is a union of predefined literals
|
|
36
36
|
and str, giving you the best of both worlds: suggestions for known
|
|
37
37
|
models and flexibility for custom ones.
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
These are example common model names as of Q1 2026. Actual availability
|
|
40
40
|
depends on your LiteLLM proxy configuration and provider access.
|
|
41
41
|
|
|
42
42
|
Model categories:
|
|
@@ -58,22 +58,7 @@ Using custom models:
|
|
|
58
58
|
- Custom models work seamlessly as strings
|
|
59
59
|
- No need for Union types or additional type aliases
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
>>> # Predefined model with IDE autocomplete
|
|
65
|
-
>>> model: ModelName = "gpt-5.1" # IDE suggests common models
|
|
66
|
-
>>> response = await llm.generate(model, messages="Hello")
|
|
67
|
-
>>>
|
|
68
|
-
>>> # Custom model works directly
|
|
69
|
-
>>> model: ModelName = "custom-model-v2" # Any string is valid
|
|
70
|
-
>>> response = await llm.generate(model, messages="Hello")
|
|
71
|
-
>>>
|
|
72
|
-
>>> # Both types work seamlessly
|
|
73
|
-
>>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
|
|
74
|
-
|
|
75
|
-
Note:
|
|
76
|
-
The ModelName type includes both predefined literals and str,
|
|
77
|
-
allowing full flexibility while maintaining IDE support for
|
|
78
|
-
common models.
|
|
61
|
+
The ModelName type includes both predefined literals and str,
|
|
62
|
+
allowing full flexibility while maintaining IDE support for
|
|
63
|
+
common models.
|
|
79
64
|
"""
|
|
@@ -2,11 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
Provides a Prefect-integrated logging facade for unified logging across pipelines.
|
|
4
4
|
Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
|
|
5
|
-
|
|
6
|
-
Example:
|
|
7
|
-
>>> from ai_pipeline_core import get_pipeline_logger
|
|
8
|
-
>>> logger = get_pipeline_logger(__name__)
|
|
9
|
-
>>> logger.info("Processing started")
|
|
10
5
|
"""
|
|
11
6
|
|
|
12
7
|
from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
|
|
@@ -14,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
|
|
|
14
9
|
|
|
15
10
|
__all__ = [
|
|
16
11
|
"LoggerMixin",
|
|
17
|
-
"StructuredLoggerMixin",
|
|
18
12
|
"LoggingConfig",
|
|
19
|
-
"
|
|
13
|
+
"StructuredLoggerMixin",
|
|
20
14
|
"get_pipeline_logger",
|
|
15
|
+
"setup_logging",
|
|
21
16
|
]
|
|
@@ -6,7 +6,7 @@ Provides logging configuration management that integrates with Prefect's logging
|
|
|
6
6
|
import logging.config
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
from prefect.logging import get_logger
|
|
@@ -16,7 +16,7 @@ DEFAULT_LOG_LEVELS = {
|
|
|
16
16
|
"ai_pipeline_core": "INFO",
|
|
17
17
|
"ai_pipeline_core.documents": "INFO",
|
|
18
18
|
"ai_pipeline_core.llm": "INFO",
|
|
19
|
-
"ai_pipeline_core.
|
|
19
|
+
"ai_pipeline_core.pipeline": "INFO",
|
|
20
20
|
"ai_pipeline_core.testing": "DEBUG",
|
|
21
21
|
}
|
|
22
22
|
|
|
@@ -32,22 +32,19 @@ class LoggingConfig:
|
|
|
32
32
|
3. PREFECT_LOGGING_SETTINGS_PATH environment variable
|
|
33
33
|
4. Default configuration
|
|
34
34
|
|
|
35
|
-
Example:
|
|
36
|
-
>>> config = LoggingConfig()
|
|
37
|
-
>>> config.apply()
|
|
38
35
|
"""
|
|
39
36
|
|
|
40
|
-
def __init__(self, config_path:
|
|
37
|
+
def __init__(self, config_path: Path | None = None):
|
|
41
38
|
"""Initialize logging configuration.
|
|
42
39
|
|
|
43
40
|
Args:
|
|
44
41
|
config_path: Optional path to YAML configuration file.
|
|
45
42
|
"""
|
|
46
43
|
self.config_path = config_path or self._get_default_config_path()
|
|
47
|
-
self._config:
|
|
44
|
+
self._config: dict[str, Any] | None = None
|
|
48
45
|
|
|
49
46
|
@staticmethod
|
|
50
|
-
def _get_default_config_path() ->
|
|
47
|
+
def _get_default_config_path() -> Path | None:
|
|
51
48
|
"""Get default config path from environment variables.
|
|
52
49
|
|
|
53
50
|
Returns:
|
|
@@ -63,7 +60,7 @@ class LoggingConfig:
|
|
|
63
60
|
|
|
64
61
|
return None
|
|
65
62
|
|
|
66
|
-
def load_config(self) ->
|
|
63
|
+
def load_config(self) -> dict[str, Any]:
|
|
67
64
|
"""Load logging configuration from file or defaults.
|
|
68
65
|
|
|
69
66
|
Returns:
|
|
@@ -71,7 +68,7 @@ class LoggingConfig:
|
|
|
71
68
|
"""
|
|
72
69
|
if self._config is None:
|
|
73
70
|
if self.config_path and self.config_path.exists():
|
|
74
|
-
with open(self.config_path, "
|
|
71
|
+
with open(self.config_path, encoding="utf-8") as f:
|
|
75
72
|
self._config = yaml.safe_load(f)
|
|
76
73
|
else:
|
|
77
74
|
self._config = self._get_default_config()
|
|
@@ -80,7 +77,7 @@ class LoggingConfig:
|
|
|
80
77
|
return self._config
|
|
81
78
|
|
|
82
79
|
@staticmethod
|
|
83
|
-
def _get_default_config() ->
|
|
80
|
+
def _get_default_config() -> dict[str, Any]:
|
|
84
81
|
"""Get default logging configuration.
|
|
85
82
|
|
|
86
83
|
Returns:
|
|
@@ -95,10 +92,7 @@ class LoggingConfig:
|
|
|
95
92
|
"datefmt": "%H:%M:%S",
|
|
96
93
|
},
|
|
97
94
|
"detailed": {
|
|
98
|
-
"format": (
|
|
99
|
-
"%(asctime)s | %(levelname)-7s | %(name)s | "
|
|
100
|
-
"%(funcName)s:%(lineno)d - %(message)s"
|
|
101
|
-
),
|
|
95
|
+
"format": ("%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"),
|
|
102
96
|
"datefmt": "%Y-%m-%d %H:%M:%S",
|
|
103
97
|
},
|
|
104
98
|
},
|
|
@@ -134,10 +128,10 @@ class LoggingConfig:
|
|
|
134
128
|
|
|
135
129
|
|
|
136
130
|
# Global configuration instance
|
|
137
|
-
_logging_config:
|
|
131
|
+
_logging_config: LoggingConfig | None = None
|
|
138
132
|
|
|
139
133
|
|
|
140
|
-
def setup_logging(config_path:
|
|
134
|
+
def setup_logging(config_path: Path | None = None, level: str | None = None):
|
|
141
135
|
"""Setup logging for the AI Pipeline Core library.
|
|
142
136
|
|
|
143
137
|
Initializes logging configuration for the pipeline system.
|
|
@@ -149,18 +143,8 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
|
|
|
149
143
|
config_path: Optional path to YAML logging configuration file.
|
|
150
144
|
level: Optional log level override (INFO, DEBUG, WARNING, etc.).
|
|
151
145
|
|
|
152
|
-
Example:
|
|
153
|
-
>>> # In your main.py or application entry point:
|
|
154
|
-
>>> def main():
|
|
155
|
-
... setup_logging() # Call once at startup
|
|
156
|
-
... # Your application code here
|
|
157
|
-
...
|
|
158
|
-
>>> # Or with custom level:
|
|
159
|
-
>>> if __name__ == "__main__":
|
|
160
|
-
... setup_logging(level="DEBUG")
|
|
161
|
-
... run_application()
|
|
162
146
|
"""
|
|
163
|
-
global _logging_config
|
|
147
|
+
global _logging_config # noqa: PLW0603
|
|
164
148
|
|
|
165
149
|
_logging_config = LoggingConfig(config_path)
|
|
166
150
|
_logging_config.apply()
|
|
@@ -179,22 +163,28 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
|
|
|
179
163
|
def get_pipeline_logger(name: str):
|
|
180
164
|
"""Get a logger for pipeline components.
|
|
181
165
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
166
|
+
Returns a Prefect-integrated logger with the OTel span-event bridge
|
|
167
|
+
attached. Any log record at INFO+ emitted while an OTel span is
|
|
168
|
+
recording will be captured as a span event in the trace.
|
|
185
169
|
|
|
186
170
|
Args:
|
|
187
171
|
name: Logger name, typically __name__.
|
|
188
172
|
|
|
189
173
|
Returns:
|
|
190
|
-
Prefect logger instance.
|
|
174
|
+
Prefect logger instance with bridge handler.
|
|
191
175
|
|
|
192
|
-
Example:
|
|
193
|
-
>>> logger = get_pipeline_logger(__name__)
|
|
194
|
-
>>> logger.info("Module initialized")
|
|
195
176
|
"""
|
|
196
|
-
# Ensure logging is setup
|
|
197
177
|
if _logging_config is None:
|
|
198
178
|
setup_logging()
|
|
199
179
|
|
|
200
|
-
|
|
180
|
+
logger = get_logger(name)
|
|
181
|
+
|
|
182
|
+
# Attach the singleton bridge handler so log records become OTel span events.
|
|
183
|
+
# The handler is a no-op when no span is recording, so early attachment is safe.
|
|
184
|
+
from ai_pipeline_core.observability._logging_bridge import get_bridge_handler # noqa: PLC0415
|
|
185
|
+
|
|
186
|
+
handler = get_bridge_handler()
|
|
187
|
+
if handler not in logger.handlers:
|
|
188
|
+
logger.addHandler(handler)
|
|
189
|
+
|
|
190
|
+
return logger
|