ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +78 -125
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +130 -81
- ai_pipeline_core/llm/client.py +327 -193
- ai_pipeline_core/llm/model_options.py +14 -86
- ai_pipeline_core/llm/model_response.py +60 -103
- ai_pipeline_core/llm/model_types.py +16 -34
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -483
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core/utils/deploy.py +0 -373
- ai_pipeline_core/utils/remote_deployment.py +0 -269
- ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
- ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -41,11 +41,11 @@ class ModelOptions(BaseModel):
|
|
|
41
41
|
|
|
42
42
|
retries: Number of retry attempts on failure (default: 3).
|
|
43
43
|
|
|
44
|
-
retry_delay_seconds: Seconds to wait between retries (default:
|
|
44
|
+
retry_delay_seconds: Seconds to wait between retries (default: 20).
|
|
45
45
|
|
|
46
|
-
timeout: Maximum seconds to wait for response (default:
|
|
46
|
+
timeout: Maximum seconds to wait for response (default: 600).
|
|
47
47
|
|
|
48
|
-
cache_ttl: Cache TTL for context messages (default: "
|
|
48
|
+
cache_ttl: Cache TTL for context messages (default: "300s").
|
|
49
49
|
String format like "60s", "5m", or None to disable caching.
|
|
50
50
|
Applied to the last context message for efficient token reuse.
|
|
51
51
|
|
|
@@ -99,77 +99,11 @@ class ModelOptions(BaseModel):
|
|
|
99
99
|
Merged with usage_tracking if both are set.
|
|
100
100
|
Useful for beta features or provider-specific capabilities.
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
... )
|
|
108
|
-
>>>
|
|
109
|
-
>>> # With system prompt
|
|
110
|
-
>>> options = ModelOptions(
|
|
111
|
-
... system_prompt="You are a helpful coding assistant",
|
|
112
|
-
... temperature=0.3 # Lower for code generation
|
|
113
|
-
... )
|
|
114
|
-
>>>
|
|
115
|
-
>>> # With custom cache TTL
|
|
116
|
-
>>> options = ModelOptions(
|
|
117
|
-
... cache_ttl="300s", # Cache context for 5 minutes
|
|
118
|
-
... max_completion_tokens=1000
|
|
119
|
-
... )
|
|
120
|
-
>>>
|
|
121
|
-
>>> # Disable caching
|
|
122
|
-
>>> options = ModelOptions(
|
|
123
|
-
... cache_ttl=None, # No context caching
|
|
124
|
-
... temperature=0.5
|
|
125
|
-
... )
|
|
126
|
-
>>>
|
|
127
|
-
>>> # For search-enabled models
|
|
128
|
-
>>> options = ModelOptions(
|
|
129
|
-
... search_context_size="high", # Get more search results
|
|
130
|
-
... max_completion_tokens=2000
|
|
131
|
-
... )
|
|
132
|
-
>>>
|
|
133
|
-
>>> # For reasoning models
|
|
134
|
-
>>> options = ModelOptions(
|
|
135
|
-
... reasoning_effort="high", # Deep reasoning
|
|
136
|
-
... timeout=600 # More time for complex reasoning
|
|
137
|
-
... )
|
|
138
|
-
>>>
|
|
139
|
-
>>> # With stop sequences
|
|
140
|
-
>>> options = ModelOptions(
|
|
141
|
-
... stop=["STOP", "END", "\n\n"], # Stop on these sequences
|
|
142
|
-
... temperature=0.7
|
|
143
|
-
... )
|
|
144
|
-
>>>
|
|
145
|
-
>>> # With custom extra_body parameters
|
|
146
|
-
>>> options = ModelOptions(
|
|
147
|
-
... extra_body={"custom_param": "value", "beta_feature": True},
|
|
148
|
-
... usage_tracking=True # Still tracks usage alongside custom params
|
|
149
|
-
... )
|
|
150
|
-
>>>
|
|
151
|
-
>>> # With user tracking for cost monitoring
|
|
152
|
-
>>> options = ModelOptions(
|
|
153
|
-
... user="user_12345", # Track costs per user
|
|
154
|
-
... temperature=0.7
|
|
155
|
-
... )
|
|
156
|
-
>>>
|
|
157
|
-
>>> # With metadata for tracking and observability
|
|
158
|
-
>>> options = ModelOptions(
|
|
159
|
-
... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
|
|
160
|
-
... temperature=0.7
|
|
161
|
-
... )
|
|
162
|
-
|
|
163
|
-
Note:
|
|
164
|
-
- Not all options apply to all models
|
|
165
|
-
- search_context_size only works with search models
|
|
166
|
-
- reasoning_effort only works with models that support explicit reasoning
|
|
167
|
-
- response_format is set internally by generate_structured()
|
|
168
|
-
- cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
|
|
169
|
-
- stop sequences are limited to 4 by most providers
|
|
170
|
-
- user identifier helps track costs per end-user (max 256 chars)
|
|
171
|
-
- extra_body allows passing provider-specific parameters
|
|
172
|
-
- usage_tracking is enabled by default for cost monitoring
|
|
102
|
+
Not all options apply to all models. search_context_size only works with search models,
|
|
103
|
+
reasoning_effort only works with models that support explicit reasoning, and
|
|
104
|
+
response_format is set internally by generate_structured(). cache_ttl accepts formats
|
|
105
|
+
like "120s", "5m", "1h" or None (default: "300s"). Stop sequences are limited to 4 by
|
|
106
|
+
most providers.
|
|
173
107
|
"""
|
|
174
108
|
|
|
175
109
|
temperature: float | None = None
|
|
@@ -179,18 +113,19 @@ class ModelOptions(BaseModel):
|
|
|
179
113
|
retries: int = 3
|
|
180
114
|
retry_delay_seconds: int = 20
|
|
181
115
|
timeout: int = 600
|
|
182
|
-
cache_ttl: str | None = "
|
|
116
|
+
cache_ttl: str | None = "300s"
|
|
183
117
|
service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
|
|
184
118
|
max_completion_tokens: int | None = None
|
|
185
119
|
stop: str | list[str] | None = None
|
|
186
120
|
response_format: type[BaseModel] | None = None
|
|
187
121
|
verbosity: Literal["low", "medium", "high"] | None = None
|
|
122
|
+
stream: bool = False
|
|
188
123
|
usage_tracking: bool = True
|
|
189
124
|
user: str | None = None
|
|
190
125
|
metadata: dict[str, str] | None = None
|
|
191
126
|
extra_body: dict[str, Any] | None = None
|
|
192
127
|
|
|
193
|
-
def to_openai_completion_kwargs(self) -> dict[str, Any]:
|
|
128
|
+
def to_openai_completion_kwargs(self) -> dict[str, Any]: # noqa: C901
|
|
194
129
|
"""Convert options to OpenAI API completion parameters.
|
|
195
130
|
|
|
196
131
|
Transforms ModelOptions fields into the format expected by
|
|
@@ -221,16 +156,9 @@ class ModelOptions(BaseModel):
|
|
|
221
156
|
{"web_search_options": {"search_context_size": "low|medium|high"}}
|
|
222
157
|
Non-search models silently ignore this parameter.
|
|
223
158
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
>>> kwargs
|
|
228
|
-
{'timeout': 60, 'extra_body': {}, 'temperature': 0.5}
|
|
229
|
-
|
|
230
|
-
Note:
|
|
231
|
-
- system_prompt is handled separately in _process_messages()
|
|
232
|
-
- retries and retry_delay_seconds are used by retry logic
|
|
233
|
-
- extra_body always includes usage tracking for cost monitoring
|
|
159
|
+
system_prompt is handled separately in _process_messages().
|
|
160
|
+
retries and retry_delay_seconds are used by retry logic.
|
|
161
|
+
extra_body always includes usage tracking for cost monitoring.
|
|
234
162
|
"""
|
|
235
163
|
kwargs: dict[str, Any] = {
|
|
236
164
|
"timeout": self.timeout,
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""Model response structures for LLM interactions.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
|
|
6
4
|
with additional metadata, cost tracking, and structured output support.
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
7
|
import json
|
|
10
8
|
from copy import deepcopy
|
|
9
|
+
from dataclasses import dataclass
|
|
11
10
|
from typing import Any, Generic, TypeVar
|
|
12
11
|
|
|
13
12
|
from openai.types.chat import ChatCompletion
|
|
@@ -21,14 +20,20 @@ T = TypeVar(
|
|
|
21
20
|
"""Type parameter for structured response Pydantic models."""
|
|
22
21
|
|
|
23
22
|
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class Citation:
|
|
25
|
+
"""A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
|
|
26
|
+
|
|
27
|
+
title: str
|
|
28
|
+
url: str
|
|
29
|
+
|
|
30
|
+
|
|
24
31
|
class ModelResponse(ChatCompletion):
|
|
25
32
|
"""Response wrapper for LLM text generation.
|
|
26
33
|
|
|
27
|
-
@public
|
|
28
|
-
|
|
29
34
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
30
35
|
|
|
31
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
36
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
32
37
|
>>> messages.append(response) # Add assistant response to conversation
|
|
33
38
|
>>> print(response.content) # Access generated text
|
|
34
39
|
|
|
@@ -39,22 +44,9 @@ class ModelResponse(ChatCompletion):
|
|
|
39
44
|
Almost all use cases are covered by these two patterns. Advanced features
|
|
40
45
|
like token usage and cost tracking are available but rarely needed.
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
>>> messages = AIMessages(["Explain quantum computing"])
|
|
46
|
-
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
47
|
-
>>>
|
|
48
|
-
>>> # Primary usage: add to conversation
|
|
49
|
-
>>> messages.append(response)
|
|
50
|
-
>>>
|
|
51
|
-
>>> # Access generated text
|
|
52
|
-
>>> print(response.content)
|
|
53
|
-
|
|
54
|
-
Note:
|
|
55
|
-
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
56
|
-
Other properties (usage, model, id) should only be accessed
|
|
57
|
-
when absolutely necessary.
|
|
47
|
+
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
48
|
+
Other properties (usage, model, id) should only be accessed
|
|
49
|
+
when absolutely necessary.
|
|
58
50
|
"""
|
|
59
51
|
|
|
60
52
|
def __init__(
|
|
@@ -77,21 +69,21 @@ class ModelResponse(ChatCompletion):
|
|
|
77
69
|
Includes timing information and custom tags.
|
|
78
70
|
usage: Optional usage information from streaming response.
|
|
79
71
|
|
|
80
|
-
Example:
|
|
81
|
-
>>> # Usually created internally by generate()
|
|
82
|
-
>>> response = ModelResponse(
|
|
83
|
-
... chat_completion=completion,
|
|
84
|
-
... model_options={"temperature": 0.7, "model": "gpt-4"},
|
|
85
|
-
... metadata={"time_taken": 1.5, "first_token_time": 0.3}
|
|
86
|
-
... )
|
|
87
72
|
"""
|
|
88
73
|
data = chat_completion.model_dump()
|
|
89
74
|
|
|
90
75
|
# fixes issue where the role is "assistantassistant" instead of "assistant"
|
|
76
|
+
valid_finish_reasons = {"stop", "length", "tool_calls", "content_filter", "function_call"}
|
|
91
77
|
for i in range(len(data["choices"])):
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
78
|
+
data["choices"][i]["message"]["role"] = "assistant"
|
|
79
|
+
# Only update finish_reason if it's not already a valid value
|
|
80
|
+
current_finish_reason = data["choices"][i].get("finish_reason")
|
|
81
|
+
if current_finish_reason not in valid_finish_reasons:
|
|
82
|
+
data["choices"][i]["finish_reason"] = "stop"
|
|
83
|
+
# Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
|
|
84
|
+
# but OpenAI's ChatCompletion only accepts type="url_citation")
|
|
85
|
+
if annotations := data["choices"][i]["message"].get("annotations"):
|
|
86
|
+
data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
|
|
95
87
|
|
|
96
88
|
super().__init__(**data)
|
|
97
89
|
|
|
@@ -104,22 +96,12 @@ class ModelResponse(ChatCompletion):
|
|
|
104
96
|
def content(self) -> str:
|
|
105
97
|
"""Get the generated text content.
|
|
106
98
|
|
|
107
|
-
@public
|
|
108
|
-
|
|
109
99
|
Primary property for accessing the LLM's response text.
|
|
110
100
|
This is the main property you'll use with ModelResponse.
|
|
111
101
|
|
|
112
102
|
Returns:
|
|
113
103
|
Generated text from the model, or empty string if none.
|
|
114
104
|
|
|
115
|
-
Example:
|
|
116
|
-
>>> response = await generate("gpt-5", messages="Hello")
|
|
117
|
-
>>> text = response.content # The generated response
|
|
118
|
-
>>>
|
|
119
|
-
>>> # Common pattern: add to messages then use content
|
|
120
|
-
>>> messages.append(response)
|
|
121
|
-
>>> if "error" in response.content.lower():
|
|
122
|
-
... # Handle error case
|
|
123
105
|
"""
|
|
124
106
|
content = self.choices[0].message.content or ""
|
|
125
107
|
return content.split("</think>")[-1].strip()
|
|
@@ -128,8 +110,6 @@ class ModelResponse(ChatCompletion):
|
|
|
128
110
|
def reasoning_content(self) -> str:
|
|
129
111
|
"""Get the reasoning content.
|
|
130
112
|
|
|
131
|
-
@public
|
|
132
|
-
|
|
133
113
|
Returns:
|
|
134
114
|
The reasoning content from the model, or empty string if none.
|
|
135
115
|
"""
|
|
@@ -140,7 +120,19 @@ class ModelResponse(ChatCompletion):
|
|
|
140
120
|
return ""
|
|
141
121
|
return message.content.split("</think>")[0].strip()
|
|
142
122
|
|
|
143
|
-
|
|
123
|
+
@property
|
|
124
|
+
def citations(self) -> list[Citation]:
|
|
125
|
+
"""Get URL citations from search-enabled models.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
List of Citation objects with title and url. Empty list for non-search models.
|
|
129
|
+
"""
|
|
130
|
+
annotations = self.choices[0].message.annotations
|
|
131
|
+
if not annotations:
|
|
132
|
+
return []
|
|
133
|
+
return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
|
|
134
|
+
|
|
135
|
+
def get_laminar_metadata(self) -> dict[str, str | int | float]: # noqa: C901
|
|
144
136
|
"""Extract metadata for LMNR (Laminar) observability including cost tracking.
|
|
145
137
|
|
|
146
138
|
Collects comprehensive metadata about the generation for tracing,
|
|
@@ -175,56 +167,26 @@ class ModelResponse(ChatCompletion):
|
|
|
175
167
|
1. x-litellm-response-cost header (primary)
|
|
176
168
|
2. usage.cost attribute (fallback)
|
|
177
169
|
|
|
178
|
-
Cost is stored in three fields for
|
|
179
|
-
- gen_ai.usage.output_cost (
|
|
180
|
-
- gen_ai.usage.cost (
|
|
181
|
-
- gen_ai.cost (
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
... context=large_doc,
|
|
187
|
-
... messages="Summarize this"
|
|
188
|
-
... )
|
|
189
|
-
>>>
|
|
190
|
-
>>> # Get comprehensive metadata
|
|
191
|
-
>>> metadata = response.get_laminar_metadata()
|
|
192
|
-
>>>
|
|
193
|
-
>>> # Track generation cost
|
|
194
|
-
>>> cost = metadata.get('gen_ai.usage.output_cost', 0)
|
|
195
|
-
>>> if cost > 0:
|
|
196
|
-
... print(f"Generation cost: ${cost:.4f}")
|
|
197
|
-
>>>
|
|
198
|
-
>>> # Monitor token usage
|
|
199
|
-
>>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
|
|
200
|
-
>>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
|
|
201
|
-
>>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
|
|
202
|
-
>>>
|
|
203
|
-
>>> # Check cache effectiveness
|
|
204
|
-
>>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
|
|
205
|
-
>>> if cached > 0:
|
|
206
|
-
... total = metadata.get('gen_ai.usage.total_tokens', 1)
|
|
207
|
-
... savings = (cached / total) * 100
|
|
208
|
-
... print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
|
|
209
|
-
>>>
|
|
210
|
-
>>> # Calculate cost per token
|
|
211
|
-
>>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
|
|
212
|
-
... cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
|
|
213
|
-
... print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
|
|
214
|
-
|
|
215
|
-
Note:
|
|
216
|
-
- Cost availability depends on LiteLLM proxy configuration
|
|
217
|
-
- Not all providers return cost information
|
|
218
|
-
- Cached tokens reduce actual cost but may not be reflected
|
|
219
|
-
- Used internally by tracing but accessible for cost analysis
|
|
170
|
+
Cost is stored in three fields for observability tool consumption:
|
|
171
|
+
- gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
|
|
172
|
+
- gen_ai.usage.cost (aggregated cost)
|
|
173
|
+
- gen_ai.cost (short-form)
|
|
174
|
+
|
|
175
|
+
Cost availability depends on LiteLLM proxy configuration. Not all providers
|
|
176
|
+
return cost information. Cached tokens reduce actual cost but may not be reflected.
|
|
177
|
+
Used internally by tracing but accessible for cost analysis.
|
|
220
178
|
"""
|
|
221
179
|
metadata: dict[str, str | int | float] = deepcopy(self._metadata)
|
|
222
180
|
|
|
223
181
|
# Add base metadata
|
|
182
|
+
# NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
|
|
183
|
+
# to override the span display name in the tree view, hiding the actual span name
|
|
184
|
+
# (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
|
|
185
|
+
# in frontend/components/traces/trace-view/utils.ts prefers model over span name
|
|
186
|
+
# for LLM spans. Restore once Laminar shows both or prefers span name.
|
|
224
187
|
metadata.update({
|
|
225
188
|
"gen_ai.response.id": self.id,
|
|
226
|
-
"gen_ai.
|
|
227
|
-
"get_ai.system": "litellm",
|
|
189
|
+
"gen_ai.system": "litellm",
|
|
228
190
|
})
|
|
229
191
|
|
|
230
192
|
# Add usage metadata if available
|
|
@@ -242,21 +204,19 @@ class ModelResponse(ChatCompletion):
|
|
|
242
204
|
cost = float(self.usage.cost) # type: ignore[attr-defined]
|
|
243
205
|
|
|
244
206
|
# Add reasoning tokens if available
|
|
245
|
-
if completion_details := self.usage.completion_tokens_details:
|
|
246
|
-
|
|
247
|
-
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
207
|
+
if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
|
|
208
|
+
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
248
209
|
|
|
249
210
|
# Add cached tokens if available
|
|
250
|
-
if prompt_details := self.usage.prompt_tokens_details:
|
|
251
|
-
|
|
252
|
-
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
211
|
+
if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
|
|
212
|
+
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
253
213
|
|
|
254
214
|
# Add cost metadata if available
|
|
255
215
|
if cost and cost > 0:
|
|
256
216
|
metadata.update({
|
|
257
217
|
"gen_ai.usage.output_cost": cost,
|
|
258
218
|
"gen_ai.usage.cost": cost,
|
|
259
|
-
"
|
|
219
|
+
"gen_ai.cost": cost,
|
|
260
220
|
})
|
|
261
221
|
|
|
262
222
|
for key, value in self._model_options.items():
|
|
@@ -266,7 +226,7 @@ class ModelResponse(ChatCompletion):
|
|
|
266
226
|
|
|
267
227
|
other_fields = self.__dict__
|
|
268
228
|
for key, value in other_fields.items():
|
|
269
|
-
if key in
|
|
229
|
+
if key in {"_model_options", "_metadata", "choices"}:
|
|
270
230
|
continue
|
|
271
231
|
try:
|
|
272
232
|
metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
|
|
@@ -275,7 +235,7 @@ class ModelResponse(ChatCompletion):
|
|
|
275
235
|
|
|
276
236
|
message = self.choices[0].message
|
|
277
237
|
for key, value in message.__dict__.items():
|
|
278
|
-
if key in
|
|
238
|
+
if key in {"content"}:
|
|
279
239
|
continue
|
|
280
240
|
metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
|
|
281
241
|
|
|
@@ -294,16 +254,13 @@ class ModelResponse(ChatCompletion):
|
|
|
294
254
|
if not self.content:
|
|
295
255
|
raise ValueError("Empty response content")
|
|
296
256
|
|
|
297
|
-
if response_format := self._model_options.get("response_format"):
|
|
298
|
-
|
|
299
|
-
response_format.model_validate_json(self.content)
|
|
257
|
+
if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
|
|
258
|
+
response_format.model_validate_json(self.content)
|
|
300
259
|
|
|
301
260
|
|
|
302
|
-
class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
261
|
+
class StructuredModelResponse(ModelResponse, Generic[T]): # noqa: UP046
|
|
303
262
|
"""Response wrapper for structured/typed LLM output.
|
|
304
263
|
|
|
305
|
-
@public
|
|
306
|
-
|
|
307
264
|
Primary usage is accessing the .parsed property for the structured data.
|
|
308
265
|
"""
|
|
309
266
|
|
|
@@ -10,44 +10,41 @@ Model categories:
|
|
|
10
10
|
- Search models: Models with web search capabilities
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from typing import Literal
|
|
13
|
+
from typing import Literal
|
|
14
14
|
|
|
15
|
-
ModelName
|
|
15
|
+
type ModelName = (
|
|
16
16
|
Literal[
|
|
17
17
|
# Core models
|
|
18
|
-
"gemini-
|
|
19
|
-
"gpt-5",
|
|
20
|
-
"grok-4",
|
|
18
|
+
"gemini-3-pro",
|
|
19
|
+
"gpt-5.1",
|
|
21
20
|
# Small models
|
|
22
|
-
"gemini-
|
|
23
|
-
"gpt-5-
|
|
24
|
-
"grok-4-fast",
|
|
21
|
+
"gemini-3-flash",
|
|
22
|
+
"gpt-5-mini",
|
|
23
|
+
"grok-4.1-fast",
|
|
25
24
|
# Search models
|
|
26
|
-
"gemini-
|
|
25
|
+
"gemini-3-flash-search",
|
|
26
|
+
"gpt-5-mini-search",
|
|
27
|
+
"grok-4.1-fast-search",
|
|
27
28
|
"sonar-pro-search",
|
|
28
|
-
"gpt-4o-search",
|
|
29
|
-
"grok-4-fast-search",
|
|
30
29
|
]
|
|
31
30
|
| str
|
|
32
31
|
)
|
|
33
32
|
"""Type-safe model name identifiers with support for custom models.
|
|
34
33
|
|
|
35
|
-
@public
|
|
36
|
-
|
|
37
34
|
Provides IDE autocompletion for common model names while allowing any
|
|
38
35
|
string for custom models. The type is a union of predefined literals
|
|
39
36
|
and str, giving you the best of both worlds: suggestions for known
|
|
40
37
|
models and flexibility for custom ones.
|
|
41
38
|
|
|
42
|
-
|
|
39
|
+
These are example common model names as of Q1 2026. Actual availability
|
|
43
40
|
depends on your LiteLLM proxy configuration and provider access.
|
|
44
41
|
|
|
45
42
|
Model categories:
|
|
46
|
-
Core models (gemini-
|
|
43
|
+
Core models (gemini-3-pro, gpt-5.1):
|
|
47
44
|
High-capability models for complex tasks requiring deep reasoning,
|
|
48
45
|
nuanced understanding, or creative generation.
|
|
49
46
|
|
|
50
|
-
Small models (gemini-
|
|
47
|
+
Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
|
|
51
48
|
Efficient models optimized for speed and cost, suitable for
|
|
52
49
|
simpler tasks or high-volume processing.
|
|
53
50
|
|
|
@@ -61,22 +58,7 @@ Using custom models:
|
|
|
61
58
|
- Custom models work seamlessly as strings
|
|
62
59
|
- No need for Union types or additional type aliases
|
|
63
60
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
>>> # Predefined model with IDE autocomplete
|
|
68
|
-
>>> model: ModelName = "gpt-5" # IDE suggests common models
|
|
69
|
-
>>> response = await llm.generate(model, messages="Hello")
|
|
70
|
-
>>>
|
|
71
|
-
>>> # Custom model works directly
|
|
72
|
-
>>> model: ModelName = "custom-model-v2" # Any string is valid
|
|
73
|
-
>>> response = await llm.generate(model, messages="Hello")
|
|
74
|
-
>>>
|
|
75
|
-
>>> # Both types work seamlessly
|
|
76
|
-
>>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
|
|
77
|
-
|
|
78
|
-
Note:
|
|
79
|
-
The ModelName type includes both predefined literals and str,
|
|
80
|
-
allowing full flexibility while maintaining IDE support for
|
|
81
|
-
common models.
|
|
61
|
+
The ModelName type includes both predefined literals and str,
|
|
62
|
+
allowing full flexibility while maintaining IDE support for
|
|
63
|
+
common models.
|
|
82
64
|
"""
|
|
@@ -2,11 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
Provides a Prefect-integrated logging facade for unified logging across pipelines.
|
|
4
4
|
Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
|
|
5
|
-
|
|
6
|
-
Example:
|
|
7
|
-
>>> from ai_pipeline_core import get_pipeline_logger
|
|
8
|
-
>>> logger = get_pipeline_logger(__name__)
|
|
9
|
-
>>> logger.info("Processing started")
|
|
10
5
|
"""
|
|
11
6
|
|
|
12
7
|
from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
|
|
@@ -14,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
|
|
|
14
9
|
|
|
15
10
|
__all__ = [
|
|
16
11
|
"LoggerMixin",
|
|
17
|
-
"StructuredLoggerMixin",
|
|
18
12
|
"LoggingConfig",
|
|
19
|
-
"
|
|
13
|
+
"StructuredLoggerMixin",
|
|
20
14
|
"get_pipeline_logger",
|
|
15
|
+
"setup_logging",
|
|
21
16
|
]
|