ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +83 -119
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +14 -15
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +349 -1062
- ai_pipeline_core/documents/mime_type.py +40 -85
- ai_pipeline_core/documents/utils.py +62 -7
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +5 -3
- ai_pipeline_core/llm/ai_messages.py +284 -73
- ai_pipeline_core/llm/client.py +462 -209
- ai_pipeline_core/llm/model_options.py +86 -53
- ai_pipeline_core/llm/model_response.py +187 -241
- ai_pipeline_core/llm/model_types.py +34 -54
- ai_pipeline_core/logging/__init__.py +2 -9
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -43
- ai_pipeline_core/logging/logging_mixin.py +17 -51
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/observability/tracing.py +640 -0
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +26 -105
- ai_pipeline_core/settings.py +41 -32
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -240
- ai_pipeline_core/documents/flow_document.py +0 -128
- ai_pipeline_core/documents/task_document.py +0 -133
- ai_pipeline_core/documents/temporary_document.py +0 -95
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -314
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -717
- ai_pipeline_core/prefect.py +0 -54
- ai_pipeline_core/simple_runner/__init__.py +0 -24
- ai_pipeline_core/simple_runner/cli.py +0 -255
- ai_pipeline_core/simple_runner/simple_runner.py +0 -385
- ai_pipeline_core/tracing.py +0 -475
- ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
- ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,30 +1,40 @@
|
|
|
1
1
|
"""Model response structures for LLM interactions.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
Provides enhanced response classes that wrap OpenAI API responses
|
|
3
|
+
Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
|
|
6
4
|
with additional metadata, cost tracking, and structured output support.
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
|
-
import
|
|
7
|
+
import json
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from dataclasses import dataclass
|
|
10
10
|
from typing import Any, Generic, TypeVar
|
|
11
11
|
|
|
12
|
-
from openai.types.chat import ChatCompletion
|
|
13
|
-
from
|
|
12
|
+
from openai.types.chat import ChatCompletion
|
|
13
|
+
from openai.types.completion_usage import CompletionUsage
|
|
14
|
+
from pydantic import BaseModel
|
|
14
15
|
|
|
15
|
-
T = TypeVar(
|
|
16
|
+
T = TypeVar(
|
|
17
|
+
"T",
|
|
18
|
+
bound=BaseModel,
|
|
19
|
+
)
|
|
16
20
|
"""Type parameter for structured response Pydantic models."""
|
|
17
21
|
|
|
18
22
|
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class Citation:
|
|
25
|
+
"""A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
|
|
26
|
+
|
|
27
|
+
title: str
|
|
28
|
+
url: str
|
|
29
|
+
|
|
30
|
+
|
|
19
31
|
class ModelResponse(ChatCompletion):
|
|
20
32
|
"""Response wrapper for LLM text generation.
|
|
21
33
|
|
|
22
|
-
@public
|
|
23
|
-
|
|
24
34
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
25
35
|
|
|
26
|
-
>>> response = await llm.generate(messages=messages)
|
|
27
|
-
>>> messages.
|
|
36
|
+
>>> response = await llm.generate("gpt-5.1", messages=messages)
|
|
37
|
+
>>> messages.append(response) # Add assistant response to conversation
|
|
28
38
|
>>> print(response.content) # Access generated text
|
|
29
39
|
|
|
30
40
|
The two main interactions with ModelResponse:
|
|
@@ -34,168 +44,153 @@ class ModelResponse(ChatCompletion):
|
|
|
34
44
|
Almost all use cases are covered by these two patterns. Advanced features
|
|
35
45
|
like token usage and cost tracking are available but rarely needed.
|
|
36
46
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
>>> messages = AIMessages("Explain quantum computing")
|
|
41
|
-
>>> response = await generate(messages=messages)
|
|
42
|
-
>>>
|
|
43
|
-
>>> # Primary usage: add to conversation
|
|
44
|
-
>>> messages.add(response)
|
|
45
|
-
>>>
|
|
46
|
-
>>> # Access generated text
|
|
47
|
-
>>> print(response.content)
|
|
48
|
-
|
|
49
|
-
Note:
|
|
50
|
-
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
51
|
-
Other properties (usage, model, id) should only be accessed
|
|
52
|
-
when absolutely necessary.
|
|
47
|
+
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
48
|
+
Other properties (usage, model, id) should only be accessed
|
|
49
|
+
when absolutely necessary.
|
|
53
50
|
"""
|
|
54
51
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
chat_completion: ChatCompletion,
|
|
55
|
+
model_options: dict[str, Any],
|
|
56
|
+
metadata: dict[str, Any],
|
|
57
|
+
usage: CompletionUsage | None = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Initialize ModelResponse from ChatCompletion.
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
headers dict if not provided.
|
|
61
|
+
Wraps an OpenAI ChatCompletion object with additional metadata
|
|
62
|
+
and model options for tracking and observability.
|
|
64
63
|
|
|
65
64
|
Args:
|
|
66
|
-
chat_completion:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
>>>
|
|
74
|
-
>>> # Direct initialization (mainly for testing)
|
|
75
|
-
>>> response = ModelResponse(
|
|
76
|
-
... id="test",
|
|
77
|
-
... model="gpt-5",
|
|
78
|
-
... choices=[...]
|
|
79
|
-
... )
|
|
65
|
+
chat_completion: ChatCompletion object from the API.
|
|
66
|
+
model_options: Model configuration options used for the request.
|
|
67
|
+
Stored for metadata extraction and tracing.
|
|
68
|
+
metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
|
|
69
|
+
Includes timing information and custom tags.
|
|
70
|
+
usage: Optional usage information from streaming response.
|
|
71
|
+
|
|
80
72
|
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
73
|
+
data = chat_completion.model_dump()
|
|
74
|
+
|
|
75
|
+
# fixes issue where the role is "assistantassistant" instead of "assistant"
|
|
76
|
+
valid_finish_reasons = {"stop", "length", "tool_calls", "content_filter", "function_call"}
|
|
77
|
+
for i in range(len(data["choices"])):
|
|
78
|
+
data["choices"][i]["message"]["role"] = "assistant"
|
|
79
|
+
# Only update finish_reason if it's not already a valid value
|
|
80
|
+
current_finish_reason = data["choices"][i].get("finish_reason")
|
|
81
|
+
if current_finish_reason not in valid_finish_reasons:
|
|
82
|
+
data["choices"][i]["finish_reason"] = "stop"
|
|
83
|
+
# Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
|
|
84
|
+
# but OpenAI's ChatCompletion only accepts type="url_citation")
|
|
85
|
+
if annotations := data["choices"][i]["message"].get("annotations"):
|
|
86
|
+
data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
|
|
87
|
+
|
|
88
|
+
super().__init__(**data)
|
|
89
|
+
|
|
90
|
+
self._model_options = model_options
|
|
91
|
+
self._metadata = metadata
|
|
92
|
+
if usage:
|
|
93
|
+
self.usage = usage
|
|
91
94
|
|
|
92
95
|
@property
|
|
93
96
|
def content(self) -> str:
|
|
94
97
|
"""Get the generated text content.
|
|
95
98
|
|
|
96
|
-
@public
|
|
97
|
-
|
|
98
99
|
Primary property for accessing the LLM's response text.
|
|
99
|
-
This
|
|
100
|
+
This is the main property you'll use with ModelResponse.
|
|
100
101
|
|
|
101
102
|
Returns:
|
|
102
103
|
Generated text from the model, or empty string if none.
|
|
103
104
|
|
|
104
|
-
Example:
|
|
105
|
-
>>> response = await generate(messages="Hello")
|
|
106
|
-
>>> text = response.content # The generated response
|
|
107
|
-
>>>
|
|
108
|
-
>>> # Common pattern: add to messages then use content
|
|
109
|
-
>>> messages.add(response)
|
|
110
|
-
>>> if "error" in response.content.lower():
|
|
111
|
-
... # Handle error case
|
|
112
105
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def set_model_options(self, options: dict[str, Any]) -> None:
|
|
116
|
-
"""Store the model configuration used for generation.
|
|
117
|
-
|
|
118
|
-
Saves a deep copy of the options used for this generation,
|
|
119
|
-
excluding the messages for brevity.
|
|
106
|
+
content = self.choices[0].message.content or ""
|
|
107
|
+
return content.split("</think>")[-1].strip()
|
|
120
108
|
|
|
121
|
-
|
|
122
|
-
|
|
109
|
+
@property
|
|
110
|
+
def reasoning_content(self) -> str:
|
|
111
|
+
"""Get the reasoning content.
|
|
123
112
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
Called internally by the generation functions.
|
|
113
|
+
Returns:
|
|
114
|
+
The reasoning content from the model, or empty string if none.
|
|
127
115
|
"""
|
|
128
|
-
|
|
129
|
-
if "
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
""
|
|
116
|
+
message = self.choices[0].message
|
|
117
|
+
if reasoning_content := getattr(message, "reasoning_content", None):
|
|
118
|
+
return reasoning_content
|
|
119
|
+
if not message.content or "</think>" not in message.content:
|
|
120
|
+
return ""
|
|
121
|
+
return message.content.split("</think>")[0].strip()
|
|
134
122
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
headers: Dictionary of HTTP headers from the response.
|
|
123
|
+
@property
|
|
124
|
+
def citations(self) -> list[Citation]:
|
|
125
|
+
"""Get URL citations from search-enabled models.
|
|
140
126
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
- x-litellm-call-id: Unique call identifier
|
|
144
|
-
- x-litellm-model-id: Actual model used
|
|
127
|
+
Returns:
|
|
128
|
+
List of Citation objects with title and url. Empty list for non-search models.
|
|
145
129
|
"""
|
|
146
|
-
|
|
130
|
+
annotations = self.choices[0].message.annotations
|
|
131
|
+
if not annotations:
|
|
132
|
+
return []
|
|
133
|
+
return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
|
|
147
134
|
|
|
148
|
-
def get_laminar_metadata(self) -> dict[str, str | int | float]:
|
|
149
|
-
"""Extract metadata for LMNR (Laminar) observability.
|
|
135
|
+
def get_laminar_metadata(self) -> dict[str, str | int | float]: # noqa: C901
|
|
136
|
+
"""Extract metadata for LMNR (Laminar) observability including cost tracking.
|
|
150
137
|
|
|
151
|
-
Collects comprehensive metadata about the generation for
|
|
152
|
-
|
|
138
|
+
Collects comprehensive metadata about the generation for tracing,
|
|
139
|
+
monitoring, and cost analysis in the LMNR platform. This method
|
|
140
|
+
provides detailed insights into token usage, caching effectiveness,
|
|
141
|
+
and generation costs.
|
|
153
142
|
|
|
154
143
|
Returns:
|
|
155
144
|
Dictionary containing:
|
|
156
|
-
- LiteLLM headers (call ID, costs, etc.)
|
|
157
|
-
- Token usage statistics
|
|
158
|
-
- Model configuration
|
|
159
|
-
- Cost information
|
|
160
|
-
- Cached token counts
|
|
145
|
+
- LiteLLM headers (call ID, costs, model info, etc.)
|
|
146
|
+
- Token usage statistics (input, output, total, cached)
|
|
147
|
+
- Model configuration used for generation
|
|
148
|
+
- Cost information in multiple formats
|
|
149
|
+
- Cached token counts (when context caching enabled)
|
|
161
150
|
- Reasoning token counts (for O1 models)
|
|
162
151
|
|
|
163
152
|
Metadata structure:
|
|
164
153
|
- litellm.*: All LiteLLM-specific headers
|
|
165
|
-
- gen_ai.usage
|
|
154
|
+
- gen_ai.usage.prompt_tokens: Input token count
|
|
155
|
+
- gen_ai.usage.completion_tokens: Output token count
|
|
156
|
+
- gen_ai.usage.total_tokens: Total tokens used
|
|
157
|
+
- gen_ai.usage.cached_tokens: Cached tokens (if applicable)
|
|
158
|
+
- gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
|
|
159
|
+
- gen_ai.usage.output_cost: Generation cost in dollars
|
|
160
|
+
- gen_ai.usage.cost: Alternative cost field (same value)
|
|
161
|
+
- gen_ai.cost: Simple cost field (same value)
|
|
166
162
|
- gen_ai.response.*: Response identifiers
|
|
167
|
-
- gen_ai.cost: Cost information
|
|
168
163
|
- model_options.*: Configuration used
|
|
169
164
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
>>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
|
|
165
|
+
Cost tracking:
|
|
166
|
+
Cost information is extracted from two sources:
|
|
167
|
+
1. x-litellm-response-cost header (primary)
|
|
168
|
+
2. usage.cost attribute (fallback)
|
|
175
169
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
metadata: dict[str, str | int | float] = {}
|
|
170
|
+
Cost is stored in three fields for observability tool consumption:
|
|
171
|
+
- gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
|
|
172
|
+
- gen_ai.usage.cost (aggregated cost)
|
|
173
|
+
- gen_ai.cost (short-form)
|
|
181
174
|
|
|
182
|
-
|
|
183
|
-
cost
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
if header.startswith("x-litellm-"):
|
|
188
|
-
header_name = header.replace("x-litellm-", "").lower()
|
|
189
|
-
metadata[f"litellm.{header_name}"] = value
|
|
175
|
+
Cost availability depends on LiteLLM proxy configuration. Not all providers
|
|
176
|
+
return cost information. Cached tokens reduce actual cost but may not be reflected.
|
|
177
|
+
Used internally by tracing but accessible for cost analysis.
|
|
178
|
+
"""
|
|
179
|
+
metadata: dict[str, str | int | float] = deepcopy(self._metadata)
|
|
190
180
|
|
|
191
181
|
# Add base metadata
|
|
182
|
+
# NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
|
|
183
|
+
# to override the span display name in the tree view, hiding the actual span name
|
|
184
|
+
# (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
|
|
185
|
+
# in frontend/components/traces/trace-view/utils.ts prefers model over span name
|
|
186
|
+
# for LLM spans. Restore once Laminar shows both or prefers span name.
|
|
192
187
|
metadata.update({
|
|
193
|
-
"gen_ai.response.id":
|
|
194
|
-
"gen_ai.
|
|
195
|
-
"get_ai.system": "litellm",
|
|
188
|
+
"gen_ai.response.id": self.id,
|
|
189
|
+
"gen_ai.system": "litellm",
|
|
196
190
|
})
|
|
197
191
|
|
|
198
192
|
# Add usage metadata if available
|
|
193
|
+
cost = None
|
|
199
194
|
if self.usage:
|
|
200
195
|
metadata.update({
|
|
201
196
|
"gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
|
|
@@ -209,145 +204,96 @@ class ModelResponse(ChatCompletion):
|
|
|
209
204
|
cost = float(self.usage.cost) # type: ignore[attr-defined]
|
|
210
205
|
|
|
211
206
|
# Add reasoning tokens if available
|
|
212
|
-
if completion_details := self.usage.completion_tokens_details:
|
|
213
|
-
|
|
214
|
-
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
207
|
+
if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
|
|
208
|
+
metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
|
|
215
209
|
|
|
216
210
|
# Add cached tokens if available
|
|
217
|
-
if prompt_details := self.usage.prompt_tokens_details:
|
|
218
|
-
|
|
219
|
-
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
211
|
+
if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
|
|
212
|
+
metadata["gen_ai.usage.cached_tokens"] = cached_tokens
|
|
220
213
|
|
|
221
214
|
# Add cost metadata if available
|
|
222
215
|
if cost and cost > 0:
|
|
223
216
|
metadata.update({
|
|
224
217
|
"gen_ai.usage.output_cost": cost,
|
|
225
218
|
"gen_ai.usage.cost": cost,
|
|
226
|
-
"
|
|
219
|
+
"gen_ai.cost": cost,
|
|
227
220
|
})
|
|
228
221
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
222
|
+
for key, value in self._model_options.items():
|
|
223
|
+
if "messages" in key:
|
|
224
|
+
continue
|
|
225
|
+
metadata[f"model_options.{key}"] = str(value)
|
|
226
|
+
|
|
227
|
+
other_fields = self.__dict__
|
|
228
|
+
for key, value in other_fields.items():
|
|
229
|
+
if key in {"_model_options", "_metadata", "choices"}:
|
|
230
|
+
continue
|
|
231
|
+
try:
|
|
232
|
+
metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
|
|
233
|
+
except Exception:
|
|
234
|
+
metadata[f"response.raw.{key}"] = str(value)
|
|
235
|
+
|
|
236
|
+
message = self.choices[0].message
|
|
237
|
+
for key, value in message.__dict__.items():
|
|
238
|
+
if key in {"content"}:
|
|
239
|
+
continue
|
|
240
|
+
metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
|
|
232
241
|
|
|
233
242
|
return metadata
|
|
234
243
|
|
|
244
|
+
def validate_output(self) -> None:
|
|
245
|
+
"""Validate response output content and format.
|
|
246
|
+
|
|
247
|
+
Checks that response has non-empty content and validates against
|
|
248
|
+
response_format if structured output was requested.
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
ValueError: If response content is empty.
|
|
252
|
+
ValidationError: If content doesn't match response_format schema.
|
|
253
|
+
"""
|
|
254
|
+
if not self.content:
|
|
255
|
+
raise ValueError("Empty response content")
|
|
256
|
+
|
|
257
|
+
if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
|
|
258
|
+
response_format.model_validate_json(self.content)
|
|
235
259
|
|
|
236
|
-
|
|
260
|
+
|
|
261
|
+
class StructuredModelResponse(ModelResponse, Generic[T]): # noqa: UP046
|
|
237
262
|
"""Response wrapper for structured/typed LLM output.
|
|
238
263
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
Primary usage is adding to AIMessages and accessing .parsed property:
|
|
242
|
-
|
|
243
|
-
>>> class Analysis(BaseModel):
|
|
244
|
-
... sentiment: float
|
|
245
|
-
... summary: str
|
|
246
|
-
>>>
|
|
247
|
-
>>> response = await generate_structured(
|
|
248
|
-
... response_format=Analysis,
|
|
249
|
-
... messages="Analyze this text..."
|
|
250
|
-
... )
|
|
251
|
-
>>>
|
|
252
|
-
>>> # Primary usage: access parsed model
|
|
253
|
-
>>> analysis = response.parsed
|
|
254
|
-
>>> print(f"Sentiment: {analysis.sentiment}")
|
|
255
|
-
>>>
|
|
256
|
-
>>> # Can add to messages for conversation
|
|
257
|
-
>>> messages.add(response)
|
|
258
|
-
|
|
259
|
-
The two main interactions:
|
|
260
|
-
1. Accessing .parsed property for the structured data
|
|
261
|
-
2. Adding to AIMessages for conversation continuity
|
|
262
|
-
|
|
263
|
-
These patterns cover virtually all use cases. Advanced features exist
|
|
264
|
-
but should only be used when absolutely necessary.
|
|
265
|
-
|
|
266
|
-
Type Parameter:
|
|
267
|
-
T: The Pydantic model type for the structured output.
|
|
268
|
-
|
|
269
|
-
Note:
|
|
270
|
-
Extends ModelResponse with type-safe parsed data access.
|
|
271
|
-
Other inherited properties should rarely be needed.
|
|
264
|
+
Primary usage is accessing the .parsed property for the structured data.
|
|
272
265
|
"""
|
|
273
266
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
parsed_value: T | None = None,
|
|
278
|
-
**kwargs: Any,
|
|
279
|
-
) -> None:
|
|
280
|
-
"""Initialize with ChatCompletion and parsed value.
|
|
267
|
+
@classmethod
|
|
268
|
+
def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
|
|
269
|
+
"""Convert a ModelResponse to StructuredModelResponse.
|
|
281
270
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
from ParsedChatCompletion automatically.
|
|
271
|
+
Takes an existing ModelResponse and converts it to a StructuredModelResponse
|
|
272
|
+
for accessing parsed structured output. Used internally by generate_structured().
|
|
285
273
|
|
|
286
274
|
Args:
|
|
287
|
-
|
|
288
|
-
parsed_value: Pre-parsed Pydantic model instance.
|
|
289
|
-
If None, attempts extraction from
|
|
290
|
-
ParsedChatCompletion.
|
|
291
|
-
**kwargs: Additional ChatCompletion parameters.
|
|
292
|
-
|
|
293
|
-
Extraction behavior:
|
|
294
|
-
1. Use provided parsed_value if given
|
|
295
|
-
2. Extract from ParsedChatCompletion if available
|
|
296
|
-
3. Store as None (access will raise ValueError)
|
|
297
|
-
|
|
298
|
-
Note:
|
|
299
|
-
Usually created internally by generate_structured().
|
|
300
|
-
The parsed value is validated by Pydantic automatically.
|
|
301
|
-
"""
|
|
302
|
-
super().__init__(chat_completion, **kwargs)
|
|
303
|
-
self._parsed_value: T | None = parsed_value
|
|
275
|
+
model_response: The ModelResponse to convert.
|
|
304
276
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
self._parsed_value = message.parsed # type: ignore[attr-defined]
|
|
277
|
+
Returns:
|
|
278
|
+
StructuredModelResponse with lazy parsing support.
|
|
279
|
+
"""
|
|
280
|
+
model_response.__class__ = cls
|
|
281
|
+
return model_response # type: ignore[return-value]
|
|
311
282
|
|
|
312
283
|
@property
|
|
313
284
|
def parsed(self) -> T:
|
|
314
|
-
"""Get the parsed
|
|
285
|
+
"""Get the parsed structured output.
|
|
315
286
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
Primary property for accessing structured output.
|
|
319
|
-
This is the main reason to use generate_structured().
|
|
287
|
+
Lazily parses the JSON content into the specified Pydantic model.
|
|
288
|
+
Result is cached after first access.
|
|
320
289
|
|
|
321
290
|
Returns:
|
|
322
|
-
|
|
291
|
+
Parsed Pydantic model instance.
|
|
323
292
|
|
|
324
293
|
Raises:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
Example:
|
|
328
|
-
>>> class UserInfo(BaseModel):
|
|
329
|
-
... name: str
|
|
330
|
-
... age: int
|
|
331
|
-
>>>
|
|
332
|
-
>>> response = await generate_structured(
|
|
333
|
-
... response_format=UserInfo,
|
|
334
|
-
... messages="Extract user info..."
|
|
335
|
-
... )
|
|
336
|
-
>>>
|
|
337
|
-
>>> # Primary usage: get the parsed model
|
|
338
|
-
>>> user = response.parsed
|
|
339
|
-
>>> print(f"{user.name} is {user.age} years old")
|
|
340
|
-
>>>
|
|
341
|
-
>>> # Can also add to messages
|
|
342
|
-
>>> messages.add(response)
|
|
343
|
-
|
|
344
|
-
Note:
|
|
345
|
-
Type-safe with full IDE support. This property covers
|
|
346
|
-
99% of structured response use cases.
|
|
294
|
+
ValidationError: If content doesn't match the response_format schema.
|
|
347
295
|
"""
|
|
348
|
-
if self
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
"No parsed content available. This should not happen for StructuredModelResponse."
|
|
353
|
-
)
|
|
296
|
+
if not hasattr(self, "_parsed_value"):
|
|
297
|
+
response_format = self._model_options.get("response_format")
|
|
298
|
+
self._parsed_value: T = response_format.model_validate_json(self.content) # type: ignore[return-value]
|
|
299
|
+
return self._parsed_value
|
|
@@ -10,40 +10,41 @@ Model categories:
|
|
|
10
10
|
- Search models: Models with web search capabilities
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from typing import Literal
|
|
13
|
+
from typing import Literal
|
|
14
14
|
|
|
15
|
-
ModelName
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
]
|
|
30
|
-
|
|
15
|
+
type ModelName = (
|
|
16
|
+
Literal[
|
|
17
|
+
# Core models
|
|
18
|
+
"gemini-3-pro",
|
|
19
|
+
"gpt-5.1",
|
|
20
|
+
# Small models
|
|
21
|
+
"gemini-3-flash",
|
|
22
|
+
"gpt-5-mini",
|
|
23
|
+
"grok-4.1-fast",
|
|
24
|
+
# Search models
|
|
25
|
+
"gemini-3-flash-search",
|
|
26
|
+
"gpt-5-mini-search",
|
|
27
|
+
"grok-4.1-fast-search",
|
|
28
|
+
"sonar-pro-search",
|
|
29
|
+
]
|
|
30
|
+
| str
|
|
31
|
+
)
|
|
32
|
+
"""Type-safe model name identifiers with support for custom models.
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
Provides IDE autocompletion for common model names while allowing any
|
|
35
|
+
string for custom models. The type is a union of predefined literals
|
|
36
|
+
and str, giving you the best of both worlds: suggestions for known
|
|
37
|
+
models and flexibility for custom ones.
|
|
33
38
|
|
|
34
|
-
|
|
35
|
-
language model names. Used throughout the library to prevent typos
|
|
36
|
-
and ensure only valid models are referenced.
|
|
37
|
-
|
|
38
|
-
Note: These are example common model names as of Q3 2025. Actual availability
|
|
39
|
+
These are example common model names as of Q1 2026. Actual availability
|
|
39
40
|
depends on your LiteLLM proxy configuration and provider access.
|
|
40
41
|
|
|
41
42
|
Model categories:
|
|
42
|
-
Core models (gemini-
|
|
43
|
+
Core models (gemini-3-pro, gpt-5.1):
|
|
43
44
|
High-capability models for complex tasks requiring deep reasoning,
|
|
44
45
|
nuanced understanding, or creative generation.
|
|
45
46
|
|
|
46
|
-
Small models (gemini-
|
|
47
|
+
Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
|
|
47
48
|
Efficient models optimized for speed and cost, suitable for
|
|
48
49
|
simpler tasks or high-volume processing.
|
|
49
50
|
|
|
@@ -51,34 +52,13 @@ Model categories:
|
|
|
51
52
|
Models with integrated web search capabilities for retrieving
|
|
52
53
|
and synthesizing current information.
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
3. Or simply use strings: model = "any-model-via-litellm"
|
|
60
|
-
|
|
61
|
-
Example:
|
|
62
|
-
>>> from ai_pipeline_core import llm, ModelName
|
|
63
|
-
>>>
|
|
64
|
-
>>> # Type-safe model selection
|
|
65
|
-
>>> model: ModelName = "gpt-5" # IDE autocomplete works
|
|
66
|
-
>>> response = await llm.generate(model, messages="Hello")
|
|
67
|
-
>>>
|
|
68
|
-
>>> # Also accepts string for custom models
|
|
69
|
-
>>> response = await llm.generate("custom-model-v2", messages="Hello")
|
|
70
|
-
>>>
|
|
71
|
-
>>> # Custom type safety
|
|
72
|
-
>>> from typing import Literal
|
|
73
|
-
>>> MyModel = Literal["company-llm-v1"]
|
|
74
|
-
>>> model: ModelName | MyModel = "company-llm-v1"
|
|
75
|
-
|
|
76
|
-
Note:
|
|
77
|
-
While the type alias provides suggestions for common models,
|
|
78
|
-
the generate functions also accept string literals to support
|
|
79
|
-
custom or newer models accessed via LiteLLM proxy.
|
|
55
|
+
Using custom models:
|
|
56
|
+
ModelName now includes str, so you can use any model name directly:
|
|
57
|
+
- Predefined models get IDE autocomplete and validation
|
|
58
|
+
- Custom models work seamlessly as strings
|
|
59
|
+
- No need for Union types or additional type aliases
|
|
80
60
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
61
|
+
The ModelName type includes both predefined literals and str,
|
|
62
|
+
allowing full flexibility while maintaining IDE support for
|
|
63
|
+
common models.
|
|
84
64
|
"""
|