ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,30 +1,40 @@
1
1
  """Model response structures for LLM interactions.
2
2
 
3
- @public
4
-
5
- Provides enhanced response classes that wrap OpenAI API responses
3
+ Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
6
4
  with additional metadata, cost tracking, and structured output support.
7
5
  """
8
6
 
9
- import copy
7
+ import json
8
+ from copy import deepcopy
9
+ from dataclasses import dataclass
10
10
  from typing import Any, Generic, TypeVar
11
11
 
12
- from openai.types.chat import ChatCompletion, ParsedChatCompletion
13
- from pydantic import BaseModel, Field
12
+ from openai.types.chat import ChatCompletion
13
+ from openai.types.completion_usage import CompletionUsage
14
+ from pydantic import BaseModel
14
15
 
15
- T = TypeVar("T", bound=BaseModel)
16
+ T = TypeVar(
17
+ "T",
18
+ bound=BaseModel,
19
+ )
16
20
  """Type parameter for structured response Pydantic models."""
17
21
 
18
22
 
23
+ @dataclass(frozen=True)
24
+ class Citation:
25
+ """A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
26
+
27
+ title: str
28
+ url: str
29
+
30
+
19
31
  class ModelResponse(ChatCompletion):
20
32
  """Response wrapper for LLM text generation.
21
33
 
22
- @public
23
-
24
34
  Primary usage is adding to AIMessages for multi-turn conversations:
25
35
 
26
- >>> response = await llm.generate(messages=messages)
27
- >>> messages.add(response) # Add assistant response to conversation
36
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
37
+ >>> messages.append(response) # Add assistant response to conversation
28
38
  >>> print(response.content) # Access generated text
29
39
 
30
40
  The two main interactions with ModelResponse:
@@ -34,168 +44,153 @@ class ModelResponse(ChatCompletion):
34
44
  Almost all use cases are covered by these two patterns. Advanced features
35
45
  like token usage and cost tracking are available but rarely needed.
36
46
 
37
- Example:
38
- >>> from ai_pipeline_core.llm import AIMessages, generate
39
- >>>
40
- >>> messages = AIMessages("Explain quantum computing")
41
- >>> response = await generate(messages=messages)
42
- >>>
43
- >>> # Primary usage: add to conversation
44
- >>> messages.add(response)
45
- >>>
46
- >>> # Access generated text
47
- >>> print(response.content)
48
-
49
- Note:
50
- Inherits from OpenAI's ChatCompletion for compatibility.
51
- Other properties (usage, model, id) should only be accessed
52
- when absolutely necessary.
47
+ Inherits from OpenAI's ChatCompletion for compatibility.
48
+ Other properties (usage, model, id) should only be accessed
49
+ when absolutely necessary.
53
50
  """
54
51
 
55
- headers: dict[str, str] = Field(default_factory=dict)
56
- model_options: dict[str, Any] = Field(default_factory=dict)
57
-
58
- def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
59
- """Initialize ModelResponse from ChatCompletion or kwargs.
52
+ def __init__(
53
+ self,
54
+ chat_completion: ChatCompletion,
55
+ model_options: dict[str, Any],
56
+ metadata: dict[str, Any],
57
+ usage: CompletionUsage | None = None,
58
+ ) -> None:
59
+ """Initialize ModelResponse from ChatCompletion.
60
60
 
61
- Can be initialized from an existing ChatCompletion object or
62
- directly from keyword arguments. Automatically initializes
63
- headers dict if not provided.
61
+ Wraps an OpenAI ChatCompletion object with additional metadata
62
+ and model options for tracking and observability.
64
63
 
65
64
  Args:
66
- chat_completion: Optional ChatCompletion to wrap.
67
- **kwargs: Direct initialization parameters if no
68
- ChatCompletion provided.
69
-
70
- Example:
71
- >>> # From ChatCompletion
72
- >>> response = ModelResponse(chat_completion_obj)
73
- >>>
74
- >>> # Direct initialization (mainly for testing)
75
- >>> response = ModelResponse(
76
- ... id="test",
77
- ... model="gpt-5",
78
- ... choices=[...]
79
- ... )
65
+ chat_completion: ChatCompletion object from the API.
66
+ model_options: Model configuration options used for the request.
67
+ Stored for metadata extraction and tracing.
68
+ metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
69
+ Includes timing information and custom tags.
70
+ usage: Optional usage information from streaming response.
71
+
80
72
  """
81
- if chat_completion:
82
- # Copy all attributes from the ChatCompletion instance
83
- data = chat_completion.model_dump()
84
- data["headers"] = {} # Add default headers
85
- super().__init__(**data)
86
- else:
87
- # Initialize from kwargs
88
- if "headers" not in kwargs:
89
- kwargs["headers"] = {}
90
- super().__init__(**kwargs)
73
+ data = chat_completion.model_dump()
74
+
75
+ # fixes issue where the role is "assistantassistant" instead of "assistant"
76
+ valid_finish_reasons = {"stop", "length", "tool_calls", "content_filter", "function_call"}
77
+ for i in range(len(data["choices"])):
78
+ data["choices"][i]["message"]["role"] = "assistant"
79
+ # Only update finish_reason if it's not already a valid value
80
+ current_finish_reason = data["choices"][i].get("finish_reason")
81
+ if current_finish_reason not in valid_finish_reasons:
82
+ data["choices"][i]["finish_reason"] = "stop"
83
+ # Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
84
+ # but OpenAI's ChatCompletion only accepts type="url_citation")
85
+ if annotations := data["choices"][i]["message"].get("annotations"):
86
+ data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
87
+
88
+ super().__init__(**data)
89
+
90
+ self._model_options = model_options
91
+ self._metadata = metadata
92
+ if usage:
93
+ self.usage = usage
91
94
 
92
95
  @property
93
96
  def content(self) -> str:
94
97
  """Get the generated text content.
95
98
 
96
- @public
97
-
98
99
  Primary property for accessing the LLM's response text.
99
- This covers 99% of use cases with ModelResponse.
100
+ This is the main property you'll use with ModelResponse.
100
101
 
101
102
  Returns:
102
103
  Generated text from the model, or empty string if none.
103
104
 
104
- Example:
105
- >>> response = await generate(messages="Hello")
106
- >>> text = response.content # The generated response
107
- >>>
108
- >>> # Common pattern: add to messages then use content
109
- >>> messages.add(response)
110
- >>> if "error" in response.content.lower():
111
- ... # Handle error case
112
105
  """
113
- return self.choices[0].message.content or ""
114
-
115
- def set_model_options(self, options: dict[str, Any]) -> None:
116
- """Store the model configuration used for generation.
117
-
118
- Saves a deep copy of the options used for this generation,
119
- excluding the messages for brevity.
106
+ content = self.choices[0].message.content or ""
107
+ return content.split("</think>")[-1].strip()
120
108
 
121
- Args:
122
- options: Dictionary of model options from the API call.
109
+ @property
110
+ def reasoning_content(self) -> str:
111
+ """Get the reasoning content.
123
112
 
124
- Note:
125
- Messages are removed to avoid storing large prompts.
126
- Called internally by the generation functions.
113
+ Returns:
114
+ The reasoning content from the model, or empty string if none.
127
115
  """
128
- self.model_options = copy.deepcopy(options)
129
- if "messages" in self.model_options:
130
- del self.model_options["messages"]
131
-
132
- def set_headers(self, headers: dict[str, str]) -> None:
133
- """Store HTTP response headers.
116
+ message = self.choices[0].message
117
+ if reasoning_content := getattr(message, "reasoning_content", None):
118
+ return reasoning_content
119
+ if not message.content or "</think>" not in message.content:
120
+ return ""
121
+ return message.content.split("</think>")[0].strip()
134
122
 
135
- Saves response headers which contain LiteLLM metadata
136
- including cost information and call IDs.
137
-
138
- Args:
139
- headers: Dictionary of HTTP headers from the response.
123
+ @property
124
+ def citations(self) -> list[Citation]:
125
+ """Get URL citations from search-enabled models.
140
126
 
141
- Headers of interest:
142
- - x-litellm-response-cost: Generation cost
143
- - x-litellm-call-id: Unique call identifier
144
- - x-litellm-model-id: Actual model used
127
+ Returns:
128
+ List of Citation objects with title and url. Empty list for non-search models.
145
129
  """
146
- self.headers = copy.deepcopy(headers)
130
+ annotations = self.choices[0].message.annotations
131
+ if not annotations:
132
+ return []
133
+ return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
147
134
 
148
- def get_laminar_metadata(self) -> dict[str, str | int | float]:
149
- """Extract metadata for LMNR (Laminar) observability.
135
+ def get_laminar_metadata(self) -> dict[str, str | int | float]: # noqa: C901
136
+ """Extract metadata for LMNR (Laminar) observability including cost tracking.
150
137
 
151
- Collects comprehensive metadata about the generation for
152
- tracing and monitoring in the LMNR platform.
138
+ Collects comprehensive metadata about the generation for tracing,
139
+ monitoring, and cost analysis in the LMNR platform. This method
140
+ provides detailed insights into token usage, caching effectiveness,
141
+ and generation costs.
153
142
 
154
143
  Returns:
155
144
  Dictionary containing:
156
- - LiteLLM headers (call ID, costs, etc.)
157
- - Token usage statistics
158
- - Model configuration
159
- - Cost information
160
- - Cached token counts
145
+ - LiteLLM headers (call ID, costs, model info, etc.)
146
+ - Token usage statistics (input, output, total, cached)
147
+ - Model configuration used for generation
148
+ - Cost information in multiple formats
149
+ - Cached token counts (when context caching enabled)
161
150
  - Reasoning token counts (for O1 models)
162
151
 
163
152
  Metadata structure:
164
153
  - litellm.*: All LiteLLM-specific headers
165
- - gen_ai.usage.*: Token usage statistics
154
+ - gen_ai.usage.prompt_tokens: Input token count
155
+ - gen_ai.usage.completion_tokens: Output token count
156
+ - gen_ai.usage.total_tokens: Total tokens used
157
+ - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
158
+ - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
159
+ - gen_ai.usage.output_cost: Generation cost in dollars
160
+ - gen_ai.usage.cost: Alternative cost field (same value)
161
+ - gen_ai.cost: Simple cost field (same value)
166
162
  - gen_ai.response.*: Response identifiers
167
- - gen_ai.cost: Cost information
168
163
  - model_options.*: Configuration used
169
164
 
170
- Example:
171
- >>> response = await llm.generate(...)
172
- >>> metadata = response.get_laminar_metadata()
173
- >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
174
- >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
165
+ Cost tracking:
166
+ Cost information is extracted from two sources:
167
+ 1. x-litellm-response-cost header (primary)
168
+ 2. usage.cost attribute (fallback)
175
169
 
176
- Note:
177
- Used internally by the tracing system for observability.
178
- Cost is extracted from headers or usage object.
179
- """
180
- metadata: dict[str, str | int | float] = {}
170
+ Cost is stored in three fields for observability tool consumption:
171
+ - gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
172
+ - gen_ai.usage.cost (aggregated cost)
173
+ - gen_ai.cost (short-form)
181
174
 
182
- litellm_id = self.headers.get("x-litellm-call-id")
183
- cost = float(self.headers.get("x-litellm-response-cost") or 0)
184
-
185
- # Add all x-litellm-* headers
186
- for header, value in self.headers.items():
187
- if header.startswith("x-litellm-"):
188
- header_name = header.replace("x-litellm-", "").lower()
189
- metadata[f"litellm.{header_name}"] = value
175
+ Cost availability depends on LiteLLM proxy configuration. Not all providers
176
+ return cost information. Cached tokens reduce actual cost but may not be reflected.
177
+ Used internally by tracing but accessible for cost analysis.
178
+ """
179
+ metadata: dict[str, str | int | float] = deepcopy(self._metadata)
190
180
 
191
181
  # Add base metadata
182
+ # NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
183
+ # to override the span display name in the tree view, hiding the actual span name
184
+ # (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
185
+ # in frontend/components/traces/trace-view/utils.ts prefers model over span name
186
+ # for LLM spans. Restore once Laminar shows both or prefers span name.
192
187
  metadata.update({
193
- "gen_ai.response.id": litellm_id or self.id,
194
- "gen_ai.response.model": self.model,
195
- "get_ai.system": "litellm",
188
+ "gen_ai.response.id": self.id,
189
+ "gen_ai.system": "litellm",
196
190
  })
197
191
 
198
192
  # Add usage metadata if available
193
+ cost = None
199
194
  if self.usage:
200
195
  metadata.update({
201
196
  "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
@@ -209,145 +204,96 @@ class ModelResponse(ChatCompletion):
209
204
  cost = float(self.usage.cost) # type: ignore[attr-defined]
210
205
 
211
206
  # Add reasoning tokens if available
212
- if completion_details := self.usage.completion_tokens_details:
213
- if reasoning_tokens := completion_details.reasoning_tokens:
214
- metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
207
+ if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
208
+ metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
215
209
 
216
210
  # Add cached tokens if available
217
- if prompt_details := self.usage.prompt_tokens_details:
218
- if cached_tokens := prompt_details.cached_tokens:
219
- metadata["gen_ai.usage.cached_tokens"] = cached_tokens
211
+ if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
212
+ metadata["gen_ai.usage.cached_tokens"] = cached_tokens
220
213
 
221
214
  # Add cost metadata if available
222
215
  if cost and cost > 0:
223
216
  metadata.update({
224
217
  "gen_ai.usage.output_cost": cost,
225
218
  "gen_ai.usage.cost": cost,
226
- "get_ai.cost": cost,
219
+ "gen_ai.cost": cost,
227
220
  })
228
221
 
229
- if self.model_options:
230
- for key, value in self.model_options.items():
231
- metadata[f"model_options.{key}"] = str(value)
222
+ for key, value in self._model_options.items():
223
+ if "messages" in key:
224
+ continue
225
+ metadata[f"model_options.{key}"] = str(value)
226
+
227
+ other_fields = self.__dict__
228
+ for key, value in other_fields.items():
229
+ if key in {"_model_options", "_metadata", "choices"}:
230
+ continue
231
+ try:
232
+ metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
233
+ except Exception:
234
+ metadata[f"response.raw.{key}"] = str(value)
235
+
236
+ message = self.choices[0].message
237
+ for key, value in message.__dict__.items():
238
+ if key in {"content"}:
239
+ continue
240
+ metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
232
241
 
233
242
  return metadata
234
243
 
244
+ def validate_output(self) -> None:
245
+ """Validate response output content and format.
246
+
247
+ Checks that response has non-empty content and validates against
248
+ response_format if structured output was requested.
249
+
250
+ Raises:
251
+ ValueError: If response content is empty.
252
+ ValidationError: If content doesn't match response_format schema.
253
+ """
254
+ if not self.content:
255
+ raise ValueError("Empty response content")
256
+
257
+ if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
258
+ response_format.model_validate_json(self.content)
235
259
 
236
- class StructuredModelResponse(ModelResponse, Generic[T]):
260
+
261
+ class StructuredModelResponse(ModelResponse, Generic[T]): # noqa: UP046
237
262
  """Response wrapper for structured/typed LLM output.
238
263
 
239
- @public
240
-
241
- Primary usage is adding to AIMessages and accessing .parsed property:
242
-
243
- >>> class Analysis(BaseModel):
244
- ... sentiment: float
245
- ... summary: str
246
- >>>
247
- >>> response = await generate_structured(
248
- ... response_format=Analysis,
249
- ... messages="Analyze this text..."
250
- ... )
251
- >>>
252
- >>> # Primary usage: access parsed model
253
- >>> analysis = response.parsed
254
- >>> print(f"Sentiment: {analysis.sentiment}")
255
- >>>
256
- >>> # Can add to messages for conversation
257
- >>> messages.add(response)
258
-
259
- The two main interactions:
260
- 1. Accessing .parsed property for the structured data
261
- 2. Adding to AIMessages for conversation continuity
262
-
263
- These patterns cover virtually all use cases. Advanced features exist
264
- but should only be used when absolutely necessary.
265
-
266
- Type Parameter:
267
- T: The Pydantic model type for the structured output.
268
-
269
- Note:
270
- Extends ModelResponse with type-safe parsed data access.
271
- Other inherited properties should rarely be needed.
264
+ Primary usage is accessing the .parsed property for the structured data.
272
265
  """
273
266
 
274
- def __init__(
275
- self,
276
- chat_completion: ChatCompletion | None = None,
277
- parsed_value: T | None = None,
278
- **kwargs: Any,
279
- ) -> None:
280
- """Initialize with ChatCompletion and parsed value.
267
+ @classmethod
268
+ def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
269
+ """Convert a ModelResponse to StructuredModelResponse.
281
270
 
282
- Creates a structured response from a base completion and
283
- optionally a pre-parsed value. Can extract parsed value
284
- from ParsedChatCompletion automatically.
271
+ Takes an existing ModelResponse and converts it to a StructuredModelResponse
272
+ for accessing parsed structured output. Used internally by generate_structured().
285
273
 
286
274
  Args:
287
- chat_completion: Base chat completion response.
288
- parsed_value: Pre-parsed Pydantic model instance.
289
- If None, attempts extraction from
290
- ParsedChatCompletion.
291
- **kwargs: Additional ChatCompletion parameters.
292
-
293
- Extraction behavior:
294
- 1. Use provided parsed_value if given
295
- 2. Extract from ParsedChatCompletion if available
296
- 3. Store as None (access will raise ValueError)
297
-
298
- Note:
299
- Usually created internally by generate_structured().
300
- The parsed value is validated by Pydantic automatically.
301
- """
302
- super().__init__(chat_completion, **kwargs)
303
- self._parsed_value: T | None = parsed_value
275
+ model_response: The ModelResponse to convert.
304
276
 
305
- # Extract parsed value from ParsedChatCompletion if available
306
- if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
307
- if chat_completion.choices: # type: ignore[attr-defined]
308
- message = chat_completion.choices[0].message # type: ignore[attr-defined]
309
- if hasattr(message, "parsed"): # type: ignore
310
- self._parsed_value = message.parsed # type: ignore[attr-defined]
277
+ Returns:
278
+ StructuredModelResponse with lazy parsing support.
279
+ """
280
+ model_response.__class__ = cls
281
+ return model_response # type: ignore[return-value]
311
282
 
312
283
  @property
313
284
  def parsed(self) -> T:
314
- """Get the parsed Pydantic model instance.
285
+ """Get the parsed structured output.
315
286
 
316
- @public
317
-
318
- Primary property for accessing structured output.
319
- This is the main reason to use generate_structured().
287
+ Lazily parses the JSON content into the specified Pydantic model.
288
+ Result is cached after first access.
320
289
 
321
290
  Returns:
322
- Validated instance of the Pydantic model type T.
291
+ Parsed Pydantic model instance.
323
292
 
324
293
  Raises:
325
- ValueError: If no parsed content available (internal error).
326
-
327
- Example:
328
- >>> class UserInfo(BaseModel):
329
- ... name: str
330
- ... age: int
331
- >>>
332
- >>> response = await generate_structured(
333
- ... response_format=UserInfo,
334
- ... messages="Extract user info..."
335
- ... )
336
- >>>
337
- >>> # Primary usage: get the parsed model
338
- >>> user = response.parsed
339
- >>> print(f"{user.name} is {user.age} years old")
340
- >>>
341
- >>> # Can also add to messages
342
- >>> messages.add(response)
343
-
344
- Note:
345
- Type-safe with full IDE support. This property covers
346
- 99% of structured response use cases.
294
+ ValidationError: If content doesn't match the response_format schema.
347
295
  """
348
- if self._parsed_value is not None:
349
- return self._parsed_value
350
-
351
- raise ValueError(
352
- "No parsed content available. This should not happen for StructuredModelResponse."
353
- )
296
+ if not hasattr(self, "_parsed_value"):
297
+ response_format = self._model_options.get("response_format")
298
+ self._parsed_value: T = response_format.model_validate_json(self.content) # type: ignore[return-value]
299
+ return self._parsed_value
@@ -10,40 +10,41 @@ Model categories:
10
10
  - Search models: Models with web search capabilities
11
11
  """
12
12
 
13
- from typing import Literal, TypeAlias
13
+ from typing import Literal
14
14
 
15
- ModelName: TypeAlias = Literal[
16
- # Core models
17
- "gemini-2.5-pro",
18
- "gpt-5",
19
- "grok-4",
20
- # Small models
21
- "gemini-2.5-flash",
22
- "gpt-5-mini",
23
- "grok-3-mini",
24
- # Search models
25
- "gemini-2.5-flash-search",
26
- "sonar-pro-search",
27
- "gpt-4o-search",
28
- "grok-3-mini-search",
29
- ]
30
- """Type-safe model name identifiers.
15
+ type ModelName = (
16
+ Literal[
17
+ # Core models
18
+ "gemini-3-pro",
19
+ "gpt-5.1",
20
+ # Small models
21
+ "gemini-3-flash",
22
+ "gpt-5-mini",
23
+ "grok-4.1-fast",
24
+ # Search models
25
+ "gemini-3-flash-search",
26
+ "gpt-5-mini-search",
27
+ "grok-4.1-fast-search",
28
+ "sonar-pro-search",
29
+ ]
30
+ | str
31
+ )
32
+ """Type-safe model name identifiers with support for custom models.
31
33
 
32
- @public
34
+ Provides IDE autocompletion for common model names while allowing any
35
+ string for custom models. The type is a union of predefined literals
36
+ and str, giving you the best of both worlds: suggestions for known
37
+ models and flexibility for custom ones.
33
38
 
34
- Provides compile-time validation and IDE autocompletion for supported
35
- language model names. Used throughout the library to prevent typos
36
- and ensure only valid models are referenced.
37
-
38
- Note: These are example common model names as of Q3 2025. Actual availability
39
+ These are example common model names as of Q1 2026. Actual availability
39
40
  depends on your LiteLLM proxy configuration and provider access.
40
41
 
41
42
  Model categories:
42
- Core models (gemini-2.5-pro, gpt-5, grok-4):
43
+ Core models (gemini-3-pro, gpt-5.1):
43
44
  High-capability models for complex tasks requiring deep reasoning,
44
45
  nuanced understanding, or creative generation.
45
46
 
46
- Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
47
+ Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
47
48
  Efficient models optimized for speed and cost, suitable for
48
49
  simpler tasks or high-volume processing.
49
50
 
@@ -51,34 +52,13 @@ Model categories:
51
52
  Models with integrated web search capabilities for retrieving
52
53
  and synthesizing current information.
53
54
 
54
- Extending with custom models:
55
- The generate functions accept any string, not just ModelName literals.
56
- To add custom models for type safety:
57
- 1. Create a new type alias: CustomModel = Literal["my-model"]
58
- 2. Use Union: model: ModelName | CustomModel = "my-model"
59
- 3. Or simply use strings: model = "any-model-via-litellm"
60
-
61
- Example:
62
- >>> from ai_pipeline_core import llm, ModelName
63
- >>>
64
- >>> # Type-safe model selection
65
- >>> model: ModelName = "gpt-5" # IDE autocomplete works
66
- >>> response = await llm.generate(model, messages="Hello")
67
- >>>
68
- >>> # Also accepts string for custom models
69
- >>> response = await llm.generate("custom-model-v2", messages="Hello")
70
- >>>
71
- >>> # Custom type safety
72
- >>> from typing import Literal
73
- >>> MyModel = Literal["company-llm-v1"]
74
- >>> model: ModelName | MyModel = "company-llm-v1"
75
-
76
- Note:
77
- While the type alias provides suggestions for common models,
78
- the generate functions also accept string literals to support
79
- custom or newer models accessed via LiteLLM proxy.
55
+ Using custom models:
56
+ ModelName now includes str, so you can use any model name directly:
57
+ - Predefined models get IDE autocomplete and validation
58
+ - Custom models work seamlessly as strings
59
+ - No need for Union types or additional type aliases
80
60
 
81
- See Also:
82
- - llm.generate: Main generation function
83
- - ModelOptions: Model configuration options
61
+ The ModelName type includes both predefined literals and str,
62
+ allowing full flexibility while maintaining IDE support for
63
+ common models.
84
64
  """