ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. ai_pipeline_core/__init__.py +78 -125
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +130 -81
  37. ai_pipeline_core/llm/client.py +327 -193
  38. ai_pipeline_core/llm/model_options.py +14 -86
  39. ai_pipeline_core/llm/model_response.py +60 -103
  40. ai_pipeline_core/llm/model_types.py +16 -34
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -483
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/simple_runner/__init__.py +0 -14
  85. ai_pipeline_core/simple_runner/cli.py +0 -254
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  87. ai_pipeline_core/storage/__init__.py +0 -8
  88. ai_pipeline_core/storage/storage.py +0 -628
  89. ai_pipeline_core/utils/__init__.py +0 -8
  90. ai_pipeline_core/utils/deploy.py +0 -373
  91. ai_pipeline_core/utils/remote_deployment.py +0 -269
  92. ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
  93. ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
  94. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -41,11 +41,11 @@ class ModelOptions(BaseModel):
41
41
 
42
42
  retries: Number of retry attempts on failure (default: 3).
43
43
 
44
- retry_delay_seconds: Seconds to wait between retries (default: 10).
44
+ retry_delay_seconds: Seconds to wait between retries (default: 20).
45
45
 
46
- timeout: Maximum seconds to wait for response (default: 300).
46
+ timeout: Maximum seconds to wait for response (default: 600).
47
47
 
48
- cache_ttl: Cache TTL for context messages (default: "5m").
48
+ cache_ttl: Cache TTL for context messages (default: "300s").
49
49
  String format like "60s", "5m", or None to disable caching.
50
50
  Applied to the last context message for efficient token reuse.
51
51
 
@@ -99,77 +99,11 @@ class ModelOptions(BaseModel):
99
99
  Merged with usage_tracking if both are set.
100
100
  Useful for beta features or provider-specific capabilities.
101
101
 
102
- Example:
103
- >>> # Basic configuration
104
- >>> options = ModelOptions(
105
- ... temperature=0.7,
106
- ... max_completion_tokens=1000
107
- ... )
108
- >>>
109
- >>> # With system prompt
110
- >>> options = ModelOptions(
111
- ... system_prompt="You are a helpful coding assistant",
112
- ... temperature=0.3 # Lower for code generation
113
- ... )
114
- >>>
115
- >>> # With custom cache TTL
116
- >>> options = ModelOptions(
117
- ... cache_ttl="300s", # Cache context for 5 minutes
118
- ... max_completion_tokens=1000
119
- ... )
120
- >>>
121
- >>> # Disable caching
122
- >>> options = ModelOptions(
123
- ... cache_ttl=None, # No context caching
124
- ... temperature=0.5
125
- ... )
126
- >>>
127
- >>> # For search-enabled models
128
- >>> options = ModelOptions(
129
- ... search_context_size="high", # Get more search results
130
- ... max_completion_tokens=2000
131
- ... )
132
- >>>
133
- >>> # For reasoning models
134
- >>> options = ModelOptions(
135
- ... reasoning_effort="high", # Deep reasoning
136
- ... timeout=600 # More time for complex reasoning
137
- ... )
138
- >>>
139
- >>> # With stop sequences
140
- >>> options = ModelOptions(
141
- ... stop=["STOP", "END", "\n\n"], # Stop on these sequences
142
- ... temperature=0.7
143
- ... )
144
- >>>
145
- >>> # With custom extra_body parameters
146
- >>> options = ModelOptions(
147
- ... extra_body={"custom_param": "value", "beta_feature": True},
148
- ... usage_tracking=True # Still tracks usage alongside custom params
149
- ... )
150
- >>>
151
- >>> # With user tracking for cost monitoring
152
- >>> options = ModelOptions(
153
- ... user="user_12345", # Track costs per user
154
- ... temperature=0.7
155
- ... )
156
- >>>
157
- >>> # With metadata for tracking and observability
158
- >>> options = ModelOptions(
159
- ... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
160
- ... temperature=0.7
161
- ... )
162
-
163
- Note:
164
- - Not all options apply to all models
165
- - search_context_size only works with search models
166
- - reasoning_effort only works with models that support explicit reasoning
167
- - response_format is set internally by generate_structured()
168
- - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
169
- - stop sequences are limited to 4 by most providers
170
- - user identifier helps track costs per end-user (max 256 chars)
171
- - extra_body allows passing provider-specific parameters
172
- - usage_tracking is enabled by default for cost monitoring
102
+ Not all options apply to all models. search_context_size only works with search models,
103
+ reasoning_effort only works with models that support explicit reasoning, and
104
+ response_format is set internally by generate_structured(). cache_ttl accepts formats
105
+ like "120s", "5m", "1h" or None (default: "300s"). Stop sequences are limited to 4 by
106
+ most providers.
173
107
  """
174
108
 
175
109
  temperature: float | None = None
@@ -179,18 +113,19 @@ class ModelOptions(BaseModel):
179
113
  retries: int = 3
180
114
  retry_delay_seconds: int = 20
181
115
  timeout: int = 600
182
- cache_ttl: str | None = "5m"
116
+ cache_ttl: str | None = "300s"
183
117
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
184
118
  max_completion_tokens: int | None = None
185
119
  stop: str | list[str] | None = None
186
120
  response_format: type[BaseModel] | None = None
187
121
  verbosity: Literal["low", "medium", "high"] | None = None
122
+ stream: bool = False
188
123
  usage_tracking: bool = True
189
124
  user: str | None = None
190
125
  metadata: dict[str, str] | None = None
191
126
  extra_body: dict[str, Any] | None = None
192
127
 
193
- def to_openai_completion_kwargs(self) -> dict[str, Any]:
128
+ def to_openai_completion_kwargs(self) -> dict[str, Any]: # noqa: C901
194
129
  """Convert options to OpenAI API completion parameters.
195
130
 
196
131
  Transforms ModelOptions fields into the format expected by
@@ -221,16 +156,9 @@ class ModelOptions(BaseModel):
221
156
  {"web_search_options": {"search_context_size": "low|medium|high"}}
222
157
  Non-search models silently ignore this parameter.
223
158
 
224
- Example:
225
- >>> options = ModelOptions(temperature=0.5, timeout=60)
226
- >>> kwargs = options.to_openai_completion_kwargs()
227
- >>> kwargs
228
- {'timeout': 60, 'extra_body': {}, 'temperature': 0.5}
229
-
230
- Note:
231
- - system_prompt is handled separately in _process_messages()
232
- - retries and retry_delay_seconds are used by retry logic
233
- - extra_body always includes usage tracking for cost monitoring
159
+ system_prompt is handled separately in _process_messages().
160
+ retries and retry_delay_seconds are used by retry logic.
161
+ extra_body always includes usage tracking for cost monitoring.
234
162
  """
235
163
  kwargs: dict[str, Any] = {
236
164
  "timeout": self.timeout,
@@ -1,13 +1,12 @@
1
1
  """Model response structures for LLM interactions.
2
2
 
3
- @public
4
-
5
3
  Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
6
4
  with additional metadata, cost tracking, and structured output support.
7
5
  """
8
6
 
9
7
  import json
10
8
  from copy import deepcopy
9
+ from dataclasses import dataclass
11
10
  from typing import Any, Generic, TypeVar
12
11
 
13
12
  from openai.types.chat import ChatCompletion
@@ -21,14 +20,20 @@ T = TypeVar(
21
20
  """Type parameter for structured response Pydantic models."""
22
21
 
23
22
 
23
+ @dataclass(frozen=True)
24
+ class Citation:
25
+ """A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
26
+
27
+ title: str
28
+ url: str
29
+
30
+
24
31
  class ModelResponse(ChatCompletion):
25
32
  """Response wrapper for LLM text generation.
26
33
 
27
- @public
28
-
29
34
  Primary usage is adding to AIMessages for multi-turn conversations:
30
35
 
31
- >>> response = await llm.generate("gpt-5", messages=messages)
36
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
32
37
  >>> messages.append(response) # Add assistant response to conversation
33
38
  >>> print(response.content) # Access generated text
34
39
 
@@ -39,22 +44,9 @@ class ModelResponse(ChatCompletion):
39
44
  Almost all use cases are covered by these two patterns. Advanced features
40
45
  like token usage and cost tracking are available but rarely needed.
41
46
 
42
- Example:
43
- >>> from ai_pipeline_core import llm, AIMessages
44
- >>>
45
- >>> messages = AIMessages(["Explain quantum computing"])
46
- >>> response = await llm.generate("gpt-5", messages=messages)
47
- >>>
48
- >>> # Primary usage: add to conversation
49
- >>> messages.append(response)
50
- >>>
51
- >>> # Access generated text
52
- >>> print(response.content)
53
-
54
- Note:
55
- Inherits from OpenAI's ChatCompletion for compatibility.
56
- Other properties (usage, model, id) should only be accessed
57
- when absolutely necessary.
47
+ Inherits from OpenAI's ChatCompletion for compatibility.
48
+ Other properties (usage, model, id) should only be accessed
49
+ when absolutely necessary.
58
50
  """
59
51
 
60
52
  def __init__(
@@ -77,21 +69,21 @@ class ModelResponse(ChatCompletion):
77
69
  Includes timing information and custom tags.
78
70
  usage: Optional usage information from streaming response.
79
71
 
80
- Example:
81
- >>> # Usually created internally by generate()
82
- >>> response = ModelResponse(
83
- ... chat_completion=completion,
84
- ... model_options={"temperature": 0.7, "model": "gpt-4"},
85
- ... metadata={"time_taken": 1.5, "first_token_time": 0.3}
86
- ... )
87
72
  """
88
73
  data = chat_completion.model_dump()
89
74
 
90
75
  # fixes issue where the role is "assistantassistant" instead of "assistant"
76
+ valid_finish_reasons = {"stop", "length", "tool_calls", "content_filter", "function_call"}
91
77
  for i in range(len(data["choices"])):
92
- if role := data["choices"][i]["message"].get("role"):
93
- if role.startswith("assistant") and role != "assistant":
94
- data["choices"][i]["message"]["role"] = "assistant"
78
+ data["choices"][i]["message"]["role"] = "assistant"
79
+ # Only update finish_reason if it's not already a valid value
80
+ current_finish_reason = data["choices"][i].get("finish_reason")
81
+ if current_finish_reason not in valid_finish_reasons:
82
+ data["choices"][i]["finish_reason"] = "stop"
83
+ # Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
84
+ # but OpenAI's ChatCompletion only accepts type="url_citation")
85
+ if annotations := data["choices"][i]["message"].get("annotations"):
86
+ data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
95
87
 
96
88
  super().__init__(**data)
97
89
 
@@ -104,22 +96,12 @@ class ModelResponse(ChatCompletion):
104
96
  def content(self) -> str:
105
97
  """Get the generated text content.
106
98
 
107
- @public
108
-
109
99
  Primary property for accessing the LLM's response text.
110
100
  This is the main property you'll use with ModelResponse.
111
101
 
112
102
  Returns:
113
103
  Generated text from the model, or empty string if none.
114
104
 
115
- Example:
116
- >>> response = await generate("gpt-5", messages="Hello")
117
- >>> text = response.content # The generated response
118
- >>>
119
- >>> # Common pattern: add to messages then use content
120
- >>> messages.append(response)
121
- >>> if "error" in response.content.lower():
122
- ... # Handle error case
123
105
  """
124
106
  content = self.choices[0].message.content or ""
125
107
  return content.split("</think>")[-1].strip()
@@ -128,8 +110,6 @@ class ModelResponse(ChatCompletion):
128
110
  def reasoning_content(self) -> str:
129
111
  """Get the reasoning content.
130
112
 
131
- @public
132
-
133
113
  Returns:
134
114
  The reasoning content from the model, or empty string if none.
135
115
  """
@@ -140,7 +120,19 @@ class ModelResponse(ChatCompletion):
140
120
  return ""
141
121
  return message.content.split("</think>")[0].strip()
142
122
 
143
- def get_laminar_metadata(self) -> dict[str, str | int | float]:
123
+ @property
124
+ def citations(self) -> list[Citation]:
125
+ """Get URL citations from search-enabled models.
126
+
127
+ Returns:
128
+ List of Citation objects with title and url. Empty list for non-search models.
129
+ """
130
+ annotations = self.choices[0].message.annotations
131
+ if not annotations:
132
+ return []
133
+ return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
134
+
135
+ def get_laminar_metadata(self) -> dict[str, str | int | float]: # noqa: C901
144
136
  """Extract metadata for LMNR (Laminar) observability including cost tracking.
145
137
 
146
138
  Collects comprehensive metadata about the generation for tracing,
@@ -175,56 +167,26 @@ class ModelResponse(ChatCompletion):
175
167
  1. x-litellm-response-cost header (primary)
176
168
  2. usage.cost attribute (fallback)
177
169
 
178
- Cost is stored in three fields for compatibility:
179
- - gen_ai.usage.output_cost (standard)
180
- - gen_ai.usage.cost (alternative)
181
- - gen_ai.cost (simple)
182
-
183
- Example:
184
- >>> response = await llm.generate(
185
- ... "gpt-5",
186
- ... context=large_doc,
187
- ... messages="Summarize this"
188
- ... )
189
- >>>
190
- >>> # Get comprehensive metadata
191
- >>> metadata = response.get_laminar_metadata()
192
- >>>
193
- >>> # Track generation cost
194
- >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
195
- >>> if cost > 0:
196
- ... print(f"Generation cost: ${cost:.4f}")
197
- >>>
198
- >>> # Monitor token usage
199
- >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
200
- >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
201
- >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
202
- >>>
203
- >>> # Check cache effectiveness
204
- >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
205
- >>> if cached > 0:
206
- ... total = metadata.get('gen_ai.usage.total_tokens', 1)
207
- ... savings = (cached / total) * 100
208
- ... print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
209
- >>>
210
- >>> # Calculate cost per token
211
- >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
212
- ... cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
213
- ... print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
214
-
215
- Note:
216
- - Cost availability depends on LiteLLM proxy configuration
217
- - Not all providers return cost information
218
- - Cached tokens reduce actual cost but may not be reflected
219
- - Used internally by tracing but accessible for cost analysis
170
+ Cost is stored in three fields for observability tool consumption:
171
+ - gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
172
+ - gen_ai.usage.cost (aggregated cost)
173
+ - gen_ai.cost (short-form)
174
+
175
+ Cost availability depends on LiteLLM proxy configuration. Not all providers
176
+ return cost information. Cached tokens reduce actual cost but may not be reflected.
177
+ Used internally by tracing but accessible for cost analysis.
220
178
  """
221
179
  metadata: dict[str, str | int | float] = deepcopy(self._metadata)
222
180
 
223
181
  # Add base metadata
182
+ # NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
183
+ # to override the span display name in the tree view, hiding the actual span name
184
+ # (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
185
+ # in frontend/components/traces/trace-view/utils.ts prefers model over span name
186
+ # for LLM spans. Restore once Laminar shows both or prefers span name.
224
187
  metadata.update({
225
188
  "gen_ai.response.id": self.id,
226
- "gen_ai.response.model": self.model,
227
- "get_ai.system": "litellm",
189
+ "gen_ai.system": "litellm",
228
190
  })
229
191
 
230
192
  # Add usage metadata if available
@@ -242,21 +204,19 @@ class ModelResponse(ChatCompletion):
242
204
  cost = float(self.usage.cost) # type: ignore[attr-defined]
243
205
 
244
206
  # Add reasoning tokens if available
245
- if completion_details := self.usage.completion_tokens_details:
246
- if reasoning_tokens := completion_details.reasoning_tokens:
247
- metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
207
+ if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
208
+ metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
248
209
 
249
210
  # Add cached tokens if available
250
- if prompt_details := self.usage.prompt_tokens_details:
251
- if cached_tokens := prompt_details.cached_tokens:
252
- metadata["gen_ai.usage.cached_tokens"] = cached_tokens
211
+ if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
212
+ metadata["gen_ai.usage.cached_tokens"] = cached_tokens
253
213
 
254
214
  # Add cost metadata if available
255
215
  if cost and cost > 0:
256
216
  metadata.update({
257
217
  "gen_ai.usage.output_cost": cost,
258
218
  "gen_ai.usage.cost": cost,
259
- "get_ai.cost": cost,
219
+ "gen_ai.cost": cost,
260
220
  })
261
221
 
262
222
  for key, value in self._model_options.items():
@@ -266,7 +226,7 @@ class ModelResponse(ChatCompletion):
266
226
 
267
227
  other_fields = self.__dict__
268
228
  for key, value in other_fields.items():
269
- if key in ["_model_options", "_metadata", "choices", "usage"]:
229
+ if key in {"_model_options", "_metadata", "choices"}:
270
230
  continue
271
231
  try:
272
232
  metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
@@ -275,7 +235,7 @@ class ModelResponse(ChatCompletion):
275
235
 
276
236
  message = self.choices[0].message
277
237
  for key, value in message.__dict__.items():
278
- if key in ["content"]:
238
+ if key in {"content"}:
279
239
  continue
280
240
  metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
281
241
 
@@ -294,16 +254,13 @@ class ModelResponse(ChatCompletion):
294
254
  if not self.content:
295
255
  raise ValueError("Empty response content")
296
256
 
297
- if response_format := self._model_options.get("response_format"):
298
- if isinstance(response_format, BaseModel):
299
- response_format.model_validate_json(self.content)
257
+ if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
258
+ response_format.model_validate_json(self.content)
300
259
 
301
260
 
302
- class StructuredModelResponse(ModelResponse, Generic[T]):
261
+ class StructuredModelResponse(ModelResponse, Generic[T]): # noqa: UP046
303
262
  """Response wrapper for structured/typed LLM output.
304
263
 
305
- @public
306
-
307
264
  Primary usage is accessing the .parsed property for the structured data.
308
265
  """
309
266
 
@@ -10,44 +10,41 @@ Model categories:
10
10
  - Search models: Models with web search capabilities
11
11
  """
12
12
 
13
- from typing import Literal, TypeAlias
13
+ from typing import Literal
14
14
 
15
- ModelName: TypeAlias = (
15
+ type ModelName = (
16
16
  Literal[
17
17
  # Core models
18
- "gemini-2.5-pro",
19
- "gpt-5",
20
- "grok-4",
18
+ "gemini-3-pro",
19
+ "gpt-5.1",
21
20
  # Small models
22
- "gemini-2.5-flash",
23
- "gpt-5-nano",
24
- "grok-4-fast",
21
+ "gemini-3-flash",
22
+ "gpt-5-mini",
23
+ "grok-4.1-fast",
25
24
  # Search models
26
- "gemini-2.5-flash-search",
25
+ "gemini-3-flash-search",
26
+ "gpt-5-mini-search",
27
+ "grok-4.1-fast-search",
27
28
  "sonar-pro-search",
28
- "gpt-4o-search",
29
- "grok-4-fast-search",
30
29
  ]
31
30
  | str
32
31
  )
33
32
  """Type-safe model name identifiers with support for custom models.
34
33
 
35
- @public
36
-
37
34
  Provides IDE autocompletion for common model names while allowing any
38
35
  string for custom models. The type is a union of predefined literals
39
36
  and str, giving you the best of both worlds: suggestions for known
40
37
  models and flexibility for custom ones.
41
38
 
42
- Note: These are example common model names as of Q3 2025. Actual availability
39
+ These are example common model names as of Q1 2026. Actual availability
43
40
  depends on your LiteLLM proxy configuration and provider access.
44
41
 
45
42
  Model categories:
46
- Core models (gemini-2.5-pro, gpt-5, grok-4):
43
+ Core models (gemini-3-pro, gpt-5.1):
47
44
  High-capability models for complex tasks requiring deep reasoning,
48
45
  nuanced understanding, or creative generation.
49
46
 
50
- Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
47
+ Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
51
48
  Efficient models optimized for speed and cost, suitable for
52
49
  simpler tasks or high-volume processing.
53
50
 
@@ -61,22 +58,7 @@ Using custom models:
61
58
  - Custom models work seamlessly as strings
62
59
  - No need for Union types or additional type aliases
63
60
 
64
- Example:
65
- >>> from ai_pipeline_core import llm, ModelName
66
- >>>
67
- >>> # Predefined model with IDE autocomplete
68
- >>> model: ModelName = "gpt-5" # IDE suggests common models
69
- >>> response = await llm.generate(model, messages="Hello")
70
- >>>
71
- >>> # Custom model works directly
72
- >>> model: ModelName = "custom-model-v2" # Any string is valid
73
- >>> response = await llm.generate(model, messages="Hello")
74
- >>>
75
- >>> # Both types work seamlessly
76
- >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
77
-
78
- Note:
79
- The ModelName type includes both predefined literals and str,
80
- allowing full flexibility while maintaining IDE support for
81
- common models.
61
+ The ModelName type includes both predefined literals and str,
62
+ allowing full flexibility while maintaining IDE support for
63
+ common models.
82
64
  """
@@ -2,11 +2,6 @@
2
2
 
3
3
  Provides a Prefect-integrated logging facade for unified logging across pipelines.
4
4
  Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
5
-
6
- Example:
7
- >>> from ai_pipeline_core import get_pipeline_logger
8
- >>> logger = get_pipeline_logger(__name__)
9
- >>> logger.info("Processing started")
10
5
  """
11
6
 
12
7
  from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
@@ -14,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
14
9
 
15
10
  __all__ = [
16
11
  "LoggerMixin",
17
- "StructuredLoggerMixin",
18
12
  "LoggingConfig",
19
- "setup_logging",
13
+ "StructuredLoggerMixin",
20
14
  "get_pipeline_logger",
15
+ "setup_logging",
21
16
  ]
@@ -48,7 +48,7 @@ loggers:
48
48
  ai_pipeline_core.llm:
49
49
  level: INFO
50
50
 
51
- ai_pipeline_core.flow:
51
+ ai_pipeline_core.pipeline:
52
52
  level: INFO
53
53
 
54
54
  ai_pipeline_core.testing: