ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +21 -13
- ai_pipeline_core/documents/document.py +202 -51
- ai_pipeline_core/documents/document_list.py +148 -24
- ai_pipeline_core/documents/flow_document.py +2 -6
- ai_pipeline_core/documents/task_document.py +0 -4
- ai_pipeline_core/documents/temporary_document.py +1 -8
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/llm/__init__.py +1 -6
- ai_pipeline_core/llm/ai_messages.py +137 -4
- ai_pipeline_core/llm/client.py +118 -65
- ai_pipeline_core/llm/model_options.py +6 -7
- ai_pipeline_core/llm/model_response.py +17 -16
- ai_pipeline_core/llm/model_types.py +3 -7
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +54 -68
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +14 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -189
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +234 -30
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
- ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,6 +9,8 @@ including text, documents, and model responses.
|
|
|
9
9
|
import base64
|
|
10
10
|
import hashlib
|
|
11
11
|
import json
|
|
12
|
+
from copy import deepcopy
|
|
13
|
+
from typing import Any, Callable, Iterable, SupportsIndex, Union
|
|
12
14
|
|
|
13
15
|
from openai.types.chat import (
|
|
14
16
|
ChatCompletionContentPartParam,
|
|
@@ -48,15 +50,25 @@ class AIMessages(list[AIMessageType]):
|
|
|
48
50
|
- ModelResponse: Becomes {"role": "assistant", "content": response.content}
|
|
49
51
|
|
|
50
52
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
|
|
54
|
+
VISION/PDF MODEL COMPATIBILITY WARNING:
|
|
55
|
+
Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
|
|
56
|
+
Non-vision models will raise ValueError when encountering image documents.
|
|
57
|
+
PDFs require models with document processing support - check your model's capabilities
|
|
58
|
+
before including PDF documents in messages. Unsupported models may fall back to
|
|
59
|
+
text extraction or raise errors depending on provider configuration.
|
|
60
|
+
LiteLLM proxy handles the specific encoding requirements for each provider.
|
|
55
61
|
|
|
56
62
|
IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
|
|
57
63
|
expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
|
|
58
64
|
of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
|
|
59
65
|
|
|
66
|
+
CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
|
|
67
|
+
`AIMessages(["text"])`) or empty constructor with append (e.g.,
|
|
68
|
+
`AIMessages(); messages.append("text")`). Never pass raw strings directly to the
|
|
69
|
+
constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
|
|
70
|
+
accidental character iteration.
|
|
71
|
+
|
|
60
72
|
Example:
|
|
61
73
|
>>> from ai_pipeline_core import llm
|
|
62
74
|
>>> messages = AIMessages()
|
|
@@ -65,6 +77,127 @@ class AIMessages(list[AIMessageType]):
|
|
|
65
77
|
>>> messages.append(response) # Add the actual response
|
|
66
78
|
"""
|
|
67
79
|
|
|
80
|
+
def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
|
|
81
|
+
"""Initialize AIMessages with optional iterable.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
iterable: Optional iterable of messages (list, tuple, etc.).
|
|
85
|
+
Must not be a string.
|
|
86
|
+
frozen: If True, list is immutable from creation.
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
TypeError: If a string is passed directly to the constructor.
|
|
90
|
+
"""
|
|
91
|
+
if isinstance(iterable, str):
|
|
92
|
+
raise TypeError(
|
|
93
|
+
"AIMessages cannot be constructed from a string directly. "
|
|
94
|
+
"Use AIMessages(['text']) for a single message or "
|
|
95
|
+
"AIMessages() and then append('text')."
|
|
96
|
+
)
|
|
97
|
+
self._frozen = False # Initialize as unfrozen to allow initial population
|
|
98
|
+
if iterable is None:
|
|
99
|
+
super().__init__()
|
|
100
|
+
else:
|
|
101
|
+
super().__init__(iterable)
|
|
102
|
+
self._frozen = frozen # Set frozen state after initial population
|
|
103
|
+
|
|
104
|
+
def freeze(self) -> None:
|
|
105
|
+
"""Permanently freeze the list, preventing modifications.
|
|
106
|
+
|
|
107
|
+
Once frozen, the list cannot be unfrozen.
|
|
108
|
+
"""
|
|
109
|
+
self._frozen = True
|
|
110
|
+
|
|
111
|
+
def copy(self) -> "AIMessages":
|
|
112
|
+
"""Create an unfrozen deep copy of the list.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
New unfrozen AIMessages with deep-copied messages.
|
|
116
|
+
"""
|
|
117
|
+
copied_messages = deepcopy(list(self))
|
|
118
|
+
return AIMessages(copied_messages, frozen=False)
|
|
119
|
+
|
|
120
|
+
def _check_frozen(self) -> None:
|
|
121
|
+
"""Check if list is frozen and raise if it is.
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
RuntimeError: If the list is frozen.
|
|
125
|
+
"""
|
|
126
|
+
if self._frozen:
|
|
127
|
+
raise RuntimeError("Cannot modify frozen AIMessages")
|
|
128
|
+
|
|
129
|
+
def append(self, message: AIMessageType) -> None:
|
|
130
|
+
"""Add a message to the end of the list."""
|
|
131
|
+
self._check_frozen()
|
|
132
|
+
super().append(message)
|
|
133
|
+
|
|
134
|
+
def extend(self, messages: Iterable[AIMessageType]) -> None:
|
|
135
|
+
"""Add multiple messages to the list."""
|
|
136
|
+
self._check_frozen()
|
|
137
|
+
super().extend(messages)
|
|
138
|
+
|
|
139
|
+
def insert(self, index: SupportsIndex, message: AIMessageType) -> None:
|
|
140
|
+
"""Insert a message at the specified position."""
|
|
141
|
+
self._check_frozen()
|
|
142
|
+
super().insert(index, message)
|
|
143
|
+
|
|
144
|
+
def __setitem__(
|
|
145
|
+
self,
|
|
146
|
+
index: Union[SupportsIndex, slice],
|
|
147
|
+
value: Union[AIMessageType, Iterable[AIMessageType]],
|
|
148
|
+
) -> None:
|
|
149
|
+
"""Set item or slice."""
|
|
150
|
+
self._check_frozen()
|
|
151
|
+
super().__setitem__(index, value) # type: ignore[arg-type]
|
|
152
|
+
|
|
153
|
+
def __iadd__(self, other: Iterable[AIMessageType]) -> "AIMessages":
|
|
154
|
+
"""In-place addition (+=).
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
This AIMessages instance after modification.
|
|
158
|
+
"""
|
|
159
|
+
self._check_frozen()
|
|
160
|
+
return super().__iadd__(other)
|
|
161
|
+
|
|
162
|
+
def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
|
|
163
|
+
"""Delete item or slice from list."""
|
|
164
|
+
self._check_frozen()
|
|
165
|
+
super().__delitem__(index)
|
|
166
|
+
|
|
167
|
+
def pop(self, index: SupportsIndex = -1) -> AIMessageType:
|
|
168
|
+
"""Remove and return item at index.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
AIMessageType removed from the list.
|
|
172
|
+
"""
|
|
173
|
+
self._check_frozen()
|
|
174
|
+
return super().pop(index)
|
|
175
|
+
|
|
176
|
+
def remove(self, message: AIMessageType) -> None:
|
|
177
|
+
"""Remove first occurrence of message."""
|
|
178
|
+
self._check_frozen()
|
|
179
|
+
super().remove(message)
|
|
180
|
+
|
|
181
|
+
def clear(self) -> None:
|
|
182
|
+
"""Remove all items from list."""
|
|
183
|
+
self._check_frozen()
|
|
184
|
+
super().clear()
|
|
185
|
+
|
|
186
|
+
def reverse(self) -> None:
|
|
187
|
+
"""Reverse list in place."""
|
|
188
|
+
self._check_frozen()
|
|
189
|
+
super().reverse()
|
|
190
|
+
|
|
191
|
+
def sort(
|
|
192
|
+
self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
|
|
193
|
+
) -> None:
|
|
194
|
+
"""Sort list in place."""
|
|
195
|
+
self._check_frozen()
|
|
196
|
+
if key is None:
|
|
197
|
+
super().sort(reverse=reverse) # type: ignore[call-arg]
|
|
198
|
+
else:
|
|
199
|
+
super().sort(key=key, reverse=reverse)
|
|
200
|
+
|
|
68
201
|
def get_last_message(self) -> AIMessageType:
|
|
69
202
|
"""Get the last message in the conversation.
|
|
70
203
|
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -24,7 +24,6 @@ from pydantic import BaseModel
|
|
|
24
24
|
|
|
25
25
|
from ai_pipeline_core.exceptions import LLMError
|
|
26
26
|
from ai_pipeline_core.settings import settings
|
|
27
|
-
from ai_pipeline_core.tracing import trace
|
|
28
27
|
|
|
29
28
|
from .ai_messages import AIMessages
|
|
30
29
|
from .model_options import ModelOptions
|
|
@@ -60,9 +59,9 @@ def _process_messages(
|
|
|
60
59
|
- Regular messages without caching
|
|
61
60
|
|
|
62
61
|
System Prompt Location:
|
|
63
|
-
The system prompt
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
The system prompt parameter is always injected as the FIRST message
|
|
63
|
+
with role="system". It is NOT cached with context, allowing dynamic
|
|
64
|
+
system prompts without breaking cache efficiency.
|
|
66
65
|
|
|
67
66
|
Cache behavior:
|
|
68
67
|
The last context message gets ephemeral caching with specified TTL
|
|
@@ -221,7 +220,6 @@ async def _generate_with_retry(
|
|
|
221
220
|
raise LLMError("Unknown error occurred during LLM generation.")
|
|
222
221
|
|
|
223
222
|
|
|
224
|
-
@trace(ignore_inputs=["context"])
|
|
225
223
|
async def generate(
|
|
226
224
|
model: ModelName,
|
|
227
225
|
*,
|
|
@@ -238,9 +236,10 @@ async def generate(
|
|
|
238
236
|
expensive static content separately from dynamic queries.
|
|
239
237
|
|
|
240
238
|
Best Practices:
|
|
241
|
-
1. OPTIONS:
|
|
239
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
242
240
|
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
243
241
|
3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
|
|
242
|
+
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
244
243
|
|
|
245
244
|
Args:
|
|
246
245
|
model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
|
|
@@ -250,8 +249,11 @@ async def generate(
|
|
|
250
249
|
messages: Dynamic messages/queries. AIMessages or str ONLY.
|
|
251
250
|
Do not pass Document or DocumentList directly.
|
|
252
251
|
If string, converted to AIMessages internally.
|
|
253
|
-
options:
|
|
254
|
-
|
|
252
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
253
|
+
Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
|
|
254
|
+
Configure model behavior centrally via LiteLLM proxy settings or environment
|
|
255
|
+
variables, not per API call. Provider-specific settings should be configured
|
|
256
|
+
at the proxy level.
|
|
255
257
|
|
|
256
258
|
Returns:
|
|
257
259
|
ModelResponse containing:
|
|
@@ -276,17 +278,26 @@ async def generate(
|
|
|
276
278
|
# WRONG - don't convert to string yourself
|
|
277
279
|
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
278
280
|
|
|
281
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
282
|
+
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
283
|
+
- Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
|
|
284
|
+
- PDFs require document processing support (varies by provider)
|
|
285
|
+
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
286
|
+
- Check model capabilities before including visual/PDF content
|
|
287
|
+
|
|
279
288
|
Context vs Messages Strategy:
|
|
280
|
-
context: Static, reusable content
|
|
289
|
+
context: Static, reusable content for caching efficiency
|
|
281
290
|
- Large documents, instructions, examples
|
|
282
|
-
-
|
|
291
|
+
- Remains constant across multiple calls
|
|
292
|
+
- Cached when supported by provider/proxy configuration
|
|
283
293
|
|
|
284
|
-
messages: Dynamic,
|
|
294
|
+
messages: Dynamic, per-call specific content
|
|
285
295
|
- User questions, current conversation turn
|
|
286
|
-
- Changes
|
|
296
|
+
- Changes with each API call
|
|
297
|
+
- Never cached, always processed fresh
|
|
287
298
|
|
|
288
299
|
Example:
|
|
289
|
-
>>> #
|
|
300
|
+
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
290
301
|
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
291
302
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
292
303
|
|
|
@@ -300,29 +311,6 @@ async def generate(
|
|
|
300
311
|
>>> # Second call: reuses cache, saves tokens!
|
|
301
312
|
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
302
313
|
|
|
303
|
-
>>> # Custom cache TTL for longer-lived contexts
|
|
304
|
-
>>> response = await llm.generate(
|
|
305
|
-
... "gpt-5",
|
|
306
|
-
... context=static_doc,
|
|
307
|
-
... messages="Analyze this",
|
|
308
|
-
... options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
309
|
-
... )
|
|
310
|
-
|
|
311
|
-
>>> # Disable caching when context changes frequently
|
|
312
|
-
>>> response = await llm.generate(
|
|
313
|
-
... "gpt-5",
|
|
314
|
-
... context=dynamic_doc,
|
|
315
|
-
... messages="Process this",
|
|
316
|
-
... options=ModelOptions(cache_ttl=None) # No caching
|
|
317
|
-
... )
|
|
318
|
-
|
|
319
|
-
>>> # AVOID unnecessary options (defaults are optimal)
|
|
320
|
-
>>> response = await llm.generate(
|
|
321
|
-
... "gpt-5",
|
|
322
|
-
... messages="Hello",
|
|
323
|
-
... options=ModelOptions(temperature=0.7) # Default is probably fine!
|
|
324
|
-
... )
|
|
325
|
-
|
|
326
314
|
>>> # Multi-turn conversation
|
|
327
315
|
>>> messages = AIMessages([
|
|
328
316
|
... "What is Python?",
|
|
@@ -331,31 +319,48 @@ async def generate(
|
|
|
331
319
|
... ])
|
|
332
320
|
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
333
321
|
|
|
322
|
+
Configuration via LiteLLM Proxy:
|
|
323
|
+
>>> # Configure temperature in litellm_config.yaml:
|
|
324
|
+
>>> # model_list:
|
|
325
|
+
>>> # - model_name: gpt-5
|
|
326
|
+
>>> # litellm_params:
|
|
327
|
+
>>> # model: openai/gpt-4o
|
|
328
|
+
>>> # temperature: 0.3
|
|
329
|
+
>>> # max_tokens: 1000
|
|
330
|
+
>>>
|
|
331
|
+
>>> # Configure retry logic in proxy:
|
|
332
|
+
>>> # general_settings:
|
|
333
|
+
>>> # master_key: sk-1234
|
|
334
|
+
>>> # max_retries: 5
|
|
335
|
+
>>> # retry_delay: 15
|
|
336
|
+
|
|
334
337
|
Performance:
|
|
335
338
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
336
339
|
- First call: full token cost
|
|
337
340
|
- Subsequent calls (within cache TTL): only messages tokens
|
|
338
|
-
- Default cache TTL is 120s (
|
|
339
|
-
- Default retry
|
|
341
|
+
- Default cache TTL is 120s (production-optimized)
|
|
342
|
+
- Default retry logic: 3 attempts with 10s delay (production-optimized)
|
|
340
343
|
|
|
341
344
|
Caching:
|
|
342
345
|
When enabled in your LiteLLM proxy and supported by the upstream provider,
|
|
343
346
|
context messages may be cached to reduce token usage on repeated calls.
|
|
344
|
-
Default TTL is 120s
|
|
345
|
-
|
|
346
|
-
treat this as an optimization, not a guarantee.
|
|
347
|
-
|
|
347
|
+
Default TTL is 120s (optimized for production workloads). Configure caching
|
|
348
|
+
behavior centrally via your LiteLLM proxy settings, not per API call.
|
|
349
|
+
Savings depend on provider and payload; treat this as an optimization, not a guarantee.
|
|
350
|
+
|
|
351
|
+
Configuration:
|
|
352
|
+
All model behavior should be configured at the LiteLLM proxy level:
|
|
353
|
+
- Temperature, max_tokens: Set in litellm_config.yaml model_list
|
|
354
|
+
- Retry logic: Configure in proxy general_settings
|
|
355
|
+
- Timeouts: Set via proxy configuration
|
|
356
|
+
- Caching: Enable/configure in proxy cache settings
|
|
357
|
+
|
|
358
|
+
This centralizes configuration and ensures consistency across all API calls.
|
|
348
359
|
|
|
349
360
|
Note:
|
|
350
|
-
- Context argument is ignored by the tracer to avoid recording large data
|
|
351
361
|
- All models are accessed via LiteLLM proxy
|
|
352
362
|
- Automatic retry with configurable delay between attempts
|
|
353
363
|
- Cost tracking via response headers
|
|
354
|
-
|
|
355
|
-
See Also:
|
|
356
|
-
- generate_structured: For typed/structured output
|
|
357
|
-
- AIMessages: Message container with document support
|
|
358
|
-
- ModelOptions: Configuration options
|
|
359
364
|
"""
|
|
360
365
|
if isinstance(messages, str):
|
|
361
366
|
messages = AIMessages([messages])
|
|
@@ -375,7 +380,6 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
375
380
|
"""Type variable for Pydantic model types in structured generation."""
|
|
376
381
|
|
|
377
382
|
|
|
378
|
-
@trace(ignore_inputs=["context"])
|
|
379
383
|
async def generate_structured(
|
|
380
384
|
model: ModelName,
|
|
381
385
|
response_format: type[T],
|
|
@@ -391,18 +395,71 @@ async def generate_structured(
|
|
|
391
395
|
Type-safe generation that returns validated Pydantic model instances.
|
|
392
396
|
Uses OpenAI's structured output feature for guaranteed schema compliance.
|
|
393
397
|
|
|
398
|
+
IMPORTANT: Search models (models with '-search' suffix) do not support
|
|
399
|
+
structured output. Use generate() instead for search models.
|
|
400
|
+
|
|
394
401
|
Best Practices:
|
|
395
|
-
|
|
402
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
403
|
+
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
404
|
+
3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
405
|
+
4. See generate() documentation for more details
|
|
406
|
+
|
|
407
|
+
Context vs Messages Strategy:
|
|
408
|
+
context: Static, reusable content for caching efficiency
|
|
409
|
+
- Schemas, examples, instructions
|
|
410
|
+
- Remains constant across multiple calls
|
|
411
|
+
- Cached when supported by provider/proxy configuration
|
|
412
|
+
|
|
413
|
+
messages: Dynamic, per-call specific content
|
|
414
|
+
- Data to be structured, user queries
|
|
415
|
+
- Changes with each API call
|
|
416
|
+
- Never cached, always processed fresh
|
|
417
|
+
|
|
418
|
+
Complex Task Pattern:
|
|
419
|
+
For complex tasks like research or deep analysis, it's recommended to use
|
|
420
|
+
a two-step approach:
|
|
421
|
+
1. First use generate() with a capable model to perform the analysis
|
|
422
|
+
2. Then use generate_structured() with a smaller model to convert the
|
|
423
|
+
response into structured output
|
|
424
|
+
|
|
425
|
+
This pattern is more reliable than trying to force complex reasoning
|
|
426
|
+
directly into structured format:
|
|
427
|
+
|
|
428
|
+
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
429
|
+
>>> research = await llm.generate(
|
|
430
|
+
... "gpt-5",
|
|
431
|
+
... messages="Research and analyze this complex topic..."
|
|
432
|
+
... )
|
|
433
|
+
>>>
|
|
434
|
+
>>> # Step 2: Structure the results with generate_structured()
|
|
435
|
+
>>> structured = await llm.generate_structured(
|
|
436
|
+
... "gpt-5-mini", # Smaller model is fine for structuring
|
|
437
|
+
... response_format=ResearchSummary,
|
|
438
|
+
... messages=f"Extract key information: {research.content}"
|
|
439
|
+
... )
|
|
396
440
|
|
|
397
441
|
Args:
|
|
398
442
|
model: Model to use (must support structured output).
|
|
443
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
399
444
|
response_format: Pydantic model class defining the output schema.
|
|
400
445
|
The model will generate JSON matching this schema.
|
|
401
446
|
context: Static context to cache (documents, schemas, examples).
|
|
402
447
|
Defaults to None (empty AIMessages).
|
|
403
448
|
messages: Dynamic prompts/queries. AIMessages or str ONLY.
|
|
404
449
|
Do not pass Document or DocumentList directly.
|
|
405
|
-
options:
|
|
450
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
451
|
+
Framework defaults are production-optimized. Configure model behavior
|
|
452
|
+
centrally via LiteLLM proxy settings, not per API call.
|
|
453
|
+
The response_format is set automatically from the response_format parameter.
|
|
454
|
+
|
|
455
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
456
|
+
When using Documents with images/PDFs in structured output:
|
|
457
|
+
- Images require vision-capable models that also support structured output
|
|
458
|
+
- PDFs require models with both document processing AND structured output support
|
|
459
|
+
- Many models support either vision OR structured output, but not both
|
|
460
|
+
- Test your specific model+document combination before production use
|
|
461
|
+
- Consider two-step approach: generate() for analysis, then generate_structured()
|
|
462
|
+
for formatting
|
|
406
463
|
|
|
407
464
|
Returns:
|
|
408
465
|
StructuredModelResponse[T] containing:
|
|
@@ -412,6 +469,7 @@ async def generate_structured(
|
|
|
412
469
|
Raises:
|
|
413
470
|
TypeError: If response_format is not a Pydantic model class.
|
|
414
471
|
ValueError: If model doesn't support structured output or no parsed content returned.
|
|
472
|
+
Structured output support varies by provider and model.
|
|
415
473
|
LLMError: If generation fails after retries.
|
|
416
474
|
ValidationError: If response cannot be parsed into response_format.
|
|
417
475
|
|
|
@@ -423,8 +481,9 @@ async def generate_structured(
|
|
|
423
481
|
... sentiment: float = Field(ge=-1, le=1)
|
|
424
482
|
... key_points: list[str] = Field(max_length=5)
|
|
425
483
|
>>>
|
|
484
|
+
>>> # CORRECT - No options parameter
|
|
426
485
|
>>> response = await llm.generate_structured(
|
|
427
|
-
...
|
|
486
|
+
... "gpt-5",
|
|
428
487
|
... response_format=Analysis,
|
|
429
488
|
... messages="Analyze this product review: ..."
|
|
430
489
|
... )
|
|
@@ -435,11 +494,13 @@ async def generate_structured(
|
|
|
435
494
|
... print(f"- {point}")
|
|
436
495
|
|
|
437
496
|
Supported models:
|
|
438
|
-
|
|
497
|
+
Structured output support varies by provider and model. Generally includes:
|
|
439
498
|
- OpenAI: GPT-4 and newer models
|
|
440
499
|
- Anthropic: Claude 3+ models
|
|
441
500
|
- Google: Gemini Pro models
|
|
442
|
-
|
|
501
|
+
|
|
502
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
503
|
+
Check provider documentation for specific support.
|
|
443
504
|
|
|
444
505
|
Performance:
|
|
445
506
|
- Structured output may use more tokens than free text
|
|
@@ -451,11 +512,7 @@ async def generate_structured(
|
|
|
451
512
|
- The model generates JSON matching the schema
|
|
452
513
|
- Validation happens automatically via Pydantic
|
|
453
514
|
- Use Field() descriptions to guide generation
|
|
454
|
-
|
|
455
|
-
See Also:
|
|
456
|
-
- generate: For unstructured text generation
|
|
457
|
-
- ModelOptions: Configuration including response_format
|
|
458
|
-
- StructuredModelResponse: Response wrapper with .parsed property
|
|
515
|
+
- Search models (models with '-search' suffix) do not support structured output
|
|
459
516
|
"""
|
|
460
517
|
if context is None:
|
|
461
518
|
context = AIMessages()
|
|
@@ -467,6 +524,8 @@ async def generate_structured(
|
|
|
467
524
|
if isinstance(messages, str):
|
|
468
525
|
messages = AIMessages([messages])
|
|
469
526
|
|
|
527
|
+
assert isinstance(messages, AIMessages)
|
|
528
|
+
|
|
470
529
|
# Call the internal generate function with structured output enabled
|
|
471
530
|
try:
|
|
472
531
|
response = await _generate_with_retry(model, context, messages, options)
|
|
@@ -498,9 +557,3 @@ async def generate_structured(
|
|
|
498
557
|
|
|
499
558
|
# Create a StructuredModelResponse with the parsed value
|
|
500
559
|
return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
# Public aliases for testing internal functions
|
|
504
|
-
# These are exported to allow testing of implementation details
|
|
505
|
-
process_messages_for_testing = _process_messages
|
|
506
|
-
generate_with_retry_for_testing = _generate_with_retry
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Configuration options for LLM generation.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides the ModelOptions class for configuring model behavior,
|
|
6
4
|
retry logic, and advanced features like web search and reasoning.
|
|
7
5
|
"""
|
|
@@ -14,8 +12,6 @@ from pydantic import BaseModel
|
|
|
14
12
|
class ModelOptions(BaseModel):
|
|
15
13
|
"""Configuration options for LLM generation requests.
|
|
16
14
|
|
|
17
|
-
@public
|
|
18
|
-
|
|
19
15
|
ModelOptions encapsulates all configuration parameters for model
|
|
20
16
|
generation, including model behavior settings, retry logic, and
|
|
21
17
|
advanced features. All fields are optional with sensible defaults.
|
|
@@ -68,7 +64,8 @@ class ModelOptions(BaseModel):
|
|
|
68
64
|
|
|
69
65
|
response_format: Pydantic model class for structured output.
|
|
70
66
|
Pass a Pydantic model; the client converts it to JSON Schema.
|
|
71
|
-
Set automatically by generate_structured().
|
|
67
|
+
Set automatically by generate_structured().
|
|
68
|
+
Structured output support varies by provider and model.
|
|
72
69
|
|
|
73
70
|
Example:
|
|
74
71
|
>>> # Basic configuration
|
|
@@ -162,11 +159,13 @@ class ModelOptions(BaseModel):
|
|
|
162
159
|
Note:
|
|
163
160
|
- system_prompt is handled separately in _process_messages()
|
|
164
161
|
- retries and retry_delay_seconds are used by retry logic
|
|
165
|
-
- extra_body
|
|
162
|
+
- extra_body always includes usage tracking for cost monitoring
|
|
166
163
|
"""
|
|
167
164
|
kwargs: dict[str, Any] = {
|
|
168
165
|
"timeout": self.timeout,
|
|
169
|
-
"extra_body": {
|
|
166
|
+
"extra_body": {
|
|
167
|
+
"usage": {"include": True}, # For openrouter cost tracking
|
|
168
|
+
},
|
|
170
169
|
}
|
|
171
170
|
|
|
172
171
|
if self.temperature:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
@public
|
|
4
4
|
|
|
5
|
-
Provides enhanced response classes that
|
|
5
|
+
Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
|
|
6
6
|
with additional metadata, cost tracking, and structured output support.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
|
|
|
23
23
|
|
|
24
24
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
25
25
|
|
|
26
|
-
>>> response = await llm.generate(messages=messages)
|
|
27
|
-
>>> messages.
|
|
26
|
+
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
27
|
+
>>> messages.append(response) # Add assistant response to conversation
|
|
28
28
|
>>> print(response.content) # Access generated text
|
|
29
29
|
|
|
30
30
|
The two main interactions with ModelResponse:
|
|
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
|
|
|
35
35
|
like token usage and cost tracking are available but rarely needed.
|
|
36
36
|
|
|
37
37
|
Example:
|
|
38
|
-
>>> from ai_pipeline_core
|
|
38
|
+
>>> from ai_pipeline_core import llm, AIMessages
|
|
39
39
|
>>>
|
|
40
|
-
>>> messages = AIMessages("Explain quantum computing")
|
|
41
|
-
>>> response = await generate(messages=messages)
|
|
40
|
+
>>> messages = AIMessages(["Explain quantum computing"])
|
|
41
|
+
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
42
42
|
>>>
|
|
43
43
|
>>> # Primary usage: add to conversation
|
|
44
|
-
>>> messages.
|
|
44
|
+
>>> messages.append(response)
|
|
45
45
|
>>>
|
|
46
46
|
>>> # Access generated text
|
|
47
47
|
>>> print(response.content)
|
|
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
|
|
|
96
96
|
@public
|
|
97
97
|
|
|
98
98
|
Primary property for accessing the LLM's response text.
|
|
99
|
-
This
|
|
99
|
+
This is the main property you'll use with ModelResponse.
|
|
100
100
|
|
|
101
101
|
Returns:
|
|
102
102
|
Generated text from the model, or empty string if none.
|
|
103
103
|
|
|
104
104
|
Example:
|
|
105
|
-
>>> response = await generate(messages="Hello")
|
|
105
|
+
>>> response = await generate("gpt-5", messages="Hello")
|
|
106
106
|
>>> text = response.content # The generated response
|
|
107
107
|
>>>
|
|
108
108
|
>>> # Common pattern: add to messages then use content
|
|
109
|
-
>>> messages.
|
|
109
|
+
>>> messages.append(response)
|
|
110
110
|
>>> if "error" in response.content.lower():
|
|
111
111
|
... # Handle error case
|
|
112
112
|
"""
|
|
@@ -189,8 +189,7 @@ class ModelResponse(ChatCompletion):
|
|
|
189
189
|
>>> response = await llm.generate(
|
|
190
190
|
... "gpt-5",
|
|
191
191
|
... context=large_doc,
|
|
192
|
-
... messages="Summarize this"
|
|
193
|
-
... options=ModelOptions(cache_ttl="300s")
|
|
192
|
+
... messages="Summarize this"
|
|
194
193
|
... )
|
|
195
194
|
>>>
|
|
196
195
|
>>> # Get comprehensive metadata
|
|
@@ -292,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
292
291
|
... summary: str
|
|
293
292
|
>>>
|
|
294
293
|
>>> response = await generate_structured(
|
|
294
|
+
... "gpt-5",
|
|
295
295
|
... response_format=Analysis,
|
|
296
296
|
... messages="Analyze this text..."
|
|
297
297
|
... )
|
|
@@ -301,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
301
301
|
>>> print(f"Sentiment: {analysis.sentiment}")
|
|
302
302
|
>>>
|
|
303
303
|
>>> # Can add to messages for conversation
|
|
304
|
-
>>> messages.
|
|
304
|
+
>>> messages.append(response)
|
|
305
305
|
|
|
306
306
|
The two main interactions:
|
|
307
307
|
1. Accessing .parsed property for the structured data
|
|
@@ -377,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
377
377
|
... age: int
|
|
378
378
|
>>>
|
|
379
379
|
>>> response = await generate_structured(
|
|
380
|
+
... "gpt-5",
|
|
380
381
|
... response_format=UserInfo,
|
|
381
382
|
... messages="Extract user info..."
|
|
382
383
|
... )
|
|
@@ -386,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
386
387
|
>>> print(f"{user.name} is {user.age} years old")
|
|
387
388
|
>>>
|
|
388
389
|
>>> # Can also add to messages
|
|
389
|
-
>>> messages.
|
|
390
|
+
>>> messages.append(response)
|
|
390
391
|
|
|
391
392
|
Note:
|
|
392
|
-
Type-safe with full IDE support. This property
|
|
393
|
-
|
|
393
|
+
Type-safe with full IDE support. This is the main property
|
|
394
|
+
you'll use with structured responses.
|
|
394
395
|
"""
|
|
395
396
|
if self._parsed_value is not None:
|
|
396
397
|
return self._parsed_value
|
|
@@ -21,12 +21,12 @@ ModelName: TypeAlias = (
|
|
|
21
21
|
# Small models
|
|
22
22
|
"gemini-2.5-flash",
|
|
23
23
|
"gpt-5-mini",
|
|
24
|
-
"grok-
|
|
24
|
+
"grok-4-fast",
|
|
25
25
|
# Search models
|
|
26
26
|
"gemini-2.5-flash-search",
|
|
27
27
|
"sonar-pro-search",
|
|
28
28
|
"gpt-4o-search",
|
|
29
|
-
"grok-
|
|
29
|
+
"grok-4-fast-search",
|
|
30
30
|
]
|
|
31
31
|
| str
|
|
32
32
|
)
|
|
@@ -47,7 +47,7 @@ Model categories:
|
|
|
47
47
|
High-capability models for complex tasks requiring deep reasoning,
|
|
48
48
|
nuanced understanding, or creative generation.
|
|
49
49
|
|
|
50
|
-
Small models (gemini-2.5-flash, gpt-5-mini, grok-
|
|
50
|
+
Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
|
|
51
51
|
Efficient models optimized for speed and cost, suitable for
|
|
52
52
|
simpler tasks or high-volume processing.
|
|
53
53
|
|
|
@@ -79,8 +79,4 @@ Note:
|
|
|
79
79
|
The ModelName type includes both predefined literals and str,
|
|
80
80
|
allowing full flexibility while maintaining IDE support for
|
|
81
81
|
common models.
|
|
82
|
-
|
|
83
|
-
See Also:
|
|
84
|
-
- llm.generate: Main generation function
|
|
85
|
-
- ModelOptions: Model configuration options
|
|
86
82
|
"""
|