ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +21 -13
- ai_pipeline_core/documents/document.py +93 -50
- ai_pipeline_core/documents/document_list.py +70 -23
- ai_pipeline_core/documents/flow_document.py +2 -6
- ai_pipeline_core/documents/task_document.py +0 -4
- ai_pipeline_core/documents/temporary_document.py +1 -8
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/llm/__init__.py +1 -1
- ai_pipeline_core/llm/ai_messages.py +14 -4
- ai_pipeline_core/llm/client.py +116 -59
- ai_pipeline_core/llm/model_options.py +2 -5
- ai_pipeline_core/llm/model_response.py +17 -16
- ai_pipeline_core/llm/model_types.py +0 -4
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +45 -68
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +6 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -189
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +3 -26
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +19 -17
- ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/flow/config.py
CHANGED
|
@@ -10,11 +10,16 @@ Best Practice:
|
|
|
10
10
|
to ensure type safety and proper validation of output documents.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
import json
|
|
13
14
|
from abc import ABC
|
|
14
15
|
from typing import Any, ClassVar, Iterable
|
|
15
16
|
|
|
16
|
-
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
17
|
+
from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
|
|
17
18
|
from ai_pipeline_core.exceptions import DocumentValidationError
|
|
19
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
20
|
+
from ai_pipeline_core.storage import Storage
|
|
21
|
+
|
|
22
|
+
logger = get_pipeline_logger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class FlowConfig(ABC):
|
|
@@ -51,8 +56,10 @@ class FlowConfig(ABC):
|
|
|
51
56
|
... OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Different type!
|
|
52
57
|
>>>
|
|
53
58
|
>>> # Use in @pipeline_flow - RECOMMENDED PATTERN
|
|
54
|
-
>>> @pipeline_flow(name="processing")
|
|
55
|
-
>>> async def process(
|
|
59
|
+
>>> @pipeline_flow(config=ProcessingFlowConfig, name="processing")
|
|
60
|
+
>>> async def process(
|
|
61
|
+
... project_name: str, docs: DocumentList, flow_options: FlowOptions
|
|
62
|
+
... ) -> DocumentList:
|
|
56
63
|
... outputs = []
|
|
57
64
|
... # ... processing logic ...
|
|
58
65
|
... return config.create_and_validate_output(outputs)
|
|
@@ -289,8 +296,10 @@ class FlowConfig(ABC):
|
|
|
289
296
|
DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
|
|
290
297
|
|
|
291
298
|
Example:
|
|
292
|
-
>>> @pipeline_flow(name="my_flow")
|
|
293
|
-
>>> async def process_flow(
|
|
299
|
+
>>> @pipeline_flow(config=MyFlowConfig, name="my_flow")
|
|
300
|
+
>>> async def process_flow(
|
|
301
|
+
... project_name: str, documents: DocumentList, flow_options: FlowOptions
|
|
302
|
+
... ) -> DocumentList:
|
|
294
303
|
>>> outputs = []
|
|
295
304
|
>>> # ... processing logic ...
|
|
296
305
|
>>> outputs.append(OutputDoc(...))
|
|
@@ -312,3 +321,163 @@ class FlowConfig(ABC):
|
|
|
312
321
|
documents = DocumentList(list(output)) # type: ignore[arg-type]
|
|
313
322
|
cls.validate_output_documents(documents)
|
|
314
323
|
return documents
|
|
324
|
+
|
|
325
|
+
@classmethod
|
|
326
|
+
async def load_documents(
|
|
327
|
+
cls,
|
|
328
|
+
uri: str,
|
|
329
|
+
) -> DocumentList:
|
|
330
|
+
"""Load documents from storage matching INPUT_DOCUMENT_TYPES.
|
|
331
|
+
|
|
332
|
+
Loads documents from a storage location based on the class's INPUT_DOCUMENT_TYPES.
|
|
333
|
+
Supports both local filesystem and Google Cloud Storage backends.
|
|
334
|
+
Automatically loads metadata (.description.md and .sources.json) when present.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
uri: Storage URI (file://, gs://, or local path)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
DocumentList containing loaded documents matching INPUT_DOCUMENT_TYPES
|
|
341
|
+
|
|
342
|
+
Example:
|
|
343
|
+
>>> # Load from local filesystem
|
|
344
|
+
>>> docs = await MyFlowConfig.load_documents("./data")
|
|
345
|
+
>>>
|
|
346
|
+
>>> # Load from GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
|
|
347
|
+
>>> docs = await MyFlowConfig.load_documents("gs://bucket/data")
|
|
348
|
+
"""
|
|
349
|
+
# Use INPUT_DOCUMENT_TYPES if not specified
|
|
350
|
+
storage = await Storage.from_uri(uri)
|
|
351
|
+
loaded_documents = DocumentList()
|
|
352
|
+
|
|
353
|
+
# Process each document type
|
|
354
|
+
for doc_type in cls.INPUT_DOCUMENT_TYPES:
|
|
355
|
+
canonical_name = doc_type.canonical_name()
|
|
356
|
+
doc_storage = storage.with_base(canonical_name)
|
|
357
|
+
|
|
358
|
+
# Check if subdirectory exists
|
|
359
|
+
if not await doc_storage.exists(""):
|
|
360
|
+
logger.debug(f"Subdirectory {canonical_name} not found, skipping")
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
# List files in subdirectory
|
|
364
|
+
objects = await doc_storage.list("", recursive=False, include_dirs=False)
|
|
365
|
+
|
|
366
|
+
# Create lookup set for metadata files
|
|
367
|
+
object_keys = {obj.key for obj in objects}
|
|
368
|
+
|
|
369
|
+
# Filter out metadata files
|
|
370
|
+
doc_files = [
|
|
371
|
+
obj
|
|
372
|
+
for obj in objects
|
|
373
|
+
if not obj.key.endswith(Document.DESCRIPTION_EXTENSION)
|
|
374
|
+
and not obj.key.endswith(Document.SOURCES_EXTENSION)
|
|
375
|
+
]
|
|
376
|
+
|
|
377
|
+
for obj in doc_files:
|
|
378
|
+
try:
|
|
379
|
+
# Load document content
|
|
380
|
+
content = await doc_storage.read_bytes(obj.key)
|
|
381
|
+
|
|
382
|
+
# Load metadata if present
|
|
383
|
+
description = None
|
|
384
|
+
sources: list[str] = []
|
|
385
|
+
|
|
386
|
+
# Check for description in objects list
|
|
387
|
+
desc_path = f"{obj.key}{Document.DESCRIPTION_EXTENSION}"
|
|
388
|
+
if desc_path in object_keys:
|
|
389
|
+
try:
|
|
390
|
+
description = await doc_storage.read_text(desc_path)
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.warning(f"Failed to load description for {obj.key}: {e}")
|
|
393
|
+
|
|
394
|
+
# Check for sources in objects list
|
|
395
|
+
sources_path = f"{obj.key}{Document.SOURCES_EXTENSION}"
|
|
396
|
+
if sources_path in object_keys:
|
|
397
|
+
try:
|
|
398
|
+
sources_text = await doc_storage.read_text(sources_path)
|
|
399
|
+
sources = json.loads(sources_text)
|
|
400
|
+
except Exception as e:
|
|
401
|
+
logger.warning(f"Failed to load sources for {obj.key}: {e}")
|
|
402
|
+
|
|
403
|
+
# Create document instance
|
|
404
|
+
doc = doc_type(
|
|
405
|
+
name=obj.key,
|
|
406
|
+
content=content,
|
|
407
|
+
description=description,
|
|
408
|
+
sources=sources,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
loaded_documents.append(doc)
|
|
412
|
+
logger.debug(f"Loaded {doc_type.__name__} document: {obj.key}")
|
|
413
|
+
except Exception as e:
|
|
414
|
+
logger.error(f"Failed to load {doc_type.__name__} document {obj.key}: {e}")
|
|
415
|
+
|
|
416
|
+
logger.info(f"Loaded {len(loaded_documents)} documents from {uri}")
|
|
417
|
+
return loaded_documents
|
|
418
|
+
|
|
419
|
+
@classmethod
|
|
420
|
+
async def save_documents(
|
|
421
|
+
cls,
|
|
422
|
+
uri: str,
|
|
423
|
+
documents: DocumentList,
|
|
424
|
+
*,
|
|
425
|
+
validate_output_type: bool = True,
|
|
426
|
+
) -> None:
|
|
427
|
+
"""Save documents to storage with metadata.
|
|
428
|
+
|
|
429
|
+
Saves FlowDocument instances to a storage location with their content
|
|
430
|
+
and metadata files (Document.DESCRIPTION_EXTENSION and Document.SOURCES_EXTENSION).
|
|
431
|
+
Non-FlowDocument instances (TaskDocument, TemporaryDocument) are skipped.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
uri: Storage URI (file://, gs://, or local path)
|
|
435
|
+
documents: DocumentList to save
|
|
436
|
+
validate_output_type: If True, validate documents match cls.OUTPUT_DOCUMENT_TYPE
|
|
437
|
+
|
|
438
|
+
Raises:
|
|
439
|
+
DocumentValidationError: If validate_output_type=True and documents don't match
|
|
440
|
+
OUTPUT_DOCUMENT_TYPE
|
|
441
|
+
|
|
442
|
+
Example:
|
|
443
|
+
>>> # Save to local filesystem
|
|
444
|
+
>>> await MyFlowConfig.save_documents("./output", docs)
|
|
445
|
+
>>>
|
|
446
|
+
>>> # Save to GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
|
|
447
|
+
>>> await MyFlowConfig.save_documents("gs://bucket/output", docs)
|
|
448
|
+
"""
|
|
449
|
+
# Validate output type if requested
|
|
450
|
+
if validate_output_type:
|
|
451
|
+
cls.validate_output_documents(documents)
|
|
452
|
+
|
|
453
|
+
storage = await Storage.from_uri(uri)
|
|
454
|
+
saved_count = 0
|
|
455
|
+
|
|
456
|
+
for doc in documents:
|
|
457
|
+
# Skip non-FlowDocument instances
|
|
458
|
+
if not isinstance(doc, FlowDocument):
|
|
459
|
+
logger.warning(f"Skipping non-FlowDocument: {type(doc).__name__}")
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
# Get canonical name for subdirectory
|
|
463
|
+
canonical_name = doc.canonical_name()
|
|
464
|
+
doc_storage = storage.with_base(canonical_name)
|
|
465
|
+
|
|
466
|
+
# Save document content
|
|
467
|
+
await doc_storage.write_bytes(doc.name, doc.content)
|
|
468
|
+
saved_count += 1
|
|
469
|
+
|
|
470
|
+
# Save description if present
|
|
471
|
+
if doc.description:
|
|
472
|
+
desc_path = f"{doc.name}{Document.DESCRIPTION_EXTENSION}"
|
|
473
|
+
await doc_storage.write_text(desc_path, doc.description)
|
|
474
|
+
|
|
475
|
+
# Save sources if present
|
|
476
|
+
if doc.sources:
|
|
477
|
+
sources_path = f"{doc.name}{Document.SOURCES_EXTENSION}"
|
|
478
|
+
sources_json = json.dumps(doc.sources, indent=2)
|
|
479
|
+
await doc_storage.write_text(sources_path, sources_json)
|
|
480
|
+
|
|
481
|
+
logger.debug(f"Saved {type(doc).__name__} document: {doc.name}")
|
|
482
|
+
|
|
483
|
+
logger.info(f"Saved {saved_count} documents to {uri}")
|
ai_pipeline_core/llm/__init__.py
CHANGED
|
@@ -48,15 +48,25 @@ class AIMessages(list[AIMessageType]):
|
|
|
48
48
|
- ModelResponse: Becomes {"role": "assistant", "content": response.content}
|
|
49
49
|
|
|
50
50
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
|
|
52
|
+
VISION/PDF MODEL COMPATIBILITY WARNING:
|
|
53
|
+
Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
|
|
54
|
+
Non-vision models will raise ValueError when encountering image documents.
|
|
55
|
+
PDFs require models with document processing support - check your model's capabilities
|
|
56
|
+
before including PDF documents in messages. Unsupported models may fall back to
|
|
57
|
+
text extraction or raise errors depending on provider configuration.
|
|
58
|
+
LiteLLM proxy handles the specific encoding requirements for each provider.
|
|
55
59
|
|
|
56
60
|
IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
|
|
57
61
|
expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
|
|
58
62
|
of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
|
|
59
63
|
|
|
64
|
+
CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
|
|
65
|
+
`AIMessages(["text"])`) or empty constructor with append (e.g.,
|
|
66
|
+
`AIMessages(); messages.append("text")`). Never pass raw strings directly to the
|
|
67
|
+
constructor (`AIMessages("text")`) as this will iterate over the string characters
|
|
68
|
+
instead of treating it as a single message.
|
|
69
|
+
|
|
60
70
|
Example:
|
|
61
71
|
>>> from ai_pipeline_core import llm
|
|
62
72
|
>>> messages = AIMessages()
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -24,7 +24,6 @@ from pydantic import BaseModel
|
|
|
24
24
|
|
|
25
25
|
from ai_pipeline_core.exceptions import LLMError
|
|
26
26
|
from ai_pipeline_core.settings import settings
|
|
27
|
-
from ai_pipeline_core.tracing import trace
|
|
28
27
|
|
|
29
28
|
from .ai_messages import AIMessages
|
|
30
29
|
from .model_options import ModelOptions
|
|
@@ -60,9 +59,9 @@ def _process_messages(
|
|
|
60
59
|
- Regular messages without caching
|
|
61
60
|
|
|
62
61
|
System Prompt Location:
|
|
63
|
-
The system prompt
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
The system prompt parameter is always injected as the FIRST message
|
|
63
|
+
with role="system". It is NOT cached with context, allowing dynamic
|
|
64
|
+
system prompts without breaking cache efficiency.
|
|
66
65
|
|
|
67
66
|
Cache behavior:
|
|
68
67
|
The last context message gets ephemeral caching with specified TTL
|
|
@@ -221,7 +220,6 @@ async def _generate_with_retry(
|
|
|
221
220
|
raise LLMError("Unknown error occurred during LLM generation.")
|
|
222
221
|
|
|
223
222
|
|
|
224
|
-
@trace(ignore_inputs=["context"])
|
|
225
223
|
async def generate(
|
|
226
224
|
model: ModelName,
|
|
227
225
|
*,
|
|
@@ -238,9 +236,10 @@ async def generate(
|
|
|
238
236
|
expensive static content separately from dynamic queries.
|
|
239
237
|
|
|
240
238
|
Best Practices:
|
|
241
|
-
1. OPTIONS:
|
|
239
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
242
240
|
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
243
241
|
3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
|
|
242
|
+
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
244
243
|
|
|
245
244
|
Args:
|
|
246
245
|
model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
|
|
@@ -250,8 +249,11 @@ async def generate(
|
|
|
250
249
|
messages: Dynamic messages/queries. AIMessages or str ONLY.
|
|
251
250
|
Do not pass Document or DocumentList directly.
|
|
252
251
|
If string, converted to AIMessages internally.
|
|
253
|
-
options:
|
|
254
|
-
|
|
252
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
253
|
+
Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
|
|
254
|
+
Configure model behavior centrally via LiteLLM proxy settings or environment
|
|
255
|
+
variables, not per API call. Provider-specific settings should be configured
|
|
256
|
+
at the proxy level.
|
|
255
257
|
|
|
256
258
|
Returns:
|
|
257
259
|
ModelResponse containing:
|
|
@@ -276,17 +278,26 @@ async def generate(
|
|
|
276
278
|
# WRONG - don't convert to string yourself
|
|
277
279
|
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
278
280
|
|
|
281
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
282
|
+
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
283
|
+
- Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
|
|
284
|
+
- PDFs require document processing support (varies by provider)
|
|
285
|
+
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
286
|
+
- Check model capabilities before including visual/PDF content
|
|
287
|
+
|
|
279
288
|
Context vs Messages Strategy:
|
|
280
|
-
context: Static, reusable content
|
|
289
|
+
context: Static, reusable content for caching efficiency
|
|
281
290
|
- Large documents, instructions, examples
|
|
282
|
-
-
|
|
291
|
+
- Remains constant across multiple calls
|
|
292
|
+
- Cached when supported by provider/proxy configuration
|
|
283
293
|
|
|
284
|
-
messages: Dynamic,
|
|
294
|
+
messages: Dynamic, per-call specific content
|
|
285
295
|
- User questions, current conversation turn
|
|
286
|
-
- Changes
|
|
296
|
+
- Changes with each API call
|
|
297
|
+
- Never cached, always processed fresh
|
|
287
298
|
|
|
288
299
|
Example:
|
|
289
|
-
>>> #
|
|
300
|
+
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
290
301
|
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
291
302
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
292
303
|
|
|
@@ -300,29 +311,6 @@ async def generate(
|
|
|
300
311
|
>>> # Second call: reuses cache, saves tokens!
|
|
301
312
|
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
302
313
|
|
|
303
|
-
>>> # Custom cache TTL for longer-lived contexts
|
|
304
|
-
>>> response = await llm.generate(
|
|
305
|
-
... "gpt-5",
|
|
306
|
-
... context=static_doc,
|
|
307
|
-
... messages="Analyze this",
|
|
308
|
-
... options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
309
|
-
... )
|
|
310
|
-
|
|
311
|
-
>>> # Disable caching when context changes frequently
|
|
312
|
-
>>> response = await llm.generate(
|
|
313
|
-
... "gpt-5",
|
|
314
|
-
... context=dynamic_doc,
|
|
315
|
-
... messages="Process this",
|
|
316
|
-
... options=ModelOptions(cache_ttl=None) # No caching
|
|
317
|
-
... )
|
|
318
|
-
|
|
319
|
-
>>> # AVOID unnecessary options (defaults are optimal)
|
|
320
|
-
>>> response = await llm.generate(
|
|
321
|
-
... "gpt-5",
|
|
322
|
-
... messages="Hello",
|
|
323
|
-
... options=ModelOptions(temperature=0.7) # Default is probably fine!
|
|
324
|
-
... )
|
|
325
|
-
|
|
326
314
|
>>> # Multi-turn conversation
|
|
327
315
|
>>> messages = AIMessages([
|
|
328
316
|
... "What is Python?",
|
|
@@ -331,31 +319,48 @@ async def generate(
|
|
|
331
319
|
... ])
|
|
332
320
|
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
333
321
|
|
|
322
|
+
Configuration via LiteLLM Proxy:
|
|
323
|
+
>>> # Configure temperature in litellm_config.yaml:
|
|
324
|
+
>>> # model_list:
|
|
325
|
+
>>> # - model_name: gpt-5
|
|
326
|
+
>>> # litellm_params:
|
|
327
|
+
>>> # model: openai/gpt-4o
|
|
328
|
+
>>> # temperature: 0.3
|
|
329
|
+
>>> # max_tokens: 1000
|
|
330
|
+
>>>
|
|
331
|
+
>>> # Configure retry logic in proxy:
|
|
332
|
+
>>> # general_settings:
|
|
333
|
+
>>> # master_key: sk-1234
|
|
334
|
+
>>> # max_retries: 5
|
|
335
|
+
>>> # retry_delay: 15
|
|
336
|
+
|
|
334
337
|
Performance:
|
|
335
338
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
336
339
|
- First call: full token cost
|
|
337
340
|
- Subsequent calls (within cache TTL): only messages tokens
|
|
338
|
-
- Default cache TTL is 120s (
|
|
339
|
-
- Default retry
|
|
341
|
+
- Default cache TTL is 120s (production-optimized)
|
|
342
|
+
- Default retry logic: 3 attempts with 10s delay (production-optimized)
|
|
340
343
|
|
|
341
344
|
Caching:
|
|
342
345
|
When enabled in your LiteLLM proxy and supported by the upstream provider,
|
|
343
346
|
context messages may be cached to reduce token usage on repeated calls.
|
|
344
|
-
Default TTL is 120s
|
|
345
|
-
|
|
346
|
-
treat this as an optimization, not a guarantee.
|
|
347
|
-
|
|
347
|
+
Default TTL is 120s (optimized for production workloads). Configure caching
|
|
348
|
+
behavior centrally via your LiteLLM proxy settings, not per API call.
|
|
349
|
+
Savings depend on provider and payload; treat this as an optimization, not a guarantee.
|
|
350
|
+
|
|
351
|
+
Configuration:
|
|
352
|
+
All model behavior should be configured at the LiteLLM proxy level:
|
|
353
|
+
- Temperature, max_tokens: Set in litellm_config.yaml model_list
|
|
354
|
+
- Retry logic: Configure in proxy general_settings
|
|
355
|
+
- Timeouts: Set via proxy configuration
|
|
356
|
+
- Caching: Enable/configure in proxy cache settings
|
|
357
|
+
|
|
358
|
+
This centralizes configuration and ensures consistency across all API calls.
|
|
348
359
|
|
|
349
360
|
Note:
|
|
350
|
-
- Context argument is ignored by the tracer to avoid recording large data
|
|
351
361
|
- All models are accessed via LiteLLM proxy
|
|
352
362
|
- Automatic retry with configurable delay between attempts
|
|
353
363
|
- Cost tracking via response headers
|
|
354
|
-
|
|
355
|
-
See Also:
|
|
356
|
-
- generate_structured: For typed/structured output
|
|
357
|
-
- AIMessages: Message container with document support
|
|
358
|
-
- ModelOptions: Configuration options
|
|
359
364
|
"""
|
|
360
365
|
if isinstance(messages, str):
|
|
361
366
|
messages = AIMessages([messages])
|
|
@@ -375,7 +380,6 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
375
380
|
"""Type variable for Pydantic model types in structured generation."""
|
|
376
381
|
|
|
377
382
|
|
|
378
|
-
@trace(ignore_inputs=["context"])
|
|
379
383
|
async def generate_structured(
|
|
380
384
|
model: ModelName,
|
|
381
385
|
response_format: type[T],
|
|
@@ -391,18 +395,71 @@ async def generate_structured(
|
|
|
391
395
|
Type-safe generation that returns validated Pydantic model instances.
|
|
392
396
|
Uses OpenAI's structured output feature for guaranteed schema compliance.
|
|
393
397
|
|
|
398
|
+
IMPORTANT: Search models (models with '-search' suffix) do not support
|
|
399
|
+
structured output. Use generate() instead for search models.
|
|
400
|
+
|
|
394
401
|
Best Practices:
|
|
395
|
-
|
|
402
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
403
|
+
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
404
|
+
3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
405
|
+
4. See generate() documentation for more details
|
|
406
|
+
|
|
407
|
+
Context vs Messages Strategy:
|
|
408
|
+
context: Static, reusable content for caching efficiency
|
|
409
|
+
- Schemas, examples, instructions
|
|
410
|
+
- Remains constant across multiple calls
|
|
411
|
+
- Cached when supported by provider/proxy configuration
|
|
412
|
+
|
|
413
|
+
messages: Dynamic, per-call specific content
|
|
414
|
+
- Data to be structured, user queries
|
|
415
|
+
- Changes with each API call
|
|
416
|
+
- Never cached, always processed fresh
|
|
417
|
+
|
|
418
|
+
Complex Task Pattern:
|
|
419
|
+
For complex tasks like research or deep analysis, it's recommended to use
|
|
420
|
+
a two-step approach:
|
|
421
|
+
1. First use generate() with a capable model to perform the analysis
|
|
422
|
+
2. Then use generate_structured() with a smaller model to convert the
|
|
423
|
+
response into structured output
|
|
424
|
+
|
|
425
|
+
This pattern is more reliable than trying to force complex reasoning
|
|
426
|
+
directly into structured format:
|
|
427
|
+
|
|
428
|
+
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
429
|
+
>>> research = await llm.generate(
|
|
430
|
+
... "gpt-5",
|
|
431
|
+
... messages="Research and analyze this complex topic..."
|
|
432
|
+
... )
|
|
433
|
+
>>>
|
|
434
|
+
>>> # Step 2: Structure the results with generate_structured()
|
|
435
|
+
>>> structured = await llm.generate_structured(
|
|
436
|
+
... "gpt-5-mini", # Smaller model is fine for structuring
|
|
437
|
+
... response_format=ResearchSummary,
|
|
438
|
+
... messages=f"Extract key information: {research.content}"
|
|
439
|
+
... )
|
|
396
440
|
|
|
397
441
|
Args:
|
|
398
442
|
model: Model to use (must support structured output).
|
|
443
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
399
444
|
response_format: Pydantic model class defining the output schema.
|
|
400
445
|
The model will generate JSON matching this schema.
|
|
401
446
|
context: Static context to cache (documents, schemas, examples).
|
|
402
447
|
Defaults to None (empty AIMessages).
|
|
403
448
|
messages: Dynamic prompts/queries. AIMessages or str ONLY.
|
|
404
449
|
Do not pass Document or DocumentList directly.
|
|
405
|
-
options:
|
|
450
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
451
|
+
Framework defaults are production-optimized. Configure model behavior
|
|
452
|
+
centrally via LiteLLM proxy settings, not per API call.
|
|
453
|
+
The response_format is set automatically from the response_format parameter.
|
|
454
|
+
|
|
455
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
456
|
+
When using Documents with images/PDFs in structured output:
|
|
457
|
+
- Images require vision-capable models that also support structured output
|
|
458
|
+
- PDFs require models with both document processing AND structured output support
|
|
459
|
+
- Many models support either vision OR structured output, but not both
|
|
460
|
+
- Test your specific model+document combination before production use
|
|
461
|
+
- Consider two-step approach: generate() for analysis, then generate_structured()
|
|
462
|
+
for formatting
|
|
406
463
|
|
|
407
464
|
Returns:
|
|
408
465
|
StructuredModelResponse[T] containing:
|
|
@@ -412,6 +469,7 @@ async def generate_structured(
|
|
|
412
469
|
Raises:
|
|
413
470
|
TypeError: If response_format is not a Pydantic model class.
|
|
414
471
|
ValueError: If model doesn't support structured output or no parsed content returned.
|
|
472
|
+
Structured output support varies by provider and model.
|
|
415
473
|
LLMError: If generation fails after retries.
|
|
416
474
|
ValidationError: If response cannot be parsed into response_format.
|
|
417
475
|
|
|
@@ -423,8 +481,9 @@ async def generate_structured(
|
|
|
423
481
|
... sentiment: float = Field(ge=-1, le=1)
|
|
424
482
|
... key_points: list[str] = Field(max_length=5)
|
|
425
483
|
>>>
|
|
484
|
+
>>> # CORRECT - No options parameter
|
|
426
485
|
>>> response = await llm.generate_structured(
|
|
427
|
-
...
|
|
486
|
+
... "gpt-5",
|
|
428
487
|
... response_format=Analysis,
|
|
429
488
|
... messages="Analyze this product review: ..."
|
|
430
489
|
... )
|
|
@@ -435,11 +494,13 @@ async def generate_structured(
|
|
|
435
494
|
... print(f"- {point}")
|
|
436
495
|
|
|
437
496
|
Supported models:
|
|
438
|
-
|
|
497
|
+
Structured output support varies by provider and model. Generally includes:
|
|
439
498
|
- OpenAI: GPT-4 and newer models
|
|
440
499
|
- Anthropic: Claude 3+ models
|
|
441
500
|
- Google: Gemini Pro models
|
|
442
|
-
|
|
501
|
+
|
|
502
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
503
|
+
Check provider documentation for specific support.
|
|
443
504
|
|
|
444
505
|
Performance:
|
|
445
506
|
- Structured output may use more tokens than free text
|
|
@@ -451,11 +512,7 @@ async def generate_structured(
|
|
|
451
512
|
- The model generates JSON matching the schema
|
|
452
513
|
- Validation happens automatically via Pydantic
|
|
453
514
|
- Use Field() descriptions to guide generation
|
|
454
|
-
|
|
455
|
-
See Also:
|
|
456
|
-
- generate: For unstructured text generation
|
|
457
|
-
- ModelOptions: Configuration including response_format
|
|
458
|
-
- StructuredModelResponse: Response wrapper with .parsed property
|
|
515
|
+
- Search models (models with '-search' suffix) do not support structured output
|
|
459
516
|
"""
|
|
460
517
|
if context is None:
|
|
461
518
|
context = AIMessages()
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Configuration options for LLM generation.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides the ModelOptions class for configuring model behavior,
|
|
6
4
|
retry logic, and advanced features like web search and reasoning.
|
|
7
5
|
"""
|
|
@@ -14,8 +12,6 @@ from pydantic import BaseModel
|
|
|
14
12
|
class ModelOptions(BaseModel):
|
|
15
13
|
"""Configuration options for LLM generation requests.
|
|
16
14
|
|
|
17
|
-
@public
|
|
18
|
-
|
|
19
15
|
ModelOptions encapsulates all configuration parameters for model
|
|
20
16
|
generation, including model behavior settings, retry logic, and
|
|
21
17
|
advanced features. All fields are optional with sensible defaults.
|
|
@@ -68,7 +64,8 @@ class ModelOptions(BaseModel):
|
|
|
68
64
|
|
|
69
65
|
response_format: Pydantic model class for structured output.
|
|
70
66
|
Pass a Pydantic model; the client converts it to JSON Schema.
|
|
71
|
-
Set automatically by generate_structured().
|
|
67
|
+
Set automatically by generate_structured().
|
|
68
|
+
Structured output support varies by provider and model.
|
|
72
69
|
|
|
73
70
|
Example:
|
|
74
71
|
>>> # Basic configuration
|