ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +25 -14
- ai_pipeline_core/documents/__init__.py +2 -1
- ai_pipeline_core/documents/document.py +317 -49
- ai_pipeline_core/documents/document_list.py +136 -33
- ai_pipeline_core/documents/flow_document.py +8 -29
- ai_pipeline_core/documents/task_document.py +6 -27
- ai_pipeline_core/documents/temporary_document.py +6 -27
- ai_pipeline_core/documents/utils.py +64 -1
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/flow/options.py +2 -2
- ai_pipeline_core/llm/__init__.py +6 -1
- ai_pipeline_core/llm/ai_messages.py +14 -7
- ai_pipeline_core/llm/client.py +143 -55
- ai_pipeline_core/llm/model_options.py +20 -5
- ai_pipeline_core/llm/model_response.py +77 -29
- ai_pipeline_core/llm/model_types.py +38 -40
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +68 -65
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +6 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -172
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +110 -26
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
- ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/flow/config.py
CHANGED
|
@@ -10,11 +10,16 @@ Best Practice:
|
|
|
10
10
|
to ensure type safety and proper validation of output documents.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
import json
|
|
13
14
|
from abc import ABC
|
|
14
15
|
from typing import Any, ClassVar, Iterable
|
|
15
16
|
|
|
16
|
-
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
17
|
+
from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
|
|
17
18
|
from ai_pipeline_core.exceptions import DocumentValidationError
|
|
19
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
20
|
+
from ai_pipeline_core.storage import Storage
|
|
21
|
+
|
|
22
|
+
logger = get_pipeline_logger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class FlowConfig(ABC):
|
|
@@ -51,8 +56,10 @@ class FlowConfig(ABC):
|
|
|
51
56
|
... OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Different type!
|
|
52
57
|
>>>
|
|
53
58
|
>>> # Use in @pipeline_flow - RECOMMENDED PATTERN
|
|
54
|
-
>>> @pipeline_flow(name="processing")
|
|
55
|
-
>>> async def process(
|
|
59
|
+
>>> @pipeline_flow(config=ProcessingFlowConfig, name="processing")
|
|
60
|
+
>>> async def process(
|
|
61
|
+
... project_name: str, docs: DocumentList, flow_options: FlowOptions
|
|
62
|
+
... ) -> DocumentList:
|
|
56
63
|
... outputs = []
|
|
57
64
|
... # ... processing logic ...
|
|
58
65
|
... return config.create_and_validate_output(outputs)
|
|
@@ -289,8 +296,10 @@ class FlowConfig(ABC):
|
|
|
289
296
|
DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
|
|
290
297
|
|
|
291
298
|
Example:
|
|
292
|
-
>>> @pipeline_flow(name="my_flow")
|
|
293
|
-
>>> async def process_flow(
|
|
299
|
+
>>> @pipeline_flow(config=MyFlowConfig, name="my_flow")
|
|
300
|
+
>>> async def process_flow(
|
|
301
|
+
... project_name: str, documents: DocumentList, flow_options: FlowOptions
|
|
302
|
+
... ) -> DocumentList:
|
|
294
303
|
>>> outputs = []
|
|
295
304
|
>>> # ... processing logic ...
|
|
296
305
|
>>> outputs.append(OutputDoc(...))
|
|
@@ -312,3 +321,163 @@ class FlowConfig(ABC):
|
|
|
312
321
|
documents = DocumentList(list(output)) # type: ignore[arg-type]
|
|
313
322
|
cls.validate_output_documents(documents)
|
|
314
323
|
return documents
|
|
324
|
+
|
|
325
|
+
@classmethod
|
|
326
|
+
async def load_documents(
|
|
327
|
+
cls,
|
|
328
|
+
uri: str,
|
|
329
|
+
) -> DocumentList:
|
|
330
|
+
"""Load documents from storage matching INPUT_DOCUMENT_TYPES.
|
|
331
|
+
|
|
332
|
+
Loads documents from a storage location based on the class's INPUT_DOCUMENT_TYPES.
|
|
333
|
+
Supports both local filesystem and Google Cloud Storage backends.
|
|
334
|
+
Automatically loads metadata (.description.md and .sources.json) when present.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
uri: Storage URI (file://, gs://, or local path)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
DocumentList containing loaded documents matching INPUT_DOCUMENT_TYPES
|
|
341
|
+
|
|
342
|
+
Example:
|
|
343
|
+
>>> # Load from local filesystem
|
|
344
|
+
>>> docs = await MyFlowConfig.load_documents("./data")
|
|
345
|
+
>>>
|
|
346
|
+
>>> # Load from GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
|
|
347
|
+
>>> docs = await MyFlowConfig.load_documents("gs://bucket/data")
|
|
348
|
+
"""
|
|
349
|
+
# Use INPUT_DOCUMENT_TYPES if not specified
|
|
350
|
+
storage = await Storage.from_uri(uri)
|
|
351
|
+
loaded_documents = DocumentList()
|
|
352
|
+
|
|
353
|
+
# Process each document type
|
|
354
|
+
for doc_type in cls.INPUT_DOCUMENT_TYPES:
|
|
355
|
+
canonical_name = doc_type.canonical_name()
|
|
356
|
+
doc_storage = storage.with_base(canonical_name)
|
|
357
|
+
|
|
358
|
+
# Check if subdirectory exists
|
|
359
|
+
if not await doc_storage.exists(""):
|
|
360
|
+
logger.debug(f"Subdirectory {canonical_name} not found, skipping")
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
# List files in subdirectory
|
|
364
|
+
objects = await doc_storage.list("", recursive=False, include_dirs=False)
|
|
365
|
+
|
|
366
|
+
# Create lookup set for metadata files
|
|
367
|
+
object_keys = {obj.key for obj in objects}
|
|
368
|
+
|
|
369
|
+
# Filter out metadata files
|
|
370
|
+
doc_files = [
|
|
371
|
+
obj
|
|
372
|
+
for obj in objects
|
|
373
|
+
if not obj.key.endswith(Document.DESCRIPTION_EXTENSION)
|
|
374
|
+
and not obj.key.endswith(Document.SOURCES_EXTENSION)
|
|
375
|
+
]
|
|
376
|
+
|
|
377
|
+
for obj in doc_files:
|
|
378
|
+
try:
|
|
379
|
+
# Load document content
|
|
380
|
+
content = await doc_storage.read_bytes(obj.key)
|
|
381
|
+
|
|
382
|
+
# Load metadata if present
|
|
383
|
+
description = None
|
|
384
|
+
sources: list[str] = []
|
|
385
|
+
|
|
386
|
+
# Check for description in objects list
|
|
387
|
+
desc_path = f"{obj.key}{Document.DESCRIPTION_EXTENSION}"
|
|
388
|
+
if desc_path in object_keys:
|
|
389
|
+
try:
|
|
390
|
+
description = await doc_storage.read_text(desc_path)
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.warning(f"Failed to load description for {obj.key}: {e}")
|
|
393
|
+
|
|
394
|
+
# Check for sources in objects list
|
|
395
|
+
sources_path = f"{obj.key}{Document.SOURCES_EXTENSION}"
|
|
396
|
+
if sources_path in object_keys:
|
|
397
|
+
try:
|
|
398
|
+
sources_text = await doc_storage.read_text(sources_path)
|
|
399
|
+
sources = json.loads(sources_text)
|
|
400
|
+
except Exception as e:
|
|
401
|
+
logger.warning(f"Failed to load sources for {obj.key}: {e}")
|
|
402
|
+
|
|
403
|
+
# Create document instance
|
|
404
|
+
doc = doc_type(
|
|
405
|
+
name=obj.key,
|
|
406
|
+
content=content,
|
|
407
|
+
description=description,
|
|
408
|
+
sources=sources,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
loaded_documents.append(doc)
|
|
412
|
+
logger.debug(f"Loaded {doc_type.__name__} document: {obj.key}")
|
|
413
|
+
except Exception as e:
|
|
414
|
+
logger.error(f"Failed to load {doc_type.__name__} document {obj.key}: {e}")
|
|
415
|
+
|
|
416
|
+
logger.info(f"Loaded {len(loaded_documents)} documents from {uri}")
|
|
417
|
+
return loaded_documents
|
|
418
|
+
|
|
419
|
+
@classmethod
|
|
420
|
+
async def save_documents(
|
|
421
|
+
cls,
|
|
422
|
+
uri: str,
|
|
423
|
+
documents: DocumentList,
|
|
424
|
+
*,
|
|
425
|
+
validate_output_type: bool = True,
|
|
426
|
+
) -> None:
|
|
427
|
+
"""Save documents to storage with metadata.
|
|
428
|
+
|
|
429
|
+
Saves FlowDocument instances to a storage location with their content
|
|
430
|
+
and metadata files (Document.DESCRIPTION_EXTENSION and Document.SOURCES_EXTENSION).
|
|
431
|
+
Non-FlowDocument instances (TaskDocument, TemporaryDocument) are skipped.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
uri: Storage URI (file://, gs://, or local path)
|
|
435
|
+
documents: DocumentList to save
|
|
436
|
+
validate_output_type: If True, validate documents match cls.OUTPUT_DOCUMENT_TYPE
|
|
437
|
+
|
|
438
|
+
Raises:
|
|
439
|
+
DocumentValidationError: If validate_output_type=True and documents don't match
|
|
440
|
+
OUTPUT_DOCUMENT_TYPE
|
|
441
|
+
|
|
442
|
+
Example:
|
|
443
|
+
>>> # Save to local filesystem
|
|
444
|
+
>>> await MyFlowConfig.save_documents("./output", docs)
|
|
445
|
+
>>>
|
|
446
|
+
>>> # Save to GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
|
|
447
|
+
>>> await MyFlowConfig.save_documents("gs://bucket/output", docs)
|
|
448
|
+
"""
|
|
449
|
+
# Validate output type if requested
|
|
450
|
+
if validate_output_type:
|
|
451
|
+
cls.validate_output_documents(documents)
|
|
452
|
+
|
|
453
|
+
storage = await Storage.from_uri(uri)
|
|
454
|
+
saved_count = 0
|
|
455
|
+
|
|
456
|
+
for doc in documents:
|
|
457
|
+
# Skip non-FlowDocument instances
|
|
458
|
+
if not isinstance(doc, FlowDocument):
|
|
459
|
+
logger.warning(f"Skipping non-FlowDocument: {type(doc).__name__}")
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
# Get canonical name for subdirectory
|
|
463
|
+
canonical_name = doc.canonical_name()
|
|
464
|
+
doc_storage = storage.with_base(canonical_name)
|
|
465
|
+
|
|
466
|
+
# Save document content
|
|
467
|
+
await doc_storage.write_bytes(doc.name, doc.content)
|
|
468
|
+
saved_count += 1
|
|
469
|
+
|
|
470
|
+
# Save description if present
|
|
471
|
+
if doc.description:
|
|
472
|
+
desc_path = f"{doc.name}{Document.DESCRIPTION_EXTENSION}"
|
|
473
|
+
await doc_storage.write_text(desc_path, doc.description)
|
|
474
|
+
|
|
475
|
+
# Save sources if present
|
|
476
|
+
if doc.sources:
|
|
477
|
+
sources_path = f"{doc.name}{Document.SOURCES_EXTENSION}"
|
|
478
|
+
sources_json = json.dumps(doc.sources, indent=2)
|
|
479
|
+
await doc_storage.write_text(sources_path, sources_json)
|
|
480
|
+
|
|
481
|
+
logger.debug(f"Saved {type(doc).__name__} document: {doc.name}")
|
|
482
|
+
|
|
483
|
+
logger.info(f"Saved {saved_count} documents to {uri}")
|
ai_pipeline_core/flow/options.py
CHANGED
|
@@ -60,11 +60,11 @@ class FlowOptions(BaseSettings):
|
|
|
60
60
|
add flow-specific parameters with appropriate validation.
|
|
61
61
|
"""
|
|
62
62
|
|
|
63
|
-
core_model: ModelName
|
|
63
|
+
core_model: ModelName = Field(
|
|
64
64
|
default="gpt-5",
|
|
65
65
|
description="Primary model for complex analysis and generation tasks.",
|
|
66
66
|
)
|
|
67
|
-
small_model: ModelName
|
|
67
|
+
small_model: ModelName = Field(
|
|
68
68
|
default="gpt-5-mini",
|
|
69
69
|
description="Fast, cost-effective model for simple tasks and orchestration.",
|
|
70
70
|
)
|
ai_pipeline_core/llm/__init__.py
CHANGED
|
@@ -8,6 +8,8 @@ from .ai_messages import AIMessages, AIMessageType
|
|
|
8
8
|
from .client import (
|
|
9
9
|
generate,
|
|
10
10
|
generate_structured,
|
|
11
|
+
generate_with_retry_for_testing,
|
|
12
|
+
process_messages_for_testing,
|
|
11
13
|
)
|
|
12
14
|
from .model_options import ModelOptions
|
|
13
15
|
from .model_response import ModelResponse, StructuredModelResponse
|
|
@@ -17,9 +19,12 @@ __all__ = [
|
|
|
17
19
|
"AIMessages",
|
|
18
20
|
"AIMessageType",
|
|
19
21
|
"ModelName",
|
|
20
|
-
"ModelOptions",
|
|
21
22
|
"ModelResponse",
|
|
23
|
+
"ModelOptions",
|
|
22
24
|
"StructuredModelResponse",
|
|
23
25
|
"generate",
|
|
24
26
|
"generate_structured",
|
|
27
|
+
# Internal functions exposed for testing only
|
|
28
|
+
"process_messages_for_testing",
|
|
29
|
+
"generate_with_retry_for_testing",
|
|
25
30
|
]
|
|
@@ -48,22 +48,31 @@ class AIMessages(list[AIMessageType]):
|
|
|
48
48
|
- ModelResponse: Becomes {"role": "assistant", "content": response.content}
|
|
49
49
|
|
|
50
50
|
Note: Document conversion is automatic. Text content becomes user text messages.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
|
|
52
|
+
VISION/PDF MODEL COMPATIBILITY WARNING:
|
|
53
|
+
Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
|
|
54
|
+
Non-vision models will raise ValueError when encountering image documents.
|
|
55
|
+
PDFs require models with document processing support - check your model's capabilities
|
|
56
|
+
before including PDF documents in messages. Unsupported models may fall back to
|
|
57
|
+
text extraction or raise errors depending on provider configuration.
|
|
58
|
+
LiteLLM proxy handles the specific encoding requirements for each provider.
|
|
55
59
|
|
|
56
60
|
IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
|
|
57
61
|
expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
|
|
58
62
|
of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
|
|
59
63
|
|
|
64
|
+
CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
|
|
65
|
+
`AIMessages(["text"])`) or empty constructor with append (e.g.,
|
|
66
|
+
`AIMessages(); messages.append("text")`). Never pass raw strings directly to the
|
|
67
|
+
constructor (`AIMessages("text")`) as this will iterate over the string characters
|
|
68
|
+
instead of treating it as a single message.
|
|
69
|
+
|
|
60
70
|
Example:
|
|
61
71
|
>>> from ai_pipeline_core import llm
|
|
62
72
|
>>> messages = AIMessages()
|
|
63
73
|
>>> messages.append("What is the capital of France?")
|
|
64
74
|
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
65
75
|
>>> messages.append(response) # Add the actual response
|
|
66
|
-
>>> prompt = messages.get_last_message_as_str() # Get the last message as a string
|
|
67
76
|
"""
|
|
68
77
|
|
|
69
78
|
def get_last_message(self) -> AIMessageType:
|
|
@@ -78,8 +87,6 @@ class AIMessages(list[AIMessageType]):
|
|
|
78
87
|
def get_last_message_as_str(self) -> str:
|
|
79
88
|
"""Get the last message as a string, raising if not a string.
|
|
80
89
|
|
|
81
|
-
@public
|
|
82
|
-
|
|
83
90
|
Returns:
|
|
84
91
|
The last message as a string.
|
|
85
92
|
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -24,7 +24,6 @@ from pydantic import BaseModel
|
|
|
24
24
|
|
|
25
25
|
from ai_pipeline_core.exceptions import LLMError
|
|
26
26
|
from ai_pipeline_core.settings import settings
|
|
27
|
-
from ai_pipeline_core.tracing import trace
|
|
28
27
|
|
|
29
28
|
from .ai_messages import AIMessages
|
|
30
29
|
from .model_options import ModelOptions
|
|
@@ -38,6 +37,7 @@ def _process_messages(
|
|
|
38
37
|
context: AIMessages,
|
|
39
38
|
messages: AIMessages,
|
|
40
39
|
system_prompt: str | None = None,
|
|
40
|
+
cache_ttl: str | None = "120s",
|
|
41
41
|
) -> list[ChatCompletionMessageParam]:
|
|
42
42
|
"""Process and format messages for LLM API consumption.
|
|
43
43
|
|
|
@@ -49,21 +49,25 @@ def _process_messages(
|
|
|
49
49
|
context: Messages to be cached (typically expensive/static content).
|
|
50
50
|
messages: Regular messages without caching (dynamic queries).
|
|
51
51
|
system_prompt: Optional system instructions for the model.
|
|
52
|
+
cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
|
|
53
|
+
Set to None or empty string to disable caching.
|
|
52
54
|
|
|
53
55
|
Returns:
|
|
54
56
|
List of formatted messages ready for API calls, with:
|
|
55
57
|
- System prompt at the beginning (if provided)
|
|
56
|
-
- Context messages with cache_control on the last one
|
|
58
|
+
- Context messages with cache_control on the last one (if cache_ttl)
|
|
57
59
|
- Regular messages without caching
|
|
58
60
|
|
|
59
61
|
System Prompt Location:
|
|
60
|
-
The system prompt
|
|
61
|
-
|
|
62
|
-
|
|
62
|
+
The system prompt parameter is always injected as the FIRST message
|
|
63
|
+
with role="system". It is NOT cached with context, allowing dynamic
|
|
64
|
+
system prompts without breaking cache efficiency.
|
|
63
65
|
|
|
64
66
|
Cache behavior:
|
|
65
|
-
The last context message gets ephemeral caching
|
|
67
|
+
The last context message gets ephemeral caching with specified TTL
|
|
66
68
|
to reduce token usage on repeated calls with same context.
|
|
69
|
+
If cache_ttl is None or empty string (falsy), no caching is applied.
|
|
70
|
+
Only the last context message receives cache_control to maximize efficiency.
|
|
67
71
|
|
|
68
72
|
Note:
|
|
69
73
|
This is an internal function used by _generate_with_retry().
|
|
@@ -80,11 +84,12 @@ def _process_messages(
|
|
|
80
84
|
# Use AIMessages.to_prompt() for context
|
|
81
85
|
context_messages = context.to_prompt()
|
|
82
86
|
|
|
83
|
-
# Apply caching to last context message
|
|
84
|
-
|
|
85
|
-
"type
|
|
86
|
-
|
|
87
|
-
|
|
87
|
+
# Apply caching to last context message if cache_ttl is set
|
|
88
|
+
if cache_ttl:
|
|
89
|
+
context_messages[-1]["cache_control"] = { # type: ignore
|
|
90
|
+
"type": "ephemeral",
|
|
91
|
+
"ttl": cache_ttl,
|
|
92
|
+
}
|
|
88
93
|
|
|
89
94
|
processed_messages.extend(context_messages)
|
|
90
95
|
|
|
@@ -173,7 +178,9 @@ async def _generate_with_retry(
|
|
|
173
178
|
if not context and not messages:
|
|
174
179
|
raise ValueError("Either context or messages must be provided")
|
|
175
180
|
|
|
176
|
-
processed_messages = _process_messages(
|
|
181
|
+
processed_messages = _process_messages(
|
|
182
|
+
context, messages, options.system_prompt, options.cache_ttl
|
|
183
|
+
)
|
|
177
184
|
completion_kwargs: dict[str, Any] = {
|
|
178
185
|
"model": model,
|
|
179
186
|
"messages": processed_messages,
|
|
@@ -213,9 +220,8 @@ async def _generate_with_retry(
|
|
|
213
220
|
raise LLMError("Unknown error occurred during LLM generation.")
|
|
214
221
|
|
|
215
222
|
|
|
216
|
-
@trace(ignore_inputs=["context"])
|
|
217
223
|
async def generate(
|
|
218
|
-
model: ModelName
|
|
224
|
+
model: ModelName,
|
|
219
225
|
*,
|
|
220
226
|
context: AIMessages | None = None,
|
|
221
227
|
messages: AIMessages | str,
|
|
@@ -230,20 +236,24 @@ async def generate(
|
|
|
230
236
|
expensive static content separately from dynamic queries.
|
|
231
237
|
|
|
232
238
|
Best Practices:
|
|
233
|
-
1. OPTIONS:
|
|
239
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
234
240
|
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
235
241
|
3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
|
|
242
|
+
4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
236
243
|
|
|
237
244
|
Args:
|
|
238
245
|
model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
|
|
239
|
-
|
|
246
|
+
Accepts predefined models or any string for custom models.
|
|
240
247
|
context: Static context to cache (documents, examples, instructions).
|
|
241
248
|
Defaults to None (empty context). Cached for 120 seconds.
|
|
242
249
|
messages: Dynamic messages/queries. AIMessages or str ONLY.
|
|
243
250
|
Do not pass Document or DocumentList directly.
|
|
244
251
|
If string, converted to AIMessages internally.
|
|
245
|
-
options:
|
|
246
|
-
|
|
252
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
253
|
+
Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
|
|
254
|
+
Configure model behavior centrally via LiteLLM proxy settings or environment
|
|
255
|
+
variables, not per API call. Provider-specific settings should be configured
|
|
256
|
+
at the proxy level.
|
|
247
257
|
|
|
248
258
|
Returns:
|
|
249
259
|
ModelResponse containing:
|
|
@@ -268,17 +278,26 @@ async def generate(
|
|
|
268
278
|
# WRONG - don't convert to string yourself
|
|
269
279
|
response = await llm.generate("gpt-5", messages=my_document.text) # NO!
|
|
270
280
|
|
|
281
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
282
|
+
When using Documents containing images or PDFs, ensure your model supports these formats:
|
|
283
|
+
- Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
|
|
284
|
+
- PDFs require document processing support (varies by provider)
|
|
285
|
+
- Non-compatible models will raise ValueError or fall back to text extraction
|
|
286
|
+
- Check model capabilities before including visual/PDF content
|
|
287
|
+
|
|
271
288
|
Context vs Messages Strategy:
|
|
272
|
-
context: Static, reusable content
|
|
289
|
+
context: Static, reusable content for caching efficiency
|
|
273
290
|
- Large documents, instructions, examples
|
|
274
|
-
-
|
|
291
|
+
- Remains constant across multiple calls
|
|
292
|
+
- Cached when supported by provider/proxy configuration
|
|
275
293
|
|
|
276
|
-
messages: Dynamic,
|
|
294
|
+
messages: Dynamic, per-call specific content
|
|
277
295
|
- User questions, current conversation turn
|
|
278
|
-
- Changes
|
|
296
|
+
- Changes with each API call
|
|
297
|
+
- Never cached, always processed fresh
|
|
279
298
|
|
|
280
299
|
Example:
|
|
281
|
-
>>> #
|
|
300
|
+
>>> # CORRECT - No options parameter (this is the recommended pattern)
|
|
282
301
|
>>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
|
|
283
302
|
>>> print(response.content) # In production, use get_pipeline_logger instead of print
|
|
284
303
|
|
|
@@ -292,13 +311,6 @@ async def generate(
|
|
|
292
311
|
>>> # Second call: reuses cache, saves tokens!
|
|
293
312
|
>>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
|
|
294
313
|
|
|
295
|
-
>>> # AVOID unnecessary options (defaults are optimal)
|
|
296
|
-
>>> response = await llm.generate(
|
|
297
|
-
... "gpt-5",
|
|
298
|
-
... messages="Hello",
|
|
299
|
-
... options=ModelOptions(temperature=0.7) # Default is probably fine!
|
|
300
|
-
... )
|
|
301
|
-
|
|
302
314
|
>>> # Multi-turn conversation
|
|
303
315
|
>>> messages = AIMessages([
|
|
304
316
|
... "What is Python?",
|
|
@@ -307,28 +319,48 @@ async def generate(
|
|
|
307
319
|
... ])
|
|
308
320
|
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
309
321
|
|
|
322
|
+
Configuration via LiteLLM Proxy:
|
|
323
|
+
>>> # Configure temperature in litellm_config.yaml:
|
|
324
|
+
>>> # model_list:
|
|
325
|
+
>>> # - model_name: gpt-5
|
|
326
|
+
>>> # litellm_params:
|
|
327
|
+
>>> # model: openai/gpt-4o
|
|
328
|
+
>>> # temperature: 0.3
|
|
329
|
+
>>> # max_tokens: 1000
|
|
330
|
+
>>>
|
|
331
|
+
>>> # Configure retry logic in proxy:
|
|
332
|
+
>>> # general_settings:
|
|
333
|
+
>>> # master_key: sk-1234
|
|
334
|
+
>>> # max_retries: 5
|
|
335
|
+
>>> # retry_delay: 15
|
|
336
|
+
|
|
310
337
|
Performance:
|
|
311
338
|
- Context caching saves ~50-90% tokens on repeated calls
|
|
312
339
|
- First call: full token cost
|
|
313
|
-
- Subsequent calls (within
|
|
314
|
-
- Default
|
|
340
|
+
- Subsequent calls (within cache TTL): only messages tokens
|
|
341
|
+
- Default cache TTL is 120s (production-optimized)
|
|
342
|
+
- Default retry logic: 3 attempts with 10s delay (production-optimized)
|
|
315
343
|
|
|
316
344
|
Caching:
|
|
317
345
|
When enabled in your LiteLLM proxy and supported by the upstream provider,
|
|
318
|
-
context messages may be cached
|
|
319
|
-
|
|
320
|
-
|
|
346
|
+
context messages may be cached to reduce token usage on repeated calls.
|
|
347
|
+
Default TTL is 120s (optimized for production workloads). Configure caching
|
|
348
|
+
behavior centrally via your LiteLLM proxy settings, not per API call.
|
|
349
|
+
Savings depend on provider and payload; treat this as an optimization, not a guarantee.
|
|
350
|
+
|
|
351
|
+
Configuration:
|
|
352
|
+
All model behavior should be configured at the LiteLLM proxy level:
|
|
353
|
+
- Temperature, max_tokens: Set in litellm_config.yaml model_list
|
|
354
|
+
- Retry logic: Configure in proxy general_settings
|
|
355
|
+
- Timeouts: Set via proxy configuration
|
|
356
|
+
- Caching: Enable/configure in proxy cache settings
|
|
357
|
+
|
|
358
|
+
This centralizes configuration and ensures consistency across all API calls.
|
|
321
359
|
|
|
322
360
|
Note:
|
|
323
|
-
- Context argument is ignored by the tracer to avoid recording large data
|
|
324
361
|
- All models are accessed via LiteLLM proxy
|
|
325
362
|
- Automatic retry with configurable delay between attempts
|
|
326
363
|
- Cost tracking via response headers
|
|
327
|
-
|
|
328
|
-
See Also:
|
|
329
|
-
- generate_structured: For typed/structured output
|
|
330
|
-
- AIMessages: Message container with document support
|
|
331
|
-
- ModelOptions: Configuration options
|
|
332
364
|
"""
|
|
333
365
|
if isinstance(messages, str):
|
|
334
366
|
messages = AIMessages([messages])
|
|
@@ -348,9 +380,8 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
348
380
|
"""Type variable for Pydantic model types in structured generation."""
|
|
349
381
|
|
|
350
382
|
|
|
351
|
-
@trace(ignore_inputs=["context"])
|
|
352
383
|
async def generate_structured(
|
|
353
|
-
model: ModelName
|
|
384
|
+
model: ModelName,
|
|
354
385
|
response_format: type[T],
|
|
355
386
|
*,
|
|
356
387
|
context: AIMessages | None = None,
|
|
@@ -364,20 +395,71 @@ async def generate_structured(
|
|
|
364
395
|
Type-safe generation that returns validated Pydantic model instances.
|
|
365
396
|
Uses OpenAI's structured output feature for guaranteed schema compliance.
|
|
366
397
|
|
|
367
|
-
|
|
368
|
-
|
|
398
|
+
IMPORTANT: Search models (models with '-search' suffix) do not support
|
|
399
|
+
structured output. Use generate() instead for search models.
|
|
400
|
+
|
|
401
|
+
Best Practices:
|
|
402
|
+
1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
|
|
369
403
|
2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
|
|
370
|
-
3.
|
|
404
|
+
3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
|
|
405
|
+
4. See generate() documentation for more details
|
|
406
|
+
|
|
407
|
+
Context vs Messages Strategy:
|
|
408
|
+
context: Static, reusable content for caching efficiency
|
|
409
|
+
- Schemas, examples, instructions
|
|
410
|
+
- Remains constant across multiple calls
|
|
411
|
+
- Cached when supported by provider/proxy configuration
|
|
412
|
+
|
|
413
|
+
messages: Dynamic, per-call specific content
|
|
414
|
+
- Data to be structured, user queries
|
|
415
|
+
- Changes with each API call
|
|
416
|
+
- Never cached, always processed fresh
|
|
417
|
+
|
|
418
|
+
Complex Task Pattern:
|
|
419
|
+
For complex tasks like research or deep analysis, it's recommended to use
|
|
420
|
+
a two-step approach:
|
|
421
|
+
1. First use generate() with a capable model to perform the analysis
|
|
422
|
+
2. Then use generate_structured() with a smaller model to convert the
|
|
423
|
+
response into structured output
|
|
424
|
+
|
|
425
|
+
This pattern is more reliable than trying to force complex reasoning
|
|
426
|
+
directly into structured format:
|
|
427
|
+
|
|
428
|
+
>>> # Step 1: Research/analysis with generate() - no options parameter
|
|
429
|
+
>>> research = await llm.generate(
|
|
430
|
+
... "gpt-5",
|
|
431
|
+
... messages="Research and analyze this complex topic..."
|
|
432
|
+
... )
|
|
433
|
+
>>>
|
|
434
|
+
>>> # Step 2: Structure the results with generate_structured()
|
|
435
|
+
>>> structured = await llm.generate_structured(
|
|
436
|
+
... "gpt-5-mini", # Smaller model is fine for structuring
|
|
437
|
+
... response_format=ResearchSummary,
|
|
438
|
+
... messages=f"Extract key information: {research.content}"
|
|
439
|
+
... )
|
|
371
440
|
|
|
372
441
|
Args:
|
|
373
442
|
model: Model to use (must support structured output).
|
|
443
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
374
444
|
response_format: Pydantic model class defining the output schema.
|
|
375
445
|
The model will generate JSON matching this schema.
|
|
376
446
|
context: Static context to cache (documents, schemas, examples).
|
|
377
447
|
Defaults to None (empty AIMessages).
|
|
378
448
|
messages: Dynamic prompts/queries. AIMessages or str ONLY.
|
|
379
449
|
Do not pass Document or DocumentList directly.
|
|
380
|
-
options:
|
|
450
|
+
options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
|
|
451
|
+
Framework defaults are production-optimized. Configure model behavior
|
|
452
|
+
centrally via LiteLLM proxy settings, not per API call.
|
|
453
|
+
The response_format is set automatically from the response_format parameter.
|
|
454
|
+
|
|
455
|
+
VISION/PDF MODEL COMPATIBILITY:
|
|
456
|
+
When using Documents with images/PDFs in structured output:
|
|
457
|
+
- Images require vision-capable models that also support structured output
|
|
458
|
+
- PDFs require models with both document processing AND structured output support
|
|
459
|
+
- Many models support either vision OR structured output, but not both
|
|
460
|
+
- Test your specific model+document combination before production use
|
|
461
|
+
- Consider two-step approach: generate() for analysis, then generate_structured()
|
|
462
|
+
for formatting
|
|
381
463
|
|
|
382
464
|
Returns:
|
|
383
465
|
StructuredModelResponse[T] containing:
|
|
@@ -387,6 +469,7 @@ async def generate_structured(
|
|
|
387
469
|
Raises:
|
|
388
470
|
TypeError: If response_format is not a Pydantic model class.
|
|
389
471
|
ValueError: If model doesn't support structured output or no parsed content returned.
|
|
472
|
+
Structured output support varies by provider and model.
|
|
390
473
|
LLMError: If generation fails after retries.
|
|
391
474
|
ValidationError: If response cannot be parsed into response_format.
|
|
392
475
|
|
|
@@ -398,8 +481,9 @@ async def generate_structured(
|
|
|
398
481
|
... sentiment: float = Field(ge=-1, le=1)
|
|
399
482
|
... key_points: list[str] = Field(max_length=5)
|
|
400
483
|
>>>
|
|
484
|
+
>>> # CORRECT - No options parameter
|
|
401
485
|
>>> response = await llm.generate_structured(
|
|
402
|
-
...
|
|
486
|
+
... "gpt-5",
|
|
403
487
|
... response_format=Analysis,
|
|
404
488
|
... messages="Analyze this product review: ..."
|
|
405
489
|
... )
|
|
@@ -410,11 +494,13 @@ async def generate_structured(
|
|
|
410
494
|
... print(f"- {point}")
|
|
411
495
|
|
|
412
496
|
Supported models:
|
|
413
|
-
|
|
497
|
+
Structured output support varies by provider and model. Generally includes:
|
|
414
498
|
- OpenAI: GPT-4 and newer models
|
|
415
499
|
- Anthropic: Claude 3+ models
|
|
416
500
|
- Google: Gemini Pro models
|
|
417
|
-
|
|
501
|
+
|
|
502
|
+
Search models (models with '-search' suffix) do not support structured output.
|
|
503
|
+
Check provider documentation for specific support.
|
|
418
504
|
|
|
419
505
|
Performance:
|
|
420
506
|
- Structured output may use more tokens than free text
|
|
@@ -426,11 +512,7 @@ async def generate_structured(
|
|
|
426
512
|
- The model generates JSON matching the schema
|
|
427
513
|
- Validation happens automatically via Pydantic
|
|
428
514
|
- Use Field() descriptions to guide generation
|
|
429
|
-
|
|
430
|
-
See Also:
|
|
431
|
-
- generate: For unstructured text generation
|
|
432
|
-
- ModelOptions: Configuration including response_format
|
|
433
|
-
- StructuredModelResponse: Response wrapper with .parsed property
|
|
515
|
+
- Search models (models with '-search' suffix) do not support structured output
|
|
434
516
|
"""
|
|
435
517
|
if context is None:
|
|
436
518
|
context = AIMessages()
|
|
@@ -473,3 +555,9 @@ async def generate_structured(
|
|
|
473
555
|
|
|
474
556
|
# Create a StructuredModelResponse with the parsed value
|
|
475
557
|
return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# Public aliases for testing internal functions
|
|
561
|
+
# These are exported to allow testing of implementation details
|
|
562
|
+
process_messages_for_testing = _process_messages
|
|
563
|
+
generate_with_retry_for_testing = _generate_with_retry
|