ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ai_pipeline_core/__init__.py +21 -13
  2. ai_pipeline_core/documents/document.py +202 -51
  3. ai_pipeline_core/documents/document_list.py +148 -24
  4. ai_pipeline_core/documents/flow_document.py +2 -6
  5. ai_pipeline_core/documents/task_document.py +0 -4
  6. ai_pipeline_core/documents/temporary_document.py +1 -8
  7. ai_pipeline_core/flow/config.py +174 -5
  8. ai_pipeline_core/llm/__init__.py +1 -6
  9. ai_pipeline_core/llm/ai_messages.py +137 -4
  10. ai_pipeline_core/llm/client.py +118 -65
  11. ai_pipeline_core/llm/model_options.py +6 -7
  12. ai_pipeline_core/llm/model_response.py +17 -16
  13. ai_pipeline_core/llm/model_types.py +3 -7
  14. ai_pipeline_core/logging/__init__.py +0 -2
  15. ai_pipeline_core/logging/logging_config.py +0 -6
  16. ai_pipeline_core/logging/logging_mixin.py +2 -10
  17. ai_pipeline_core/pipeline.py +54 -68
  18. ai_pipeline_core/prefect.py +12 -3
  19. ai_pipeline_core/prompt_manager.py +14 -7
  20. ai_pipeline_core/settings.py +13 -5
  21. ai_pipeline_core/simple_runner/__init__.py +1 -11
  22. ai_pipeline_core/simple_runner/cli.py +13 -12
  23. ai_pipeline_core/simple_runner/simple_runner.py +34 -189
  24. ai_pipeline_core/storage/__init__.py +8 -0
  25. ai_pipeline_core/storage/storage.py +628 -0
  26. ai_pipeline_core/tracing.py +234 -30
  27. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
  28. ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
  29. ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
  30. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
  31. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -9,6 +9,8 @@ including text, documents, and model responses.
9
9
  import base64
10
10
  import hashlib
11
11
  import json
12
+ from copy import deepcopy
13
+ from typing import Any, Callable, Iterable, SupportsIndex, Union
12
14
 
13
15
  from openai.types.chat import (
14
16
  ChatCompletionContentPartParam,
@@ -48,15 +50,25 @@ class AIMessages(list[AIMessageType]):
48
50
  - ModelResponse: Becomes {"role": "assistant", "content": response.content}
49
51
 
50
52
  Note: Document conversion is automatic. Text content becomes user text messages.
51
- Images are sent to vision-capable models (non-vision models will raise ValueError).
52
- PDFs are attached when supported by the model, otherwise a text extraction
53
- fallback is used. LiteLLM proxy handles the specific encoding requirements
54
- for each provider.
53
+
54
+ VISION/PDF MODEL COMPATIBILITY WARNING:
55
+ Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
56
+ Non-vision models will raise ValueError when encountering image documents.
57
+ PDFs require models with document processing support - check your model's capabilities
58
+ before including PDF documents in messages. Unsupported models may fall back to
59
+ text extraction or raise errors depending on provider configuration.
60
+ LiteLLM proxy handles the specific encoding requirements for each provider.
55
61
 
56
62
  IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
57
63
  expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
58
64
  of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
59
65
 
66
+ CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
67
+ `AIMessages(["text"])`) or empty constructor with append (e.g.,
68
+ `AIMessages(); messages.append("text")`). Never pass raw strings directly to the
69
+ constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
70
+ accidental character iteration.
71
+
60
72
  Example:
61
73
  >>> from ai_pipeline_core import llm
62
74
  >>> messages = AIMessages()
@@ -65,6 +77,127 @@ class AIMessages(list[AIMessageType]):
65
77
  >>> messages.append(response) # Add the actual response
66
78
  """
67
79
 
80
+ def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
81
+ """Initialize AIMessages with optional iterable.
82
+
83
+ Args:
84
+ iterable: Optional iterable of messages (list, tuple, etc.).
85
+ Must not be a string.
86
+ frozen: If True, list is immutable from creation.
87
+
88
+ Raises:
89
+ TypeError: If a string is passed directly to the constructor.
90
+ """
91
+ if isinstance(iterable, str):
92
+ raise TypeError(
93
+ "AIMessages cannot be constructed from a string directly. "
94
+ "Use AIMessages(['text']) for a single message or "
95
+ "AIMessages() and then append('text')."
96
+ )
97
+ self._frozen = False # Initialize as unfrozen to allow initial population
98
+ if iterable is None:
99
+ super().__init__()
100
+ else:
101
+ super().__init__(iterable)
102
+ self._frozen = frozen # Set frozen state after initial population
103
+
104
+ def freeze(self) -> None:
105
+ """Permanently freeze the list, preventing modifications.
106
+
107
+ Once frozen, the list cannot be unfrozen.
108
+ """
109
+ self._frozen = True
110
+
111
+ def copy(self) -> "AIMessages":
112
+ """Create an unfrozen deep copy of the list.
113
+
114
+ Returns:
115
+ New unfrozen AIMessages with deep-copied messages.
116
+ """
117
+ copied_messages = deepcopy(list(self))
118
+ return AIMessages(copied_messages, frozen=False)
119
+
120
+ def _check_frozen(self) -> None:
121
+ """Check if list is frozen and raise if it is.
122
+
123
+ Raises:
124
+ RuntimeError: If the list is frozen.
125
+ """
126
+ if self._frozen:
127
+ raise RuntimeError("Cannot modify frozen AIMessages")
128
+
129
+ def append(self, message: AIMessageType) -> None:
130
+ """Add a message to the end of the list."""
131
+ self._check_frozen()
132
+ super().append(message)
133
+
134
+ def extend(self, messages: Iterable[AIMessageType]) -> None:
135
+ """Add multiple messages to the list."""
136
+ self._check_frozen()
137
+ super().extend(messages)
138
+
139
+ def insert(self, index: SupportsIndex, message: AIMessageType) -> None:
140
+ """Insert a message at the specified position."""
141
+ self._check_frozen()
142
+ super().insert(index, message)
143
+
144
+ def __setitem__(
145
+ self,
146
+ index: Union[SupportsIndex, slice],
147
+ value: Union[AIMessageType, Iterable[AIMessageType]],
148
+ ) -> None:
149
+ """Set item or slice."""
150
+ self._check_frozen()
151
+ super().__setitem__(index, value) # type: ignore[arg-type]
152
+
153
+ def __iadd__(self, other: Iterable[AIMessageType]) -> "AIMessages":
154
+ """In-place addition (+=).
155
+
156
+ Returns:
157
+ This AIMessages instance after modification.
158
+ """
159
+ self._check_frozen()
160
+ return super().__iadd__(other)
161
+
162
+ def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
163
+ """Delete item or slice from list."""
164
+ self._check_frozen()
165
+ super().__delitem__(index)
166
+
167
+ def pop(self, index: SupportsIndex = -1) -> AIMessageType:
168
+ """Remove and return item at index.
169
+
170
+ Returns:
171
+ AIMessageType removed from the list.
172
+ """
173
+ self._check_frozen()
174
+ return super().pop(index)
175
+
176
+ def remove(self, message: AIMessageType) -> None:
177
+ """Remove first occurrence of message."""
178
+ self._check_frozen()
179
+ super().remove(message)
180
+
181
+ def clear(self) -> None:
182
+ """Remove all items from list."""
183
+ self._check_frozen()
184
+ super().clear()
185
+
186
+ def reverse(self) -> None:
187
+ """Reverse list in place."""
188
+ self._check_frozen()
189
+ super().reverse()
190
+
191
+ def sort(
192
+ self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
193
+ ) -> None:
194
+ """Sort list in place."""
195
+ self._check_frozen()
196
+ if key is None:
197
+ super().sort(reverse=reverse) # type: ignore[call-arg]
198
+ else:
199
+ super().sort(key=key, reverse=reverse)
200
+
68
201
  def get_last_message(self) -> AIMessageType:
69
202
  """Get the last message in the conversation.
70
203
 
@@ -24,7 +24,6 @@ from pydantic import BaseModel
24
24
 
25
25
  from ai_pipeline_core.exceptions import LLMError
26
26
  from ai_pipeline_core.settings import settings
27
- from ai_pipeline_core.tracing import trace
28
27
 
29
28
  from .ai_messages import AIMessages
30
29
  from .model_options import ModelOptions
@@ -60,9 +59,9 @@ def _process_messages(
60
59
  - Regular messages without caching
61
60
 
62
61
  System Prompt Location:
63
- The system prompt from ModelOptions.system_prompt is always injected
64
- as the FIRST message with role="system". It is NOT cached with context,
65
- allowing dynamic system prompts without breaking cache efficiency.
62
+ The system prompt parameter is always injected as the FIRST message
63
+ with role="system". It is NOT cached with context, allowing dynamic
64
+ system prompts without breaking cache efficiency.
66
65
 
67
66
  Cache behavior:
68
67
  The last context message gets ephemeral caching with specified TTL
@@ -221,7 +220,6 @@ async def _generate_with_retry(
221
220
  raise LLMError("Unknown error occurred during LLM generation.")
222
221
 
223
222
 
224
- @trace(ignore_inputs=["context"])
225
223
  async def generate(
226
224
  model: ModelName,
227
225
  *,
@@ -238,9 +236,10 @@ async def generate(
238
236
  expensive static content separately from dynamic queries.
239
237
 
240
238
  Best Practices:
241
- 1. OPTIONS: Omit in 90% of cases - defaults are optimized
239
+ 1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
242
240
  2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
243
241
  3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
242
+ 4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
244
243
 
245
244
  Args:
246
245
  model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
@@ -250,8 +249,11 @@ async def generate(
250
249
  messages: Dynamic messages/queries. AIMessages or str ONLY.
251
250
  Do not pass Document or DocumentList directly.
252
251
  If string, converted to AIMessages internally.
253
- options: Model configuration (temperature, retries, timeout, etc.).
254
- Defaults to None (uses ModelOptions() with standard settings).
252
+ options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
253
+ Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
254
+ Configure model behavior centrally via LiteLLM proxy settings or environment
255
+ variables, not per API call. Provider-specific settings should be configured
256
+ at the proxy level.
255
257
 
256
258
  Returns:
257
259
  ModelResponse containing:
@@ -276,17 +278,26 @@ async def generate(
276
278
  # WRONG - don't convert to string yourself
277
279
  response = await llm.generate("gpt-5", messages=my_document.text) # NO!
278
280
 
281
+ VISION/PDF MODEL COMPATIBILITY:
282
+ When using Documents containing images or PDFs, ensure your model supports these formats:
283
+ - Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
284
+ - PDFs require document processing support (varies by provider)
285
+ - Non-compatible models will raise ValueError or fall back to text extraction
286
+ - Check model capabilities before including visual/PDF content
287
+
279
288
  Context vs Messages Strategy:
280
- context: Static, reusable content (cached 120 seconds)
289
+ context: Static, reusable content for caching efficiency
281
290
  - Large documents, instructions, examples
282
- - Same across multiple calls
291
+ - Remains constant across multiple calls
292
+ - Cached when supported by provider/proxy configuration
283
293
 
284
- messages: Dynamic, query-specific content
294
+ messages: Dynamic, per-call specific content
285
295
  - User questions, current conversation turn
286
- - Changes every call
296
+ - Changes with each API call
297
+ - Never cached, always processed fresh
287
298
 
288
299
  Example:
289
- >>> # Simple case - no options needed (90% of cases)
300
+ >>> # CORRECT - No options parameter (this is the recommended pattern)
290
301
  >>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
291
302
  >>> print(response.content) # In production, use get_pipeline_logger instead of print
292
303
 
@@ -300,29 +311,6 @@ async def generate(
300
311
  >>> # Second call: reuses cache, saves tokens!
301
312
  >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
302
313
 
303
- >>> # Custom cache TTL for longer-lived contexts
304
- >>> response = await llm.generate(
305
- ... "gpt-5",
306
- ... context=static_doc,
307
- ... messages="Analyze this",
308
- ... options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
309
- ... )
310
-
311
- >>> # Disable caching when context changes frequently
312
- >>> response = await llm.generate(
313
- ... "gpt-5",
314
- ... context=dynamic_doc,
315
- ... messages="Process this",
316
- ... options=ModelOptions(cache_ttl=None) # No caching
317
- ... )
318
-
319
- >>> # AVOID unnecessary options (defaults are optimal)
320
- >>> response = await llm.generate(
321
- ... "gpt-5",
322
- ... messages="Hello",
323
- ... options=ModelOptions(temperature=0.7) # Default is probably fine!
324
- ... )
325
-
326
314
  >>> # Multi-turn conversation
327
315
  >>> messages = AIMessages([
328
316
  ... "What is Python?",
@@ -331,31 +319,48 @@ async def generate(
331
319
  ... ])
332
320
  >>> response = await llm.generate("gpt-5", messages=messages)
333
321
 
322
+ Configuration via LiteLLM Proxy:
323
+ >>> # Configure temperature in litellm_config.yaml:
324
+ >>> # model_list:
325
+ >>> # - model_name: gpt-5
326
+ >>> # litellm_params:
327
+ >>> # model: openai/gpt-4o
328
+ >>> # temperature: 0.3
329
+ >>> # max_tokens: 1000
330
+ >>>
331
+ >>> # Configure retry logic in proxy:
332
+ >>> # general_settings:
333
+ >>> # master_key: sk-1234
334
+ >>> # max_retries: 5
335
+ >>> # retry_delay: 15
336
+
334
337
  Performance:
335
338
  - Context caching saves ~50-90% tokens on repeated calls
336
339
  - First call: full token cost
337
340
  - Subsequent calls (within cache TTL): only messages tokens
338
- - Default cache TTL is 120s (configurable via ModelOptions.cache_ttl)
339
- - Default retry delay is 10s (configurable via ModelOptions.retry_delay_seconds)
341
+ - Default cache TTL is 120s (production-optimized)
342
+ - Default retry logic: 3 attempts with 10s delay (production-optimized)
340
343
 
341
344
  Caching:
342
345
  When enabled in your LiteLLM proxy and supported by the upstream provider,
343
346
  context messages may be cached to reduce token usage on repeated calls.
344
- Default TTL is 120s, configurable via ModelOptions.cache_ttl (e.g. "300s", "5m").
345
- Set cache_ttl=None to disable caching. Savings depend on provider and payload;
346
- treat this as an optimization, not a guarantee. Cache behavior varies by proxy
347
- configuration.
347
+ Default TTL is 120s (optimized for production workloads). Configure caching
348
+ behavior centrally via your LiteLLM proxy settings, not per API call.
349
+ Savings depend on provider and payload; treat this as an optimization, not a guarantee.
350
+
351
+ Configuration:
352
+ All model behavior should be configured at the LiteLLM proxy level:
353
+ - Temperature, max_tokens: Set in litellm_config.yaml model_list
354
+ - Retry logic: Configure in proxy general_settings
355
+ - Timeouts: Set via proxy configuration
356
+ - Caching: Enable/configure in proxy cache settings
357
+
358
+ This centralizes configuration and ensures consistency across all API calls.
348
359
 
349
360
  Note:
350
- - Context argument is ignored by the tracer to avoid recording large data
351
361
  - All models are accessed via LiteLLM proxy
352
362
  - Automatic retry with configurable delay between attempts
353
363
  - Cost tracking via response headers
354
-
355
- See Also:
356
- - generate_structured: For typed/structured output
357
- - AIMessages: Message container with document support
358
- - ModelOptions: Configuration options
359
364
  """
360
365
  if isinstance(messages, str):
361
366
  messages = AIMessages([messages])
@@ -375,7 +380,6 @@ T = TypeVar("T", bound=BaseModel)
375
380
  """Type variable for Pydantic model types in structured generation."""
376
381
 
377
382
 
378
- @trace(ignore_inputs=["context"])
379
383
  async def generate_structured(
380
384
  model: ModelName,
381
385
  response_format: type[T],
@@ -391,18 +395,71 @@ async def generate_structured(
391
395
  Type-safe generation that returns validated Pydantic model instances.
392
396
  Uses OpenAI's structured output feature for guaranteed schema compliance.
393
397
 
398
+ IMPORTANT: Search models (models with '-search' suffix) do not support
399
+ structured output. Use generate() instead for search models.
400
+
394
401
  Best Practices:
395
- Same as generate() - see generate() documentation for details.
402
+ 1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
403
+ 2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
404
+ 3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
405
+ 4. See generate() documentation for more details
406
+
407
+ Context vs Messages Strategy:
408
+ context: Static, reusable content for caching efficiency
409
+ - Schemas, examples, instructions
410
+ - Remains constant across multiple calls
411
+ - Cached when supported by provider/proxy configuration
412
+
413
+ messages: Dynamic, per-call specific content
414
+ - Data to be structured, user queries
415
+ - Changes with each API call
416
+ - Never cached, always processed fresh
417
+
418
+ Complex Task Pattern:
419
+ For complex tasks like research or deep analysis, it's recommended to use
420
+ a two-step approach:
421
+ 1. First use generate() with a capable model to perform the analysis
422
+ 2. Then use generate_structured() with a smaller model to convert the
423
+ response into structured output
424
+
425
+ This pattern is more reliable than trying to force complex reasoning
426
+ directly into structured format:
427
+
428
+ >>> # Step 1: Research/analysis with generate() - no options parameter
429
+ >>> research = await llm.generate(
430
+ ... "gpt-5",
431
+ ... messages="Research and analyze this complex topic..."
432
+ ... )
433
+ >>>
434
+ >>> # Step 2: Structure the results with generate_structured()
435
+ >>> structured = await llm.generate_structured(
436
+ ... "gpt-5-mini", # Smaller model is fine for structuring
437
+ ... response_format=ResearchSummary,
438
+ ... messages=f"Extract key information: {research.content}"
439
+ ... )
396
440
 
397
441
  Args:
398
442
  model: Model to use (must support structured output).
443
+ Search models (models with '-search' suffix) do not support structured output.
399
444
  response_format: Pydantic model class defining the output schema.
400
445
  The model will generate JSON matching this schema.
401
446
  context: Static context to cache (documents, schemas, examples).
402
447
  Defaults to None (empty AIMessages).
403
448
  messages: Dynamic prompts/queries. AIMessages or str ONLY.
404
449
  Do not pass Document or DocumentList directly.
405
- options: Model configuration. response_format is set automatically.
450
+ options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
451
+ Framework defaults are production-optimized. Configure model behavior
452
+ centrally via LiteLLM proxy settings, not per API call.
453
+ The response_format is set automatically from the response_format parameter.
454
+
455
+ VISION/PDF MODEL COMPATIBILITY:
456
+ When using Documents with images/PDFs in structured output:
457
+ - Images require vision-capable models that also support structured output
458
+ - PDFs require models with both document processing AND structured output support
459
+ - Many models support either vision OR structured output, but not both
460
+ - Test your specific model+document combination before production use
461
+ - Consider two-step approach: generate() for analysis, then generate_structured()
462
+ for formatting
406
463
 
407
464
  Returns:
408
465
  StructuredModelResponse[T] containing:
@@ -412,6 +469,7 @@ async def generate_structured(
412
469
  Raises:
413
470
  TypeError: If response_format is not a Pydantic model class.
414
471
  ValueError: If model doesn't support structured output or no parsed content returned.
472
+ Structured output support varies by provider and model.
415
473
  LLMError: If generation fails after retries.
416
474
  ValidationError: If response cannot be parsed into response_format.
417
475
 
@@ -423,8 +481,9 @@ async def generate_structured(
423
481
  ... sentiment: float = Field(ge=-1, le=1)
424
482
  ... key_points: list[str] = Field(max_length=5)
425
483
  >>>
484
+ >>> # CORRECT - No options parameter
426
485
  >>> response = await llm.generate_structured(
427
- ... model="gpt-5",
486
+ ... "gpt-5",
428
487
  ... response_format=Analysis,
429
488
  ... messages="Analyze this product review: ..."
430
489
  ... )
@@ -435,11 +494,13 @@ async def generate_structured(
435
494
  ... print(f"- {point}")
436
495
 
437
496
  Supported models:
438
- Support varies by provider and model. Generally includes:
497
+ Structured output support varies by provider and model. Generally includes:
439
498
  - OpenAI: GPT-4 and newer models
440
499
  - Anthropic: Claude 3+ models
441
500
  - Google: Gemini Pro models
442
- Check provider documentation for specific model support.
501
+
502
+ Search models (models with '-search' suffix) do not support structured output.
503
+ Check provider documentation for specific support.
443
504
 
444
505
  Performance:
445
506
  - Structured output may use more tokens than free text
@@ -451,11 +512,7 @@ async def generate_structured(
451
512
  - The model generates JSON matching the schema
452
513
  - Validation happens automatically via Pydantic
453
514
  - Use Field() descriptions to guide generation
454
-
455
- See Also:
456
- - generate: For unstructured text generation
457
- - ModelOptions: Configuration including response_format
458
- - StructuredModelResponse: Response wrapper with .parsed property
515
+ - Search models (models with '-search' suffix) do not support structured output
459
516
  """
460
517
  if context is None:
461
518
  context = AIMessages()
@@ -467,6 +524,8 @@ async def generate_structured(
467
524
  if isinstance(messages, str):
468
525
  messages = AIMessages([messages])
469
526
 
527
+ assert isinstance(messages, AIMessages)
528
+
470
529
  # Call the internal generate function with structured output enabled
471
530
  try:
472
531
  response = await _generate_with_retry(model, context, messages, options)
@@ -498,9 +557,3 @@ async def generate_structured(
498
557
 
499
558
  # Create a StructuredModelResponse with the parsed value
500
559
  return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
501
-
502
-
503
- # Public aliases for testing internal functions
504
- # These are exported to allow testing of implementation details
505
- process_messages_for_testing = _process_messages
506
- generate_with_retry_for_testing = _generate_with_retry
@@ -1,7 +1,5 @@
1
1
  """Configuration options for LLM generation.
2
2
 
3
- @public
4
-
5
3
  Provides the ModelOptions class for configuring model behavior,
6
4
  retry logic, and advanced features like web search and reasoning.
7
5
  """
@@ -14,8 +12,6 @@ from pydantic import BaseModel
14
12
  class ModelOptions(BaseModel):
15
13
  """Configuration options for LLM generation requests.
16
14
 
17
- @public
18
-
19
15
  ModelOptions encapsulates all configuration parameters for model
20
16
  generation, including model behavior settings, retry logic, and
21
17
  advanced features. All fields are optional with sensible defaults.
@@ -68,7 +64,8 @@ class ModelOptions(BaseModel):
68
64
 
69
65
  response_format: Pydantic model class for structured output.
70
66
  Pass a Pydantic model; the client converts it to JSON Schema.
71
- Set automatically by generate_structured(). Provider support varies.
67
+ Set automatically by generate_structured().
68
+ Structured output support varies by provider and model.
72
69
 
73
70
  Example:
74
71
  >>> # Basic configuration
@@ -162,11 +159,13 @@ class ModelOptions(BaseModel):
162
159
  Note:
163
160
  - system_prompt is handled separately in _process_messages()
164
161
  - retries and retry_delay_seconds are used by retry logic
165
- - extra_body is always included for potential extensions
162
+ - extra_body always includes usage tracking for cost monitoring
166
163
  """
167
164
  kwargs: dict[str, Any] = {
168
165
  "timeout": self.timeout,
169
- "extra_body": {},
166
+ "extra_body": {
167
+ "usage": {"include": True}, # For openrouter cost tracking
168
+ },
170
169
  }
171
170
 
172
171
  if self.temperature:
@@ -2,7 +2,7 @@
2
2
 
3
3
  @public
4
4
 
5
- Provides enhanced response classes that wrap OpenAI API responses
5
+ Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
6
6
  with additional metadata, cost tracking, and structured output support.
7
7
  """
8
8
 
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
23
23
 
24
24
  Primary usage is adding to AIMessages for multi-turn conversations:
25
25
 
26
- >>> response = await llm.generate(messages=messages)
27
- >>> messages.add(response) # Add assistant response to conversation
26
+ >>> response = await llm.generate("gpt-5", messages=messages)
27
+ >>> messages.append(response) # Add assistant response to conversation
28
28
  >>> print(response.content) # Access generated text
29
29
 
30
30
  The two main interactions with ModelResponse:
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
35
35
  like token usage and cost tracking are available but rarely needed.
36
36
 
37
37
  Example:
38
- >>> from ai_pipeline_core.llm import AIMessages, generate
38
+ >>> from ai_pipeline_core import llm, AIMessages
39
39
  >>>
40
- >>> messages = AIMessages("Explain quantum computing")
41
- >>> response = await generate(messages=messages)
40
+ >>> messages = AIMessages(["Explain quantum computing"])
41
+ >>> response = await llm.generate("gpt-5", messages=messages)
42
42
  >>>
43
43
  >>> # Primary usage: add to conversation
44
- >>> messages.add(response)
44
+ >>> messages.append(response)
45
45
  >>>
46
46
  >>> # Access generated text
47
47
  >>> print(response.content)
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
96
96
  @public
97
97
 
98
98
  Primary property for accessing the LLM's response text.
99
- This covers 99% of use cases with ModelResponse.
99
+ This is the main property you'll use with ModelResponse.
100
100
 
101
101
  Returns:
102
102
  Generated text from the model, or empty string if none.
103
103
 
104
104
  Example:
105
- >>> response = await generate(messages="Hello")
105
+ >>> response = await generate("gpt-5", messages="Hello")
106
106
  >>> text = response.content # The generated response
107
107
  >>>
108
108
  >>> # Common pattern: add to messages then use content
109
- >>> messages.add(response)
109
+ >>> messages.append(response)
110
110
  >>> if "error" in response.content.lower():
111
111
  ... # Handle error case
112
112
  """
@@ -189,8 +189,7 @@ class ModelResponse(ChatCompletion):
189
189
  >>> response = await llm.generate(
190
190
  ... "gpt-5",
191
191
  ... context=large_doc,
192
- ... messages="Summarize this",
193
- ... options=ModelOptions(cache_ttl="300s")
192
+ ... messages="Summarize this"
194
193
  ... )
195
194
  >>>
196
195
  >>> # Get comprehensive metadata
@@ -292,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
292
291
  ... summary: str
293
292
  >>>
294
293
  >>> response = await generate_structured(
294
+ ... "gpt-5",
295
295
  ... response_format=Analysis,
296
296
  ... messages="Analyze this text..."
297
297
  ... )
@@ -301,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
301
301
  >>> print(f"Sentiment: {analysis.sentiment}")
302
302
  >>>
303
303
  >>> # Can add to messages for conversation
304
- >>> messages.add(response)
304
+ >>> messages.append(response)
305
305
 
306
306
  The two main interactions:
307
307
  1. Accessing .parsed property for the structured data
@@ -377,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
377
377
  ... age: int
378
378
  >>>
379
379
  >>> response = await generate_structured(
380
+ ... "gpt-5",
380
381
  ... response_format=UserInfo,
381
382
  ... messages="Extract user info..."
382
383
  ... )
@@ -386,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
386
387
  >>> print(f"{user.name} is {user.age} years old")
387
388
  >>>
388
389
  >>> # Can also add to messages
389
- >>> messages.add(response)
390
+ >>> messages.append(response)
390
391
 
391
392
  Note:
392
- Type-safe with full IDE support. This property covers
393
- 99% of structured response use cases.
393
+ Type-safe with full IDE support. This is the main property
394
+ you'll use with structured responses.
394
395
  """
395
396
  if self._parsed_value is not None:
396
397
  return self._parsed_value
@@ -21,12 +21,12 @@ ModelName: TypeAlias = (
21
21
  # Small models
22
22
  "gemini-2.5-flash",
23
23
  "gpt-5-mini",
24
- "grok-3-mini",
24
+ "grok-4-fast",
25
25
  # Search models
26
26
  "gemini-2.5-flash-search",
27
27
  "sonar-pro-search",
28
28
  "gpt-4o-search",
29
- "grok-3-mini-search",
29
+ "grok-4-fast-search",
30
30
  ]
31
31
  | str
32
32
  )
@@ -47,7 +47,7 @@ Model categories:
47
47
  High-capability models for complex tasks requiring deep reasoning,
48
48
  nuanced understanding, or creative generation.
49
49
 
50
- Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
50
+ Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
51
51
  Efficient models optimized for speed and cost, suitable for
52
52
  simpler tasks or high-volume processing.
53
53
 
@@ -79,8 +79,4 @@ Note:
79
79
  The ModelName type includes both predefined literals and str,
80
80
  allowing full flexibility while maintaining IDE support for
81
81
  common models.
82
-
83
- See Also:
84
- - llm.generate: Main generation function
85
- - ModelOptions: Model configuration options
86
82
  """