ai-pipeline-core 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,24 +29,8 @@ class TaskDocument(Document):
29
29
  - Reduces persistent I/O for temporary data
30
30
 
31
31
  Creating TaskDocuments:
32
- **Use the `create` classmethod** for most use cases. It handles automatic
33
- conversion of various content types. Only use __init__ when you have bytes.
34
-
35
- >>> from enum import StrEnum
36
- >>>
37
- >>> # Simple task document:
38
- >>> class TempDoc(TaskDocument):
39
- ... pass
40
- >>>
41
- >>> # With restricted files:
42
- >>> class CacheDoc(TaskDocument):
43
- ... class FILES(StrEnum):
44
- ... CACHE = "cache.json"
45
- ... INDEX = "index.dat"
46
- >>>
47
- >>> # RECOMMENDED - automatic conversion:
48
- >>> doc = TempDoc.create(name="temp.json", content={"status": "processing"})
49
- >>> doc = CacheDoc.create(name="cache.json", content={"data": [1, 2, 3]})
32
+ Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
33
+ See Document.create() for detailed usage examples.
50
34
 
51
35
  Use Cases:
52
36
  - Intermediate transformation results
@@ -71,13 +55,11 @@ class TaskDocument(Document):
71
55
  name: str,
72
56
  content: bytes,
73
57
  description: str | None = None,
58
+ sources: list[str] = [],
74
59
  ) -> None:
75
60
  """Initialize a TaskDocument with raw bytes content.
76
61
 
77
- Important:
78
- **Most users should use the `create` classmethod instead of __init__.**
79
- The create method provides automatic content conversion for various types
80
- (str, dict, list, Pydantic models) while __init__ only accepts bytes.
62
+ See Document.__init__() for parameter details and usage notes.
81
63
 
82
64
  Prevents direct instantiation of the abstract TaskDocument class.
83
65
  TaskDocument must be subclassed for specific temporary document types.
@@ -86,6 +68,7 @@ class TaskDocument(Document):
86
68
  name: Document filename (required, keyword-only)
87
69
  content: Document content as raw bytes (required, keyword-only)
88
70
  description: Optional human-readable description (keyword-only)
71
+ sources: Optional list of strings for provenance tracking
89
72
 
90
73
  Raises:
91
74
  TypeError: If attempting to instantiate TaskDocument directly
@@ -114,7 +97,7 @@ class TaskDocument(Document):
114
97
  """
115
98
  if type(self) is TaskDocument:
116
99
  raise TypeError("Cannot instantiate abstract TaskDocument class directly")
117
- super().__init__(name=name, content=content, description=description)
100
+ super().__init__(name=name, content=content, description=description, sources=sources)
118
101
 
119
102
  @final
120
103
  def get_base_type(self) -> Literal["task"]:
@@ -30,25 +30,11 @@ class TemporaryDocument(Document):
30
30
  - Ignored by simple_runner save operations
31
31
 
32
32
  Creating TemporaryDocuments:
33
- **Use the `create` classmethod** for most use cases. It handles automatic
34
- conversion of various content types. Only use __init__ when you have bytes.
35
-
36
- >>> # RECOMMENDED - automatic conversion:
37
- >>> doc = TemporaryDocument.create(
38
- ... name="api_response.json",
39
- ... content={"status": "ok", "data": [1, 2, 3]}
40
- ... )
41
- >>> doc = TemporaryDocument.create(
42
- ... name="credentials.txt",
43
- ... content="secret_token_xyz"
44
- ... )
45
- >>>
46
- >>> # Direct constructor - only for bytes:
47
- >>> doc = TemporaryDocument(
48
- ... name="binary.dat",
49
- ... content=b"\x00\x01\x02"
50
- ... )
51
- >>>
33
+ Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
34
+ Unlike abstract document types, TemporaryDocument can be instantiated directly.
35
+ See Document.create() for detailed usage examples.
36
+
37
+ >>> doc = TemporaryDocument.create(name="api.json", content={"status": "ok"})
52
38
  >>> doc.is_temporary # Always True
53
39
 
54
40
  Use Cases:
@@ -1,7 +1,7 @@
1
1
  """Utility functions for document handling.
2
2
 
3
3
  Provides helper functions for URL sanitization, naming conventions,
4
- and canonical key generation used throughout the document system.
4
+ canonical key generation, and hash validation used throughout the document system.
5
5
  """
6
6
 
7
7
  import re
@@ -115,3 +115,66 @@ def canonical_name_key(
115
115
  break
116
116
 
117
117
  return camel_to_snake(name)
118
+
119
+
120
+ def is_document_sha256(value: str) -> bool:
121
+ """Check if a string is a valid base32-encoded SHA256 hash with proper entropy.
122
+
123
+ @public
124
+
125
+ This function validates that a string is not just formatted like a SHA256 hash,
126
+ but actually has the entropy characteristics of a real hash. It checks:
127
+ 1. Correct length (52 characters without padding)
128
+ 2. Valid base32 characters (A-Z, 2-7)
129
+ 3. Sufficient entropy (at least 8 unique characters)
130
+
131
+ The entropy check prevents false positives like 'AAAAAAA...AAA' from being
132
+ identified as valid document hashes.
133
+
134
+ Args:
135
+ value: String to check if it's a document SHA256 hash.
136
+
137
+ Returns:
138
+ True if the string appears to be a real base32-encoded SHA256 hash,
139
+ False otherwise.
140
+
141
+ Examples:
142
+ >>> # Real SHA256 hash
143
+ >>> is_document_sha256("P3AEMA2PSYILKFYVBUALJLMIYWVZIS2QDI3S5VTMD2X7SOODF2YQ")
144
+ True
145
+
146
+ >>> # Too uniform - lacks entropy
147
+ >>> is_document_sha256("A" * 52)
148
+ False
149
+
150
+ >>> # Wrong length
151
+ >>> is_document_sha256("ABC123")
152
+ False
153
+
154
+ >>> # Invalid characters
155
+ >>> is_document_sha256("a" * 52) # lowercase
156
+ False
157
+ """
158
+ # Check basic format: exactly 52 uppercase base32 characters
159
+ try:
160
+ if not value or len(value) != 52:
161
+ return False
162
+ except (TypeError, AttributeError):
163
+ return False
164
+
165
+ # Check if all characters are valid base32 (A-Z, 2-7)
166
+ try:
167
+ if not re.match(r"^[A-Z2-7]{52}$", value):
168
+ return False
169
+ except TypeError:
170
+ # re.match raises TypeError for non-string types like bytes
171
+ return False
172
+
173
+ # Check entropy: real SHA256 hashes have high entropy
174
+ # Require at least 8 unique characters (out of 32 possible in base32)
175
+ # This prevents patterns like "AAAAAAA..." from being identified as real hashes
176
+ unique_chars = len(set(value))
177
+ if unique_chars < 8:
178
+ return False
179
+
180
+ return True
@@ -60,11 +60,11 @@ class FlowOptions(BaseSettings):
60
60
  add flow-specific parameters with appropriate validation.
61
61
  """
62
62
 
63
- core_model: ModelName | str = Field(
63
+ core_model: ModelName = Field(
64
64
  default="gpt-5",
65
65
  description="Primary model for complex analysis and generation tasks.",
66
66
  )
67
- small_model: ModelName | str = Field(
67
+ small_model: ModelName = Field(
68
68
  default="gpt-5-mini",
69
69
  description="Fast, cost-effective model for simple tasks and orchestration.",
70
70
  )
@@ -8,6 +8,8 @@ from .ai_messages import AIMessages, AIMessageType
8
8
  from .client import (
9
9
  generate,
10
10
  generate_structured,
11
+ generate_with_retry_for_testing,
12
+ process_messages_for_testing,
11
13
  )
12
14
  from .model_options import ModelOptions
13
15
  from .model_response import ModelResponse, StructuredModelResponse
@@ -22,4 +24,7 @@ __all__ = [
22
24
  "StructuredModelResponse",
23
25
  "generate",
24
26
  "generate_structured",
27
+ # Internal functions exposed for testing only
28
+ "process_messages_for_testing",
29
+ "generate_with_retry_for_testing",
25
30
  ]
@@ -63,7 +63,6 @@ class AIMessages(list[AIMessageType]):
63
63
  >>> messages.append("What is the capital of France?")
64
64
  >>> response = await llm.generate("gpt-5", messages=messages)
65
65
  >>> messages.append(response) # Add the actual response
66
- >>> prompt = messages.get_last_message_as_str() # Get the last message as a string
67
66
  """
68
67
 
69
68
  def get_last_message(self) -> AIMessageType:
@@ -78,8 +77,6 @@ class AIMessages(list[AIMessageType]):
78
77
  def get_last_message_as_str(self) -> str:
79
78
  """Get the last message as a string, raising if not a string.
80
79
 
81
- @public
82
-
83
80
  Returns:
84
81
  The last message as a string.
85
82
 
@@ -38,6 +38,7 @@ def _process_messages(
38
38
  context: AIMessages,
39
39
  messages: AIMessages,
40
40
  system_prompt: str | None = None,
41
+ cache_ttl: str | None = "120s",
41
42
  ) -> list[ChatCompletionMessageParam]:
42
43
  """Process and format messages for LLM API consumption.
43
44
 
@@ -49,11 +50,13 @@ def _process_messages(
49
50
  context: Messages to be cached (typically expensive/static content).
50
51
  messages: Regular messages without caching (dynamic queries).
51
52
  system_prompt: Optional system instructions for the model.
53
+ cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
54
+ Set to None or empty string to disable caching.
52
55
 
53
56
  Returns:
54
57
  List of formatted messages ready for API calls, with:
55
58
  - System prompt at the beginning (if provided)
56
- - Context messages with cache_control on the last one
59
+ - Context messages with cache_control on the last one (if cache_ttl)
57
60
  - Regular messages without caching
58
61
 
59
62
  System Prompt Location:
@@ -62,8 +65,10 @@ def _process_messages(
62
65
  allowing dynamic system prompts without breaking cache efficiency.
63
66
 
64
67
  Cache behavior:
65
- The last context message gets ephemeral caching (120s TTL)
68
+ The last context message gets ephemeral caching with specified TTL
66
69
  to reduce token usage on repeated calls with same context.
70
+ If cache_ttl is None or empty string (falsy), no caching is applied.
71
+ Only the last context message receives cache_control to maximize efficiency.
67
72
 
68
73
  Note:
69
74
  This is an internal function used by _generate_with_retry().
@@ -80,11 +85,12 @@ def _process_messages(
80
85
  # Use AIMessages.to_prompt() for context
81
86
  context_messages = context.to_prompt()
82
87
 
83
- # Apply caching to last context message
84
- context_messages[-1]["cache_control"] = { # type: ignore
85
- "type": "ephemeral",
86
- "ttl": "120s", # Cache for 2m
87
- }
88
+ # Apply caching to last context message if cache_ttl is set
89
+ if cache_ttl:
90
+ context_messages[-1]["cache_control"] = { # type: ignore
91
+ "type": "ephemeral",
92
+ "ttl": cache_ttl,
93
+ }
88
94
 
89
95
  processed_messages.extend(context_messages)
90
96
 
@@ -173,7 +179,9 @@ async def _generate_with_retry(
173
179
  if not context and not messages:
174
180
  raise ValueError("Either context or messages must be provided")
175
181
 
176
- processed_messages = _process_messages(context, messages, options.system_prompt)
182
+ processed_messages = _process_messages(
183
+ context, messages, options.system_prompt, options.cache_ttl
184
+ )
177
185
  completion_kwargs: dict[str, Any] = {
178
186
  "model": model,
179
187
  "messages": processed_messages,
@@ -215,7 +223,7 @@ async def _generate_with_retry(
215
223
 
216
224
  @trace(ignore_inputs=["context"])
217
225
  async def generate(
218
- model: ModelName | str,
226
+ model: ModelName,
219
227
  *,
220
228
  context: AIMessages | None = None,
221
229
  messages: AIMessages | str,
@@ -236,7 +244,7 @@ async def generate(
236
244
 
237
245
  Args:
238
246
  model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
239
- Can be ModelName literal or any string for custom models.
247
+ Accepts predefined models or any string for custom models.
240
248
  context: Static context to cache (documents, examples, instructions).
241
249
  Defaults to None (empty context). Cached for 120 seconds.
242
250
  messages: Dynamic messages/queries. AIMessages or str ONLY.
@@ -292,6 +300,22 @@ async def generate(
292
300
  >>> # Second call: reuses cache, saves tokens!
293
301
  >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
294
302
 
303
+ >>> # Custom cache TTL for longer-lived contexts
304
+ >>> response = await llm.generate(
305
+ ... "gpt-5",
306
+ ... context=static_doc,
307
+ ... messages="Analyze this",
308
+ ... options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
309
+ ... )
310
+
311
+ >>> # Disable caching when context changes frequently
312
+ >>> response = await llm.generate(
313
+ ... "gpt-5",
314
+ ... context=dynamic_doc,
315
+ ... messages="Process this",
316
+ ... options=ModelOptions(cache_ttl=None) # No caching
317
+ ... )
318
+
295
319
  >>> # AVOID unnecessary options (defaults are optimal)
296
320
  >>> response = await llm.generate(
297
321
  ... "gpt-5",
@@ -310,14 +334,17 @@ async def generate(
310
334
  Performance:
311
335
  - Context caching saves ~50-90% tokens on repeated calls
312
336
  - First call: full token cost
313
- - Subsequent calls (within 120s): only messages tokens
337
+ - Subsequent calls (within cache TTL): only messages tokens
338
+ - Default cache TTL is 120s (configurable via ModelOptions.cache_ttl)
314
339
  - Default retry delay is 10s (configurable via ModelOptions.retry_delay_seconds)
315
340
 
316
341
  Caching:
317
342
  When enabled in your LiteLLM proxy and supported by the upstream provider,
318
- context messages may be cached (typical TTL ~120s) to reduce token usage on
319
- repeated calls. Savings depend on provider and payload; treat this as an
320
- optimization, not a guarantee. Cache behavior varies by proxy configuration.
343
+ context messages may be cached to reduce token usage on repeated calls.
344
+ Default TTL is 120s, configurable via ModelOptions.cache_ttl (e.g. "300s", "5m").
345
+ Set cache_ttl=None to disable caching. Savings depend on provider and payload;
346
+ treat this as an optimization, not a guarantee. Cache behavior varies by proxy
347
+ configuration.
321
348
 
322
349
  Note:
323
350
  - Context argument is ignored by the tracer to avoid recording large data
@@ -350,7 +377,7 @@ T = TypeVar("T", bound=BaseModel)
350
377
 
351
378
  @trace(ignore_inputs=["context"])
352
379
  async def generate_structured(
353
- model: ModelName | str,
380
+ model: ModelName,
354
381
  response_format: type[T],
355
382
  *,
356
383
  context: AIMessages | None = None,
@@ -364,10 +391,8 @@ async def generate_structured(
364
391
  Type-safe generation that returns validated Pydantic model instances.
365
392
  Uses OpenAI's structured output feature for guaranteed schema compliance.
366
393
 
367
- Best Practices (same as generate):
368
- 1. OPTIONS: Omit in 90% of cases - defaults are optimized
369
- 2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
370
- 3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
394
+ Best Practices:
395
+ Same as generate() - see generate() documentation for details.
371
396
 
372
397
  Args:
373
398
  model: Model to use (must support structured output).
@@ -473,3 +498,9 @@ async def generate_structured(
473
498
 
474
499
  # Create a StructuredModelResponse with the parsed value
475
500
  return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
501
+
502
+
503
+ # Public aliases for testing internal functions
504
+ # These are exported to allow testing of implementation details
505
+ process_messages_for_testing = _process_messages
506
+ generate_with_retry_for_testing = _generate_with_retry
@@ -49,6 +49,10 @@ class ModelOptions(BaseModel):
49
49
 
50
50
  timeout: Maximum seconds to wait for response (default: 300).
51
51
 
52
+ cache_ttl: Cache TTL for context messages (default: "120s").
53
+ String format like "60s", "5m", or None to disable caching.
54
+ Applied to the last context message for efficient token reuse.
55
+
52
56
  service_tier: API tier selection for performance/cost trade-offs.
53
57
  "auto": Let API choose
54
58
  "default": Standard tier
@@ -79,6 +83,18 @@ class ModelOptions(BaseModel):
79
83
  ... temperature=0.3 # Lower for code generation
80
84
  ... )
81
85
  >>>
86
+ >>> # With custom cache TTL
87
+ >>> options = ModelOptions(
88
+ ... cache_ttl="300s", # Cache context for 5 minutes
89
+ ... max_completion_tokens=1000
90
+ ... )
91
+ >>>
92
+ >>> # Disable caching
93
+ >>> options = ModelOptions(
94
+ ... cache_ttl=None, # No context caching
95
+ ... temperature=0.5
96
+ ... )
97
+ >>>
82
98
  >>> # For search-enabled models
83
99
  >>> options = ModelOptions(
84
100
  ... search_context_size="high", # Get more search results
@@ -96,6 +112,7 @@ class ModelOptions(BaseModel):
96
112
  - search_context_size only works with search models
97
113
  - reasoning_effort only works with models that support explicit reasoning
98
114
  - response_format is set internally by generate_structured()
115
+ - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
99
116
  """
100
117
 
101
118
  temperature: float | None = None
@@ -105,6 +122,7 @@ class ModelOptions(BaseModel):
105
122
  retries: int = 3
106
123
  retry_delay_seconds: int = 10
107
124
  timeout: int = 300
125
+ cache_ttl: str | None = "120s"
108
126
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
109
127
  max_completion_tokens: int | None = None
110
128
  response_format: type[BaseModel] | None = None
@@ -146,36 +146,83 @@ class ModelResponse(ChatCompletion):
146
146
  self.headers = copy.deepcopy(headers)
147
147
 
148
148
  def get_laminar_metadata(self) -> dict[str, str | int | float]:
149
- """Extract metadata for LMNR (Laminar) observability.
149
+ """Extract metadata for LMNR (Laminar) observability including cost tracking.
150
150
 
151
- Collects comprehensive metadata about the generation for
152
- tracing and monitoring in the LMNR platform.
151
+ Collects comprehensive metadata about the generation for tracing,
152
+ monitoring, and cost analysis in the LMNR platform. This method
153
+ provides detailed insights into token usage, caching effectiveness,
154
+ and generation costs.
153
155
 
154
156
  Returns:
155
157
  Dictionary containing:
156
- - LiteLLM headers (call ID, costs, etc.)
157
- - Token usage statistics
158
- - Model configuration
159
- - Cost information
160
- - Cached token counts
158
+ - LiteLLM headers (call ID, costs, model info, etc.)
159
+ - Token usage statistics (input, output, total, cached)
160
+ - Model configuration used for generation
161
+ - Cost information in multiple formats
162
+ - Cached token counts (when context caching enabled)
161
163
  - Reasoning token counts (for O1 models)
162
164
 
163
165
  Metadata structure:
164
166
  - litellm.*: All LiteLLM-specific headers
165
- - gen_ai.usage.*: Token usage statistics
167
+ - gen_ai.usage.prompt_tokens: Input token count
168
+ - gen_ai.usage.completion_tokens: Output token count
169
+ - gen_ai.usage.total_tokens: Total tokens used
170
+ - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
171
+ - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
172
+ - gen_ai.usage.output_cost: Generation cost in dollars
173
+ - gen_ai.usage.cost: Alternative cost field (same value)
174
+ - gen_ai.cost: Simple cost field (same value)
166
175
  - gen_ai.response.*: Response identifiers
167
- - gen_ai.cost: Cost information
168
176
  - model_options.*: Configuration used
169
177
 
178
+ Cost tracking:
179
+ Cost information is extracted from two sources:
180
+ 1. x-litellm-response-cost header (primary)
181
+ 2. usage.cost attribute (fallback)
182
+
183
+ Cost is stored in three fields for compatibility:
184
+ - gen_ai.usage.output_cost (standard)
185
+ - gen_ai.usage.cost (alternative)
186
+ - gen_ai.cost (simple)
187
+
170
188
  Example:
171
- >>> response = await llm.generate(...)
189
+ >>> response = await llm.generate(
190
+ ... "gpt-5",
191
+ ... context=large_doc,
192
+ ... messages="Summarize this",
193
+ ... options=ModelOptions(cache_ttl="300s")
194
+ ... )
195
+ >>>
196
+ >>> # Get comprehensive metadata
172
197
  >>> metadata = response.get_laminar_metadata()
173
- >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
174
- >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
198
+ >>>
199
+ >>> # Track generation cost
200
+ >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
201
+ >>> if cost > 0:
202
+ ... print(f"Generation cost: ${cost:.4f}")
203
+ >>>
204
+ >>> # Monitor token usage
205
+ >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
206
+ >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
207
+ >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
208
+ >>>
209
+ >>> # Check cache effectiveness
210
+ >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
211
+ >>> if cached > 0:
212
+ ... total = metadata.get('gen_ai.usage.total_tokens', 1)
213
+ ... savings = (cached / total) * 100
214
+ ... print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
215
+ >>>
216
+ >>> # Calculate cost per token
217
+ >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
218
+ ... cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
219
+ ... print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
175
220
 
176
221
  Note:
177
- Used internally by the tracing system for observability.
178
- Cost is extracted from headers or usage object.
222
+ - Cost availability depends on LiteLLM proxy configuration
223
+ - Not all providers return cost information
224
+ - Cached tokens reduce actual cost but may not be reflected
225
+ - Used internally by tracing but accessible for cost analysis
179
226
  """
180
227
  metadata: dict[str, str | int | float] = {}
181
228
 
@@ -12,28 +12,32 @@ Model categories:
12
12
 
13
13
  from typing import Literal, TypeAlias
14
14
 
15
- ModelName: TypeAlias = Literal[
16
- # Core models
17
- "gemini-2.5-pro",
18
- "gpt-5",
19
- "grok-4",
20
- # Small models
21
- "gemini-2.5-flash",
22
- "gpt-5-mini",
23
- "grok-3-mini",
24
- # Search models
25
- "gemini-2.5-flash-search",
26
- "sonar-pro-search",
27
- "gpt-4o-search",
28
- "grok-3-mini-search",
29
- ]
30
- """Type-safe model name identifiers.
15
+ ModelName: TypeAlias = (
16
+ Literal[
17
+ # Core models
18
+ "gemini-2.5-pro",
19
+ "gpt-5",
20
+ "grok-4",
21
+ # Small models
22
+ "gemini-2.5-flash",
23
+ "gpt-5-mini",
24
+ "grok-3-mini",
25
+ # Search models
26
+ "gemini-2.5-flash-search",
27
+ "sonar-pro-search",
28
+ "gpt-4o-search",
29
+ "grok-3-mini-search",
30
+ ]
31
+ | str
32
+ )
33
+ """Type-safe model name identifiers with support for custom models.
31
34
 
32
35
  @public
33
36
 
34
- Provides compile-time validation and IDE autocompletion for supported
35
- language model names. Used throughout the library to prevent typos
36
- and ensure only valid models are referenced.
37
+ Provides IDE autocompletion for common model names while allowing any
38
+ string for custom models. The type is a union of predefined literals
39
+ and str, giving you the best of both worlds: suggestions for known
40
+ models and flexibility for custom ones.
37
41
 
38
42
  Note: These are example common model names as of Q3 2025. Actual availability
39
43
  depends on your LiteLLM proxy configuration and provider access.
@@ -51,32 +55,30 @@ Model categories:
51
55
  Models with integrated web search capabilities for retrieving
52
56
  and synthesizing current information.
53
57
 
54
- Extending with custom models:
55
- The generate functions accept any string, not just ModelName literals.
56
- To add custom models for type safety:
57
- 1. Create a new type alias: CustomModel = Literal["my-model"]
58
- 2. Use Union: model: ModelName | CustomModel = "my-model"
59
- 3. Or simply use strings: model = "any-model-via-litellm"
58
+ Using custom models:
59
+ ModelName now includes str, so you can use any model name directly:
60
+ - Predefined models get IDE autocomplete and validation
61
+ - Custom models work seamlessly as strings
62
+ - No need for Union types or additional type aliases
60
63
 
61
64
  Example:
62
65
  >>> from ai_pipeline_core import llm, ModelName
63
66
  >>>
64
- >>> # Type-safe model selection
65
- >>> model: ModelName = "gpt-5" # IDE autocomplete works
67
+ >>> # Predefined model with IDE autocomplete
68
+ >>> model: ModelName = "gpt-5" # IDE suggests common models
66
69
  >>> response = await llm.generate(model, messages="Hello")
67
70
  >>>
68
- >>> # Also accepts string for custom models
69
- >>> response = await llm.generate("custom-model-v2", messages="Hello")
71
+ >>> # Custom model works directly
72
+ >>> model: ModelName = "custom-model-v2" # Any string is valid
73
+ >>> response = await llm.generate(model, messages="Hello")
70
74
  >>>
71
- >>> # Custom type safety
72
- >>> from typing import Literal
73
- >>> MyModel = Literal["company-llm-v1"]
74
- >>> model: ModelName | MyModel = "company-llm-v1"
75
+ >>> # Both types work seamlessly
76
+ >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
75
77
 
76
78
  Note:
77
- While the type alias provides suggestions for common models,
78
- the generate functions also accept string literals to support
79
- custom or newer models accessed via LiteLLM proxy.
79
+ The ModelName type includes both predefined literals and str,
80
+ allowing full flexibility while maintaining IDE support for
81
+ common models.
80
82
 
81
83
  See Also:
82
84
  - llm.generate: Main generation function