ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. ai_pipeline_core/__init__.py +25 -14
  2. ai_pipeline_core/documents/__init__.py +2 -1
  3. ai_pipeline_core/documents/document.py +317 -49
  4. ai_pipeline_core/documents/document_list.py +136 -33
  5. ai_pipeline_core/documents/flow_document.py +8 -29
  6. ai_pipeline_core/documents/task_document.py +6 -27
  7. ai_pipeline_core/documents/temporary_document.py +6 -27
  8. ai_pipeline_core/documents/utils.py +64 -1
  9. ai_pipeline_core/flow/config.py +174 -5
  10. ai_pipeline_core/flow/options.py +2 -2
  11. ai_pipeline_core/llm/__init__.py +6 -1
  12. ai_pipeline_core/llm/ai_messages.py +14 -7
  13. ai_pipeline_core/llm/client.py +143 -55
  14. ai_pipeline_core/llm/model_options.py +20 -5
  15. ai_pipeline_core/llm/model_response.py +77 -29
  16. ai_pipeline_core/llm/model_types.py +38 -40
  17. ai_pipeline_core/logging/__init__.py +0 -2
  18. ai_pipeline_core/logging/logging_config.py +0 -6
  19. ai_pipeline_core/logging/logging_mixin.py +2 -10
  20. ai_pipeline_core/pipeline.py +68 -65
  21. ai_pipeline_core/prefect.py +12 -3
  22. ai_pipeline_core/prompt_manager.py +6 -7
  23. ai_pipeline_core/settings.py +13 -5
  24. ai_pipeline_core/simple_runner/__init__.py +1 -11
  25. ai_pipeline_core/simple_runner/cli.py +13 -12
  26. ai_pipeline_core/simple_runner/simple_runner.py +34 -172
  27. ai_pipeline_core/storage/__init__.py +8 -0
  28. ai_pipeline_core/storage/storage.py +628 -0
  29. ai_pipeline_core/tracing.py +110 -26
  30. {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
  31. ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
  32. ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
  33. {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
  34. {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,5 @@
1
1
  """Configuration options for LLM generation.
2
2
 
3
- @public
4
-
5
3
  Provides the ModelOptions class for configuring model behavior,
6
4
  retry logic, and advanced features like web search and reasoning.
7
5
  """
@@ -14,8 +12,6 @@ from pydantic import BaseModel
14
12
  class ModelOptions(BaseModel):
15
13
  """Configuration options for LLM generation requests.
16
14
 
17
- @public
18
-
19
15
  ModelOptions encapsulates all configuration parameters for model
20
16
  generation, including model behavior settings, retry logic, and
21
17
  advanced features. All fields are optional with sensible defaults.
@@ -49,6 +45,10 @@ class ModelOptions(BaseModel):
49
45
 
50
46
  timeout: Maximum seconds to wait for response (default: 300).
51
47
 
48
+ cache_ttl: Cache TTL for context messages (default: "120s").
49
+ String format like "60s", "5m", or None to disable caching.
50
+ Applied to the last context message for efficient token reuse.
51
+
52
52
  service_tier: API tier selection for performance/cost trade-offs.
53
53
  "auto": Let API choose
54
54
  "default": Standard tier
@@ -64,7 +64,8 @@ class ModelOptions(BaseModel):
64
64
 
65
65
  response_format: Pydantic model class for structured output.
66
66
  Pass a Pydantic model; the client converts it to JSON Schema.
67
- Set automatically by generate_structured(). Provider support varies.
67
+ Set automatically by generate_structured().
68
+ Structured output support varies by provider and model.
68
69
 
69
70
  Example:
70
71
  >>> # Basic configuration
@@ -79,6 +80,18 @@ class ModelOptions(BaseModel):
79
80
  ... temperature=0.3 # Lower for code generation
80
81
  ... )
81
82
  >>>
83
+ >>> # With custom cache TTL
84
+ >>> options = ModelOptions(
85
+ ... cache_ttl="300s", # Cache context for 5 minutes
86
+ ... max_completion_tokens=1000
87
+ ... )
88
+ >>>
89
+ >>> # Disable caching
90
+ >>> options = ModelOptions(
91
+ ... cache_ttl=None, # No context caching
92
+ ... temperature=0.5
93
+ ... )
94
+ >>>
82
95
  >>> # For search-enabled models
83
96
  >>> options = ModelOptions(
84
97
  ... search_context_size="high", # Get more search results
@@ -96,6 +109,7 @@ class ModelOptions(BaseModel):
96
109
  - search_context_size only works with search models
97
110
  - reasoning_effort only works with models that support explicit reasoning
98
111
  - response_format is set internally by generate_structured()
112
+ - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
99
113
  """
100
114
 
101
115
  temperature: float | None = None
@@ -105,6 +119,7 @@ class ModelOptions(BaseModel):
105
119
  retries: int = 3
106
120
  retry_delay_seconds: int = 10
107
121
  timeout: int = 300
122
+ cache_ttl: str | None = "120s"
108
123
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
109
124
  max_completion_tokens: int | None = None
110
125
  response_format: type[BaseModel] | None = None
@@ -2,7 +2,7 @@
2
2
 
3
3
  @public
4
4
 
5
- Provides enhanced response classes that wrap OpenAI API responses
5
+ Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
6
6
  with additional metadata, cost tracking, and structured output support.
7
7
  """
8
8
 
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
23
23
 
24
24
  Primary usage is adding to AIMessages for multi-turn conversations:
25
25
 
26
- >>> response = await llm.generate(messages=messages)
27
- >>> messages.add(response) # Add assistant response to conversation
26
+ >>> response = await llm.generate("gpt-5", messages=messages)
27
+ >>> messages.append(response) # Add assistant response to conversation
28
28
  >>> print(response.content) # Access generated text
29
29
 
30
30
  The two main interactions with ModelResponse:
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
35
35
  like token usage and cost tracking are available but rarely needed.
36
36
 
37
37
  Example:
38
- >>> from ai_pipeline_core.llm import AIMessages, generate
38
+ >>> from ai_pipeline_core import llm, AIMessages
39
39
  >>>
40
- >>> messages = AIMessages("Explain quantum computing")
41
- >>> response = await generate(messages=messages)
40
+ >>> messages = AIMessages(["Explain quantum computing"])
41
+ >>> response = await llm.generate("gpt-5", messages=messages)
42
42
  >>>
43
43
  >>> # Primary usage: add to conversation
44
- >>> messages.add(response)
44
+ >>> messages.append(response)
45
45
  >>>
46
46
  >>> # Access generated text
47
47
  >>> print(response.content)
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
96
96
  @public
97
97
 
98
98
  Primary property for accessing the LLM's response text.
99
- This covers 99% of use cases with ModelResponse.
99
+ This is the main property you'll use with ModelResponse.
100
100
 
101
101
  Returns:
102
102
  Generated text from the model, or empty string if none.
103
103
 
104
104
  Example:
105
- >>> response = await generate(messages="Hello")
105
+ >>> response = await generate("gpt-5", messages="Hello")
106
106
  >>> text = response.content # The generated response
107
107
  >>>
108
108
  >>> # Common pattern: add to messages then use content
109
- >>> messages.add(response)
109
+ >>> messages.append(response)
110
110
  >>> if "error" in response.content.lower():
111
111
  ... # Handle error case
112
112
  """
@@ -146,36 +146,82 @@ class ModelResponse(ChatCompletion):
146
146
  self.headers = copy.deepcopy(headers)
147
147
 
148
148
  def get_laminar_metadata(self) -> dict[str, str | int | float]:
149
- """Extract metadata for LMNR (Laminar) observability.
149
+ """Extract metadata for LMNR (Laminar) observability including cost tracking.
150
150
 
151
- Collects comprehensive metadata about the generation for
152
- tracing and monitoring in the LMNR platform.
151
+ Collects comprehensive metadata about the generation for tracing,
152
+ monitoring, and cost analysis in the LMNR platform. This method
153
+ provides detailed insights into token usage, caching effectiveness,
154
+ and generation costs.
153
155
 
154
156
  Returns:
155
157
  Dictionary containing:
156
- - LiteLLM headers (call ID, costs, etc.)
157
- - Token usage statistics
158
- - Model configuration
159
- - Cost information
160
- - Cached token counts
158
+ - LiteLLM headers (call ID, costs, model info, etc.)
159
+ - Token usage statistics (input, output, total, cached)
160
+ - Model configuration used for generation
161
+ - Cost information in multiple formats
162
+ - Cached token counts (when context caching enabled)
161
163
  - Reasoning token counts (for O1 models)
162
164
 
163
165
  Metadata structure:
164
166
  - litellm.*: All LiteLLM-specific headers
165
- - gen_ai.usage.*: Token usage statistics
167
+ - gen_ai.usage.prompt_tokens: Input token count
168
+ - gen_ai.usage.completion_tokens: Output token count
169
+ - gen_ai.usage.total_tokens: Total tokens used
170
+ - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
171
+ - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
172
+ - gen_ai.usage.output_cost: Generation cost in dollars
173
+ - gen_ai.usage.cost: Alternative cost field (same value)
174
+ - gen_ai.cost: Simple cost field (same value)
166
175
  - gen_ai.response.*: Response identifiers
167
- - gen_ai.cost: Cost information
168
176
  - model_options.*: Configuration used
169
177
 
178
+ Cost tracking:
179
+ Cost information is extracted from two sources:
180
+ 1. x-litellm-response-cost header (primary)
181
+ 2. usage.cost attribute (fallback)
182
+
183
+ Cost is stored in three fields for compatibility:
184
+ - gen_ai.usage.output_cost (standard)
185
+ - gen_ai.usage.cost (alternative)
186
+ - gen_ai.cost (simple)
187
+
170
188
  Example:
171
- >>> response = await llm.generate(...)
189
+ >>> response = await llm.generate(
190
+ ... "gpt-5",
191
+ ... context=large_doc,
192
+ ... messages="Summarize this"
193
+ ... )
194
+ >>>
195
+ >>> # Get comprehensive metadata
172
196
  >>> metadata = response.get_laminar_metadata()
173
- >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
174
- >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
197
+ >>>
198
+ >>> # Track generation cost
199
+ >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
200
+ >>> if cost > 0:
201
+ ... print(f"Generation cost: ${cost:.4f}")
202
+ >>>
203
+ >>> # Monitor token usage
204
+ >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
205
+ >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
206
+ >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
207
+ >>>
208
+ >>> # Check cache effectiveness
209
+ >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
210
+ >>> if cached > 0:
211
+ ... total = metadata.get('gen_ai.usage.total_tokens', 1)
212
+ ... savings = (cached / total) * 100
213
+ ... print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
214
+ >>>
215
+ >>> # Calculate cost per token
216
+ >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
217
+ ... cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
218
+ ... print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
175
219
 
176
220
  Note:
177
- Used internally by the tracing system for observability.
178
- Cost is extracted from headers or usage object.
221
+ - Cost availability depends on LiteLLM proxy configuration
222
+ - Not all providers return cost information
223
+ - Cached tokens reduce actual cost but may not be reflected
224
+ - Used internally by tracing but accessible for cost analysis
179
225
  """
180
226
  metadata: dict[str, str | int | float] = {}
181
227
 
@@ -245,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
245
291
  ... summary: str
246
292
  >>>
247
293
  >>> response = await generate_structured(
294
+ ... "gpt-5",
248
295
  ... response_format=Analysis,
249
296
  ... messages="Analyze this text..."
250
297
  ... )
@@ -254,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
254
301
  >>> print(f"Sentiment: {analysis.sentiment}")
255
302
  >>>
256
303
  >>> # Can add to messages for conversation
257
- >>> messages.add(response)
304
+ >>> messages.append(response)
258
305
 
259
306
  The two main interactions:
260
307
  1. Accessing .parsed property for the structured data
@@ -330,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
330
377
  ... age: int
331
378
  >>>
332
379
  >>> response = await generate_structured(
380
+ ... "gpt-5",
333
381
  ... response_format=UserInfo,
334
382
  ... messages="Extract user info..."
335
383
  ... )
@@ -339,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
339
387
  >>> print(f"{user.name} is {user.age} years old")
340
388
  >>>
341
389
  >>> # Can also add to messages
342
- >>> messages.add(response)
390
+ >>> messages.append(response)
343
391
 
344
392
  Note:
345
- Type-safe with full IDE support. This property covers
346
- 99% of structured response use cases.
393
+ Type-safe with full IDE support. This is the main property
394
+ you'll use with structured responses.
347
395
  """
348
396
  if self._parsed_value is not None:
349
397
  return self._parsed_value
@@ -12,28 +12,32 @@ Model categories:
12
12
 
13
13
  from typing import Literal, TypeAlias
14
14
 
15
- ModelName: TypeAlias = Literal[
16
- # Core models
17
- "gemini-2.5-pro",
18
- "gpt-5",
19
- "grok-4",
20
- # Small models
21
- "gemini-2.5-flash",
22
- "gpt-5-mini",
23
- "grok-3-mini",
24
- # Search models
25
- "gemini-2.5-flash-search",
26
- "sonar-pro-search",
27
- "gpt-4o-search",
28
- "grok-3-mini-search",
29
- ]
30
- """Type-safe model name identifiers.
15
+ ModelName: TypeAlias = (
16
+ Literal[
17
+ # Core models
18
+ "gemini-2.5-pro",
19
+ "gpt-5",
20
+ "grok-4",
21
+ # Small models
22
+ "gemini-2.5-flash",
23
+ "gpt-5-mini",
24
+ "grok-3-mini",
25
+ # Search models
26
+ "gemini-2.5-flash-search",
27
+ "sonar-pro-search",
28
+ "gpt-4o-search",
29
+ "grok-3-mini-search",
30
+ ]
31
+ | str
32
+ )
33
+ """Type-safe model name identifiers with support for custom models.
31
34
 
32
35
  @public
33
36
 
34
- Provides compile-time validation and IDE autocompletion for supported
35
- language model names. Used throughout the library to prevent typos
36
- and ensure only valid models are referenced.
37
+ Provides IDE autocompletion for common model names while allowing any
38
+ string for custom models. The type is a union of predefined literals
39
+ and str, giving you the best of both worlds: suggestions for known
40
+ models and flexibility for custom ones.
37
41
 
38
42
  Note: These are example common model names as of Q3 2025. Actual availability
39
43
  depends on your LiteLLM proxy configuration and provider access.
@@ -51,34 +55,28 @@ Model categories:
51
55
  Models with integrated web search capabilities for retrieving
52
56
  and synthesizing current information.
53
57
 
54
- Extending with custom models:
55
- The generate functions accept any string, not just ModelName literals.
56
- To add custom models for type safety:
57
- 1. Create a new type alias: CustomModel = Literal["my-model"]
58
- 2. Use Union: model: ModelName | CustomModel = "my-model"
59
- 3. Or simply use strings: model = "any-model-via-litellm"
58
+ Using custom models:
59
+ ModelName now includes str, so you can use any model name directly:
60
+ - Predefined models get IDE autocomplete and validation
61
+ - Custom models work seamlessly as strings
62
+ - No need for Union types or additional type aliases
60
63
 
61
64
  Example:
62
65
  >>> from ai_pipeline_core import llm, ModelName
63
66
  >>>
64
- >>> # Type-safe model selection
65
- >>> model: ModelName = "gpt-5" # IDE autocomplete works
67
+ >>> # Predefined model with IDE autocomplete
68
+ >>> model: ModelName = "gpt-5" # IDE suggests common models
66
69
  >>> response = await llm.generate(model, messages="Hello")
67
70
  >>>
68
- >>> # Also accepts string for custom models
69
- >>> response = await llm.generate("custom-model-v2", messages="Hello")
71
+ >>> # Custom model works directly
72
+ >>> model: ModelName = "custom-model-v2" # Any string is valid
73
+ >>> response = await llm.generate(model, messages="Hello")
70
74
  >>>
71
- >>> # Custom type safety
72
- >>> from typing import Literal
73
- >>> MyModel = Literal["company-llm-v1"]
74
- >>> model: ModelName | MyModel = "company-llm-v1"
75
+ >>> # Both types work seamlessly
76
+ >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
75
77
 
76
78
  Note:
77
- While the type alias provides suggestions for common models,
78
- the generate functions also accept string literals to support
79
- custom or newer models accessed via LiteLLM proxy.
80
-
81
- See Also:
82
- - llm.generate: Main generation function
83
- - ModelOptions: Model configuration options
79
+ The ModelName type includes both predefined literals and str,
80
+ allowing full flexibility while maintaining IDE support for
81
+ common models.
84
82
  """
@@ -1,7 +1,5 @@
1
1
  """Logging infrastructure for AI Pipeline Core.
2
2
 
3
- @public
4
-
5
3
  Provides a Prefect-integrated logging facade for unified logging across pipelines.
6
4
  Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
7
5
 
@@ -1,7 +1,5 @@
1
1
  """Centralized logging configuration for AI Pipeline Core.
2
2
 
3
- @public
4
-
5
3
  Provides logging configuration management that integrates with Prefect's logging system.
6
4
  """
7
5
 
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
26
24
  class LoggingConfig:
27
25
  """Manages logging configuration for the pipeline.
28
26
 
29
- @public
30
-
31
27
  Provides centralized logging configuration with Prefect integration.
32
28
 
33
29
  Configuration precedence:
@@ -144,8 +140,6 @@ _logging_config: Optional[LoggingConfig] = None
144
140
  def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
145
141
  """Setup logging for the AI Pipeline Core library.
146
142
 
147
- @public
148
-
149
143
  Initializes logging configuration for the pipeline system.
150
144
 
151
145
  IMPORTANT: Call setup_logging exactly once in your application entry point
@@ -1,7 +1,4 @@
1
- """Logging mixin for consistent logging across components using Prefect logging.
2
-
3
- @public
4
- """
1
+ """Logging mixin for consistent logging across components using Prefect logging."""
5
2
 
6
3
  import contextlib
7
4
  import time
@@ -17,8 +14,6 @@ from prefect.logging import get_logger
17
14
  class LoggerMixin:
18
15
  """Mixin class that provides consistent logging functionality using Prefect's logging system.
19
16
 
20
- @public
21
-
22
17
  Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
23
18
  The mixin's internal behavior routes to the appropriate backend; you should not call
24
19
  logging.getLogger directly.
@@ -94,10 +89,7 @@ class LoggerMixin:
94
89
 
95
90
 
96
91
  class StructuredLoggerMixin(LoggerMixin):
97
- """Extended mixin for structured logging with Prefect.
98
-
99
- @public
100
- """
92
+ """Extended mixin for structured logging with Prefect."""
101
93
 
102
94
  def log_event(self, event: str, **kwargs: Any) -> None:
103
95
  """Log a structured event.