ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +86 -4
- ai_pipeline_core/documents/__init__.py +11 -0
- ai_pipeline_core/documents/document.py +1107 -131
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +95 -0
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +250 -23
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +308 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +215 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +145 -17
- ai_pipeline_core/simple_runner/simple_runner.py +244 -6
- ai_pipeline_core/tracing.py +232 -30
- ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
- ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
"""Model response structures for LLM interactions.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
Provides enhanced response classes that wrap OpenAI API responses
|
|
6
|
+
with additional metadata, cost tracking, and structured output support.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
import copy
|
|
2
10
|
from typing import Any, Generic, TypeVar
|
|
3
11
|
|
|
@@ -5,16 +13,71 @@ from openai.types.chat import ChatCompletion, ParsedChatCompletion
|
|
|
5
13
|
from pydantic import BaseModel, Field
|
|
6
14
|
|
|
7
15
|
T = TypeVar("T", bound=BaseModel)
|
|
16
|
+
"""Type parameter for structured response Pydantic models."""
|
|
8
17
|
|
|
9
18
|
|
|
10
19
|
class ModelResponse(ChatCompletion):
|
|
11
|
-
"""Response
|
|
20
|
+
"""Response wrapper for LLM text generation.
|
|
21
|
+
|
|
22
|
+
@public
|
|
23
|
+
|
|
24
|
+
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
25
|
+
|
|
26
|
+
>>> response = await llm.generate(messages=messages)
|
|
27
|
+
>>> messages.add(response) # Add assistant response to conversation
|
|
28
|
+
>>> print(response.content) # Access generated text
|
|
29
|
+
|
|
30
|
+
The two main interactions with ModelResponse:
|
|
31
|
+
1. Adding to AIMessages for conversation flow
|
|
32
|
+
2. Accessing .content property for the generated text
|
|
33
|
+
|
|
34
|
+
Almost all use cases are covered by these two patterns. Advanced features
|
|
35
|
+
like token usage and cost tracking are available but rarely needed.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
>>> from ai_pipeline_core.llm import AIMessages, generate
|
|
39
|
+
>>>
|
|
40
|
+
>>> messages = AIMessages("Explain quantum computing")
|
|
41
|
+
>>> response = await generate(messages=messages)
|
|
42
|
+
>>>
|
|
43
|
+
>>> # Primary usage: add to conversation
|
|
44
|
+
>>> messages.add(response)
|
|
45
|
+
>>>
|
|
46
|
+
>>> # Access generated text
|
|
47
|
+
>>> print(response.content)
|
|
48
|
+
|
|
49
|
+
Note:
|
|
50
|
+
Inherits from OpenAI's ChatCompletion for compatibility.
|
|
51
|
+
Other properties (usage, model, id) should only be accessed
|
|
52
|
+
when absolutely necessary.
|
|
53
|
+
"""
|
|
12
54
|
|
|
13
55
|
headers: dict[str, str] = Field(default_factory=dict)
|
|
14
56
|
model_options: dict[str, Any] = Field(default_factory=dict)
|
|
15
57
|
|
|
16
58
|
def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
|
|
17
|
-
"""Initialize ModelResponse from
|
|
59
|
+
"""Initialize ModelResponse from ChatCompletion or kwargs.
|
|
60
|
+
|
|
61
|
+
Can be initialized from an existing ChatCompletion object or
|
|
62
|
+
directly from keyword arguments. Automatically initializes
|
|
63
|
+
headers dict if not provided.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
chat_completion: Optional ChatCompletion to wrap.
|
|
67
|
+
**kwargs: Direct initialization parameters if no
|
|
68
|
+
ChatCompletion provided.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> # From ChatCompletion
|
|
72
|
+
>>> response = ModelResponse(chat_completion_obj)
|
|
73
|
+
>>>
|
|
74
|
+
>>> # Direct initialization (mainly for testing)
|
|
75
|
+
>>> response = ModelResponse(
|
|
76
|
+
... id="test",
|
|
77
|
+
... model="gpt-5",
|
|
78
|
+
... choices=[...]
|
|
79
|
+
... )
|
|
80
|
+
"""
|
|
18
81
|
if chat_completion:
|
|
19
82
|
# Copy all attributes from the ChatCompletion instance
|
|
20
83
|
data = chat_completion.model_dump()
|
|
@@ -28,21 +91,92 @@ class ModelResponse(ChatCompletion):
|
|
|
28
91
|
|
|
29
92
|
@property
|
|
30
93
|
def content(self) -> str:
|
|
31
|
-
"""Get the text content
|
|
94
|
+
"""Get the generated text content.
|
|
95
|
+
|
|
96
|
+
@public
|
|
97
|
+
|
|
98
|
+
Primary property for accessing the LLM's response text.
|
|
99
|
+
This covers 99% of use cases with ModelResponse.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Generated text from the model, or empty string if none.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
>>> response = await generate(messages="Hello")
|
|
106
|
+
>>> text = response.content # The generated response
|
|
107
|
+
>>>
|
|
108
|
+
>>> # Common pattern: add to messages then use content
|
|
109
|
+
>>> messages.add(response)
|
|
110
|
+
>>> if "error" in response.content.lower():
|
|
111
|
+
... # Handle error case
|
|
112
|
+
"""
|
|
32
113
|
return self.choices[0].message.content or ""
|
|
33
114
|
|
|
34
115
|
def set_model_options(self, options: dict[str, Any]) -> None:
|
|
35
|
-
"""
|
|
116
|
+
"""Store the model configuration used for generation.
|
|
117
|
+
|
|
118
|
+
Saves a deep copy of the options used for this generation,
|
|
119
|
+
excluding the messages for brevity.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
options: Dictionary of model options from the API call.
|
|
123
|
+
|
|
124
|
+
Note:
|
|
125
|
+
Messages are removed to avoid storing large prompts.
|
|
126
|
+
Called internally by the generation functions.
|
|
127
|
+
"""
|
|
36
128
|
self.model_options = copy.deepcopy(options)
|
|
37
129
|
if "messages" in self.model_options:
|
|
38
130
|
del self.model_options["messages"]
|
|
39
131
|
|
|
40
132
|
def set_headers(self, headers: dict[str, str]) -> None:
|
|
41
|
-
"""
|
|
133
|
+
"""Store HTTP response headers.
|
|
134
|
+
|
|
135
|
+
Saves response headers which contain LiteLLM metadata
|
|
136
|
+
including cost information and call IDs.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
headers: Dictionary of HTTP headers from the response.
|
|
140
|
+
|
|
141
|
+
Headers of interest:
|
|
142
|
+
- x-litellm-response-cost: Generation cost
|
|
143
|
+
- x-litellm-call-id: Unique call identifier
|
|
144
|
+
- x-litellm-model-id: Actual model used
|
|
145
|
+
"""
|
|
42
146
|
self.headers = copy.deepcopy(headers)
|
|
43
147
|
|
|
44
148
|
def get_laminar_metadata(self) -> dict[str, str | int | float]:
|
|
45
|
-
"""Extract metadata for Laminar observability
|
|
149
|
+
"""Extract metadata for LMNR (Laminar) observability.
|
|
150
|
+
|
|
151
|
+
Collects comprehensive metadata about the generation for
|
|
152
|
+
tracing and monitoring in the LMNR platform.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Dictionary containing:
|
|
156
|
+
- LiteLLM headers (call ID, costs, etc.)
|
|
157
|
+
- Token usage statistics
|
|
158
|
+
- Model configuration
|
|
159
|
+
- Cost information
|
|
160
|
+
- Cached token counts
|
|
161
|
+
- Reasoning token counts (for O1 models)
|
|
162
|
+
|
|
163
|
+
Metadata structure:
|
|
164
|
+
- litellm.*: All LiteLLM-specific headers
|
|
165
|
+
- gen_ai.usage.*: Token usage statistics
|
|
166
|
+
- gen_ai.response.*: Response identifiers
|
|
167
|
+
- gen_ai.cost: Cost information
|
|
168
|
+
- model_options.*: Configuration used
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
>>> response = await llm.generate(...)
|
|
172
|
+
>>> metadata = response.get_laminar_metadata()
|
|
173
|
+
>>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
|
|
174
|
+
>>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
|
|
175
|
+
|
|
176
|
+
Note:
|
|
177
|
+
Used internally by the tracing system for observability.
|
|
178
|
+
Cost is extracted from headers or usage object.
|
|
179
|
+
"""
|
|
46
180
|
metadata: dict[str, str | int | float] = {}
|
|
47
181
|
|
|
48
182
|
litellm_id = self.headers.get("x-litellm-call-id")
|
|
@@ -55,23 +189,19 @@ class ModelResponse(ChatCompletion):
|
|
|
55
189
|
metadata[f"litellm.{header_name}"] = value
|
|
56
190
|
|
|
57
191
|
# Add base metadata
|
|
58
|
-
metadata.update(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
64
|
-
)
|
|
192
|
+
metadata.update({
|
|
193
|
+
"gen_ai.response.id": litellm_id or self.id,
|
|
194
|
+
"gen_ai.response.model": self.model,
|
|
195
|
+
"get_ai.system": "litellm",
|
|
196
|
+
})
|
|
65
197
|
|
|
66
198
|
# Add usage metadata if available
|
|
67
199
|
if self.usage:
|
|
68
|
-
metadata.update(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
}
|
|
74
|
-
)
|
|
200
|
+
metadata.update({
|
|
201
|
+
"gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
|
|
202
|
+
"gen_ai.usage.completion_tokens": self.usage.completion_tokens,
|
|
203
|
+
"gen_ai.usage.total_tokens": self.usage.total_tokens,
|
|
204
|
+
})
|
|
75
205
|
|
|
76
206
|
# Check for cost in usage object
|
|
77
207
|
if hasattr(self.usage, "cost"):
|
|
@@ -90,13 +220,11 @@ class ModelResponse(ChatCompletion):
|
|
|
90
220
|
|
|
91
221
|
# Add cost metadata if available
|
|
92
222
|
if cost and cost > 0:
|
|
93
|
-
metadata.update(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
)
|
|
223
|
+
metadata.update({
|
|
224
|
+
"gen_ai.usage.output_cost": cost,
|
|
225
|
+
"gen_ai.usage.cost": cost,
|
|
226
|
+
"get_ai.cost": cost,
|
|
227
|
+
})
|
|
100
228
|
|
|
101
229
|
if self.model_options:
|
|
102
230
|
for key, value in self.model_options.items():
|
|
@@ -106,7 +234,42 @@ class ModelResponse(ChatCompletion):
|
|
|
106
234
|
|
|
107
235
|
|
|
108
236
|
class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
109
|
-
"""Response
|
|
237
|
+
"""Response wrapper for structured/typed LLM output.
|
|
238
|
+
|
|
239
|
+
@public
|
|
240
|
+
|
|
241
|
+
Primary usage is adding to AIMessages and accessing .parsed property:
|
|
242
|
+
|
|
243
|
+
>>> class Analysis(BaseModel):
|
|
244
|
+
... sentiment: float
|
|
245
|
+
... summary: str
|
|
246
|
+
>>>
|
|
247
|
+
>>> response = await generate_structured(
|
|
248
|
+
... response_format=Analysis,
|
|
249
|
+
... messages="Analyze this text..."
|
|
250
|
+
... )
|
|
251
|
+
>>>
|
|
252
|
+
>>> # Primary usage: access parsed model
|
|
253
|
+
>>> analysis = response.parsed
|
|
254
|
+
>>> print(f"Sentiment: {analysis.sentiment}")
|
|
255
|
+
>>>
|
|
256
|
+
>>> # Can add to messages for conversation
|
|
257
|
+
>>> messages.add(response)
|
|
258
|
+
|
|
259
|
+
The two main interactions:
|
|
260
|
+
1. Accessing .parsed property for the structured data
|
|
261
|
+
2. Adding to AIMessages for conversation continuity
|
|
262
|
+
|
|
263
|
+
These patterns cover virtually all use cases. Advanced features exist
|
|
264
|
+
but should only be used when absolutely necessary.
|
|
265
|
+
|
|
266
|
+
Type Parameter:
|
|
267
|
+
T: The Pydantic model type for the structured output.
|
|
268
|
+
|
|
269
|
+
Note:
|
|
270
|
+
Extends ModelResponse with type-safe parsed data access.
|
|
271
|
+
Other inherited properties should rarely be needed.
|
|
272
|
+
"""
|
|
110
273
|
|
|
111
274
|
def __init__(
|
|
112
275
|
self,
|
|
@@ -114,12 +277,27 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
114
277
|
parsed_value: T | None = None,
|
|
115
278
|
**kwargs: Any,
|
|
116
279
|
) -> None:
|
|
117
|
-
"""Initialize
|
|
280
|
+
"""Initialize with ChatCompletion and parsed value.
|
|
281
|
+
|
|
282
|
+
Creates a structured response from a base completion and
|
|
283
|
+
optionally a pre-parsed value. Can extract parsed value
|
|
284
|
+
from ParsedChatCompletion automatically.
|
|
118
285
|
|
|
119
286
|
Args:
|
|
120
|
-
chat_completion:
|
|
121
|
-
parsed_value:
|
|
122
|
-
|
|
287
|
+
chat_completion: Base chat completion response.
|
|
288
|
+
parsed_value: Pre-parsed Pydantic model instance.
|
|
289
|
+
If None, attempts extraction from
|
|
290
|
+
ParsedChatCompletion.
|
|
291
|
+
**kwargs: Additional ChatCompletion parameters.
|
|
292
|
+
|
|
293
|
+
Extraction behavior:
|
|
294
|
+
1. Use provided parsed_value if given
|
|
295
|
+
2. Extract from ParsedChatCompletion if available
|
|
296
|
+
3. Store as None (access will raise ValueError)
|
|
297
|
+
|
|
298
|
+
Note:
|
|
299
|
+
Usually created internally by generate_structured().
|
|
300
|
+
The parsed value is validated by Pydantic automatically.
|
|
123
301
|
"""
|
|
124
302
|
super().__init__(chat_completion, **kwargs)
|
|
125
303
|
self._parsed_value: T | None = parsed_value
|
|
@@ -133,13 +311,39 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
133
311
|
|
|
134
312
|
@property
|
|
135
313
|
def parsed(self) -> T:
|
|
136
|
-
"""Get the parsed
|
|
314
|
+
"""Get the parsed Pydantic model instance.
|
|
315
|
+
|
|
316
|
+
@public
|
|
317
|
+
|
|
318
|
+
Primary property for accessing structured output.
|
|
319
|
+
This is the main reason to use generate_structured().
|
|
137
320
|
|
|
138
321
|
Returns:
|
|
139
|
-
|
|
322
|
+
Validated instance of the Pydantic model type T.
|
|
140
323
|
|
|
141
324
|
Raises:
|
|
142
|
-
ValueError: If no parsed content
|
|
325
|
+
ValueError: If no parsed content available (internal error).
|
|
326
|
+
|
|
327
|
+
Example:
|
|
328
|
+
>>> class UserInfo(BaseModel):
|
|
329
|
+
... name: str
|
|
330
|
+
... age: int
|
|
331
|
+
>>>
|
|
332
|
+
>>> response = await generate_structured(
|
|
333
|
+
... response_format=UserInfo,
|
|
334
|
+
... messages="Extract user info..."
|
|
335
|
+
... )
|
|
336
|
+
>>>
|
|
337
|
+
>>> # Primary usage: get the parsed model
|
|
338
|
+
>>> user = response.parsed
|
|
339
|
+
>>> print(f"{user.name} is {user.age} years old")
|
|
340
|
+
>>>
|
|
341
|
+
>>> # Can also add to messages
|
|
342
|
+
>>> messages.add(response)
|
|
343
|
+
|
|
344
|
+
Note:
|
|
345
|
+
Type-safe with full IDE support. This property covers
|
|
346
|
+
99% of structured response use cases.
|
|
143
347
|
"""
|
|
144
348
|
if self._parsed_value is not None:
|
|
145
349
|
return self._parsed_value
|
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
"""Model type definitions for LLM interactions.
|
|
2
|
+
|
|
3
|
+
This module defines type aliases for model names used throughout
|
|
4
|
+
the AI Pipeline Core system. The ModelName type provides type safety
|
|
5
|
+
and IDE support for supported model identifiers.
|
|
6
|
+
|
|
7
|
+
Model categories:
|
|
8
|
+
- Core models: High-capability general-purpose models
|
|
9
|
+
- Small models: Efficient, cost-effective models
|
|
10
|
+
- Search models: Models with web search capabilities
|
|
11
|
+
"""
|
|
12
|
+
|
|
1
13
|
from typing import Literal, TypeAlias
|
|
2
14
|
|
|
3
15
|
ModelName: TypeAlias = Literal[
|
|
@@ -15,3 +27,58 @@ ModelName: TypeAlias = Literal[
|
|
|
15
27
|
"gpt-4o-search",
|
|
16
28
|
"grok-3-mini-search",
|
|
17
29
|
]
|
|
30
|
+
"""Type-safe model name identifiers.
|
|
31
|
+
|
|
32
|
+
@public
|
|
33
|
+
|
|
34
|
+
Provides compile-time validation and IDE autocompletion for supported
|
|
35
|
+
language model names. Used throughout the library to prevent typos
|
|
36
|
+
and ensure only valid models are referenced.
|
|
37
|
+
|
|
38
|
+
Note: These are example common model names as of Q3 2025. Actual availability
|
|
39
|
+
depends on your LiteLLM proxy configuration and provider access.
|
|
40
|
+
|
|
41
|
+
Model categories:
|
|
42
|
+
Core models (gemini-2.5-pro, gpt-5, grok-4):
|
|
43
|
+
High-capability models for complex tasks requiring deep reasoning,
|
|
44
|
+
nuanced understanding, or creative generation.
|
|
45
|
+
|
|
46
|
+
Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
|
|
47
|
+
Efficient models optimized for speed and cost, suitable for
|
|
48
|
+
simpler tasks or high-volume processing.
|
|
49
|
+
|
|
50
|
+
Search models (*-search suffix):
|
|
51
|
+
Models with integrated web search capabilities for retrieving
|
|
52
|
+
and synthesizing current information.
|
|
53
|
+
|
|
54
|
+
Extending with custom models:
|
|
55
|
+
The generate functions accept any string, not just ModelName literals.
|
|
56
|
+
To add custom models for type safety:
|
|
57
|
+
1. Create a new type alias: CustomModel = Literal["my-model"]
|
|
58
|
+
2. Use Union: model: ModelName | CustomModel = "my-model"
|
|
59
|
+
3. Or simply use strings: model = "any-model-via-litellm"
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
>>> from ai_pipeline_core import llm, ModelName
|
|
63
|
+
>>>
|
|
64
|
+
>>> # Type-safe model selection
|
|
65
|
+
>>> model: ModelName = "gpt-5" # IDE autocomplete works
|
|
66
|
+
>>> response = await llm.generate(model, messages="Hello")
|
|
67
|
+
>>>
|
|
68
|
+
>>> # Also accepts string for custom models
|
|
69
|
+
>>> response = await llm.generate("custom-model-v2", messages="Hello")
|
|
70
|
+
>>>
|
|
71
|
+
>>> # Custom type safety
|
|
72
|
+
>>> from typing import Literal
|
|
73
|
+
>>> MyModel = Literal["company-llm-v1"]
|
|
74
|
+
>>> model: ModelName | MyModel = "company-llm-v1"
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
While the type alias provides suggestions for common models,
|
|
78
|
+
the generate functions also accept string literals to support
|
|
79
|
+
custom or newer models accessed via LiteLLM proxy.
|
|
80
|
+
|
|
81
|
+
See Also:
|
|
82
|
+
- llm.generate: Main generation function
|
|
83
|
+
- ModelOptions: Model configuration options
|
|
84
|
+
"""
|
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""Logging infrastructure for AI Pipeline Core.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
Provides a Prefect-integrated logging facade for unified logging across pipelines.
|
|
6
|
+
Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
>>> from ai_pipeline_core import get_pipeline_logger
|
|
10
|
+
>>> logger = get_pipeline_logger(__name__)
|
|
11
|
+
>>> logger.info("Processing started")
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
|
|
2
15
|
from .logging_mixin import LoggerMixin, StructuredLoggerMixin
|
|
3
16
|
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
"""Centralized logging configuration for AI Pipeline Core
|
|
1
|
+
"""Centralized logging configuration for AI Pipeline Core.
|
|
2
2
|
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
Provides logging configuration management that integrates with Prefect's logging system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging.config
|
|
3
9
|
import os
|
|
4
10
|
from pathlib import Path
|
|
5
11
|
from typing import Any, Dict, Optional
|
|
@@ -18,15 +24,39 @@ DEFAULT_LOG_LEVELS = {
|
|
|
18
24
|
|
|
19
25
|
|
|
20
26
|
class LoggingConfig:
|
|
21
|
-
"""Manages logging configuration for the pipeline
|
|
27
|
+
"""Manages logging configuration for the pipeline.
|
|
28
|
+
|
|
29
|
+
@public
|
|
30
|
+
|
|
31
|
+
Provides centralized logging configuration with Prefect integration.
|
|
32
|
+
|
|
33
|
+
Configuration precedence:
|
|
34
|
+
1. Explicit config_path parameter
|
|
35
|
+
2. AI_PIPELINE_LOGGING_CONFIG environment variable
|
|
36
|
+
3. PREFECT_LOGGING_SETTINGS_PATH environment variable
|
|
37
|
+
4. Default configuration
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
>>> config = LoggingConfig()
|
|
41
|
+
>>> config.apply()
|
|
42
|
+
"""
|
|
22
43
|
|
|
23
44
|
def __init__(self, config_path: Optional[Path] = None):
|
|
45
|
+
"""Initialize logging configuration.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config_path: Optional path to YAML configuration file.
|
|
49
|
+
"""
|
|
24
50
|
self.config_path = config_path or self._get_default_config_path()
|
|
25
51
|
self._config: Optional[Dict[str, Any]] = None
|
|
26
52
|
|
|
27
53
|
@staticmethod
|
|
28
54
|
def _get_default_config_path() -> Optional[Path]:
|
|
29
|
-
"""Get default config path from environment
|
|
55
|
+
"""Get default config path from environment variables.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Path to the config file or None if not found.
|
|
59
|
+
"""
|
|
30
60
|
# Check environment variable first
|
|
31
61
|
if env_path := os.environ.get("AI_PIPELINE_LOGGING_CONFIG"):
|
|
32
62
|
return Path(env_path)
|
|
@@ -38,7 +68,11 @@ class LoggingConfig:
|
|
|
38
68
|
return None
|
|
39
69
|
|
|
40
70
|
def load_config(self) -> Dict[str, Any]:
|
|
41
|
-
"""Load logging configuration from file
|
|
71
|
+
"""Load logging configuration from file or defaults.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Dictionary containing logging configuration.
|
|
75
|
+
"""
|
|
42
76
|
if self._config is None:
|
|
43
77
|
if self.config_path and self.config_path.exists():
|
|
44
78
|
with open(self.config_path, "r") as f:
|
|
@@ -51,7 +85,11 @@ class LoggingConfig:
|
|
|
51
85
|
|
|
52
86
|
@staticmethod
|
|
53
87
|
def _get_default_config() -> Dict[str, Any]:
|
|
54
|
-
"""Get default logging configuration
|
|
88
|
+
"""Get default logging configuration.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Default logging configuration dictionary.
|
|
92
|
+
"""
|
|
55
93
|
return {
|
|
56
94
|
"version": 1,
|
|
57
95
|
"disable_existing_loggers": False,
|
|
@@ -89,9 +127,7 @@ class LoggingConfig:
|
|
|
89
127
|
}
|
|
90
128
|
|
|
91
129
|
def apply(self):
|
|
92
|
-
"""Apply the logging configuration"""
|
|
93
|
-
import logging.config
|
|
94
|
-
|
|
130
|
+
"""Apply the logging configuration."""
|
|
95
131
|
config = self.load_config()
|
|
96
132
|
logging.config.dictConfig(config)
|
|
97
133
|
|
|
@@ -106,16 +142,29 @@ _logging_config: Optional[LoggingConfig] = None
|
|
|
106
142
|
|
|
107
143
|
|
|
108
144
|
def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
|
|
109
|
-
"""
|
|
110
|
-
|
|
145
|
+
"""Setup logging for the AI Pipeline Core library.
|
|
146
|
+
|
|
147
|
+
@public
|
|
148
|
+
|
|
149
|
+
Initializes logging configuration for the pipeline system.
|
|
150
|
+
|
|
151
|
+
IMPORTANT: Call setup_logging exactly once in your application entry point
|
|
152
|
+
(for example, in main()). Do not call at import time or in library modules.
|
|
111
153
|
|
|
112
154
|
Args:
|
|
113
|
-
config_path: Optional path to logging configuration file
|
|
114
|
-
level: Optional
|
|
155
|
+
config_path: Optional path to YAML logging configuration file.
|
|
156
|
+
level: Optional log level override (INFO, DEBUG, WARNING, etc.).
|
|
115
157
|
|
|
116
158
|
Example:
|
|
117
|
-
>>>
|
|
118
|
-
>>>
|
|
159
|
+
>>> # In your main.py or application entry point:
|
|
160
|
+
>>> def main():
|
|
161
|
+
... setup_logging() # Call once at startup
|
|
162
|
+
... # Your application code here
|
|
163
|
+
...
|
|
164
|
+
>>> # Or with custom level:
|
|
165
|
+
>>> if __name__ == "__main__":
|
|
166
|
+
... setup_logging(level="DEBUG")
|
|
167
|
+
... run_application()
|
|
119
168
|
"""
|
|
120
169
|
global _logging_config
|
|
121
170
|
|
|
@@ -134,18 +183,21 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
|
|
|
134
183
|
|
|
135
184
|
|
|
136
185
|
def get_pipeline_logger(name: str):
|
|
137
|
-
"""
|
|
138
|
-
|
|
186
|
+
"""Get a logger for pipeline components.
|
|
187
|
+
|
|
188
|
+
@public
|
|
189
|
+
|
|
190
|
+
Returns a Prefect-integrated logger with proper configuration.
|
|
139
191
|
|
|
140
192
|
Args:
|
|
141
|
-
name: Logger name
|
|
193
|
+
name: Logger name, typically __name__.
|
|
142
194
|
|
|
143
195
|
Returns:
|
|
144
|
-
|
|
196
|
+
Prefect logger instance.
|
|
145
197
|
|
|
146
198
|
Example:
|
|
147
|
-
>>> logger = get_pipeline_logger(
|
|
148
|
-
>>> logger.info("
|
|
199
|
+
>>> logger = get_pipeline_logger(__name__)
|
|
200
|
+
>>> logger.info("Module initialized")
|
|
149
201
|
"""
|
|
150
202
|
# Ensure logging is setup
|
|
151
203
|
if _logging_config is None:
|