ai-pipeline-core 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # AI Pipeline Core Logging Configuration
2
+ # This configuration integrates with Prefect's logging system
3
+ version: 1
4
+ disable_existing_loggers: false
5
+
6
+ formatters:
7
+ standard:
8
+ format: "%(asctime)s.%(msecs)03d | %(levelname)-7s | %(name)s - %(message)s"
9
+ datefmt: "%H:%M:%S"
10
+
11
+ detailed:
12
+ format: "%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"
13
+ datefmt: "%Y-%m-%d %H:%M:%S"
14
+
15
+ handlers:
16
+ console:
17
+ class: logging.StreamHandler
18
+ formatter: standard
19
+ stream: ext://sys.stdout
20
+
21
+ file:
22
+ class: logging.handlers.RotatingFileHandler
23
+ formatter: detailed
24
+ filename: ai_pipeline.log
25
+ maxBytes: 10485760 # 10MB
26
+ backupCount: 5
27
+ encoding: utf-8
28
+
29
+ error_file:
30
+ class: logging.handlers.RotatingFileHandler
31
+ formatter: detailed
32
+ filename: ai_pipeline_errors.log
33
+ maxBytes: 10485760 # 10MB
34
+ backupCount: 5
35
+ level: ERROR
36
+ encoding: utf-8
37
+
38
+ loggers:
39
+ # AI Pipeline Core loggers
40
+ ai_pipeline_core:
41
+ level: INFO
42
+ handlers: [console]
43
+ propagate: false
44
+
45
+ ai_pipeline_core.documents:
46
+ level: INFO
47
+
48
+ ai_pipeline_core.llm:
49
+ level: INFO
50
+
51
+ ai_pipeline_core.flow:
52
+ level: INFO
53
+
54
+ ai_pipeline_core.testing:
55
+ level: DEBUG
56
+
57
+ # External libraries
58
+ httpx:
59
+ level: WARNING
60
+
61
+ openai:
62
+ level: WARNING
63
+
64
+ root:
65
+ level: WARNING
66
+ handlers: [console, error_file]
@@ -0,0 +1,154 @@
1
+ """Centralized logging configuration for AI Pipeline Core using Prefect logging"""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional
6
+
7
+ import yaml
8
+ from prefect.logging import get_logger
9
+
10
+ # Default log levels for different components
11
+ DEFAULT_LOG_LEVELS = {
12
+ "ai_pipeline_core": "INFO",
13
+ "ai_pipeline_core.documents": "INFO",
14
+ "ai_pipeline_core.llm": "INFO",
15
+ "ai_pipeline_core.flow": "INFO",
16
+ "ai_pipeline_core.testing": "DEBUG",
17
+ }
18
+
19
+
20
+ class LoggingConfig:
21
+ """Manages logging configuration for the pipeline using Prefect logging"""
22
+
23
+ def __init__(self, config_path: Optional[Path] = None):
24
+ self.config_path = config_path or self._get_default_config_path()
25
+ self._config: Optional[Dict[str, Any]] = None
26
+
27
+ @staticmethod
28
+ def _get_default_config_path() -> Optional[Path]:
29
+ """Get default config path from environment or package"""
30
+ # Check environment variable first
31
+ if env_path := os.environ.get("AI_PIPELINE_LOGGING_CONFIG"):
32
+ return Path(env_path)
33
+
34
+ # Check Prefect's setting
35
+ if prefect_path := os.environ.get("PREFECT_LOGGING_SETTINGS_PATH"):
36
+ return Path(prefect_path)
37
+
38
+ return None
39
+
40
+ def load_config(self) -> Dict[str, Any]:
41
+ """Load logging configuration from file"""
42
+ if self._config is None:
43
+ if self.config_path and self.config_path.exists():
44
+ with open(self.config_path, "r") as f:
45
+ self._config = yaml.safe_load(f)
46
+ else:
47
+ self._config = self._get_default_config()
48
+ # self._config cannot be None at this point
49
+ assert self._config is not None
50
+ return self._config
51
+
52
+ @staticmethod
53
+ def _get_default_config() -> Dict[str, Any]:
54
+ """Get default logging configuration compatible with Prefect"""
55
+ return {
56
+ "version": 1,
57
+ "disable_existing_loggers": False,
58
+ "formatters": {
59
+ "standard": {
60
+ "format": "%(asctime)s.%(msecs)03d | %(levelname)-7s | %(name)s - %(message)s",
61
+ "datefmt": "%H:%M:%S",
62
+ },
63
+ "detailed": {
64
+ "format": (
65
+ "%(asctime)s | %(levelname)-7s | %(name)s | "
66
+ "%(funcName)s:%(lineno)d - %(message)s"
67
+ ),
68
+ "datefmt": "%Y-%m-%d %H:%M:%S",
69
+ },
70
+ },
71
+ "handlers": {
72
+ "console": {
73
+ "class": "logging.StreamHandler",
74
+ "formatter": "standard",
75
+ "stream": "ext://sys.stdout",
76
+ },
77
+ },
78
+ "loggers": {
79
+ "ai_pipeline_core": {
80
+ "level": os.environ.get("AI_PIPELINE_LOG_LEVEL", "INFO"),
81
+ "handlers": ["console"],
82
+ "propagate": False,
83
+ },
84
+ },
85
+ "root": {
86
+ "level": "WARNING",
87
+ "handlers": ["console"],
88
+ },
89
+ }
90
+
91
+ def apply(self):
92
+ """Apply the logging configuration"""
93
+ import logging.config
94
+
95
+ config = self.load_config()
96
+ logging.config.dictConfig(config)
97
+
98
+ # Set Prefect logging environment variables if needed
99
+ if "prefect" in config.get("loggers", {}):
100
+ prefect_level = config["loggers"]["prefect"].get("level", "INFO")
101
+ os.environ.setdefault("PREFECT_LOGGING_LEVEL", prefect_level)
102
+
103
+
104
+ # Global configuration instance
105
+ _logging_config: Optional[LoggingConfig] = None
106
+
107
+
108
+ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
109
+ """
110
+ Setup logging for the AI Pipeline Core library
111
+
112
+ Args:
113
+ config_path: Optional path to logging configuration file
114
+ level: Optional default log level (overrides config)
115
+
116
+ Example:
117
+ >>> from ai_pipeline_core.logging_config import setup_logging
118
+ >>> setup_logging(level="DEBUG")
119
+ """
120
+ global _logging_config
121
+
122
+ _logging_config = LoggingConfig(config_path)
123
+ _logging_config.apply()
124
+
125
+ # Override level if provided
126
+ if level:
127
+ # Set for our loggers
128
+ for logger_name in DEFAULT_LOG_LEVELS:
129
+ logger = get_logger(logger_name)
130
+ logger.setLevel(level)
131
+
132
+ # Also set for Prefect
133
+ os.environ["PREFECT_LOGGING_LEVEL"] = level
134
+
135
+
136
+ def get_pipeline_logger(name: str):
137
+ """
138
+ Get a logger for pipeline components using Prefect's get_logger
139
+
140
+ Args:
141
+ name: Logger name (e.g., "ai_pipeline_core.documents")
142
+
143
+ Returns:
144
+ Logger instance
145
+
146
+ Example:
147
+ >>> logger = get_pipeline_logger("ai_pipeline_core.llm")
148
+ >>> logger.info("Starting LLM processing")
149
+ """
150
+ # Ensure logging is setup
151
+ if _logging_config is None:
152
+ setup_logging()
153
+
154
+ return get_logger(name)
@@ -0,0 +1,223 @@
1
+ """Logging mixin for consistent logging across components using Prefect logging"""
2
+
3
+ import contextlib
4
+ import time
5
+ from contextlib import contextmanager
6
+ from functools import cached_property
7
+ from typing import Any, Dict, Generator, Optional
8
+
9
+ from prefect import get_run_logger
10
+ from prefect.context import FlowRunContext, TaskRunContext
11
+ from prefect.logging import get_logger
12
+
13
+
14
+ class LoggerMixin:
15
+ """
16
+ Mixin class that provides consistent logging functionality using Prefect's logging system
17
+
18
+ Automatically uses appropriate logger based on context:
19
+ - get_run_logger() when in flow/task context
20
+ - get_logger() when outside flow/task context
21
+ """
22
+
23
+ _logger_name: Optional[str] = None
24
+
25
+ @cached_property
26
+ def logger(self):
27
+ """Get appropriate logger based on context."""
28
+ if logger := self._get_run_logger():
29
+ return logger
30
+ return get_logger(self._logger_name or self.__class__.__module__)
31
+
32
+ def _get_run_logger(self):
33
+ """Attempt to get Prefect run logger."""
34
+ # Intentionally broad: Must handle any exception when checking context
35
+ with contextlib.suppress(Exception):
36
+ if FlowRunContext.get() or TaskRunContext.get():
37
+ return get_run_logger()
38
+ return None
39
+
40
+ def log_debug(self, message: str, **kwargs: Any) -> None:
41
+ """Log debug message with optional context"""
42
+ self.logger.debug(message, extra=kwargs)
43
+
44
+ def log_info(self, message: str, **kwargs: Any) -> None:
45
+ """Log info message with optional context"""
46
+ self.logger.info(message, extra=kwargs)
47
+
48
+ def log_warning(self, message: str, **kwargs: Any) -> None:
49
+ """Log warning message with optional context"""
50
+ self.logger.warning(message, extra=kwargs)
51
+
52
+ def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
53
+ """Log error message with optional exception info"""
54
+ self.logger.error(message, exc_info=exc_info, extra=kwargs)
55
+
56
+ def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
57
+ """Log critical message with optional exception info"""
58
+ self.logger.critical(message, exc_info=exc_info, extra=kwargs)
59
+
60
+ def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
61
+ """
62
+ Log message with structured context
63
+
64
+ Args:
65
+ level: Log level (debug, info, warning, error, critical)
66
+ message: Log message
67
+ context: Additional context as dictionary
68
+
69
+ Example:
70
+ self.log_with_context("info", "Processing document", {
71
+ "document_id": doc.id,
72
+ "document_size": doc.size,
73
+ "document_type": doc.type
74
+ })
75
+ """
76
+ log_method = getattr(self.logger, level.lower(), self.logger.info)
77
+
78
+ # Format context for logging
79
+ context_str = " | ".join(f"{k}={v}" for k, v in context.items())
80
+ full_message = f"{message} | {context_str}" if context else message
81
+
82
+ log_method(full_message, extra={"context": context})
83
+
84
+
85
+ class StructuredLoggerMixin(LoggerMixin):
86
+ """
87
+ Extended mixin for structured logging with Prefect
88
+ """
89
+
90
+ def log_event(self, event: str, **kwargs: Any) -> None:
91
+ """
92
+ Log a structured event
93
+
94
+ Args:
95
+ event: Event name
96
+ **kwargs: Event attributes
97
+
98
+ Example:
99
+ self.log_event("document_processed",
100
+ document_id=doc.id,
101
+ duration_ms=processing_time,
102
+ status="success")
103
+ """
104
+ self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
105
+
106
+ def log_metric(self, metric_name: str, value: float, unit: str = "", **tags: Any) -> None:
107
+ """
108
+ Log a metric value
109
+
110
+ Args:
111
+ metric_name: Name of the metric
112
+ value: Metric value
113
+ unit: Unit of measurement
114
+ **tags: Additional tags
115
+
116
+ Example:
117
+ self.log_metric("processing_time", 1.23, "seconds",
118
+ document_type="pdf", model="gpt-4")
119
+ """
120
+ self.logger.info(
121
+ f"Metric: {metric_name}",
122
+ extra={
123
+ "metric": metric_name,
124
+ "value": value,
125
+ "unit": unit,
126
+ "tags": tags,
127
+ "structured": True,
128
+ },
129
+ )
130
+
131
+ def log_span(self, operation: str, duration_ms: float, **attributes: Any) -> None:
132
+ """
133
+ Log a span (operation with duration)
134
+
135
+ Args:
136
+ operation: Operation name
137
+ duration_ms: Duration in milliseconds
138
+ **attributes: Additional attributes
139
+
140
+ Example:
141
+ self.log_span("llm_generation", 1234.5,
142
+ model="gpt-4", tokens=500)
143
+ """
144
+ self.logger.info(
145
+ f"Span: {operation}",
146
+ extra={
147
+ "span": operation,
148
+ "duration_ms": duration_ms,
149
+ "attributes": attributes,
150
+ "structured": True,
151
+ },
152
+ )
153
+
154
+ @contextmanager
155
+ def log_operation(self, operation: str, **context: Any) -> Generator[None, None, None]:
156
+ """
157
+ Context manager for logging operations with timing
158
+
159
+ Args:
160
+ operation: Operation name
161
+ **context: Additional context
162
+
163
+ Example:
164
+ with self.log_operation("document_processing", doc_id=doc.id):
165
+ process_document(doc)
166
+ """
167
+ start_time = time.perf_counter()
168
+
169
+ self.log_debug(f"Starting {operation}", **context)
170
+
171
+ try:
172
+ yield
173
+ duration_ms = (time.perf_counter() - start_time) * 1000
174
+ self.log_info(
175
+ f"Completed {operation}", duration_ms=duration_ms, status="success", **context
176
+ )
177
+ except Exception as e:
178
+ # Intentionally broad: Context manager must catch all exceptions to log them
179
+ duration_ms = (time.perf_counter() - start_time) * 1000
180
+ self.log_error(
181
+ f"Failed {operation}: {str(e)}",
182
+ exc_info=True,
183
+ duration_ms=duration_ms,
184
+ status="failure",
185
+ **context,
186
+ )
187
+ raise
188
+
189
+
190
+ class PrefectLoggerMixin(StructuredLoggerMixin):
191
+ """
192
+ Enhanced mixin specifically for Prefect flows and tasks
193
+ """
194
+
195
+ def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
196
+ """Log flow start with parameters"""
197
+ self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
198
+
199
+ def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
200
+ """Log flow completion"""
201
+ self.log_event(
202
+ "flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
203
+ )
204
+
205
+ def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
206
+ """Log task start with inputs"""
207
+ self.log_event("task_started", task_name=task_name, inputs=inputs)
208
+
209
+ def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
210
+ """Log task completion"""
211
+ self.log_event(
212
+ "task_completed", task_name=task_name, status=status, duration_ms=duration_ms
213
+ )
214
+
215
+ def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
216
+ """Log retry attempt"""
217
+ self.log_warning(
218
+ f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
219
+ )
220
+
221
+ def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
222
+ """Log a checkpoint in processing"""
223
+ self.log_info(f"Checkpoint: {checkpoint_name}", checkpoint=checkpoint_name, **data)
@@ -0,0 +1,115 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import jinja2
5
+
6
+ from ai_pipeline_core.logging import get_pipeline_logger
7
+
8
+ from .exceptions import PromptNotFoundError, PromptRenderError
9
+
10
+ logger = get_pipeline_logger(__name__)
11
+
12
+
13
+ class PromptManager:
14
+ """A utility to load and render prompts from a structured directory.
15
+
16
+ Searches for 'prompts' directory in the current directory and parent directories
17
+ (as long as __init__.py exists in parent directories).
18
+ """
19
+
20
+ def __init__(self, current_dir: str, prompts_dir: str = "prompts"):
21
+ """Initialize PromptManager with the current file path.
22
+
23
+ Args:
24
+ current_dir: The __file__ path of the calling module (required)
25
+ prompts_dir: Name of the prompts directory to search for (default: "prompts")
26
+ """
27
+ search_paths: list[Path] = []
28
+
29
+ # Start from the directory containing the calling file
30
+ current_path = Path(current_dir).resolve()
31
+ if current_path.is_file():
32
+ current_path = current_path.parent
33
+
34
+ # First, add the immediate directory if it has a prompts subdirectory
35
+ local_prompts = current_path / prompts_dir
36
+ if local_prompts.is_dir():
37
+ search_paths.append(local_prompts)
38
+
39
+ # Also add the current directory itself for local templates
40
+ search_paths.append(current_path)
41
+
42
+ # Search for prompts directory in parent directories
43
+ # Stop when we can't find __init__.py (indicating we've left the package)
44
+ parent_path = current_path.parent
45
+ max_depth = 4 # Reasonable limit to prevent infinite searching
46
+ depth = 0
47
+
48
+ while depth < max_depth:
49
+ # Check if we're still within a Python package
50
+ if not (parent_path / "__init__.py").exists():
51
+ break
52
+
53
+ # Check if this directory has a prompts subdirectory
54
+ parent_prompts = parent_path / prompts_dir
55
+ if parent_prompts.is_dir():
56
+ search_paths.append(parent_prompts)
57
+
58
+ # Move to the next parent
59
+ parent_path = parent_path.parent
60
+ depth += 1
61
+
62
+ # If no prompts directories were found, that's okay - we can still use local templates
63
+ if not search_paths:
64
+ search_paths = [current_path]
65
+
66
+ self.search_paths = search_paths
67
+
68
+ # Create Jinja2 environment with all found search paths
69
+ self.env = jinja2.Environment(
70
+ loader=jinja2.FileSystemLoader(self.search_paths),
71
+ trim_blocks=True,
72
+ lstrip_blocks=True,
73
+ autoescape=False, # Important for prompt engineering
74
+ )
75
+
76
+ def get(self, prompt_path: str, **kwargs: Any) -> str:
77
+ """
78
+ Renders a specific prompt with the given context.
79
+
80
+ Args:
81
+ prompt_path: The path to the prompt file relative to the `prompts`
82
+ directory (e.g., 'step_01_process_inputs/summarize_document.jinja2').
83
+ The .jinja2 extension will be added automatically if missing.
84
+ **kwargs: Variables to be injected into the template.
85
+
86
+ Returns:
87
+ The rendered prompt string.
88
+ """
89
+ try:
90
+ template = self.env.get_template(prompt_path)
91
+ return template.render(**kwargs)
92
+ except jinja2.TemplateNotFound:
93
+ # If the template wasn't found and doesn't end with .jinja2, try adding the extension
94
+ if not prompt_path.endswith(".jinja2"):
95
+ try:
96
+ template = self.env.get_template(prompt_path + ".jinja2")
97
+ return template.render(**kwargs)
98
+ except jinja2.TemplateNotFound:
99
+ pass # Fall through to the original error
100
+ if not prompt_path.endswith(".jinja"):
101
+ try:
102
+ template = self.env.get_template(prompt_path + ".jinja")
103
+ return template.render(**kwargs)
104
+ except jinja2.TemplateNotFound:
105
+ pass # Fall through to the original error
106
+ raise PromptNotFoundError(
107
+ f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
108
+ )
109
+ except jinja2.TemplateError as e:
110
+ raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
111
+ except PromptNotFoundError:
112
+ raise # Re-raise our custom exception
113
+ except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
114
+ logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
115
+ raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e
File without changes
@@ -0,0 +1,24 @@
1
+ """Core configuration settings for pipeline operations."""
2
+
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+
6
+ class Settings(BaseSettings):
7
+ """Core settings for pipeline operations."""
8
+
9
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
10
+
11
+ # LLM API Configuration
12
+ openai_base_url: str = ""
13
+ openai_api_key: str = ""
14
+
15
+ # Prefect Configuration
16
+ prefect_api_url: str = ""
17
+ prefect_api_key: str = ""
18
+
19
+ # Observability
20
+ lmnr_project_api_key: str = ""
21
+
22
+
23
+ # Create a single, importable instance of the settings
24
+ settings = Settings()