ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +83 -119
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +14 -15
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +349 -1062
- ai_pipeline_core/documents/mime_type.py +40 -85
- ai_pipeline_core/documents/utils.py +62 -7
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +5 -3
- ai_pipeline_core/llm/ai_messages.py +284 -73
- ai_pipeline_core/llm/client.py +462 -209
- ai_pipeline_core/llm/model_options.py +86 -53
- ai_pipeline_core/llm/model_response.py +187 -241
- ai_pipeline_core/llm/model_types.py +34 -54
- ai_pipeline_core/logging/__init__.py +2 -9
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -43
- ai_pipeline_core/logging/logging_mixin.py +17 -51
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/observability/tracing.py +640 -0
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +26 -105
- ai_pipeline_core/settings.py +41 -32
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -240
- ai_pipeline_core/documents/flow_document.py +0 -128
- ai_pipeline_core/documents/task_document.py +0 -133
- ai_pipeline_core/documents/temporary_document.py +0 -95
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -314
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -717
- ai_pipeline_core/prefect.py +0 -54
- ai_pipeline_core/simple_runner/__init__.py +0 -24
- ai_pipeline_core/simple_runner/cli.py +0 -255
- ai_pipeline_core/simple_runner/simple_runner.py +0 -385
- ai_pipeline_core/tracing.py +0 -475
- ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
- ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,7 @@
|
|
|
1
1
|
"""Logging infrastructure for AI Pipeline Core.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides a Prefect-integrated logging facade for unified logging across pipelines.
|
|
6
4
|
Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
|
|
7
|
-
|
|
8
|
-
Example:
|
|
9
|
-
>>> from ai_pipeline_core import get_pipeline_logger
|
|
10
|
-
>>> logger = get_pipeline_logger(__name__)
|
|
11
|
-
>>> logger.info("Processing started")
|
|
12
5
|
"""
|
|
13
6
|
|
|
14
7
|
from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
|
|
@@ -16,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
|
|
|
16
9
|
|
|
17
10
|
__all__ = [
|
|
18
11
|
"LoggerMixin",
|
|
19
|
-
"StructuredLoggerMixin",
|
|
20
12
|
"LoggingConfig",
|
|
21
|
-
"
|
|
13
|
+
"StructuredLoggerMixin",
|
|
22
14
|
"get_pipeline_logger",
|
|
15
|
+
"setup_logging",
|
|
23
16
|
]
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
"""Centralized logging configuration for AI Pipeline Core.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides logging configuration management that integrates with Prefect's logging system.
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging.config
|
|
9
7
|
import os
|
|
10
8
|
from pathlib import Path
|
|
11
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
12
10
|
|
|
13
11
|
import yaml
|
|
14
12
|
from prefect.logging import get_logger
|
|
@@ -18,7 +16,7 @@ DEFAULT_LOG_LEVELS = {
|
|
|
18
16
|
"ai_pipeline_core": "INFO",
|
|
19
17
|
"ai_pipeline_core.documents": "INFO",
|
|
20
18
|
"ai_pipeline_core.llm": "INFO",
|
|
21
|
-
"ai_pipeline_core.
|
|
19
|
+
"ai_pipeline_core.pipeline": "INFO",
|
|
22
20
|
"ai_pipeline_core.testing": "DEBUG",
|
|
23
21
|
}
|
|
24
22
|
|
|
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
|
|
|
26
24
|
class LoggingConfig:
|
|
27
25
|
"""Manages logging configuration for the pipeline.
|
|
28
26
|
|
|
29
|
-
@public
|
|
30
|
-
|
|
31
27
|
Provides centralized logging configuration with Prefect integration.
|
|
32
28
|
|
|
33
29
|
Configuration precedence:
|
|
@@ -36,22 +32,19 @@ class LoggingConfig:
|
|
|
36
32
|
3. PREFECT_LOGGING_SETTINGS_PATH environment variable
|
|
37
33
|
4. Default configuration
|
|
38
34
|
|
|
39
|
-
Example:
|
|
40
|
-
>>> config = LoggingConfig()
|
|
41
|
-
>>> config.apply()
|
|
42
35
|
"""
|
|
43
36
|
|
|
44
|
-
def __init__(self, config_path:
|
|
37
|
+
def __init__(self, config_path: Path | None = None):
|
|
45
38
|
"""Initialize logging configuration.
|
|
46
39
|
|
|
47
40
|
Args:
|
|
48
41
|
config_path: Optional path to YAML configuration file.
|
|
49
42
|
"""
|
|
50
43
|
self.config_path = config_path or self._get_default_config_path()
|
|
51
|
-
self._config:
|
|
44
|
+
self._config: dict[str, Any] | None = None
|
|
52
45
|
|
|
53
46
|
@staticmethod
|
|
54
|
-
def _get_default_config_path() ->
|
|
47
|
+
def _get_default_config_path() -> Path | None:
|
|
55
48
|
"""Get default config path from environment variables.
|
|
56
49
|
|
|
57
50
|
Returns:
|
|
@@ -67,7 +60,7 @@ class LoggingConfig:
|
|
|
67
60
|
|
|
68
61
|
return None
|
|
69
62
|
|
|
70
|
-
def load_config(self) ->
|
|
63
|
+
def load_config(self) -> dict[str, Any]:
|
|
71
64
|
"""Load logging configuration from file or defaults.
|
|
72
65
|
|
|
73
66
|
Returns:
|
|
@@ -75,7 +68,7 @@ class LoggingConfig:
|
|
|
75
68
|
"""
|
|
76
69
|
if self._config is None:
|
|
77
70
|
if self.config_path and self.config_path.exists():
|
|
78
|
-
with open(self.config_path, "
|
|
71
|
+
with open(self.config_path, encoding="utf-8") as f:
|
|
79
72
|
self._config = yaml.safe_load(f)
|
|
80
73
|
else:
|
|
81
74
|
self._config = self._get_default_config()
|
|
@@ -84,7 +77,7 @@ class LoggingConfig:
|
|
|
84
77
|
return self._config
|
|
85
78
|
|
|
86
79
|
@staticmethod
|
|
87
|
-
def _get_default_config() ->
|
|
80
|
+
def _get_default_config() -> dict[str, Any]:
|
|
88
81
|
"""Get default logging configuration.
|
|
89
82
|
|
|
90
83
|
Returns:
|
|
@@ -99,10 +92,7 @@ class LoggingConfig:
|
|
|
99
92
|
"datefmt": "%H:%M:%S",
|
|
100
93
|
},
|
|
101
94
|
"detailed": {
|
|
102
|
-
"format": (
|
|
103
|
-
"%(asctime)s | %(levelname)-7s | %(name)s | "
|
|
104
|
-
"%(funcName)s:%(lineno)d - %(message)s"
|
|
105
|
-
),
|
|
95
|
+
"format": ("%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"),
|
|
106
96
|
"datefmt": "%Y-%m-%d %H:%M:%S",
|
|
107
97
|
},
|
|
108
98
|
},
|
|
@@ -138,14 +128,12 @@ class LoggingConfig:
|
|
|
138
128
|
|
|
139
129
|
|
|
140
130
|
# Global configuration instance
|
|
141
|
-
_logging_config:
|
|
131
|
+
_logging_config: LoggingConfig | None = None
|
|
142
132
|
|
|
143
133
|
|
|
144
|
-
def setup_logging(config_path:
|
|
134
|
+
def setup_logging(config_path: Path | None = None, level: str | None = None):
|
|
145
135
|
"""Setup logging for the AI Pipeline Core library.
|
|
146
136
|
|
|
147
|
-
@public
|
|
148
|
-
|
|
149
137
|
Initializes logging configuration for the pipeline system.
|
|
150
138
|
|
|
151
139
|
IMPORTANT: Call setup_logging exactly once in your application entry point
|
|
@@ -155,18 +143,8 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
|
|
|
155
143
|
config_path: Optional path to YAML logging configuration file.
|
|
156
144
|
level: Optional log level override (INFO, DEBUG, WARNING, etc.).
|
|
157
145
|
|
|
158
|
-
Example:
|
|
159
|
-
>>> # In your main.py or application entry point:
|
|
160
|
-
>>> def main():
|
|
161
|
-
... setup_logging() # Call once at startup
|
|
162
|
-
... # Your application code here
|
|
163
|
-
...
|
|
164
|
-
>>> # Or with custom level:
|
|
165
|
-
>>> if __name__ == "__main__":
|
|
166
|
-
... setup_logging(level="DEBUG")
|
|
167
|
-
... run_application()
|
|
168
146
|
"""
|
|
169
|
-
global _logging_config
|
|
147
|
+
global _logging_config # noqa: PLW0603
|
|
170
148
|
|
|
171
149
|
_logging_config = LoggingConfig(config_path)
|
|
172
150
|
_logging_config.apply()
|
|
@@ -185,22 +163,28 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
|
|
|
185
163
|
def get_pipeline_logger(name: str):
|
|
186
164
|
"""Get a logger for pipeline components.
|
|
187
165
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
166
|
+
Returns a Prefect-integrated logger with the OTel span-event bridge
|
|
167
|
+
attached. Any log record at INFO+ emitted while an OTel span is
|
|
168
|
+
recording will be captured as a span event in the trace.
|
|
191
169
|
|
|
192
170
|
Args:
|
|
193
171
|
name: Logger name, typically __name__.
|
|
194
172
|
|
|
195
173
|
Returns:
|
|
196
|
-
Prefect logger instance.
|
|
174
|
+
Prefect logger instance with bridge handler.
|
|
197
175
|
|
|
198
|
-
Example:
|
|
199
|
-
>>> logger = get_pipeline_logger(__name__)
|
|
200
|
-
>>> logger.info("Module initialized")
|
|
201
176
|
"""
|
|
202
|
-
# Ensure logging is setup
|
|
203
177
|
if _logging_config is None:
|
|
204
178
|
setup_logging()
|
|
205
179
|
|
|
206
|
-
|
|
180
|
+
logger = get_logger(name)
|
|
181
|
+
|
|
182
|
+
# Attach the singleton bridge handler so log records become OTel span events.
|
|
183
|
+
# The handler is a no-op when no span is recording, so early attachment is safe.
|
|
184
|
+
from ai_pipeline_core.observability._logging_bridge import get_bridge_handler # noqa: PLC0415
|
|
185
|
+
|
|
186
|
+
handler = get_bridge_handler()
|
|
187
|
+
if handler not in logger.handlers:
|
|
188
|
+
logger.addHandler(handler)
|
|
189
|
+
|
|
190
|
+
return logger
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
"""Logging mixin for consistent logging across components using Prefect logging.
|
|
2
|
-
|
|
3
|
-
@public
|
|
4
|
-
"""
|
|
1
|
+
"""Logging mixin for consistent logging across components using Prefect logging."""
|
|
5
2
|
|
|
6
3
|
import contextlib
|
|
7
4
|
import time
|
|
5
|
+
from collections.abc import Generator
|
|
8
6
|
from contextlib import contextmanager
|
|
9
7
|
from functools import cached_property
|
|
10
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
11
9
|
|
|
12
10
|
from prefect import get_run_logger
|
|
13
11
|
from prefect.context import FlowRunContext, TaskRunContext
|
|
@@ -17,8 +15,6 @@ from prefect.logging import get_logger
|
|
|
17
15
|
class LoggerMixin:
|
|
18
16
|
"""Mixin class that provides consistent logging functionality using Prefect's logging system.
|
|
19
17
|
|
|
20
|
-
@public
|
|
21
|
-
|
|
22
18
|
Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
|
|
23
19
|
The mixin's internal behavior routes to the appropriate backend; you should not call
|
|
24
20
|
logging.getLogger directly.
|
|
@@ -28,7 +24,7 @@ class LoggerMixin:
|
|
|
28
24
|
- Internal routing when outside flow/task context
|
|
29
25
|
"""
|
|
30
26
|
|
|
31
|
-
_logger_name:
|
|
27
|
+
_logger_name: str | None = None
|
|
32
28
|
|
|
33
29
|
@cached_property
|
|
34
30
|
def logger(self):
|
|
@@ -37,7 +33,8 @@ class LoggerMixin:
|
|
|
37
33
|
return logger
|
|
38
34
|
return get_logger(self._logger_name or self.__class__.__module__)
|
|
39
35
|
|
|
40
|
-
|
|
36
|
+
@staticmethod
|
|
37
|
+
def _get_run_logger():
|
|
41
38
|
"""Attempt to get Prefect run logger.
|
|
42
39
|
|
|
43
40
|
Returns:
|
|
@@ -61,15 +58,15 @@ class LoggerMixin:
|
|
|
61
58
|
"""Log warning message with optional context."""
|
|
62
59
|
self.logger.warning(message, extra=kwargs)
|
|
63
60
|
|
|
64
|
-
def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
|
|
61
|
+
def log_error(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
|
|
65
62
|
"""Log error message with optional exception info."""
|
|
66
63
|
self.logger.error(message, exc_info=exc_info, extra=kwargs)
|
|
67
64
|
|
|
68
|
-
def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
|
|
65
|
+
def log_critical(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
|
|
69
66
|
"""Log critical message with optional exception info."""
|
|
70
67
|
self.logger.critical(message, exc_info=exc_info, extra=kwargs)
|
|
71
68
|
|
|
72
|
-
def log_with_context(self, level: str, message: str, context:
|
|
69
|
+
def log_with_context(self, level: str, message: str, context: dict[str, Any]) -> None:
|
|
73
70
|
"""Log message with structured context.
|
|
74
71
|
|
|
75
72
|
Args:
|
|
@@ -77,12 +74,6 @@ class LoggerMixin:
|
|
|
77
74
|
message: Log message
|
|
78
75
|
context: Additional context as dictionary
|
|
79
76
|
|
|
80
|
-
Example:
|
|
81
|
-
self.log_with_context("info", "Processing document", {
|
|
82
|
-
"document_id": doc.id,
|
|
83
|
-
"document_size": doc.size,
|
|
84
|
-
"document_type": doc.type
|
|
85
|
-
})
|
|
86
77
|
"""
|
|
87
78
|
log_method = getattr(self.logger, level.lower(), self.logger.info)
|
|
88
79
|
|
|
@@ -94,10 +85,7 @@ class LoggerMixin:
|
|
|
94
85
|
|
|
95
86
|
|
|
96
87
|
class StructuredLoggerMixin(LoggerMixin):
|
|
97
|
-
"""Extended mixin for structured logging with Prefect.
|
|
98
|
-
|
|
99
|
-
@public
|
|
100
|
-
"""
|
|
88
|
+
"""Extended mixin for structured logging with Prefect."""
|
|
101
89
|
|
|
102
90
|
def log_event(self, event: str, **kwargs: Any) -> None:
|
|
103
91
|
"""Log a structured event.
|
|
@@ -106,11 +94,6 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
106
94
|
event: Event name
|
|
107
95
|
**kwargs: Event attributes
|
|
108
96
|
|
|
109
|
-
Example:
|
|
110
|
-
self.log_event("document_processed",
|
|
111
|
-
document_id=doc.id,
|
|
112
|
-
duration_ms=processing_time,
|
|
113
|
-
status="success")
|
|
114
97
|
"""
|
|
115
98
|
self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
|
|
116
99
|
|
|
@@ -123,9 +106,6 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
123
106
|
unit: Unit of measurement
|
|
124
107
|
**tags: Additional tags
|
|
125
108
|
|
|
126
|
-
Example:
|
|
127
|
-
self.log_metric("processing_time", 1.23, "seconds",
|
|
128
|
-
document_type="pdf", model="gpt-4")
|
|
129
109
|
"""
|
|
130
110
|
self.logger.info(
|
|
131
111
|
f"Metric: {metric_name}",
|
|
@@ -146,9 +126,6 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
146
126
|
duration_ms: Duration in milliseconds
|
|
147
127
|
**attributes: Additional attributes
|
|
148
128
|
|
|
149
|
-
Example:
|
|
150
|
-
self.log_span("llm_generation", 1234.5,
|
|
151
|
-
model="gpt-4", tokens=500)
|
|
152
129
|
"""
|
|
153
130
|
self.logger.info(
|
|
154
131
|
f"Span: {operation}",
|
|
@@ -168,9 +145,6 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
168
145
|
operation: Operation name
|
|
169
146
|
**context: Additional context
|
|
170
147
|
|
|
171
|
-
Example:
|
|
172
|
-
with self.log_operation("document_processing", doc_id=doc.id):
|
|
173
|
-
process_document(doc)
|
|
174
148
|
"""
|
|
175
149
|
start_time = time.perf_counter()
|
|
176
150
|
|
|
@@ -179,14 +153,12 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
179
153
|
try:
|
|
180
154
|
yield
|
|
181
155
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
182
|
-
self.log_info(
|
|
183
|
-
f"Completed {operation}", duration_ms=duration_ms, status="success", **context
|
|
184
|
-
)
|
|
156
|
+
self.log_info(f"Completed {operation}", duration_ms=duration_ms, status="success", **context)
|
|
185
157
|
except Exception as e:
|
|
186
158
|
# Intentionally broad: Context manager must catch all exceptions to log them
|
|
187
159
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
188
160
|
self.log_error(
|
|
189
|
-
f"Failed {operation}: {
|
|
161
|
+
f"Failed {operation}: {e!s}",
|
|
190
162
|
exc_info=True,
|
|
191
163
|
duration_ms=duration_ms,
|
|
192
164
|
status="failure",
|
|
@@ -198,31 +170,25 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
198
170
|
class PrefectLoggerMixin(StructuredLoggerMixin):
|
|
199
171
|
"""Enhanced mixin specifically for Prefect flows and tasks."""
|
|
200
172
|
|
|
201
|
-
def log_flow_start(self, flow_name: str, parameters:
|
|
173
|
+
def log_flow_start(self, flow_name: str, parameters: dict[str, Any]) -> None:
|
|
202
174
|
"""Log flow start with parameters."""
|
|
203
175
|
self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
|
|
204
176
|
|
|
205
177
|
def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
|
|
206
178
|
"""Log flow completion."""
|
|
207
|
-
self.log_event(
|
|
208
|
-
"flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
|
|
209
|
-
)
|
|
179
|
+
self.log_event("flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms)
|
|
210
180
|
|
|
211
|
-
def log_task_start(self, task_name: str, inputs:
|
|
181
|
+
def log_task_start(self, task_name: str, inputs: dict[str, Any]) -> None:
|
|
212
182
|
"""Log task start with inputs."""
|
|
213
183
|
self.log_event("task_started", task_name=task_name, inputs=inputs)
|
|
214
184
|
|
|
215
185
|
def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
|
|
216
186
|
"""Log task completion."""
|
|
217
|
-
self.log_event(
|
|
218
|
-
"task_completed", task_name=task_name, status=status, duration_ms=duration_ms
|
|
219
|
-
)
|
|
187
|
+
self.log_event("task_completed", task_name=task_name, status=status, duration_ms=duration_ms)
|
|
220
188
|
|
|
221
189
|
def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
|
|
222
190
|
"""Log retry attempt."""
|
|
223
|
-
self.log_warning(
|
|
224
|
-
f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
|
|
225
|
-
)
|
|
191
|
+
self.log_warning(f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error)
|
|
226
192
|
|
|
227
193
|
def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
|
|
228
194
|
"""Log a checkpoint in processing."""
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Observability system for AI pipelines.
|
|
2
|
+
|
|
3
|
+
Contains debug tracing, ClickHouse-based tracking, and initialization utilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from ai_pipeline_core.observability._debug import (
|
|
7
|
+
ArtifactStore,
|
|
8
|
+
ContentRef,
|
|
9
|
+
ContentWriter,
|
|
10
|
+
LocalDebugSpanProcessor,
|
|
11
|
+
LocalTraceWriter,
|
|
12
|
+
SpanInfo,
|
|
13
|
+
TraceDebugConfig,
|
|
14
|
+
TraceState,
|
|
15
|
+
WriteJob,
|
|
16
|
+
generate_summary,
|
|
17
|
+
)
|
|
18
|
+
from ai_pipeline_core.observability._debug._content import reconstruct_span_content
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ArtifactStore",
|
|
22
|
+
"ContentRef",
|
|
23
|
+
"ContentWriter",
|
|
24
|
+
"LocalDebugSpanProcessor",
|
|
25
|
+
"LocalTraceWriter",
|
|
26
|
+
"SpanInfo",
|
|
27
|
+
"TraceDebugConfig",
|
|
28
|
+
"TraceState",
|
|
29
|
+
"WriteJob",
|
|
30
|
+
"generate_summary",
|
|
31
|
+
"reconstruct_span_content",
|
|
32
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Local trace debugging system for AI pipelines.
|
|
2
|
+
|
|
3
|
+
This module provides filesystem-based trace debugging that saves all spans
|
|
4
|
+
with their inputs/outputs for LLM-assisted debugging. Includes static
|
|
5
|
+
summary generation and LLM-powered auto-summary capabilities.
|
|
6
|
+
|
|
7
|
+
Enabled automatically in CLI mode (``run_cli``), writing to ``<working_dir>/.trace``.
|
|
8
|
+
Disable with ``--no-trace``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from ._config import TraceDebugConfig
|
|
12
|
+
from ._content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
|
|
13
|
+
from ._processor import LocalDebugSpanProcessor
|
|
14
|
+
from ._summary import generate_summary
|
|
15
|
+
from ._types import SpanInfo, TraceState, WriteJob
|
|
16
|
+
from ._writer import LocalTraceWriter
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ArtifactStore",
|
|
20
|
+
"ContentRef",
|
|
21
|
+
"ContentWriter",
|
|
22
|
+
"LocalDebugSpanProcessor",
|
|
23
|
+
"LocalTraceWriter",
|
|
24
|
+
"SpanInfo",
|
|
25
|
+
"TraceDebugConfig",
|
|
26
|
+
"TraceState",
|
|
27
|
+
"WriteJob",
|
|
28
|
+
"generate_summary",
|
|
29
|
+
"reconstruct_span_content",
|
|
30
|
+
]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""LLM-powered auto-summary generation for trace debugging.
|
|
2
|
+
|
|
3
|
+
Separated from _summary.py to avoid circular imports: this module depends on
|
|
4
|
+
ai_pipeline_core.llm, which cannot be imported during the initial package load
|
|
5
|
+
chain that includes _debug/__init__.py.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
|
|
10
|
+
from ai_pipeline_core.llm import generate_structured
|
|
11
|
+
from ai_pipeline_core.llm.ai_messages import AIMessages
|
|
12
|
+
from ai_pipeline_core.llm.model_options import ModelOptions
|
|
13
|
+
|
|
14
|
+
from ._types import TraceState
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AutoTraceSummary(BaseModel):
|
|
18
|
+
"""LLM-generated trace analysis."""
|
|
19
|
+
|
|
20
|
+
model_config = ConfigDict(frozen=True)
|
|
21
|
+
|
|
22
|
+
overview: str
|
|
23
|
+
outcome: str
|
|
24
|
+
error_analysis: str
|
|
25
|
+
bottlenecks: tuple[str, ...] = ()
|
|
26
|
+
cost_assessment: str
|
|
27
|
+
recommendations: tuple[str, ...] = ()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def generate_auto_summary(
|
|
31
|
+
trace: TraceState, # noqa: ARG001
|
|
32
|
+
static_summary: str,
|
|
33
|
+
model: str,
|
|
34
|
+
) -> str | None:
|
|
35
|
+
"""Generate LLM-powered auto-summary of the trace.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
trace: Completed trace state with all span data.
|
|
39
|
+
static_summary: Pre-generated static summary text used as LLM input context.
|
|
40
|
+
model: LLM model name for summary generation.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Formatted markdown auto-summary string, or None if generation fails.
|
|
44
|
+
"""
|
|
45
|
+
messages = AIMessages()
|
|
46
|
+
messages.append(static_summary)
|
|
47
|
+
|
|
48
|
+
options = ModelOptions(
|
|
49
|
+
system_prompt=(
|
|
50
|
+
"You are analyzing an AI pipeline execution trace. "
|
|
51
|
+
"Provide concise, actionable analysis based on the execution data. "
|
|
52
|
+
"Focus on cost efficiency, performance bottlenecks, and errors."
|
|
53
|
+
),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
result = await generate_structured(
|
|
57
|
+
model=model,
|
|
58
|
+
response_format=AutoTraceSummary,
|
|
59
|
+
messages=messages,
|
|
60
|
+
options=options,
|
|
61
|
+
purpose="trace_auto_summary",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not result or not result.parsed:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
summary = result.parsed
|
|
68
|
+
lines = [
|
|
69
|
+
"# Auto-Summary (LLM-Generated)",
|
|
70
|
+
"",
|
|
71
|
+
f"**Overview:** {summary.overview}",
|
|
72
|
+
"",
|
|
73
|
+
f"**Outcome:** {summary.outcome}",
|
|
74
|
+
"",
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
if summary.error_analysis:
|
|
78
|
+
lines.append(f"**Error Analysis:** {summary.error_analysis}")
|
|
79
|
+
lines.append("")
|
|
80
|
+
|
|
81
|
+
if summary.bottlenecks:
|
|
82
|
+
lines.append("**Bottlenecks:**")
|
|
83
|
+
lines.extend(f"- {b}" for b in summary.bottlenecks)
|
|
84
|
+
lines.append("")
|
|
85
|
+
|
|
86
|
+
lines.append(f"**Cost Assessment:** {summary.cost_assessment}")
|
|
87
|
+
lines.append("")
|
|
88
|
+
|
|
89
|
+
if summary.recommendations:
|
|
90
|
+
lines.append("**Recommendations:**")
|
|
91
|
+
lines.extend(f"- {r}" for r in summary.recommendations)
|
|
92
|
+
lines.append("")
|
|
93
|
+
|
|
94
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Configuration for local trace debugging."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TraceDebugConfig(BaseModel):
|
|
9
|
+
"""Configuration for local trace debugging.
|
|
10
|
+
|
|
11
|
+
Controls how traces are written to the local filesystem for debugging.
|
|
12
|
+
Enabled automatically in CLI mode, writing to ``<working_dir>/.trace``.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
model_config = ConfigDict(frozen=True)
|
|
16
|
+
|
|
17
|
+
path: Path = Field(description="Directory for debug traces")
|
|
18
|
+
enabled: bool = Field(default=True, description="Whether debug tracing is enabled")
|
|
19
|
+
|
|
20
|
+
# Content size limits (Issue #2)
|
|
21
|
+
max_file_bytes: int = Field(
|
|
22
|
+
default=50_000,
|
|
23
|
+
description="Max bytes for input.yaml or output.yaml. Elements externalized to stay under.",
|
|
24
|
+
)
|
|
25
|
+
max_element_bytes: int = Field(
|
|
26
|
+
default=10_000,
|
|
27
|
+
description="Max bytes for single element. Above this, partial + artifact ref.",
|
|
28
|
+
)
|
|
29
|
+
element_excerpt_bytes: int = Field(
|
|
30
|
+
default=2_000,
|
|
31
|
+
description="Bytes of content to keep inline when element exceeds max_element_bytes.",
|
|
32
|
+
)
|
|
33
|
+
max_content_bytes: int = Field(
|
|
34
|
+
default=10_000_000,
|
|
35
|
+
description="Max bytes for any single artifact. Above this, truncate.",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Image handling (Issue #7 - no changes per user)
|
|
39
|
+
extract_base64_images: bool = Field(
|
|
40
|
+
default=True,
|
|
41
|
+
description="Extract base64 images to artifact files",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Span optimization (Issue #4)
|
|
45
|
+
merge_wrapper_spans: bool = Field(
|
|
46
|
+
default=True,
|
|
47
|
+
description="Merge Prefect wrapper spans with inner traced function spans",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Indexes (Issue #1)
|
|
51
|
+
include_llm_index: bool = Field(
|
|
52
|
+
default=True,
|
|
53
|
+
description="Generate _llm_calls.yaml with LLM-specific details",
|
|
54
|
+
)
|
|
55
|
+
include_error_index: bool = Field(
|
|
56
|
+
default=True,
|
|
57
|
+
description="Generate _errors.yaml with failed span details",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Cleanup
|
|
61
|
+
max_traces: int | None = Field(
|
|
62
|
+
default=None,
|
|
63
|
+
description="Max number of traces to keep. None for unlimited.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Security - default redaction patterns for common secrets
|
|
67
|
+
redact_patterns: tuple[str, ...] = Field(
|
|
68
|
+
default=(
|
|
69
|
+
r"sk-[a-zA-Z0-9]{20,}", # OpenAI API keys
|
|
70
|
+
r"sk-proj-[a-zA-Z0-9\-_]{20,}", # OpenAI project keys
|
|
71
|
+
r"AKIA[0-9A-Z]{16}", # AWS access keys
|
|
72
|
+
r"ghp_[a-zA-Z0-9]{36}", # GitHub personal tokens
|
|
73
|
+
r"gho_[a-zA-Z0-9]{36}", # GitHub OAuth tokens
|
|
74
|
+
r"xoxb-[a-zA-Z0-9\-]+", # Slack bot tokens
|
|
75
|
+
r"xoxp-[a-zA-Z0-9\-]+", # Slack user tokens
|
|
76
|
+
r"(?i)password\s*[:=]\s*['\"]?[^\s'\"]+", # Passwords
|
|
77
|
+
r"(?i)secret\s*[:=]\s*['\"]?[^\s'\"]+", # Secrets
|
|
78
|
+
r"(?i)api[_\-]?key\s*[:=]\s*['\"]?[^\s'\"]+", # API keys
|
|
79
|
+
r"(?i)bearer\s+[a-zA-Z0-9\-_\.]+", # Bearer tokens
|
|
80
|
+
),
|
|
81
|
+
description="Regex patterns for secrets to redact",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Summary
|
|
85
|
+
generate_summary: bool = Field(default=True, description="Generate _summary.md")
|
|
86
|
+
|
|
87
|
+
# Auto-summary (LLM-powered)
|
|
88
|
+
auto_summary_enabled: bool = Field(
|
|
89
|
+
default=False,
|
|
90
|
+
description="Generate LLM-powered auto-summary after trace completion",
|
|
91
|
+
)
|
|
92
|
+
auto_summary_model: str = Field(
|
|
93
|
+
default="gemini-3-flash",
|
|
94
|
+
description="Model to use for auto-summary generation",
|
|
95
|
+
)
|