ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,7 @@
1
1
  """Logging infrastructure for AI Pipeline Core.
2
2
 
3
- @public
4
-
5
3
  Provides a Prefect-integrated logging facade for unified logging across pipelines.
6
4
  Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
7
-
8
- Example:
9
- >>> from ai_pipeline_core import get_pipeline_logger
10
- >>> logger = get_pipeline_logger(__name__)
11
- >>> logger.info("Processing started")
12
5
  """
13
6
 
14
7
  from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
@@ -16,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
16
9
 
17
10
  __all__ = [
18
11
  "LoggerMixin",
19
- "StructuredLoggerMixin",
20
12
  "LoggingConfig",
21
- "setup_logging",
13
+ "StructuredLoggerMixin",
22
14
  "get_pipeline_logger",
15
+ "setup_logging",
23
16
  ]
@@ -48,7 +48,7 @@ loggers:
48
48
  ai_pipeline_core.llm:
49
49
  level: INFO
50
50
 
51
- ai_pipeline_core.flow:
51
+ ai_pipeline_core.pipeline:
52
52
  level: INFO
53
53
 
54
54
  ai_pipeline_core.testing:
@@ -1,14 +1,12 @@
1
1
  """Centralized logging configuration for AI Pipeline Core.
2
2
 
3
- @public
4
-
5
3
  Provides logging configuration management that integrates with Prefect's logging system.
6
4
  """
7
5
 
8
6
  import logging.config
9
7
  import os
10
8
  from pathlib import Path
11
- from typing import Any, Dict, Optional
9
+ from typing import Any
12
10
 
13
11
  import yaml
14
12
  from prefect.logging import get_logger
@@ -18,7 +16,7 @@ DEFAULT_LOG_LEVELS = {
18
16
  "ai_pipeline_core": "INFO",
19
17
  "ai_pipeline_core.documents": "INFO",
20
18
  "ai_pipeline_core.llm": "INFO",
21
- "ai_pipeline_core.flow": "INFO",
19
+ "ai_pipeline_core.pipeline": "INFO",
22
20
  "ai_pipeline_core.testing": "DEBUG",
23
21
  }
24
22
 
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
26
24
  class LoggingConfig:
27
25
  """Manages logging configuration for the pipeline.
28
26
 
29
- @public
30
-
31
27
  Provides centralized logging configuration with Prefect integration.
32
28
 
33
29
  Configuration precedence:
@@ -36,22 +32,19 @@ class LoggingConfig:
36
32
  3. PREFECT_LOGGING_SETTINGS_PATH environment variable
37
33
  4. Default configuration
38
34
 
39
- Example:
40
- >>> config = LoggingConfig()
41
- >>> config.apply()
42
35
  """
43
36
 
44
- def __init__(self, config_path: Optional[Path] = None):
37
+ def __init__(self, config_path: Path | None = None):
45
38
  """Initialize logging configuration.
46
39
 
47
40
  Args:
48
41
  config_path: Optional path to YAML configuration file.
49
42
  """
50
43
  self.config_path = config_path or self._get_default_config_path()
51
- self._config: Optional[Dict[str, Any]] = None
44
+ self._config: dict[str, Any] | None = None
52
45
 
53
46
  @staticmethod
54
- def _get_default_config_path() -> Optional[Path]:
47
+ def _get_default_config_path() -> Path | None:
55
48
  """Get default config path from environment variables.
56
49
 
57
50
  Returns:
@@ -67,7 +60,7 @@ class LoggingConfig:
67
60
 
68
61
  return None
69
62
 
70
- def load_config(self) -> Dict[str, Any]:
63
+ def load_config(self) -> dict[str, Any]:
71
64
  """Load logging configuration from file or defaults.
72
65
 
73
66
  Returns:
@@ -75,7 +68,7 @@ class LoggingConfig:
75
68
  """
76
69
  if self._config is None:
77
70
  if self.config_path and self.config_path.exists():
78
- with open(self.config_path, "r") as f:
71
+ with open(self.config_path, encoding="utf-8") as f:
79
72
  self._config = yaml.safe_load(f)
80
73
  else:
81
74
  self._config = self._get_default_config()
@@ -84,7 +77,7 @@ class LoggingConfig:
84
77
  return self._config
85
78
 
86
79
  @staticmethod
87
- def _get_default_config() -> Dict[str, Any]:
80
+ def _get_default_config() -> dict[str, Any]:
88
81
  """Get default logging configuration.
89
82
 
90
83
  Returns:
@@ -99,10 +92,7 @@ class LoggingConfig:
99
92
  "datefmt": "%H:%M:%S",
100
93
  },
101
94
  "detailed": {
102
- "format": (
103
- "%(asctime)s | %(levelname)-7s | %(name)s | "
104
- "%(funcName)s:%(lineno)d - %(message)s"
105
- ),
95
+ "format": ("%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"),
106
96
  "datefmt": "%Y-%m-%d %H:%M:%S",
107
97
  },
108
98
  },
@@ -138,14 +128,12 @@ class LoggingConfig:
138
128
 
139
129
 
140
130
  # Global configuration instance
141
- _logging_config: Optional[LoggingConfig] = None
131
+ _logging_config: LoggingConfig | None = None
142
132
 
143
133
 
144
- def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
134
+ def setup_logging(config_path: Path | None = None, level: str | None = None):
145
135
  """Setup logging for the AI Pipeline Core library.
146
136
 
147
- @public
148
-
149
137
  Initializes logging configuration for the pipeline system.
150
138
 
151
139
  IMPORTANT: Call setup_logging exactly once in your application entry point
@@ -155,18 +143,8 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
155
143
  config_path: Optional path to YAML logging configuration file.
156
144
  level: Optional log level override (INFO, DEBUG, WARNING, etc.).
157
145
 
158
- Example:
159
- >>> # In your main.py or application entry point:
160
- >>> def main():
161
- ... setup_logging() # Call once at startup
162
- ... # Your application code here
163
- ...
164
- >>> # Or with custom level:
165
- >>> if __name__ == "__main__":
166
- ... setup_logging(level="DEBUG")
167
- ... run_application()
168
146
  """
169
- global _logging_config
147
+ global _logging_config # noqa: PLW0603
170
148
 
171
149
  _logging_config = LoggingConfig(config_path)
172
150
  _logging_config.apply()
@@ -185,22 +163,28 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
185
163
  def get_pipeline_logger(name: str):
186
164
  """Get a logger for pipeline components.
187
165
 
188
- @public
189
-
190
- Returns a Prefect-integrated logger with proper configuration.
166
+ Returns a Prefect-integrated logger with the OTel span-event bridge
167
+ attached. Any log record at INFO+ emitted while an OTel span is
168
+ recording will be captured as a span event in the trace.
191
169
 
192
170
  Args:
193
171
  name: Logger name, typically __name__.
194
172
 
195
173
  Returns:
196
- Prefect logger instance.
174
+ Prefect logger instance with bridge handler.
197
175
 
198
- Example:
199
- >>> logger = get_pipeline_logger(__name__)
200
- >>> logger.info("Module initialized")
201
176
  """
202
- # Ensure logging is setup
203
177
  if _logging_config is None:
204
178
  setup_logging()
205
179
 
206
- return get_logger(name)
180
+ logger = get_logger(name)
181
+
182
+ # Attach the singleton bridge handler so log records become OTel span events.
183
+ # The handler is a no-op when no span is recording, so early attachment is safe.
184
+ from ai_pipeline_core.observability._logging_bridge import get_bridge_handler # noqa: PLC0415
185
+
186
+ handler = get_bridge_handler()
187
+ if handler not in logger.handlers:
188
+ logger.addHandler(handler)
189
+
190
+ return logger
@@ -1,13 +1,11 @@
1
- """Logging mixin for consistent logging across components using Prefect logging.
2
-
3
- @public
4
- """
1
+ """Logging mixin for consistent logging across components using Prefect logging."""
5
2
 
6
3
  import contextlib
7
4
  import time
5
+ from collections.abc import Generator
8
6
  from contextlib import contextmanager
9
7
  from functools import cached_property
10
- from typing import Any, Dict, Generator, Optional
8
+ from typing import Any
11
9
 
12
10
  from prefect import get_run_logger
13
11
  from prefect.context import FlowRunContext, TaskRunContext
@@ -17,8 +15,6 @@ from prefect.logging import get_logger
17
15
  class LoggerMixin:
18
16
  """Mixin class that provides consistent logging functionality using Prefect's logging system.
19
17
 
20
- @public
21
-
22
18
  Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
23
19
  The mixin's internal behavior routes to the appropriate backend; you should not call
24
20
  logging.getLogger directly.
@@ -28,7 +24,7 @@ class LoggerMixin:
28
24
  - Internal routing when outside flow/task context
29
25
  """
30
26
 
31
- _logger_name: Optional[str] = None
27
+ _logger_name: str | None = None
32
28
 
33
29
  @cached_property
34
30
  def logger(self):
@@ -37,7 +33,8 @@ class LoggerMixin:
37
33
  return logger
38
34
  return get_logger(self._logger_name or self.__class__.__module__)
39
35
 
40
- def _get_run_logger(self):
36
+ @staticmethod
37
+ def _get_run_logger():
41
38
  """Attempt to get Prefect run logger.
42
39
 
43
40
  Returns:
@@ -61,15 +58,15 @@ class LoggerMixin:
61
58
  """Log warning message with optional context."""
62
59
  self.logger.warning(message, extra=kwargs)
63
60
 
64
- def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
61
+ def log_error(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
65
62
  """Log error message with optional exception info."""
66
63
  self.logger.error(message, exc_info=exc_info, extra=kwargs)
67
64
 
68
- def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
65
+ def log_critical(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
69
66
  """Log critical message with optional exception info."""
70
67
  self.logger.critical(message, exc_info=exc_info, extra=kwargs)
71
68
 
72
- def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
69
+ def log_with_context(self, level: str, message: str, context: dict[str, Any]) -> None:
73
70
  """Log message with structured context.
74
71
 
75
72
  Args:
@@ -77,12 +74,6 @@ class LoggerMixin:
77
74
  message: Log message
78
75
  context: Additional context as dictionary
79
76
 
80
- Example:
81
- self.log_with_context("info", "Processing document", {
82
- "document_id": doc.id,
83
- "document_size": doc.size,
84
- "document_type": doc.type
85
- })
86
77
  """
87
78
  log_method = getattr(self.logger, level.lower(), self.logger.info)
88
79
 
@@ -94,10 +85,7 @@ class LoggerMixin:
94
85
 
95
86
 
96
87
  class StructuredLoggerMixin(LoggerMixin):
97
- """Extended mixin for structured logging with Prefect.
98
-
99
- @public
100
- """
88
+ """Extended mixin for structured logging with Prefect."""
101
89
 
102
90
  def log_event(self, event: str, **kwargs: Any) -> None:
103
91
  """Log a structured event.
@@ -106,11 +94,6 @@ class StructuredLoggerMixin(LoggerMixin):
106
94
  event: Event name
107
95
  **kwargs: Event attributes
108
96
 
109
- Example:
110
- self.log_event("document_processed",
111
- document_id=doc.id,
112
- duration_ms=processing_time,
113
- status="success")
114
97
  """
115
98
  self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
116
99
 
@@ -123,9 +106,6 @@ class StructuredLoggerMixin(LoggerMixin):
123
106
  unit: Unit of measurement
124
107
  **tags: Additional tags
125
108
 
126
- Example:
127
- self.log_metric("processing_time", 1.23, "seconds",
128
- document_type="pdf", model="gpt-4")
129
109
  """
130
110
  self.logger.info(
131
111
  f"Metric: {metric_name}",
@@ -146,9 +126,6 @@ class StructuredLoggerMixin(LoggerMixin):
146
126
  duration_ms: Duration in milliseconds
147
127
  **attributes: Additional attributes
148
128
 
149
- Example:
150
- self.log_span("llm_generation", 1234.5,
151
- model="gpt-4", tokens=500)
152
129
  """
153
130
  self.logger.info(
154
131
  f"Span: {operation}",
@@ -168,9 +145,6 @@ class StructuredLoggerMixin(LoggerMixin):
168
145
  operation: Operation name
169
146
  **context: Additional context
170
147
 
171
- Example:
172
- with self.log_operation("document_processing", doc_id=doc.id):
173
- process_document(doc)
174
148
  """
175
149
  start_time = time.perf_counter()
176
150
 
@@ -179,14 +153,12 @@ class StructuredLoggerMixin(LoggerMixin):
179
153
  try:
180
154
  yield
181
155
  duration_ms = (time.perf_counter() - start_time) * 1000
182
- self.log_info(
183
- f"Completed {operation}", duration_ms=duration_ms, status="success", **context
184
- )
156
+ self.log_info(f"Completed {operation}", duration_ms=duration_ms, status="success", **context)
185
157
  except Exception as e:
186
158
  # Intentionally broad: Context manager must catch all exceptions to log them
187
159
  duration_ms = (time.perf_counter() - start_time) * 1000
188
160
  self.log_error(
189
- f"Failed {operation}: {str(e)}",
161
+ f"Failed {operation}: {e!s}",
190
162
  exc_info=True,
191
163
  duration_ms=duration_ms,
192
164
  status="failure",
@@ -198,31 +170,25 @@ class StructuredLoggerMixin(LoggerMixin):
198
170
  class PrefectLoggerMixin(StructuredLoggerMixin):
199
171
  """Enhanced mixin specifically for Prefect flows and tasks."""
200
172
 
201
- def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
173
+ def log_flow_start(self, flow_name: str, parameters: dict[str, Any]) -> None:
202
174
  """Log flow start with parameters."""
203
175
  self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
204
176
 
205
177
  def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
206
178
  """Log flow completion."""
207
- self.log_event(
208
- "flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
209
- )
179
+ self.log_event("flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms)
210
180
 
211
- def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
181
+ def log_task_start(self, task_name: str, inputs: dict[str, Any]) -> None:
212
182
  """Log task start with inputs."""
213
183
  self.log_event("task_started", task_name=task_name, inputs=inputs)
214
184
 
215
185
  def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
216
186
  """Log task completion."""
217
- self.log_event(
218
- "task_completed", task_name=task_name, status=status, duration_ms=duration_ms
219
- )
187
+ self.log_event("task_completed", task_name=task_name, status=status, duration_ms=duration_ms)
220
188
 
221
189
  def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
222
190
  """Log retry attempt."""
223
- self.log_warning(
224
- f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
225
- )
191
+ self.log_warning(f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error)
226
192
 
227
193
  def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
228
194
  """Log a checkpoint in processing."""
@@ -0,0 +1,32 @@
1
+ """Observability system for AI pipelines.
2
+
3
+ Contains debug tracing, ClickHouse-based tracking, and initialization utilities.
4
+ """
5
+
6
+ from ai_pipeline_core.observability._debug import (
7
+ ArtifactStore,
8
+ ContentRef,
9
+ ContentWriter,
10
+ LocalDebugSpanProcessor,
11
+ LocalTraceWriter,
12
+ SpanInfo,
13
+ TraceDebugConfig,
14
+ TraceState,
15
+ WriteJob,
16
+ generate_summary,
17
+ )
18
+ from ai_pipeline_core.observability._debug._content import reconstruct_span_content
19
+
20
+ __all__ = [
21
+ "ArtifactStore",
22
+ "ContentRef",
23
+ "ContentWriter",
24
+ "LocalDebugSpanProcessor",
25
+ "LocalTraceWriter",
26
+ "SpanInfo",
27
+ "TraceDebugConfig",
28
+ "TraceState",
29
+ "WriteJob",
30
+ "generate_summary",
31
+ "reconstruct_span_content",
32
+ ]
@@ -0,0 +1,30 @@
1
+ """Local trace debugging system for AI pipelines.
2
+
3
+ This module provides filesystem-based trace debugging that saves all spans
4
+ with their inputs/outputs for LLM-assisted debugging. Includes static
5
+ summary generation and LLM-powered auto-summary capabilities.
6
+
7
+ Enabled automatically in CLI mode (``run_cli``), writing to ``<working_dir>/.trace``.
8
+ Disable with ``--no-trace``.
9
+ """
10
+
11
+ from ._config import TraceDebugConfig
12
+ from ._content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
13
+ from ._processor import LocalDebugSpanProcessor
14
+ from ._summary import generate_summary
15
+ from ._types import SpanInfo, TraceState, WriteJob
16
+ from ._writer import LocalTraceWriter
17
+
18
+ __all__ = [
19
+ "ArtifactStore",
20
+ "ContentRef",
21
+ "ContentWriter",
22
+ "LocalDebugSpanProcessor",
23
+ "LocalTraceWriter",
24
+ "SpanInfo",
25
+ "TraceDebugConfig",
26
+ "TraceState",
27
+ "WriteJob",
28
+ "generate_summary",
29
+ "reconstruct_span_content",
30
+ ]
@@ -0,0 +1,94 @@
1
+ """LLM-powered auto-summary generation for trace debugging.
2
+
3
+ Separated from _summary.py to avoid circular imports: this module depends on
4
+ ai_pipeline_core.llm, which cannot be imported during the initial package load
5
+ chain that includes _debug/__init__.py.
6
+ """
7
+
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+ from ai_pipeline_core.llm import generate_structured
11
+ from ai_pipeline_core.llm.ai_messages import AIMessages
12
+ from ai_pipeline_core.llm.model_options import ModelOptions
13
+
14
+ from ._types import TraceState
15
+
16
+
17
+ class AutoTraceSummary(BaseModel):
18
+ """LLM-generated trace analysis."""
19
+
20
+ model_config = ConfigDict(frozen=True)
21
+
22
+ overview: str
23
+ outcome: str
24
+ error_analysis: str
25
+ bottlenecks: tuple[str, ...] = ()
26
+ cost_assessment: str
27
+ recommendations: tuple[str, ...] = ()
28
+
29
+
30
+ async def generate_auto_summary(
31
+ trace: TraceState, # noqa: ARG001
32
+ static_summary: str,
33
+ model: str,
34
+ ) -> str | None:
35
+ """Generate LLM-powered auto-summary of the trace.
36
+
37
+ Args:
38
+ trace: Completed trace state with all span data.
39
+ static_summary: Pre-generated static summary text used as LLM input context.
40
+ model: LLM model name for summary generation.
41
+
42
+ Returns:
43
+ Formatted markdown auto-summary string, or None if generation fails.
44
+ """
45
+ messages = AIMessages()
46
+ messages.append(static_summary)
47
+
48
+ options = ModelOptions(
49
+ system_prompt=(
50
+ "You are analyzing an AI pipeline execution trace. "
51
+ "Provide concise, actionable analysis based on the execution data. "
52
+ "Focus on cost efficiency, performance bottlenecks, and errors."
53
+ ),
54
+ )
55
+
56
+ result = await generate_structured(
57
+ model=model,
58
+ response_format=AutoTraceSummary,
59
+ messages=messages,
60
+ options=options,
61
+ purpose="trace_auto_summary",
62
+ )
63
+
64
+ if not result or not result.parsed:
65
+ return None
66
+
67
+ summary = result.parsed
68
+ lines = [
69
+ "# Auto-Summary (LLM-Generated)",
70
+ "",
71
+ f"**Overview:** {summary.overview}",
72
+ "",
73
+ f"**Outcome:** {summary.outcome}",
74
+ "",
75
+ ]
76
+
77
+ if summary.error_analysis:
78
+ lines.append(f"**Error Analysis:** {summary.error_analysis}")
79
+ lines.append("")
80
+
81
+ if summary.bottlenecks:
82
+ lines.append("**Bottlenecks:**")
83
+ lines.extend(f"- {b}" for b in summary.bottlenecks)
84
+ lines.append("")
85
+
86
+ lines.append(f"**Cost Assessment:** {summary.cost_assessment}")
87
+ lines.append("")
88
+
89
+ if summary.recommendations:
90
+ lines.append("**Recommendations:**")
91
+ lines.extend(f"- {r}" for r in summary.recommendations)
92
+ lines.append("")
93
+
94
+ return "\n".join(lines)
@@ -0,0 +1,95 @@
1
+ """Configuration for local trace debugging."""
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+
8
+ class TraceDebugConfig(BaseModel):
9
+ """Configuration for local trace debugging.
10
+
11
+ Controls how traces are written to the local filesystem for debugging.
12
+ Enabled automatically in CLI mode, writing to ``<working_dir>/.trace``.
13
+ """
14
+
15
+ model_config = ConfigDict(frozen=True)
16
+
17
+ path: Path = Field(description="Directory for debug traces")
18
+ enabled: bool = Field(default=True, description="Whether debug tracing is enabled")
19
+
20
+ # Content size limits (Issue #2)
21
+ max_file_bytes: int = Field(
22
+ default=50_000,
23
+ description="Max bytes for input.yaml or output.yaml. Elements externalized to stay under.",
24
+ )
25
+ max_element_bytes: int = Field(
26
+ default=10_000,
27
+ description="Max bytes for single element. Above this, partial + artifact ref.",
28
+ )
29
+ element_excerpt_bytes: int = Field(
30
+ default=2_000,
31
+ description="Bytes of content to keep inline when element exceeds max_element_bytes.",
32
+ )
33
+ max_content_bytes: int = Field(
34
+ default=10_000_000,
35
+ description="Max bytes for any single artifact. Above this, truncate.",
36
+ )
37
+
38
+ # Image handling (Issue #7 - no changes per user)
39
+ extract_base64_images: bool = Field(
40
+ default=True,
41
+ description="Extract base64 images to artifact files",
42
+ )
43
+
44
+ # Span optimization (Issue #4)
45
+ merge_wrapper_spans: bool = Field(
46
+ default=True,
47
+ description="Merge Prefect wrapper spans with inner traced function spans",
48
+ )
49
+
50
+ # Indexes (Issue #1)
51
+ include_llm_index: bool = Field(
52
+ default=True,
53
+ description="Generate _llm_calls.yaml with LLM-specific details",
54
+ )
55
+ include_error_index: bool = Field(
56
+ default=True,
57
+ description="Generate _errors.yaml with failed span details",
58
+ )
59
+
60
+ # Cleanup
61
+ max_traces: int | None = Field(
62
+ default=None,
63
+ description="Max number of traces to keep. None for unlimited.",
64
+ )
65
+
66
+ # Security - default redaction patterns for common secrets
67
+ redact_patterns: tuple[str, ...] = Field(
68
+ default=(
69
+ r"sk-[a-zA-Z0-9]{20,}", # OpenAI API keys
70
+ r"sk-proj-[a-zA-Z0-9\-_]{20,}", # OpenAI project keys
71
+ r"AKIA[0-9A-Z]{16}", # AWS access keys
72
+ r"ghp_[a-zA-Z0-9]{36}", # GitHub personal tokens
73
+ r"gho_[a-zA-Z0-9]{36}", # GitHub OAuth tokens
74
+ r"xoxb-[a-zA-Z0-9\-]+", # Slack bot tokens
75
+ r"xoxp-[a-zA-Z0-9\-]+", # Slack user tokens
76
+ r"(?i)password\s*[:=]\s*['\"]?[^\s'\"]+", # Passwords
77
+ r"(?i)secret\s*[:=]\s*['\"]?[^\s'\"]+", # Secrets
78
+ r"(?i)api[_\-]?key\s*[:=]\s*['\"]?[^\s'\"]+", # API keys
79
+ r"(?i)bearer\s+[a-zA-Z0-9\-_\.]+", # Bearer tokens
80
+ ),
81
+ description="Regex patterns for secrets to redact",
82
+ )
83
+
84
+ # Summary
85
+ generate_summary: bool = Field(default=True, description="Generate _summary.md")
86
+
87
+ # Auto-summary (LLM-powered)
88
+ auto_summary_enabled: bool = Field(
89
+ default=False,
90
+ description="Generate LLM-powered auto-summary after trace completion",
91
+ )
92
+ auto_summary_model: str = Field(
93
+ default="gemini-3-flash",
94
+ description="Model to use for auto-summary generation",
95
+ )