ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +70 -144
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +106 -81
  37. ai_pipeline_core/llm/client.py +267 -158
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,9 +2,10 @@
2
2
 
3
3
  import contextlib
4
4
  import time
5
+ from collections.abc import Generator
5
6
  from contextlib import contextmanager
6
7
  from functools import cached_property
7
- from typing import Any, Dict, Generator, Optional
8
+ from typing import Any
8
9
 
9
10
  from prefect import get_run_logger
10
11
  from prefect.context import FlowRunContext, TaskRunContext
@@ -23,7 +24,7 @@ class LoggerMixin:
23
24
  - Internal routing when outside flow/task context
24
25
  """
25
26
 
26
- _logger_name: Optional[str] = None
27
+ _logger_name: str | None = None
27
28
 
28
29
  @cached_property
29
30
  def logger(self):
@@ -32,7 +33,8 @@ class LoggerMixin:
32
33
  return logger
33
34
  return get_logger(self._logger_name or self.__class__.__module__)
34
35
 
35
- def _get_run_logger(self):
36
+ @staticmethod
37
+ def _get_run_logger():
36
38
  """Attempt to get Prefect run logger.
37
39
 
38
40
  Returns:
@@ -56,15 +58,15 @@ class LoggerMixin:
56
58
  """Log warning message with optional context."""
57
59
  self.logger.warning(message, extra=kwargs)
58
60
 
59
- def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
61
+ def log_error(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
60
62
  """Log error message with optional exception info."""
61
63
  self.logger.error(message, exc_info=exc_info, extra=kwargs)
62
64
 
63
- def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
65
+ def log_critical(self, message: str, *, exc_info: bool = False, **kwargs: Any) -> None:
64
66
  """Log critical message with optional exception info."""
65
67
  self.logger.critical(message, exc_info=exc_info, extra=kwargs)
66
68
 
67
- def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
69
+ def log_with_context(self, level: str, message: str, context: dict[str, Any]) -> None:
68
70
  """Log message with structured context.
69
71
 
70
72
  Args:
@@ -72,12 +74,6 @@ class LoggerMixin:
72
74
  message: Log message
73
75
  context: Additional context as dictionary
74
76
 
75
- Example:
76
- self.log_with_context("info", "Processing document", {
77
- "document_id": doc.id,
78
- "document_size": doc.size,
79
- "document_type": doc.type
80
- })
81
77
  """
82
78
  log_method = getattr(self.logger, level.lower(), self.logger.info)
83
79
 
@@ -98,11 +94,6 @@ class StructuredLoggerMixin(LoggerMixin):
98
94
  event: Event name
99
95
  **kwargs: Event attributes
100
96
 
101
- Example:
102
- self.log_event("document_processed",
103
- document_id=doc.id,
104
- duration_ms=processing_time,
105
- status="success")
106
97
  """
107
98
  self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
108
99
 
@@ -115,9 +106,6 @@ class StructuredLoggerMixin(LoggerMixin):
115
106
  unit: Unit of measurement
116
107
  **tags: Additional tags
117
108
 
118
- Example:
119
- self.log_metric("processing_time", 1.23, "seconds",
120
- document_type="pdf", model="gpt-5.1")
121
109
  """
122
110
  self.logger.info(
123
111
  f"Metric: {metric_name}",
@@ -138,9 +126,6 @@ class StructuredLoggerMixin(LoggerMixin):
138
126
  duration_ms: Duration in milliseconds
139
127
  **attributes: Additional attributes
140
128
 
141
- Example:
142
- self.log_span("llm_generation", 1234.5,
143
- model="gpt-5.1", tokens=500)
144
129
  """
145
130
  self.logger.info(
146
131
  f"Span: {operation}",
@@ -160,9 +145,6 @@ class StructuredLoggerMixin(LoggerMixin):
160
145
  operation: Operation name
161
146
  **context: Additional context
162
147
 
163
- Example:
164
- with self.log_operation("document_processing", doc_id=doc.id):
165
- process_document(doc)
166
148
  """
167
149
  start_time = time.perf_counter()
168
150
 
@@ -171,14 +153,12 @@ class StructuredLoggerMixin(LoggerMixin):
171
153
  try:
172
154
  yield
173
155
  duration_ms = (time.perf_counter() - start_time) * 1000
174
- self.log_info(
175
- f"Completed {operation}", duration_ms=duration_ms, status="success", **context
176
- )
156
+ self.log_info(f"Completed {operation}", duration_ms=duration_ms, status="success", **context)
177
157
  except Exception as e:
178
158
  # Intentionally broad: Context manager must catch all exceptions to log them
179
159
  duration_ms = (time.perf_counter() - start_time) * 1000
180
160
  self.log_error(
181
- f"Failed {operation}: {str(e)}",
161
+ f"Failed {operation}: {e!s}",
182
162
  exc_info=True,
183
163
  duration_ms=duration_ms,
184
164
  status="failure",
@@ -190,31 +170,25 @@ class StructuredLoggerMixin(LoggerMixin):
190
170
  class PrefectLoggerMixin(StructuredLoggerMixin):
191
171
  """Enhanced mixin specifically for Prefect flows and tasks."""
192
172
 
193
- def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
173
+ def log_flow_start(self, flow_name: str, parameters: dict[str, Any]) -> None:
194
174
  """Log flow start with parameters."""
195
175
  self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
196
176
 
197
177
  def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
198
178
  """Log flow completion."""
199
- self.log_event(
200
- "flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
201
- )
179
+ self.log_event("flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms)
202
180
 
203
- def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
181
+ def log_task_start(self, task_name: str, inputs: dict[str, Any]) -> None:
204
182
  """Log task start with inputs."""
205
183
  self.log_event("task_started", task_name=task_name, inputs=inputs)
206
184
 
207
185
  def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
208
186
  """Log task completion."""
209
- self.log_event(
210
- "task_completed", task_name=task_name, status=status, duration_ms=duration_ms
211
- )
187
+ self.log_event("task_completed", task_name=task_name, status=status, duration_ms=duration_ms)
212
188
 
213
189
  def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
214
190
  """Log retry attempt."""
215
- self.log_warning(
216
- f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
217
- )
191
+ self.log_warning(f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error)
218
192
 
219
193
  def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
220
194
  """Log a checkpoint in processing."""
@@ -0,0 +1,32 @@
1
+ """Observability system for AI pipelines.
2
+
3
+ Contains debug tracing, ClickHouse-based tracking, and initialization utilities.
4
+ """
5
+
6
+ from ai_pipeline_core.observability._debug import (
7
+ ArtifactStore,
8
+ ContentRef,
9
+ ContentWriter,
10
+ LocalDebugSpanProcessor,
11
+ LocalTraceWriter,
12
+ SpanInfo,
13
+ TraceDebugConfig,
14
+ TraceState,
15
+ WriteJob,
16
+ generate_summary,
17
+ )
18
+ from ai_pipeline_core.observability._debug._content import reconstruct_span_content
19
+
20
+ __all__ = [
21
+ "ArtifactStore",
22
+ "ContentRef",
23
+ "ContentWriter",
24
+ "LocalDebugSpanProcessor",
25
+ "LocalTraceWriter",
26
+ "SpanInfo",
27
+ "TraceDebugConfig",
28
+ "TraceState",
29
+ "WriteJob",
30
+ "generate_summary",
31
+ "reconstruct_span_content",
32
+ ]
@@ -0,0 +1,30 @@
1
+ """Local trace debugging system for AI pipelines.
2
+
3
+ This module provides filesystem-based trace debugging that saves all spans
4
+ with their inputs/outputs for LLM-assisted debugging. Includes static
5
+ summary generation and LLM-powered auto-summary capabilities.
6
+
7
+ Enabled automatically in CLI mode (``run_cli``), writing to ``<working_dir>/.trace``.
8
+ Disable with ``--no-trace``.
9
+ """
10
+
11
+ from ._config import TraceDebugConfig
12
+ from ._content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
13
+ from ._processor import LocalDebugSpanProcessor
14
+ from ._summary import generate_summary
15
+ from ._types import SpanInfo, TraceState, WriteJob
16
+ from ._writer import LocalTraceWriter
17
+
18
+ __all__ = [
19
+ "ArtifactStore",
20
+ "ContentRef",
21
+ "ContentWriter",
22
+ "LocalDebugSpanProcessor",
23
+ "LocalTraceWriter",
24
+ "SpanInfo",
25
+ "TraceDebugConfig",
26
+ "TraceState",
27
+ "WriteJob",
28
+ "generate_summary",
29
+ "reconstruct_span_content",
30
+ ]
@@ -0,0 +1,94 @@
1
+ """LLM-powered auto-summary generation for trace debugging.
2
+
3
+ Separated from _summary.py to avoid circular imports: this module depends on
4
+ ai_pipeline_core.llm, which cannot be imported during the initial package load
5
+ chain that includes _debug/__init__.py.
6
+ """
7
+
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+ from ai_pipeline_core.llm import generate_structured
11
+ from ai_pipeline_core.llm.ai_messages import AIMessages
12
+ from ai_pipeline_core.llm.model_options import ModelOptions
13
+
14
+ from ._types import TraceState
15
+
16
+
17
+ class AutoTraceSummary(BaseModel):
18
+ """LLM-generated trace analysis."""
19
+
20
+ model_config = ConfigDict(frozen=True)
21
+
22
+ overview: str
23
+ outcome: str
24
+ error_analysis: str
25
+ bottlenecks: tuple[str, ...] = ()
26
+ cost_assessment: str
27
+ recommendations: tuple[str, ...] = ()
28
+
29
+
30
+ async def generate_auto_summary(
31
+ trace: TraceState, # noqa: ARG001
32
+ static_summary: str,
33
+ model: str,
34
+ ) -> str | None:
35
+ """Generate LLM-powered auto-summary of the trace.
36
+
37
+ Args:
38
+ trace: Completed trace state with all span data.
39
+ static_summary: Pre-generated static summary text used as LLM input context.
40
+ model: LLM model name for summary generation.
41
+
42
+ Returns:
43
+ Formatted markdown auto-summary string, or None if generation fails.
44
+ """
45
+ messages = AIMessages()
46
+ messages.append(static_summary)
47
+
48
+ options = ModelOptions(
49
+ system_prompt=(
50
+ "You are analyzing an AI pipeline execution trace. "
51
+ "Provide concise, actionable analysis based on the execution data. "
52
+ "Focus on cost efficiency, performance bottlenecks, and errors."
53
+ ),
54
+ )
55
+
56
+ result = await generate_structured(
57
+ model=model,
58
+ response_format=AutoTraceSummary,
59
+ messages=messages,
60
+ options=options,
61
+ purpose="trace_auto_summary",
62
+ )
63
+
64
+ if not result or not result.parsed:
65
+ return None
66
+
67
+ summary = result.parsed
68
+ lines = [
69
+ "# Auto-Summary (LLM-Generated)",
70
+ "",
71
+ f"**Overview:** {summary.overview}",
72
+ "",
73
+ f"**Outcome:** {summary.outcome}",
74
+ "",
75
+ ]
76
+
77
+ if summary.error_analysis:
78
+ lines.append(f"**Error Analysis:** {summary.error_analysis}")
79
+ lines.append("")
80
+
81
+ if summary.bottlenecks:
82
+ lines.append("**Bottlenecks:**")
83
+ lines.extend(f"- {b}" for b in summary.bottlenecks)
84
+ lines.append("")
85
+
86
+ lines.append(f"**Cost Assessment:** {summary.cost_assessment}")
87
+ lines.append("")
88
+
89
+ if summary.recommendations:
90
+ lines.append("**Recommendations:**")
91
+ lines.extend(f"- {r}" for r in summary.recommendations)
92
+ lines.append("")
93
+
94
+ return "\n".join(lines)
@@ -9,7 +9,7 @@ class TraceDebugConfig(BaseModel):
9
9
  """Configuration for local trace debugging.
10
10
 
11
11
  Controls how traces are written to the local filesystem for debugging.
12
- Enable by setting TRACE_DEBUG_PATH environment variable.
12
+ Enabled automatically in CLI mode, writing to ``<working_dir>/.trace``.
13
13
  """
14
14
 
15
15
  model_config = ConfigDict(frozen=True)
@@ -47,12 +47,6 @@ class TraceDebugConfig(BaseModel):
47
47
  description="Merge Prefect wrapper spans with inner traced function spans",
48
48
  )
49
49
 
50
- # Events (Issue #12)
51
- events_file_mode: str = Field(
52
- default="errors_only",
53
- description="When to write events.yaml: 'all', 'errors_only', 'none'",
54
- )
55
-
56
50
  # Indexes (Issue #1)
57
51
  include_llm_index: bool = Field(
58
52
  default=True,
@@ -89,3 +83,13 @@ class TraceDebugConfig(BaseModel):
89
83
 
90
84
  # Summary
91
85
  generate_summary: bool = Field(default=True, description="Generate _summary.md")
86
+
87
+ # Auto-summary (LLM-powered)
88
+ auto_summary_enabled: bool = Field(
89
+ default=False,
90
+ description="Generate LLM-powered auto-summary after trace completion",
91
+ )
92
+ auto_summary_model: str = Field(
93
+ default="gemini-3-flash",
94
+ description="Model to use for auto-summary generation",
95
+ )