ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Document tracking helpers for pipeline instrumentation.
|
|
2
|
+
|
|
3
|
+
Emits document lifecycle events and sets OTel span attributes for
|
|
4
|
+
document lineage. All functions are no-ops when tracking is not initialized.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import cast
|
|
8
|
+
|
|
9
|
+
from opentelemetry import trace as otel_trace
|
|
10
|
+
|
|
11
|
+
from ai_pipeline_core.documents import Document
|
|
12
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
13
|
+
from ai_pipeline_core.observability._initialization import TrackingServiceProtocol, get_tracking_service
|
|
14
|
+
from ai_pipeline_core.observability._tracking._models import ATTR_INPUT_DOCUMENT_SHA256S, ATTR_OUTPUT_DOCUMENT_SHA256S, DocumentEventType
|
|
15
|
+
|
|
16
|
+
logger = get_pipeline_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_current_span_id() -> str:
|
|
20
|
+
"""Return the current OTel span ID as hex, or empty string."""
|
|
21
|
+
span = otel_trace.get_current_span()
|
|
22
|
+
ctx = span.get_span_context()
|
|
23
|
+
if ctx and ctx.span_id:
|
|
24
|
+
return format(ctx.span_id, "016x")
|
|
25
|
+
return ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get_tracking_service() -> TrackingServiceProtocol | None:
|
|
29
|
+
"""Return the global tracking service, or None if not initialized."""
|
|
30
|
+
return get_tracking_service()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def track_task_io(task_name: str, args: tuple[object, ...], kwargs: dict[str, object], result: object) -> None: # noqa: ARG001
|
|
34
|
+
"""Track input/output documents for a pipeline task."""
|
|
35
|
+
service = _get_tracking_service()
|
|
36
|
+
if service is None:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
span_id = get_current_span_id()
|
|
40
|
+
input_sha256s: list[str] = []
|
|
41
|
+
output_sha256s: list[str] = []
|
|
42
|
+
|
|
43
|
+
# Track input documents
|
|
44
|
+
for arg in (*args, *kwargs.values()):
|
|
45
|
+
if isinstance(arg, Document):
|
|
46
|
+
input_sha256s.append(arg.sha256)
|
|
47
|
+
service.track_document_event(
|
|
48
|
+
document_sha256=arg.sha256,
|
|
49
|
+
span_id=span_id,
|
|
50
|
+
event_type=DocumentEventType.TASK_INPUT,
|
|
51
|
+
)
|
|
52
|
+
elif isinstance(arg, list) and arg and isinstance(arg[0], Document):
|
|
53
|
+
for doc in cast(list[Document], arg):
|
|
54
|
+
input_sha256s.append(doc.sha256)
|
|
55
|
+
service.track_document_event(
|
|
56
|
+
document_sha256=doc.sha256,
|
|
57
|
+
span_id=span_id,
|
|
58
|
+
event_type=DocumentEventType.TASK_INPUT,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Track output documents
|
|
62
|
+
if isinstance(result, Document):
|
|
63
|
+
output_sha256s.append(result.sha256)
|
|
64
|
+
service.track_document_event(
|
|
65
|
+
document_sha256=result.sha256,
|
|
66
|
+
span_id=span_id,
|
|
67
|
+
event_type=DocumentEventType.TASK_OUTPUT,
|
|
68
|
+
)
|
|
69
|
+
elif isinstance(result, list) and result and isinstance(result[0], Document):
|
|
70
|
+
for doc in cast(list[Document], result):
|
|
71
|
+
output_sha256s.append(doc.sha256)
|
|
72
|
+
service.track_document_event(
|
|
73
|
+
document_sha256=doc.sha256,
|
|
74
|
+
span_id=span_id,
|
|
75
|
+
event_type=DocumentEventType.TASK_OUTPUT,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Set span attributes for TrackingSpanProcessor to populate tracked_spans columns
|
|
79
|
+
if input_sha256s or output_sha256s:
|
|
80
|
+
span = otel_trace.get_current_span()
|
|
81
|
+
if input_sha256s:
|
|
82
|
+
span.set_attribute(ATTR_INPUT_DOCUMENT_SHA256S, input_sha256s)
|
|
83
|
+
if output_sha256s:
|
|
84
|
+
span.set_attribute(ATTR_OUTPUT_DOCUMENT_SHA256S, output_sha256s)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def track_flow_io(flow_name: str, input_documents: list[Document], output_documents: list[Document]) -> None: # noqa: ARG001
|
|
88
|
+
"""Track input/output documents for a pipeline flow."""
|
|
89
|
+
service = _get_tracking_service()
|
|
90
|
+
if service is None:
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
span_id = get_current_span_id()
|
|
94
|
+
input_sha256s: list[str] = []
|
|
95
|
+
output_sha256s: list[str] = []
|
|
96
|
+
|
|
97
|
+
for doc in input_documents:
|
|
98
|
+
input_sha256s.append(doc.sha256)
|
|
99
|
+
service.track_document_event(
|
|
100
|
+
document_sha256=doc.sha256,
|
|
101
|
+
span_id=span_id,
|
|
102
|
+
event_type=DocumentEventType.FLOW_INPUT,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
for doc in output_documents:
|
|
106
|
+
output_sha256s.append(doc.sha256)
|
|
107
|
+
service.track_document_event(
|
|
108
|
+
document_sha256=doc.sha256,
|
|
109
|
+
span_id=span_id,
|
|
110
|
+
event_type=DocumentEventType.FLOW_OUTPUT,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if input_sha256s or output_sha256s:
|
|
114
|
+
span = otel_trace.get_current_span()
|
|
115
|
+
if input_sha256s:
|
|
116
|
+
span.set_attribute(ATTR_INPUT_DOCUMENT_SHA256S, input_sha256s)
|
|
117
|
+
if output_sha256s:
|
|
118
|
+
span.set_attribute(ATTR_OUTPUT_DOCUMENT_SHA256S, output_sha256s)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def track_llm_documents(context: object | None, messages: object | None) -> None:
|
|
122
|
+
"""Track documents used in LLM calls (context and messages)."""
|
|
123
|
+
service = _get_tracking_service()
|
|
124
|
+
if service is None:
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
span_id = get_current_span_id()
|
|
128
|
+
|
|
129
|
+
if context is not None:
|
|
130
|
+
_track_docs_from_messages(service, context, span_id, DocumentEventType.LLM_CONTEXT)
|
|
131
|
+
|
|
132
|
+
if messages is not None:
|
|
133
|
+
_track_docs_from_messages(service, messages, span_id, DocumentEventType.LLM_MESSAGE)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _track_docs_from_messages(service: TrackingServiceProtocol, messages: object, span_id: str, event_type: DocumentEventType) -> None:
|
|
137
|
+
"""Extract and track documents from AIMessages or similar containers."""
|
|
138
|
+
if not isinstance(messages, list):
|
|
139
|
+
return
|
|
140
|
+
for item in cast(list[object], messages):
|
|
141
|
+
if isinstance(item, Document):
|
|
142
|
+
service.track_document_event(
|
|
143
|
+
document_sha256=item.sha256,
|
|
144
|
+
span_id=span_id,
|
|
145
|
+
event_type=event_type,
|
|
146
|
+
)
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Observability system initialization.
|
|
2
|
+
|
|
3
|
+
Provides ``initialize_observability()`` as the single entry point for
|
|
4
|
+
setting up Laminar and ClickHouse tracking.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import importlib
|
|
8
|
+
from typing import Any, Protocol
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
from lmnr import Laminar
|
|
12
|
+
from opentelemetry import trace as otel_trace
|
|
13
|
+
from pydantic import BaseModel, ConfigDict
|
|
14
|
+
|
|
15
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
16
|
+
from ai_pipeline_core.observability._tracking._client import ClickHouseClient
|
|
17
|
+
from ai_pipeline_core.observability._tracking._models import DocumentEventType, RunStatus
|
|
18
|
+
from ai_pipeline_core.observability._tracking._processor import TrackingSpanProcessor
|
|
19
|
+
from ai_pipeline_core.observability._tracking._service import TrackingService
|
|
20
|
+
from ai_pipeline_core.settings import settings
|
|
21
|
+
|
|
22
|
+
logger = get_pipeline_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TrackingServiceProtocol(Protocol):
|
|
26
|
+
"""Protocol for the tracking service methods used by deployment, decorators, and document tracking."""
|
|
27
|
+
|
|
28
|
+
# Run lifecycle
|
|
29
|
+
def set_run_context(self, *, run_id: UUID, project_name: str, flow_name: str, run_scope: str = "") -> None:
|
|
30
|
+
"""Store run metadata in context vars for downstream span attribution."""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
def track_run_start(self, *, run_id: UUID, project_name: str, flow_name: str, run_scope: str = "") -> None:
|
|
34
|
+
"""Record a pipeline run start event to ClickHouse."""
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
def track_run_end(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
run_id: UUID,
|
|
41
|
+
status: RunStatus,
|
|
42
|
+
total_cost: float = ...,
|
|
43
|
+
total_tokens: int = ...,
|
|
44
|
+
metadata: dict[str, object] | None = ...,
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Record a pipeline run completion event with final metrics."""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
def clear_run_context(self) -> None:
|
|
50
|
+
"""Reset run-scoped context vars after a run finishes."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
# Document tracking
|
|
54
|
+
def track_document_event(
|
|
55
|
+
self,
|
|
56
|
+
*,
|
|
57
|
+
document_sha256: str,
|
|
58
|
+
span_id: str,
|
|
59
|
+
event_type: DocumentEventType,
|
|
60
|
+
metadata: dict[str, str] | None = ...,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Record a document lifecycle event (created, read, transformed)."""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
# Summaries
|
|
66
|
+
def schedule_summary(self, span_id: str, label: str, output_hint: str) -> None:
|
|
67
|
+
"""Queue an LLM-generated summary for a span's output."""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
# Lifecycle
|
|
71
|
+
def flush(self, timeout: float = 30.0) -> None:
|
|
72
|
+
"""Flush all pending tracking events to ClickHouse."""
|
|
73
|
+
...
|
|
74
|
+
|
|
75
|
+
def shutdown(self, timeout: float = 30.0) -> None:
|
|
76
|
+
"""Flush pending events and release tracking resources."""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
_tracking_service: TrackingServiceProtocol | None = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_tracking_service() -> TrackingServiceProtocol | None:
|
|
84
|
+
"""Return the global TrackingService instance, or None if not initialized."""
|
|
85
|
+
return _tracking_service
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class ObservabilityConfig(BaseModel):
|
|
89
|
+
"""Configuration for the observability system."""
|
|
90
|
+
|
|
91
|
+
model_config = ConfigDict(frozen=True)
|
|
92
|
+
|
|
93
|
+
# Laminar
|
|
94
|
+
lmnr_project_api_key: str = ""
|
|
95
|
+
lmnr_debug: str = ""
|
|
96
|
+
|
|
97
|
+
# ClickHouse tracking
|
|
98
|
+
clickhouse_host: str = ""
|
|
99
|
+
clickhouse_port: int = 8443
|
|
100
|
+
clickhouse_database: str = "default"
|
|
101
|
+
clickhouse_user: str = "default"
|
|
102
|
+
clickhouse_password: str = ""
|
|
103
|
+
clickhouse_secure: bool = True
|
|
104
|
+
|
|
105
|
+
# Tracking behavior
|
|
106
|
+
tracking_enabled: bool = True
|
|
107
|
+
tracking_summary_model: str = "gemini-3-flash"
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def has_clickhouse(self) -> bool:
|
|
111
|
+
"""Whether ClickHouse is configured."""
|
|
112
|
+
return bool(self.clickhouse_host)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def has_lmnr(self) -> bool:
|
|
116
|
+
"""Whether Laminar is configured."""
|
|
117
|
+
return bool(self.lmnr_project_api_key)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _build_config_from_settings() -> ObservabilityConfig:
|
|
121
|
+
"""Build ObservabilityConfig from framework Settings."""
|
|
122
|
+
return ObservabilityConfig(
|
|
123
|
+
lmnr_project_api_key=getattr(settings, "lmnr_project_api_key", ""),
|
|
124
|
+
lmnr_debug=getattr(settings, "lmnr_debug", ""),
|
|
125
|
+
clickhouse_host=getattr(settings, "clickhouse_host", ""),
|
|
126
|
+
clickhouse_port=getattr(settings, "clickhouse_port", 8443),
|
|
127
|
+
clickhouse_database=getattr(settings, "clickhouse_database", "default"),
|
|
128
|
+
clickhouse_user=getattr(settings, "clickhouse_user", "default"),
|
|
129
|
+
clickhouse_password=getattr(settings, "clickhouse_password", ""),
|
|
130
|
+
clickhouse_secure=getattr(settings, "clickhouse_secure", True),
|
|
131
|
+
tracking_enabled=getattr(settings, "tracking_enabled", True),
|
|
132
|
+
tracking_summary_model=getattr(settings, "tracking_summary_model", "gemini-3-flash"),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _setup_tracking(config: ObservabilityConfig) -> TrackingServiceProtocol | None:
|
|
137
|
+
"""Set up ClickHouse tracking if configured. Returns TrackingService or None."""
|
|
138
|
+
if not config.has_clickhouse or not config.tracking_enabled:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
client = ClickHouseClient(
|
|
142
|
+
host=config.clickhouse_host,
|
|
143
|
+
port=config.clickhouse_port,
|
|
144
|
+
database=config.clickhouse_database,
|
|
145
|
+
username=config.clickhouse_user,
|
|
146
|
+
password=config.clickhouse_password,
|
|
147
|
+
secure=config.clickhouse_secure,
|
|
148
|
+
)
|
|
149
|
+
summary_mod = importlib.import_module("ai_pipeline_core.observability._summary")
|
|
150
|
+
service = TrackingService(
|
|
151
|
+
client,
|
|
152
|
+
summary_model=config.tracking_summary_model,
|
|
153
|
+
span_summary_fn=summary_mod.generate_span_summary,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Register span processor with OTel
|
|
157
|
+
try:
|
|
158
|
+
provider: Any = otel_trace.get_tracer_provider()
|
|
159
|
+
if hasattr(provider, "add_span_processor"):
|
|
160
|
+
processor = TrackingSpanProcessor(service)
|
|
161
|
+
provider.add_span_processor(processor)
|
|
162
|
+
logger.info("ClickHouse tracking initialized")
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.warning(f"Failed to register TrackingSpanProcessor: {e}")
|
|
165
|
+
|
|
166
|
+
return service
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def initialize_observability(config: ObservabilityConfig | None = None) -> None:
|
|
170
|
+
"""Initialize the full observability stack.
|
|
171
|
+
|
|
172
|
+
Call once at pipeline startup. Safe to call multiple times (idempotent
|
|
173
|
+
for Laminar). Reads from Settings if no config provided.
|
|
174
|
+
"""
|
|
175
|
+
global _tracking_service # noqa: PLW0603
|
|
176
|
+
|
|
177
|
+
if _tracking_service is not None:
|
|
178
|
+
return # Already initialized
|
|
179
|
+
|
|
180
|
+
if config is None:
|
|
181
|
+
config = _build_config_from_settings()
|
|
182
|
+
|
|
183
|
+
# 1. Laminar
|
|
184
|
+
if config.has_lmnr:
|
|
185
|
+
try:
|
|
186
|
+
Laminar.initialize(project_api_key=config.lmnr_project_api_key, export_timeout_seconds=15)
|
|
187
|
+
logger.info("Laminar initialized")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.warning(f"Laminar initialization failed: {e}")
|
|
190
|
+
|
|
191
|
+
# 2. ClickHouse tracking
|
|
192
|
+
_tracking_service = _setup_tracking(config)
|
|
193
|
+
|
|
194
|
+
# 3. Logging bridge — attached per-logger in get_pipeline_logger(), nothing to do here.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Logging bridge — captures Python log records as OTel span events.
|
|
2
|
+
|
|
3
|
+
Attaches a singleton ``SpanEventLoggingHandler`` to every logger created
|
|
4
|
+
via ``get_pipeline_logger()``. The handler is safe to attach eagerly
|
|
5
|
+
because ``emit()`` is a no-op when no OTel span is recording.
|
|
6
|
+
|
|
7
|
+
This is the only module that legitimately needs ``import logging`` directly
|
|
8
|
+
to subclass ``logging.Handler``. The ruff ban on ``import logging``
|
|
9
|
+
(pyproject.toml) is suppressed with ``# noqa: TID251``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import contextlib
|
|
13
|
+
import logging # noqa: TID251
|
|
14
|
+
|
|
15
|
+
from opentelemetry import trace as otel_trace
|
|
16
|
+
|
|
17
|
+
_MIN_LEVEL = logging.INFO
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SpanEventLoggingHandler(logging.Handler):
|
|
21
|
+
"""Logging handler that writes log records as OTel span events.
|
|
22
|
+
|
|
23
|
+
Attached to each logger returned by ``get_pipeline_logger()``.
|
|
24
|
+
Only captures records at INFO level and above. Each record becomes
|
|
25
|
+
a span event with ``log.level`` and ``log.message`` attributes.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
super().__init__(level=_MIN_LEVEL)
|
|
30
|
+
|
|
31
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
32
|
+
"""Write a log record as an OTel span event."""
|
|
33
|
+
with contextlib.suppress(Exception):
|
|
34
|
+
# Prevent duplicate events when handler is on both parent and child logger
|
|
35
|
+
if getattr(record, "_span_event_logged", False):
|
|
36
|
+
return
|
|
37
|
+
span = otel_trace.get_current_span()
|
|
38
|
+
if not span.is_recording():
|
|
39
|
+
return
|
|
40
|
+
span.add_event(
|
|
41
|
+
name="log",
|
|
42
|
+
attributes={
|
|
43
|
+
"log.level": record.levelname,
|
|
44
|
+
"log.message": self.format(record),
|
|
45
|
+
"log.logger": record.name,
|
|
46
|
+
},
|
|
47
|
+
)
|
|
48
|
+
record._span_event_logged = True
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Module-level singleton — safe because emit() checks is_recording().
|
|
52
|
+
_bridge_handler = SpanEventLoggingHandler()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_bridge_handler() -> SpanEventLoggingHandler:
|
|
56
|
+
"""Return the singleton bridge handler for attaching to pipeline loggers."""
|
|
57
|
+
return _bridge_handler
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""LLM-powered summary generation for tracked spans and documents."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
from ai_pipeline_core.llm import generate_structured
|
|
6
|
+
from ai_pipeline_core.llm.model_options import ModelOptions
|
|
7
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
8
|
+
from ai_pipeline_core.observability._tracking._internal import internal_tracking_context
|
|
9
|
+
|
|
10
|
+
logger = get_pipeline_logger(__name__)
|
|
11
|
+
|
|
12
|
+
_SPAN_SUMMARY_SYSTEM_PROMPT = (
|
|
13
|
+
"You summarize AI pipeline task results for non-technical users "
|
|
14
|
+
"monitoring a research pipeline.\n"
|
|
15
|
+
"Rules:\n"
|
|
16
|
+
"- Describe the action and outcome, not the content\n"
|
|
17
|
+
"- No internal names, function names, or technical details\n"
|
|
18
|
+
"- No sensitive data (URLs, personal names, company details) from the output\n"
|
|
19
|
+
"- Use present perfect tense"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
_DOC_SUMMARY_SYSTEM_PROMPT = (
|
|
23
|
+
"You generate metadata for documents in a research pipeline dashboard.\n"
|
|
24
|
+
"Rules:\n"
|
|
25
|
+
"- No sensitive data (URLs, personal names, company details)\n"
|
|
26
|
+
"- Describe purpose and content type, not the content itself\n"
|
|
27
|
+
"- For website documents: short_title must be 'domain.com: Page Title' (shorten title if needed to fit 50 chars)"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SpanSummary(BaseModel):
|
|
32
|
+
"""Structured output for span/task summaries."""
|
|
33
|
+
|
|
34
|
+
summary: str = Field(description="1-2 sentences (max 50 words) describing what the task accomplished in present perfect tense")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DocumentSummary(BaseModel):
|
|
38
|
+
"""Structured output for document summaries."""
|
|
39
|
+
|
|
40
|
+
short_title: str = Field(description="Document title proposition based on content, max 50 characters")
|
|
41
|
+
summary: str = Field(description="1-2 sentences (max 50 words) describing the document's purpose and content type")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def generate_span_summary(label: str, output_hint: str, model: str = "gemini-3-flash") -> str:
|
|
45
|
+
"""Generate a human-readable summary for a span/task output.
|
|
46
|
+
|
|
47
|
+
Returns plain summary string (stored in tracked_spans.user_summary).
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
with internal_tracking_context():
|
|
51
|
+
result = await generate_structured(
|
|
52
|
+
model=model,
|
|
53
|
+
response_format=SpanSummary,
|
|
54
|
+
messages=f"Task: {label}\nResult: {output_hint}",
|
|
55
|
+
options=ModelOptions(system_prompt=_SPAN_SUMMARY_SYSTEM_PROMPT, cache_ttl=None, retries=3, timeout=30),
|
|
56
|
+
purpose=f"span_summary: {label}",
|
|
57
|
+
)
|
|
58
|
+
return result.parsed.summary
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.warning(f"Span summary failed for '{label}': {e}")
|
|
61
|
+
return ""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def generate_document_summary(name: str, excerpt: str, model: str = "gemini-3-flash") -> str:
|
|
65
|
+
"""Generate structured metadata for a document.
|
|
66
|
+
|
|
67
|
+
Returns JSON-serialized DocumentSummary (stored in document_index.summary).
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
with internal_tracking_context():
|
|
71
|
+
result = await generate_structured(
|
|
72
|
+
model=model,
|
|
73
|
+
response_format=DocumentSummary,
|
|
74
|
+
messages=f"Document: {name}\nContent excerpt:\n{excerpt}",
|
|
75
|
+
options=ModelOptions(system_prompt=_DOC_SUMMARY_SYSTEM_PROMPT, cache_ttl=None, retries=3, timeout=30),
|
|
76
|
+
purpose=f"document_summary: {name}",
|
|
77
|
+
)
|
|
78
|
+
return result.parsed.model_dump_json()
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Document summary failed for '{name}': {e}")
|
|
81
|
+
return ""
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""ClickHouse client with lazy connection and table management."""
|
|
2
|
+
|
|
3
|
+
import clickhouse_connect
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
7
|
+
|
|
8
|
+
from ._models import (
|
|
9
|
+
TABLE_DOCUMENT_EVENTS,
|
|
10
|
+
TABLE_PIPELINE_RUNS,
|
|
11
|
+
TABLE_SPAN_EVENTS,
|
|
12
|
+
TABLE_TRACKED_SPANS,
|
|
13
|
+
TrackedSpanRow,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = get_pipeline_logger(__name__)
|
|
17
|
+
|
|
18
|
+
# SQL statements for table creation
|
|
19
|
+
_CREATE_TABLES_SQL = [
|
|
20
|
+
f"""
|
|
21
|
+
CREATE TABLE IF NOT EXISTS {TABLE_PIPELINE_RUNS}
|
|
22
|
+
(
|
|
23
|
+
run_id UUID,
|
|
24
|
+
project_name LowCardinality(String),
|
|
25
|
+
flow_name LowCardinality(String),
|
|
26
|
+
run_scope String DEFAULT '',
|
|
27
|
+
status LowCardinality(String),
|
|
28
|
+
start_time DateTime64(3, 'UTC'),
|
|
29
|
+
end_time Nullable(DateTime64(3, 'UTC')),
|
|
30
|
+
total_cost Float64 DEFAULT 0,
|
|
31
|
+
total_tokens UInt64 DEFAULT 0,
|
|
32
|
+
metadata String DEFAULT '{{}}' CODEC(ZSTD(3)),
|
|
33
|
+
version UInt64 DEFAULT 1
|
|
34
|
+
)
|
|
35
|
+
ENGINE = ReplacingMergeTree(version)
|
|
36
|
+
PARTITION BY toYYYYMM(start_time)
|
|
37
|
+
ORDER BY (run_id)
|
|
38
|
+
SETTINGS index_granularity = 8192
|
|
39
|
+
""",
|
|
40
|
+
f"""
|
|
41
|
+
CREATE TABLE IF NOT EXISTS {TABLE_TRACKED_SPANS}
|
|
42
|
+
(
|
|
43
|
+
span_id String,
|
|
44
|
+
trace_id String,
|
|
45
|
+
run_id UUID,
|
|
46
|
+
parent_span_id Nullable(String),
|
|
47
|
+
name String,
|
|
48
|
+
span_type LowCardinality(String),
|
|
49
|
+
status LowCardinality(String),
|
|
50
|
+
start_time DateTime64(3, 'UTC'),
|
|
51
|
+
end_time Nullable(DateTime64(3, 'UTC')),
|
|
52
|
+
duration_ms UInt64 DEFAULT 0,
|
|
53
|
+
cost Float64 DEFAULT 0,
|
|
54
|
+
tokens_input UInt64 DEFAULT 0,
|
|
55
|
+
tokens_output UInt64 DEFAULT 0,
|
|
56
|
+
llm_model LowCardinality(Nullable(String)),
|
|
57
|
+
user_summary Nullable(String) CODEC(ZSTD(3)),
|
|
58
|
+
user_visible Bool DEFAULT false,
|
|
59
|
+
user_label Nullable(String),
|
|
60
|
+
input_document_sha256s Array(String),
|
|
61
|
+
output_document_sha256s Array(String),
|
|
62
|
+
version UInt64 DEFAULT 1,
|
|
63
|
+
INDEX idx_trace trace_id TYPE bloom_filter GRANULARITY 1
|
|
64
|
+
)
|
|
65
|
+
ENGINE = ReplacingMergeTree(version)
|
|
66
|
+
PARTITION BY toYYYYMM(start_time)
|
|
67
|
+
ORDER BY (run_id, span_id)
|
|
68
|
+
SETTINGS index_granularity = 8192
|
|
69
|
+
""",
|
|
70
|
+
f"""
|
|
71
|
+
CREATE TABLE IF NOT EXISTS {TABLE_DOCUMENT_EVENTS}
|
|
72
|
+
(
|
|
73
|
+
event_id UUID,
|
|
74
|
+
run_id UUID,
|
|
75
|
+
document_sha256 String,
|
|
76
|
+
span_id String,
|
|
77
|
+
event_type LowCardinality(String),
|
|
78
|
+
timestamp DateTime64(3, 'UTC'),
|
|
79
|
+
metadata String DEFAULT '{{}}' CODEC(ZSTD(3))
|
|
80
|
+
)
|
|
81
|
+
ENGINE = MergeTree
|
|
82
|
+
PARTITION BY toYYYYMM(timestamp)
|
|
83
|
+
ORDER BY (run_id, document_sha256, timestamp)
|
|
84
|
+
SETTINGS index_granularity = 8192
|
|
85
|
+
""",
|
|
86
|
+
f"""
|
|
87
|
+
CREATE TABLE IF NOT EXISTS {TABLE_SPAN_EVENTS}
|
|
88
|
+
(
|
|
89
|
+
event_id UUID,
|
|
90
|
+
run_id UUID,
|
|
91
|
+
span_id String,
|
|
92
|
+
name String,
|
|
93
|
+
timestamp DateTime64(3, 'UTC'),
|
|
94
|
+
attributes String DEFAULT '{{}}' CODEC(ZSTD(3)),
|
|
95
|
+
level LowCardinality(Nullable(String))
|
|
96
|
+
)
|
|
97
|
+
ENGINE = MergeTree
|
|
98
|
+
PARTITION BY toYYYYMM(timestamp)
|
|
99
|
+
ORDER BY (run_id, span_id, timestamp)
|
|
100
|
+
SETTINGS index_granularity = 8192
|
|
101
|
+
""",
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ClickHouseClient:
|
|
106
|
+
"""Synchronous ClickHouse client with lazy connection.
|
|
107
|
+
|
|
108
|
+
All methods are synchronous and must be called from the writer background
|
|
109
|
+
thread — never from the async event loop. Connection is deferred to
|
|
110
|
+
``connect()`` which is called from the writer thread's ``_run()`` startup.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
*,
|
|
116
|
+
host: str,
|
|
117
|
+
port: int = 8443,
|
|
118
|
+
database: str = "default",
|
|
119
|
+
username: str = "default",
|
|
120
|
+
password: str = "",
|
|
121
|
+
secure: bool = True,
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Store connection params. Does NOT connect yet."""
|
|
124
|
+
self._params = {
|
|
125
|
+
"host": host,
|
|
126
|
+
"port": port,
|
|
127
|
+
"database": database,
|
|
128
|
+
"username": username,
|
|
129
|
+
"password": password,
|
|
130
|
+
"secure": secure,
|
|
131
|
+
}
|
|
132
|
+
self._client: object | None = None
|
|
133
|
+
self._tables_initialized = False
|
|
134
|
+
|
|
135
|
+
def connect(self) -> None:
|
|
136
|
+
"""Connect to ClickHouse. Call from writer thread, not async context."""
|
|
137
|
+
self._client = clickhouse_connect.get_client(**self._params) # pyright: ignore[reportArgumentType, reportUnknownMemberType]
|
|
138
|
+
logger.info(f"Connected to ClickHouse at {self._params['host']}:{self._params['port']}")
|
|
139
|
+
|
|
140
|
+
def ensure_tables(self) -> None:
|
|
141
|
+
"""Create tables if they don't exist. Call after connect()."""
|
|
142
|
+
if self._client is None:
|
|
143
|
+
raise RuntimeError("Not connected — call connect() first")
|
|
144
|
+
if self._tables_initialized:
|
|
145
|
+
return
|
|
146
|
+
for sql in _CREATE_TABLES_SQL:
|
|
147
|
+
self._client.command(sql) # type: ignore[union-attr]
|
|
148
|
+
|
|
149
|
+
self._tables_initialized = True
|
|
150
|
+
logger.info("ClickHouse tables verified/created")
|
|
151
|
+
|
|
152
|
+
def _insert_rows(self, table: str, rows: list[BaseModel]) -> None:
|
|
153
|
+
"""Insert rows into a table using columnar format."""
|
|
154
|
+
if not rows or self._client is None:
|
|
155
|
+
return
|
|
156
|
+
column_names = list(type(rows[0]).model_fields.keys())
|
|
157
|
+
data = [[getattr(row, col) for row in rows] for col in column_names]
|
|
158
|
+
self._client.insert(table, data, column_names=column_names, column_oriented=True) # type: ignore[union-attr]
|
|
159
|
+
|
|
160
|
+
def insert_runs(self, rows: list[BaseModel]) -> None:
|
|
161
|
+
"""Insert pipeline run rows."""
|
|
162
|
+
self._insert_rows(TABLE_PIPELINE_RUNS, rows)
|
|
163
|
+
|
|
164
|
+
def insert_spans(self, rows: list[BaseModel]) -> None:
|
|
165
|
+
"""Insert tracked span rows."""
|
|
166
|
+
self._insert_rows(TABLE_TRACKED_SPANS, rows)
|
|
167
|
+
|
|
168
|
+
def insert_document_events(self, rows: list[BaseModel]) -> None:
|
|
169
|
+
"""Insert document event rows."""
|
|
170
|
+
self._insert_rows(TABLE_DOCUMENT_EVENTS, rows)
|
|
171
|
+
|
|
172
|
+
def insert_span_events(self, rows: list[BaseModel]) -> None:
|
|
173
|
+
"""Insert span event rows."""
|
|
174
|
+
self._insert_rows(TABLE_SPAN_EVENTS, rows)
|
|
175
|
+
|
|
176
|
+
def update_span(self, row: TrackedSpanRow) -> None:
|
|
177
|
+
"""Insert a single replacement span row (ReplacingMergeTree update)."""
|
|
178
|
+
self.insert_spans([row])
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Thread-local flag to prevent tracking recursion.
|
|
2
|
+
|
|
3
|
+
When summary generation calls ``llm.generate()``, the resulting span must NOT
|
|
4
|
+
be tracked again (infinite loop). The flag is checked by
|
|
5
|
+
``TrackingSpanProcessor.on_end()``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import threading
|
|
9
|
+
from collections.abc import Generator
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
|
|
12
|
+
_internal = threading.local()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_internal_tracking() -> bool:
|
|
16
|
+
"""Return True if the current thread is inside a tracking-internal LLM call."""
|
|
17
|
+
return getattr(_internal, "active", False)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@contextmanager
|
|
21
|
+
def internal_tracking_context() -> Generator[None, None, None]:
|
|
22
|
+
"""Mark the current thread as performing internal tracking work."""
|
|
23
|
+
prev = getattr(_internal, "active", False)
|
|
24
|
+
_internal.active = True
|
|
25
|
+
try:
|
|
26
|
+
yield
|
|
27
|
+
finally:
|
|
28
|
+
_internal.active = prev
|