ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +78 -125
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +130 -81
- ai_pipeline_core/llm/client.py +327 -193
- ai_pipeline_core/llm/model_options.py +14 -86
- ai_pipeline_core/llm/model_response.py +60 -103
- ai_pipeline_core/llm/model_types.py +16 -34
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -483
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core/utils/deploy.py +0 -373
- ai_pipeline_core/utils/remote_deployment.py +0 -269
- ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
- ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Pydantic row models and enums for ClickHouse tracking tables."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RunStatus(StrEnum):
|
|
12
|
+
"""Pipeline run status."""
|
|
13
|
+
|
|
14
|
+
RUNNING = "running"
|
|
15
|
+
COMPLETED = "completed"
|
|
16
|
+
FAILED = "failed"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SpanType(StrEnum):
|
|
20
|
+
"""Span type classification."""
|
|
21
|
+
|
|
22
|
+
TASK = "task"
|
|
23
|
+
FLOW = "flow"
|
|
24
|
+
LLM = "llm"
|
|
25
|
+
TRACE = "trace"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DocumentEventType(StrEnum):
|
|
29
|
+
"""Document lifecycle event types."""
|
|
30
|
+
|
|
31
|
+
TASK_INPUT = "task_input"
|
|
32
|
+
TASK_OUTPUT = "task_output"
|
|
33
|
+
FLOW_INPUT = "flow_input"
|
|
34
|
+
FLOW_OUTPUT = "flow_output"
|
|
35
|
+
LLM_CONTEXT = "llm_context"
|
|
36
|
+
LLM_MESSAGE = "llm_message"
|
|
37
|
+
STORE_SAVED = "store_saved"
|
|
38
|
+
STORE_SAVE_FAILED = "store_save_failed"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# --- Table names ---
|
|
42
|
+
|
|
43
|
+
TABLE_PIPELINE_RUNS = "pipeline_runs"
|
|
44
|
+
TABLE_TRACKED_SPANS = "tracked_spans"
|
|
45
|
+
TABLE_DOCUMENT_EVENTS = "document_events"
|
|
46
|
+
TABLE_SPAN_EVENTS = "span_events"
|
|
47
|
+
|
|
48
|
+
# --- OTel span attribute names for document lineage ---
|
|
49
|
+
|
|
50
|
+
ATTR_INPUT_DOCUMENT_SHA256S = "pipeline.input_document_sha256s"
|
|
51
|
+
ATTR_OUTPUT_DOCUMENT_SHA256S = "pipeline.output_document_sha256s"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# --- Row models ---
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class PipelineRunRow(BaseModel):
|
|
58
|
+
"""Row model for pipeline_runs table."""
|
|
59
|
+
|
|
60
|
+
model_config = ConfigDict(frozen=True)
|
|
61
|
+
|
|
62
|
+
run_id: UUID
|
|
63
|
+
project_name: str
|
|
64
|
+
flow_name: str
|
|
65
|
+
run_scope: str = ""
|
|
66
|
+
status: RunStatus
|
|
67
|
+
start_time: datetime
|
|
68
|
+
end_time: datetime | None = None
|
|
69
|
+
total_cost: float = 0.0
|
|
70
|
+
total_tokens: int = 0
|
|
71
|
+
metadata: str = "{}"
|
|
72
|
+
version: int = 1
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TrackedSpanRow(BaseModel):
|
|
76
|
+
"""Row model for tracked_spans table."""
|
|
77
|
+
|
|
78
|
+
model_config = ConfigDict(frozen=True)
|
|
79
|
+
|
|
80
|
+
span_id: str
|
|
81
|
+
trace_id: str
|
|
82
|
+
run_id: UUID
|
|
83
|
+
parent_span_id: str | None = None
|
|
84
|
+
name: str
|
|
85
|
+
span_type: SpanType
|
|
86
|
+
status: str
|
|
87
|
+
start_time: datetime
|
|
88
|
+
end_time: datetime | None = None
|
|
89
|
+
duration_ms: int = 0
|
|
90
|
+
cost: float = 0.0
|
|
91
|
+
tokens_input: int = 0
|
|
92
|
+
tokens_output: int = 0
|
|
93
|
+
llm_model: str | None = None
|
|
94
|
+
user_summary: str | None = None
|
|
95
|
+
user_visible: bool = False
|
|
96
|
+
user_label: str | None = None
|
|
97
|
+
input_document_sha256s: tuple[str, ...] = Field(default_factory=tuple)
|
|
98
|
+
output_document_sha256s: tuple[str, ...] = Field(default_factory=tuple)
|
|
99
|
+
version: int = 1
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class DocumentEventRow(BaseModel):
|
|
103
|
+
"""Row model for document_events table."""
|
|
104
|
+
|
|
105
|
+
model_config = ConfigDict(frozen=True)
|
|
106
|
+
|
|
107
|
+
event_id: UUID
|
|
108
|
+
run_id: UUID
|
|
109
|
+
document_sha256: str
|
|
110
|
+
span_id: str
|
|
111
|
+
event_type: DocumentEventType
|
|
112
|
+
timestamp: datetime
|
|
113
|
+
metadata: str = "{}"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class SpanEventRow(BaseModel):
|
|
117
|
+
"""Row model for span_events table."""
|
|
118
|
+
|
|
119
|
+
model_config = ConfigDict(frozen=True)
|
|
120
|
+
|
|
121
|
+
event_id: UUID
|
|
122
|
+
run_id: UUID
|
|
123
|
+
span_id: str
|
|
124
|
+
name: str
|
|
125
|
+
timestamp: datetime
|
|
126
|
+
attributes: str = "{}"
|
|
127
|
+
level: str | None = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- Protocol for circular import resolution ---
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class SummaryRowBuilder(Protocol):
|
|
134
|
+
"""Protocol satisfied by TrackingService for writer callback."""
|
|
135
|
+
|
|
136
|
+
def build_span_summary_update(self, span_id: str, summary: str) -> TrackedSpanRow | None:
|
|
137
|
+
"""Build a replacement row with summary filled."""
|
|
138
|
+
...
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""OpenTelemetry SpanProcessor that feeds the tracking system."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from opentelemetry.context import Context
|
|
7
|
+
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
8
|
+
from opentelemetry.trace import StatusCode
|
|
9
|
+
|
|
10
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
11
|
+
|
|
12
|
+
from ._internal import is_internal_tracking
|
|
13
|
+
from ._models import ATTR_INPUT_DOCUMENT_SHA256S, ATTR_OUTPUT_DOCUMENT_SHA256S, SpanType
|
|
14
|
+
from ._service import TrackingService
|
|
15
|
+
|
|
16
|
+
logger = get_pipeline_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _hex_span_id(span_id: int) -> str:
|
|
20
|
+
"""Convert integer span ID to hex string."""
|
|
21
|
+
return format(span_id, "016x")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _hex_trace_id(trace_id: int) -> str:
|
|
25
|
+
"""Convert integer trace ID to hex string."""
|
|
26
|
+
return format(trace_id, "032x")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _ns_to_datetime(ns: int) -> datetime:
|
|
30
|
+
"""Convert nanosecond timestamp to datetime."""
|
|
31
|
+
return datetime.fromtimestamp(ns / 1e9, tz=UTC)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _classify_span(attrs: dict[str, Any]) -> SpanType:
|
|
35
|
+
"""Determine span type from attributes."""
|
|
36
|
+
span_type_str = str(attrs.get("lmnr.span.type", ""))
|
|
37
|
+
if span_type_str == "LLM":
|
|
38
|
+
return SpanType.LLM
|
|
39
|
+
if attrs.get("prefect.flow.name"):
|
|
40
|
+
return SpanType.FLOW
|
|
41
|
+
if attrs.get("prefect.task.name"):
|
|
42
|
+
return SpanType.TASK
|
|
43
|
+
return SpanType.TRACE
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TrackingSpanProcessor(SpanProcessor):
|
|
47
|
+
"""Forwards completed spans to TrackingService.
|
|
48
|
+
|
|
49
|
+
Skips internal tracking spans (summary LLM calls) to prevent recursion.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, service: TrackingService) -> None:
|
|
53
|
+
"""Initialize with tracking service."""
|
|
54
|
+
self._service = service
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def _parent_span_id(span: Span | ReadableSpan) -> str | None:
|
|
58
|
+
"""Extract parent span ID as hex string, or None."""
|
|
59
|
+
parent = span.parent
|
|
60
|
+
if parent is None:
|
|
61
|
+
return None
|
|
62
|
+
return _hex_span_id(parent.span_id)
|
|
63
|
+
|
|
64
|
+
def on_start(self, span: Span, parent_context: Context | None = None) -> None:
|
|
65
|
+
"""Record span start."""
|
|
66
|
+
if is_internal_tracking():
|
|
67
|
+
return
|
|
68
|
+
try:
|
|
69
|
+
ctx = span.get_span_context()
|
|
70
|
+
if ctx is None:
|
|
71
|
+
return
|
|
72
|
+
attrs: dict[str, Any] = dict(span.attributes or {})
|
|
73
|
+
self._service.track_span_start(
|
|
74
|
+
span_id=_hex_span_id(ctx.span_id),
|
|
75
|
+
trace_id=_hex_trace_id(ctx.trace_id),
|
|
76
|
+
parent_span_id=self._parent_span_id(span),
|
|
77
|
+
name=span.name,
|
|
78
|
+
span_type=_classify_span(attrs),
|
|
79
|
+
)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.debug(f"TrackingSpanProcessor.on_start failed: {e}")
|
|
82
|
+
|
|
83
|
+
def on_end(self, span: ReadableSpan) -> None: # noqa: PLR0914
|
|
84
|
+
"""Record span completion with full details."""
|
|
85
|
+
if is_internal_tracking():
|
|
86
|
+
return
|
|
87
|
+
try:
|
|
88
|
+
ctx = span.get_span_context()
|
|
89
|
+
if ctx is None:
|
|
90
|
+
return
|
|
91
|
+
attrs: dict[str, Any] = dict(span.attributes or {})
|
|
92
|
+
|
|
93
|
+
start_ns = span.start_time or 0
|
|
94
|
+
end_ns = span.end_time or 0
|
|
95
|
+
start_time = _ns_to_datetime(start_ns)
|
|
96
|
+
end_time = _ns_to_datetime(end_ns)
|
|
97
|
+
duration_ms = max(0, (end_ns - start_ns) // 1_000_000)
|
|
98
|
+
|
|
99
|
+
status = "failed" if span.status.status_code == StatusCode.ERROR else "completed"
|
|
100
|
+
|
|
101
|
+
# Extract LLM-specific attributes
|
|
102
|
+
cost = float(attrs.get("gen_ai.usage.cost", 0.0))
|
|
103
|
+
tokens_input = int(attrs.get("gen_ai.usage.input_tokens", 0))
|
|
104
|
+
tokens_output = int(attrs.get("gen_ai.usage.output_tokens", 0))
|
|
105
|
+
llm_model = str(attrs.get("gen_ai.request.model", "")) or None
|
|
106
|
+
|
|
107
|
+
# Extract document SHA256 arrays set by track_task_io
|
|
108
|
+
raw_input_sha256s = attrs.get(ATTR_INPUT_DOCUMENT_SHA256S)
|
|
109
|
+
input_doc_sha256s = list(raw_input_sha256s) if raw_input_sha256s else None
|
|
110
|
+
raw_output_sha256s = attrs.get(ATTR_OUTPUT_DOCUMENT_SHA256S)
|
|
111
|
+
output_doc_sha256s = list(raw_output_sha256s) if raw_output_sha256s else None
|
|
112
|
+
|
|
113
|
+
span_id = _hex_span_id(ctx.span_id)
|
|
114
|
+
self._service.track_span_end(
|
|
115
|
+
span_id=span_id,
|
|
116
|
+
trace_id=_hex_trace_id(ctx.trace_id),
|
|
117
|
+
parent_span_id=self._parent_span_id(span),
|
|
118
|
+
name=span.name,
|
|
119
|
+
span_type=_classify_span(attrs),
|
|
120
|
+
status=status,
|
|
121
|
+
start_time=start_time,
|
|
122
|
+
end_time=end_time,
|
|
123
|
+
duration_ms=duration_ms,
|
|
124
|
+
cost=cost,
|
|
125
|
+
tokens_input=tokens_input,
|
|
126
|
+
tokens_output=tokens_output,
|
|
127
|
+
llm_model=llm_model,
|
|
128
|
+
input_document_sha256s=input_doc_sha256s,
|
|
129
|
+
output_document_sha256s=output_doc_sha256s,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Forward span events
|
|
133
|
+
if span.events:
|
|
134
|
+
events: list[tuple[str, datetime, dict[str, str], str | None]] = []
|
|
135
|
+
for event in span.events:
|
|
136
|
+
event_attrs = dict(event.attributes) if event.attributes else {}
|
|
137
|
+
level = str(event_attrs.pop("log.level", "")) or None
|
|
138
|
+
events.append((
|
|
139
|
+
event.name,
|
|
140
|
+
_ns_to_datetime(event.timestamp),
|
|
141
|
+
{k: str(v) for k, v in event_attrs.items()},
|
|
142
|
+
level,
|
|
143
|
+
))
|
|
144
|
+
self._service.track_span_events(
|
|
145
|
+
span_id=span_id,
|
|
146
|
+
events=events,
|
|
147
|
+
)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.debug(f"TrackingSpanProcessor.on_end failed: {e}")
|
|
150
|
+
|
|
151
|
+
def shutdown(self) -> None:
|
|
152
|
+
"""Shutdown the tracking service."""
|
|
153
|
+
self._service.shutdown()
|
|
154
|
+
|
|
155
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool: # noqa: PLR6301
|
|
156
|
+
"""Force flush is a no-op — the writer flushes on its own schedule."""
|
|
157
|
+
_ = timeout_millis
|
|
158
|
+
return True
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""TrackingService — central coordinator for pipeline observability.
|
|
2
|
+
|
|
3
|
+
Manages run context, version counters, row caching for summary updates,
|
|
4
|
+
and coordinates the ClickHouse writer thread.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from threading import Lock
|
|
10
|
+
from uuid import UUID, uuid4
|
|
11
|
+
|
|
12
|
+
from lmnr.opentelemetry_lib.tracing import context as laminar_context
|
|
13
|
+
from opentelemetry import context as otel_context
|
|
14
|
+
|
|
15
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
16
|
+
|
|
17
|
+
from ._client import ClickHouseClient
|
|
18
|
+
from ._models import (
|
|
19
|
+
TABLE_DOCUMENT_EVENTS,
|
|
20
|
+
TABLE_PIPELINE_RUNS,
|
|
21
|
+
TABLE_SPAN_EVENTS,
|
|
22
|
+
TABLE_TRACKED_SPANS,
|
|
23
|
+
DocumentEventRow,
|
|
24
|
+
DocumentEventType,
|
|
25
|
+
PipelineRunRow,
|
|
26
|
+
RunStatus,
|
|
27
|
+
SpanEventRow,
|
|
28
|
+
SpanType,
|
|
29
|
+
TrackedSpanRow,
|
|
30
|
+
)
|
|
31
|
+
from ._writer import ClickHouseWriter, SpanSummaryFn, SummaryJob
|
|
32
|
+
|
|
33
|
+
logger = get_pipeline_logger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TrackingService:
|
|
37
|
+
"""Central tracking coordinator.
|
|
38
|
+
|
|
39
|
+
Thread-safe — all mutable state is protected by ``_lock``.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
client: ClickHouseClient,
|
|
45
|
+
*,
|
|
46
|
+
summary_model: str = "gemini-3-flash",
|
|
47
|
+
span_summary_fn: SpanSummaryFn | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Initialize tracking service and start writer thread."""
|
|
50
|
+
self._client = client
|
|
51
|
+
self._summary_model = summary_model
|
|
52
|
+
|
|
53
|
+
self._writer = ClickHouseWriter(
|
|
54
|
+
client,
|
|
55
|
+
summary_row_builder=self,
|
|
56
|
+
span_summary_fn=span_summary_fn,
|
|
57
|
+
)
|
|
58
|
+
self._writer.start()
|
|
59
|
+
|
|
60
|
+
# Run context
|
|
61
|
+
self._run_id: UUID | None = None
|
|
62
|
+
self._project_name: str = ""
|
|
63
|
+
self._flow_name: str = ""
|
|
64
|
+
self._run_scope: str = ""
|
|
65
|
+
self._run_start_time: datetime | None = None
|
|
66
|
+
|
|
67
|
+
# Version counters
|
|
68
|
+
self._versions: dict[str, int] = {}
|
|
69
|
+
self._lock = Lock()
|
|
70
|
+
|
|
71
|
+
# Row caches for summary updates
|
|
72
|
+
self._span_cache: dict[str, TrackedSpanRow] = {}
|
|
73
|
+
|
|
74
|
+
# --- Run context ---
|
|
75
|
+
|
|
76
|
+
def set_run_context(self, *, run_id: UUID, project_name: str, flow_name: str, run_scope: str = "") -> None:
|
|
77
|
+
"""Set the current run context. Called at pipeline start."""
|
|
78
|
+
with self._lock:
|
|
79
|
+
self._run_id = run_id
|
|
80
|
+
self._project_name = project_name
|
|
81
|
+
self._flow_name = flow_name
|
|
82
|
+
self._run_scope = run_scope
|
|
83
|
+
|
|
84
|
+
def clear_run_context(self) -> None:
|
|
85
|
+
"""Clear run context and caches. Called by flush() and shutdown()."""
|
|
86
|
+
with self._lock:
|
|
87
|
+
self._run_id = None
|
|
88
|
+
self._project_name = ""
|
|
89
|
+
self._flow_name = ""
|
|
90
|
+
self._run_scope = ""
|
|
91
|
+
self._run_start_time = None
|
|
92
|
+
self._versions.clear()
|
|
93
|
+
self._span_cache.clear()
|
|
94
|
+
|
|
95
|
+
# --- Version management ---
|
|
96
|
+
|
|
97
|
+
def _next_version(self, key: str) -> int:
|
|
98
|
+
"""Increment and return version counter. Must be called under _lock."""
|
|
99
|
+
v = self._versions.get(key, 0) + 1
|
|
100
|
+
self._versions[key] = v
|
|
101
|
+
return v
|
|
102
|
+
|
|
103
|
+
# --- Run tracking ---
|
|
104
|
+
|
|
105
|
+
def track_run_start(self, *, run_id: UUID, project_name: str, flow_name: str, run_scope: str = "") -> None:
|
|
106
|
+
"""Record pipeline run start."""
|
|
107
|
+
now = datetime.now(UTC)
|
|
108
|
+
with self._lock:
|
|
109
|
+
self._run_start_time = now
|
|
110
|
+
version = self._next_version(f"run:{run_id}")
|
|
111
|
+
row = PipelineRunRow(
|
|
112
|
+
run_id=run_id,
|
|
113
|
+
project_name=project_name,
|
|
114
|
+
flow_name=flow_name,
|
|
115
|
+
run_scope=run_scope,
|
|
116
|
+
status=RunStatus.RUNNING,
|
|
117
|
+
start_time=now,
|
|
118
|
+
version=version,
|
|
119
|
+
)
|
|
120
|
+
self._writer.write(TABLE_PIPELINE_RUNS, [row])
|
|
121
|
+
|
|
122
|
+
def track_run_end(
|
|
123
|
+
self,
|
|
124
|
+
*,
|
|
125
|
+
run_id: UUID,
|
|
126
|
+
status: RunStatus,
|
|
127
|
+
total_cost: float = 0.0,
|
|
128
|
+
total_tokens: int = 0,
|
|
129
|
+
metadata: dict[str, object] | None = None,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""Record pipeline run completion or failure."""
|
|
132
|
+
now = datetime.now(UTC)
|
|
133
|
+
with self._lock:
|
|
134
|
+
version = self._next_version(f"run:{run_id}")
|
|
135
|
+
start_time = self._run_start_time or now
|
|
136
|
+
row = PipelineRunRow(
|
|
137
|
+
run_id=run_id,
|
|
138
|
+
project_name=self._project_name,
|
|
139
|
+
flow_name=self._flow_name,
|
|
140
|
+
run_scope=self._run_scope,
|
|
141
|
+
status=status,
|
|
142
|
+
start_time=start_time,
|
|
143
|
+
end_time=now,
|
|
144
|
+
total_cost=total_cost,
|
|
145
|
+
total_tokens=total_tokens,
|
|
146
|
+
metadata=json.dumps(metadata) if metadata else "{}",
|
|
147
|
+
version=version,
|
|
148
|
+
)
|
|
149
|
+
self._writer.write(TABLE_PIPELINE_RUNS, [row])
|
|
150
|
+
|
|
151
|
+
# --- Span tracking ---
|
|
152
|
+
|
|
153
|
+
def track_span_start(self, *, span_id: str, trace_id: str, parent_span_id: str | None, name: str, span_type: SpanType) -> None:
|
|
154
|
+
"""Record span start."""
|
|
155
|
+
if self._run_id is None:
|
|
156
|
+
return
|
|
157
|
+
now = datetime.now(UTC)
|
|
158
|
+
with self._lock:
|
|
159
|
+
version = self._next_version(f"span:{span_id}")
|
|
160
|
+
row = TrackedSpanRow(
|
|
161
|
+
span_id=span_id,
|
|
162
|
+
trace_id=trace_id,
|
|
163
|
+
run_id=self._run_id,
|
|
164
|
+
parent_span_id=parent_span_id,
|
|
165
|
+
name=name,
|
|
166
|
+
span_type=span_type,
|
|
167
|
+
status="running",
|
|
168
|
+
start_time=now,
|
|
169
|
+
version=version,
|
|
170
|
+
)
|
|
171
|
+
self._writer.write(TABLE_TRACKED_SPANS, [row])
|
|
172
|
+
|
|
173
|
+
def track_span_end(
|
|
174
|
+
self,
|
|
175
|
+
*,
|
|
176
|
+
span_id: str,
|
|
177
|
+
trace_id: str,
|
|
178
|
+
parent_span_id: str | None,
|
|
179
|
+
name: str,
|
|
180
|
+
span_type: SpanType,
|
|
181
|
+
status: str,
|
|
182
|
+
start_time: datetime,
|
|
183
|
+
end_time: datetime,
|
|
184
|
+
duration_ms: int,
|
|
185
|
+
cost: float = 0.0,
|
|
186
|
+
tokens_input: int = 0,
|
|
187
|
+
tokens_output: int = 0,
|
|
188
|
+
llm_model: str | None = None,
|
|
189
|
+
user_summary: str | None = None,
|
|
190
|
+
user_visible: bool = False,
|
|
191
|
+
user_label: str | None = None,
|
|
192
|
+
input_document_sha256s: list[str] | None = None,
|
|
193
|
+
output_document_sha256s: list[str] | None = None,
|
|
194
|
+
) -> None:
|
|
195
|
+
"""Record span completion with full details."""
|
|
196
|
+
if self._run_id is None:
|
|
197
|
+
return
|
|
198
|
+
with self._lock:
|
|
199
|
+
version = self._next_version(f"span:{span_id}")
|
|
200
|
+
row = TrackedSpanRow(
|
|
201
|
+
span_id=span_id,
|
|
202
|
+
trace_id=trace_id,
|
|
203
|
+
run_id=self._run_id,
|
|
204
|
+
parent_span_id=parent_span_id,
|
|
205
|
+
name=name,
|
|
206
|
+
span_type=span_type,
|
|
207
|
+
status=status,
|
|
208
|
+
start_time=start_time,
|
|
209
|
+
end_time=end_time,
|
|
210
|
+
duration_ms=duration_ms,
|
|
211
|
+
cost=cost,
|
|
212
|
+
tokens_input=tokens_input,
|
|
213
|
+
tokens_output=tokens_output,
|
|
214
|
+
llm_model=llm_model,
|
|
215
|
+
user_summary=user_summary,
|
|
216
|
+
user_visible=user_visible,
|
|
217
|
+
user_label=user_label,
|
|
218
|
+
input_document_sha256s=tuple(input_document_sha256s) if input_document_sha256s else (),
|
|
219
|
+
output_document_sha256s=tuple(output_document_sha256s) if output_document_sha256s else (),
|
|
220
|
+
version=version,
|
|
221
|
+
)
|
|
222
|
+
self._writer.write(TABLE_TRACKED_SPANS, [row])
|
|
223
|
+
with self._lock:
|
|
224
|
+
self._span_cache[span_id] = row
|
|
225
|
+
|
|
226
|
+
def track_span_events(self, *, span_id: str, events: list[tuple[str, datetime, dict[str, str], str | None]]) -> None:
|
|
227
|
+
"""Record span events (including bridged log events)."""
|
|
228
|
+
if self._run_id is None or not events:
|
|
229
|
+
return
|
|
230
|
+
rows = [
|
|
231
|
+
SpanEventRow(
|
|
232
|
+
event_id=uuid4(),
|
|
233
|
+
run_id=self._run_id,
|
|
234
|
+
span_id=span_id,
|
|
235
|
+
name=name,
|
|
236
|
+
timestamp=ts,
|
|
237
|
+
attributes=json.dumps(attrs) if attrs else "{}",
|
|
238
|
+
level=level,
|
|
239
|
+
)
|
|
240
|
+
for name, ts, attrs, level in events
|
|
241
|
+
]
|
|
242
|
+
self._writer.write(TABLE_SPAN_EVENTS, list(rows))
|
|
243
|
+
|
|
244
|
+
# --- Document tracking ---
|
|
245
|
+
|
|
246
|
+
def track_document_event(
|
|
247
|
+
self,
|
|
248
|
+
*,
|
|
249
|
+
document_sha256: str,
|
|
250
|
+
span_id: str,
|
|
251
|
+
event_type: DocumentEventType,
|
|
252
|
+
metadata: dict[str, str] | None = None,
|
|
253
|
+
) -> None:
|
|
254
|
+
"""Record a document lifecycle event."""
|
|
255
|
+
if self._run_id is None:
|
|
256
|
+
return
|
|
257
|
+
row = DocumentEventRow(
|
|
258
|
+
event_id=uuid4(),
|
|
259
|
+
run_id=self._run_id,
|
|
260
|
+
document_sha256=document_sha256,
|
|
261
|
+
span_id=span_id,
|
|
262
|
+
event_type=event_type,
|
|
263
|
+
timestamp=datetime.now(UTC),
|
|
264
|
+
metadata=json.dumps(metadata) if metadata else "{}",
|
|
265
|
+
)
|
|
266
|
+
self._writer.write(TABLE_DOCUMENT_EVENTS, [row])
|
|
267
|
+
|
|
268
|
+
# --- Summary scheduling ---
|
|
269
|
+
|
|
270
|
+
def schedule_summary(self, span_id: str, label: str, output_hint: str) -> None:
|
|
271
|
+
"""Schedule LLM summary generation for a span."""
|
|
272
|
+
self._writer.write_job(
|
|
273
|
+
SummaryJob(
|
|
274
|
+
span_id=span_id,
|
|
275
|
+
label=label,
|
|
276
|
+
output_hint=output_hint,
|
|
277
|
+
summary_model=self._summary_model,
|
|
278
|
+
parent_otel_context=otel_context.get_current(),
|
|
279
|
+
parent_laminar_context=laminar_context.get_current_context(),
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# --- Summary row builders (SummaryRowBuilder protocol) ---
|
|
284
|
+
|
|
285
|
+
def build_span_summary_update(self, span_id: str, summary: str) -> TrackedSpanRow | None:
|
|
286
|
+
"""Build a replacement row with summary filled. Called from writer thread."""
|
|
287
|
+
with self._lock:
|
|
288
|
+
cached = self._span_cache.get(span_id)
|
|
289
|
+
if cached is None:
|
|
290
|
+
return None
|
|
291
|
+
version = self._next_version(f"span:{span_id}")
|
|
292
|
+
return cached.model_copy(update={"user_summary": summary, "version": version})
|
|
293
|
+
|
|
294
|
+
# --- Lifecycle ---
|
|
295
|
+
|
|
296
|
+
def flush(self, timeout: float = 30.0) -> None:
|
|
297
|
+
"""Wait for all pending items (including summary LLM jobs) to complete, then clear run context.
|
|
298
|
+
|
|
299
|
+
Use between runs in long-lived processes to prevent unbounded cache growth.
|
|
300
|
+
"""
|
|
301
|
+
self._writer.flush(timeout=timeout)
|
|
302
|
+
self.clear_run_context()
|
|
303
|
+
|
|
304
|
+
def shutdown(self, timeout: float = 30.0) -> None:
|
|
305
|
+
"""Shutdown the writer thread and clear run context.
|
|
306
|
+
|
|
307
|
+
Writer drains all pending items (including summary LLM jobs)
|
|
308
|
+
before caches are cleared, ensuring summaries can look up span data.
|
|
309
|
+
"""
|
|
310
|
+
self._writer.shutdown(timeout=timeout)
|
|
311
|
+
self.clear_run_context()
|