ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
"""Local trace writer for filesystem-based debugging."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import atexit
|
|
4
5
|
import hashlib
|
|
6
|
+
import importlib
|
|
5
7
|
import json
|
|
6
8
|
import os
|
|
7
9
|
import re
|
|
8
10
|
import shutil
|
|
9
11
|
import socket
|
|
10
|
-
from
|
|
11
|
-
from datetime import datetime, timezone
|
|
12
|
-
from pathlib import Path
|
|
12
|
+
from datetime import UTC, datetime
|
|
13
13
|
from queue import Empty, Queue
|
|
14
14
|
from threading import Lock, Thread
|
|
15
15
|
from typing import Any
|
|
@@ -18,71 +18,22 @@ import yaml
|
|
|
18
18
|
|
|
19
19
|
from ai_pipeline_core.logging import get_pipeline_logger
|
|
20
20
|
|
|
21
|
-
from .
|
|
22
|
-
from .
|
|
21
|
+
from ._config import TraceDebugConfig
|
|
22
|
+
from ._content import ArtifactStore, ContentWriter
|
|
23
|
+
from ._summary import generate_summary
|
|
24
|
+
from ._types import SpanInfo, TraceState, WriteJob
|
|
23
25
|
|
|
24
26
|
logger = get_pipeline_logger(__name__)
|
|
25
27
|
|
|
26
28
|
|
|
27
|
-
@dataclass
|
|
28
|
-
class WriteJob:
|
|
29
|
-
"""Job for background writer thread."""
|
|
30
|
-
|
|
31
|
-
trace_id: str
|
|
32
|
-
span_id: str
|
|
33
|
-
name: str
|
|
34
|
-
parent_id: str | None
|
|
35
|
-
attributes: dict[str, Any]
|
|
36
|
-
events: list[Any]
|
|
37
|
-
status_code: str # "OK" | "ERROR" | "UNSET"
|
|
38
|
-
status_description: str | None
|
|
39
|
-
start_time_ns: int
|
|
40
|
-
end_time_ns: int
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
@dataclass
|
|
44
|
-
class SpanInfo:
|
|
45
|
-
"""Information about a span for index building."""
|
|
46
|
-
|
|
47
|
-
span_id: str
|
|
48
|
-
parent_id: str | None
|
|
49
|
-
name: str
|
|
50
|
-
span_type: str
|
|
51
|
-
status: str
|
|
52
|
-
start_time: datetime
|
|
53
|
-
path: Path # Actual directory path for this span
|
|
54
|
-
depth: int = 0 # Nesting depth (0 for root)
|
|
55
|
-
order: int = 0 # Global execution order within trace
|
|
56
|
-
end_time: datetime | None = None
|
|
57
|
-
duration_ms: int = 0
|
|
58
|
-
children: list[str] = field(default_factory=list)
|
|
59
|
-
llm_info: dict[str, Any] | None = None
|
|
60
|
-
prefect_info: dict[str, Any] | None = None
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@dataclass
|
|
64
|
-
class TraceState:
|
|
65
|
-
"""State for an active trace."""
|
|
66
|
-
|
|
67
|
-
trace_id: str
|
|
68
|
-
name: str
|
|
69
|
-
path: Path
|
|
70
|
-
start_time: datetime
|
|
71
|
-
spans: dict[str, SpanInfo] = field(default_factory=dict)
|
|
72
|
-
root_span_id: str | None = None
|
|
73
|
-
total_tokens: int = 0
|
|
74
|
-
total_cost: float = 0.0
|
|
75
|
-
llm_call_count: int = 0
|
|
76
|
-
span_counter: int = 0 # Global counter for ordering span directories
|
|
77
|
-
merged_wrapper_ids: set[str] = field(default_factory=set) # IDs of merged wrappers
|
|
78
|
-
|
|
79
|
-
|
|
80
29
|
class LocalTraceWriter:
|
|
81
30
|
"""Writes trace spans to local filesystem via background thread.
|
|
82
31
|
|
|
83
32
|
Uses a hierarchical directory structure where child spans are nested
|
|
84
33
|
inside parent span directories. Directory names use numeric prefixes
|
|
85
34
|
(01_, 02_, etc.) to preserve execution order when viewed with `tree`.
|
|
35
|
+
Generates index files and optionally produces _summary.md and
|
|
36
|
+
_auto_summary.md for trace analysis.
|
|
86
37
|
"""
|
|
87
38
|
|
|
88
39
|
def __init__(self, config: TraceDebugConfig):
|
|
@@ -133,9 +84,7 @@ class LocalTraceWriter:
|
|
|
133
84
|
depth = parent_info.depth + 1
|
|
134
85
|
elif parent_id:
|
|
135
86
|
# Parent ID provided but not found - orphan span, place at root
|
|
136
|
-
logger.warning(
|
|
137
|
-
f"Span {span_id} has unknown parent {parent_id}, placing at trace root"
|
|
138
|
-
)
|
|
87
|
+
logger.warning(f"Span {span_id} has unknown parent {parent_id}, placing at trace root")
|
|
139
88
|
parent_path = trace.path
|
|
140
89
|
depth = 0
|
|
141
90
|
else:
|
|
@@ -152,7 +101,7 @@ class LocalTraceWriter:
|
|
|
152
101
|
span_dir.mkdir(parents=True, exist_ok=True)
|
|
153
102
|
|
|
154
103
|
# Record span info
|
|
155
|
-
now = datetime.now(
|
|
104
|
+
now = datetime.now(UTC)
|
|
156
105
|
span_info = SpanInfo(
|
|
157
106
|
span_id=span_id,
|
|
158
107
|
parent_id=parent_id,
|
|
@@ -174,20 +123,6 @@ class LocalTraceWriter:
|
|
|
174
123
|
if parent_id and parent_id in trace.spans:
|
|
175
124
|
trace.spans[parent_id].children.append(span_id)
|
|
176
125
|
|
|
177
|
-
# Append to event log (lightweight - just appends a line)
|
|
178
|
-
self._append_event(
|
|
179
|
-
trace,
|
|
180
|
-
{
|
|
181
|
-
"type": "span_start",
|
|
182
|
-
"span_id": span_id,
|
|
183
|
-
"parent_id": parent_id,
|
|
184
|
-
"name": name,
|
|
185
|
-
"path": str(span_dir.relative_to(trace.path)),
|
|
186
|
-
},
|
|
187
|
-
)
|
|
188
|
-
# Note: _write_status() moved to on_span_end for performance
|
|
189
|
-
# (avoids blocking I/O in main thread on every span start)
|
|
190
|
-
|
|
191
126
|
def on_span_end(self, job: WriteJob) -> None:
|
|
192
127
|
"""Queue span end job for background processing.
|
|
193
128
|
|
|
@@ -200,13 +135,22 @@ class LocalTraceWriter:
|
|
|
200
135
|
"""Flush queue and stop writer thread."""
|
|
201
136
|
if self._shutdown:
|
|
202
137
|
return
|
|
203
|
-
self._shutdown = True
|
|
204
138
|
|
|
205
|
-
#
|
|
139
|
+
# Send sentinel before setting _shutdown so in-flight on_span_end calls
|
|
140
|
+
# can still queue their jobs (they check _shutdown before putting).
|
|
206
141
|
self._queue.put(None)
|
|
207
|
-
|
|
208
|
-
# Wait for thread to finish
|
|
209
142
|
self._writer_thread.join(timeout=timeout)
|
|
143
|
+
self._shutdown = True
|
|
144
|
+
|
|
145
|
+
# Drain any jobs that arrived after the sentinel (race window between
|
|
146
|
+
# sentinel pickup and thread exit where on_span_end could still queue).
|
|
147
|
+
while True:
|
|
148
|
+
try:
|
|
149
|
+
job = self._queue.get_nowait()
|
|
150
|
+
if job is not None:
|
|
151
|
+
self._process_job(job)
|
|
152
|
+
except Empty:
|
|
153
|
+
break
|
|
210
154
|
|
|
211
155
|
# Finalize any remaining traces (ones that didn't have root span end yet)
|
|
212
156
|
with self._lock:
|
|
@@ -223,7 +167,7 @@ class LocalTraceWriter:
|
|
|
223
167
|
return self._traces[trace_id]
|
|
224
168
|
|
|
225
169
|
# Create new trace
|
|
226
|
-
timestamp = datetime.now(
|
|
170
|
+
timestamp = datetime.now(UTC)
|
|
227
171
|
safe_name = self._sanitize_name(name)
|
|
228
172
|
dir_name = f"{timestamp.strftime('%Y%m%d_%H%M%S')}_{trace_id[:8]}_{safe_name}"
|
|
229
173
|
trace_path = self._config.path / dir_name
|
|
@@ -245,16 +189,6 @@ class LocalTraceWriter:
|
|
|
245
189
|
# Write initial trace metadata
|
|
246
190
|
self._write_trace_yaml(trace)
|
|
247
191
|
|
|
248
|
-
# Append trace start event
|
|
249
|
-
self._append_event(
|
|
250
|
-
trace,
|
|
251
|
-
{
|
|
252
|
-
"type": "trace_start",
|
|
253
|
-
"trace_id": trace_id,
|
|
254
|
-
"name": name,
|
|
255
|
-
},
|
|
256
|
-
)
|
|
257
|
-
|
|
258
192
|
return trace
|
|
259
193
|
|
|
260
194
|
def _writer_loop(self) -> None:
|
|
@@ -274,7 +208,7 @@ class LocalTraceWriter:
|
|
|
274
208
|
except Exception as e:
|
|
275
209
|
logger.warning(f"Trace debug write failed for span {job.span_id}: {e}")
|
|
276
210
|
|
|
277
|
-
def _process_job(self, job: WriteJob) -> None:
|
|
211
|
+
def _process_job(self, job: WriteJob) -> None: # noqa: PLR0914
|
|
278
212
|
"""Process a span end job - write all span data."""
|
|
279
213
|
with self._lock:
|
|
280
214
|
trace = self._traces.get(job.trace_id)
|
|
@@ -309,7 +243,7 @@ class LocalTraceWriter:
|
|
|
309
243
|
prefect_info = self._extract_prefect_info(job.attributes)
|
|
310
244
|
|
|
311
245
|
# Update span info (span_info already validated above)
|
|
312
|
-
end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=
|
|
246
|
+
end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=UTC)
|
|
313
247
|
span_info.end_time = end_time
|
|
314
248
|
span_info.duration_ms = int((job.end_time_ns - job.start_time_ns) / 1e6)
|
|
315
249
|
span_info.status = "failed" if job.status_code == "ERROR" else "completed"
|
|
@@ -317,16 +251,22 @@ class LocalTraceWriter:
|
|
|
317
251
|
span_info.llm_info = llm_info
|
|
318
252
|
span_info.prefect_info = prefect_info
|
|
319
253
|
|
|
254
|
+
# Extract description and expected_cost from span attributes
|
|
255
|
+
span_info.description = job.attributes.get("description")
|
|
256
|
+
ec = job.attributes.get("expected_cost")
|
|
257
|
+
span_info.expected_cost = float(ec) if ec is not None else None
|
|
258
|
+
|
|
320
259
|
# Update trace stats
|
|
321
260
|
if llm_info:
|
|
322
261
|
trace.llm_call_count += 1
|
|
323
262
|
trace.total_tokens += llm_info.get("total_tokens", 0)
|
|
324
263
|
trace.total_cost += llm_info.get("cost", 0.0)
|
|
264
|
+
llm_expected = llm_info.get("expected_cost")
|
|
265
|
+
if llm_expected is not None:
|
|
266
|
+
trace.total_expected_cost += float(llm_expected)
|
|
325
267
|
|
|
326
268
|
# Build span metadata (input_ref and output_ref are now dicts)
|
|
327
|
-
span_meta = self._build_span_metadata_v3(
|
|
328
|
-
job, input_ref, output_ref, span_type, llm_info, prefect_info
|
|
329
|
-
)
|
|
269
|
+
span_meta = self._build_span_metadata_v3(job, input_ref, output_ref, span_type, llm_info, prefect_info)
|
|
330
270
|
|
|
331
271
|
# Write _span.yaml
|
|
332
272
|
span_yaml_path = span_dir / "_span.yaml"
|
|
@@ -335,8 +275,8 @@ class LocalTraceWriter:
|
|
|
335
275
|
encoding="utf-8",
|
|
336
276
|
)
|
|
337
277
|
|
|
338
|
-
# Write events.yaml
|
|
339
|
-
if job.events
|
|
278
|
+
# Write events.yaml (OTel span events including log records from the bridge)
|
|
279
|
+
if job.events:
|
|
340
280
|
events_data = self._format_span_events(job.events)
|
|
341
281
|
events_path = span_dir / "events.yaml"
|
|
342
282
|
events_path.write_text(
|
|
@@ -344,17 +284,6 @@ class LocalTraceWriter:
|
|
|
344
284
|
encoding="utf-8",
|
|
345
285
|
)
|
|
346
286
|
|
|
347
|
-
# Append to trace event log
|
|
348
|
-
self._append_event(
|
|
349
|
-
trace,
|
|
350
|
-
{
|
|
351
|
-
"type": "span_end",
|
|
352
|
-
"span_id": job.span_id,
|
|
353
|
-
"status": span_info.status if span_info else "unknown",
|
|
354
|
-
"duration_ms": span_info.duration_ms if span_info else 0,
|
|
355
|
-
},
|
|
356
|
-
)
|
|
357
|
-
|
|
358
287
|
# Update index
|
|
359
288
|
self._write_index(trace)
|
|
360
289
|
|
|
@@ -368,7 +297,8 @@ class LocalTraceWriter:
|
|
|
368
297
|
if job.trace_id in self._artifact_stores:
|
|
369
298
|
del self._artifact_stores[job.trace_id]
|
|
370
299
|
|
|
371
|
-
|
|
300
|
+
@staticmethod
|
|
301
|
+
def _extract_input(attributes: dict[str, Any]) -> Any:
|
|
372
302
|
"""Extract input from span attributes."""
|
|
373
303
|
input_str = attributes.get("lmnr.span.input")
|
|
374
304
|
if input_str:
|
|
@@ -378,7 +308,8 @@ class LocalTraceWriter:
|
|
|
378
308
|
return input_str
|
|
379
309
|
return None
|
|
380
310
|
|
|
381
|
-
|
|
311
|
+
@staticmethod
|
|
312
|
+
def _extract_output(attributes: dict[str, Any]) -> Any:
|
|
382
313
|
"""Extract output from span attributes."""
|
|
383
314
|
output_str = attributes.get("lmnr.span.output")
|
|
384
315
|
if output_str:
|
|
@@ -388,7 +319,8 @@ class LocalTraceWriter:
|
|
|
388
319
|
return output_str
|
|
389
320
|
return None
|
|
390
321
|
|
|
391
|
-
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _extract_span_type(attributes: dict[str, Any]) -> str:
|
|
392
324
|
"""Extract span type from attributes."""
|
|
393
325
|
span_type = attributes.get("lmnr.span.type", "DEFAULT")
|
|
394
326
|
# Map to our types
|
|
@@ -399,30 +331,29 @@ class LocalTraceWriter:
|
|
|
399
331
|
}
|
|
400
332
|
return type_map.get(span_type, "default")
|
|
401
333
|
|
|
402
|
-
|
|
334
|
+
@staticmethod
|
|
335
|
+
def _extract_llm_info(attributes: dict[str, Any]) -> dict[str, Any] | None:
|
|
403
336
|
"""Extract LLM-specific info from attributes."""
|
|
404
337
|
# Check for LLM attributes
|
|
405
|
-
input_tokens = attributes.get("gen_ai.usage.input_tokens") or attributes.get(
|
|
406
|
-
|
|
407
|
-
)
|
|
408
|
-
output_tokens = attributes.get("gen_ai.usage.output_tokens") or attributes.get(
|
|
409
|
-
"gen_ai.usage.completion_tokens"
|
|
410
|
-
)
|
|
338
|
+
input_tokens = attributes.get("gen_ai.usage.input_tokens") or attributes.get("gen_ai.usage.prompt_tokens")
|
|
339
|
+
output_tokens = attributes.get("gen_ai.usage.output_tokens") or attributes.get("gen_ai.usage.completion_tokens")
|
|
411
340
|
|
|
412
341
|
if input_tokens is None and output_tokens is None:
|
|
413
342
|
return None
|
|
414
343
|
|
|
415
344
|
return {
|
|
416
|
-
"model": attributes.get("gen_ai.response.model")
|
|
417
|
-
or attributes.get("gen_ai.request.model"),
|
|
345
|
+
"model": attributes.get("gen_ai.response.model") or attributes.get("gen_ai.request.model"),
|
|
418
346
|
"provider": attributes.get("gen_ai.system"),
|
|
419
347
|
"input_tokens": input_tokens or 0,
|
|
420
348
|
"output_tokens": output_tokens or 0,
|
|
421
349
|
"total_tokens": (input_tokens or 0) + (output_tokens or 0),
|
|
422
350
|
"cost": attributes.get("gen_ai.usage.cost", 0.0),
|
|
351
|
+
"expected_cost": attributes.get("expected_cost"),
|
|
352
|
+
"purpose": attributes.get("purpose"),
|
|
423
353
|
}
|
|
424
354
|
|
|
425
|
-
|
|
355
|
+
@staticmethod
|
|
356
|
+
def _extract_prefect_info(attributes: dict[str, Any]) -> dict[str, Any] | None:
|
|
426
357
|
"""Extract Prefect-specific info from attributes."""
|
|
427
358
|
run_id = attributes.get("prefect.run.id")
|
|
428
359
|
if not run_id:
|
|
@@ -435,8 +366,10 @@ class LocalTraceWriter:
|
|
|
435
366
|
"tags": attributes.get("prefect.tags", []),
|
|
436
367
|
}
|
|
437
368
|
|
|
438
|
-
|
|
439
|
-
|
|
369
|
+
_EXCLUDED_ATTRIBUTES: frozenset[str] = frozenset({"lmnr.span.input", "lmnr.span.output"})
|
|
370
|
+
|
|
371
|
+
@staticmethod
|
|
372
|
+
def _build_span_metadata_v3( # noqa: PLR0917
|
|
440
373
|
job: WriteJob,
|
|
441
374
|
input_ref: dict[str, Any],
|
|
442
375
|
output_ref: dict[str, Any],
|
|
@@ -445,8 +378,8 @@ class LocalTraceWriter:
|
|
|
445
378
|
prefect_info: dict[str, Any] | None,
|
|
446
379
|
) -> dict[str, Any]:
|
|
447
380
|
"""Build span metadata dictionary (V3 format with dict refs)."""
|
|
448
|
-
start_time = datetime.fromtimestamp(job.start_time_ns / 1e9, tz=
|
|
449
|
-
end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=
|
|
381
|
+
start_time = datetime.fromtimestamp(job.start_time_ns / 1e9, tz=UTC)
|
|
382
|
+
end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=UTC)
|
|
450
383
|
duration_ms = int((job.end_time_ns - job.start_time_ns) / 1e6)
|
|
451
384
|
|
|
452
385
|
meta: dict[str, Any] = {
|
|
@@ -470,6 +403,14 @@ class LocalTraceWriter:
|
|
|
470
403
|
if llm_info:
|
|
471
404
|
meta["llm"] = llm_info
|
|
472
405
|
|
|
406
|
+
# Add observability metadata
|
|
407
|
+
description = job.attributes.get("description")
|
|
408
|
+
if description:
|
|
409
|
+
meta["description"] = description
|
|
410
|
+
expected_cost = job.attributes.get("expected_cost")
|
|
411
|
+
if expected_cost is not None:
|
|
412
|
+
meta["expected_cost"] = float(expected_cost)
|
|
413
|
+
|
|
473
414
|
# Add content references (input_ref and output_ref are dicts from ContentWriter.write())
|
|
474
415
|
meta["input"] = input_ref
|
|
475
416
|
meta["output"] = output_ref
|
|
@@ -480,18 +421,22 @@ class LocalTraceWriter:
|
|
|
480
421
|
"message": job.status_description,
|
|
481
422
|
}
|
|
482
423
|
|
|
424
|
+
# Add raw span attributes (excluding input/output which are in separate files)
|
|
425
|
+
filtered_attrs = {k: v for k, v in job.attributes.items() if k not in LocalTraceWriter._EXCLUDED_ATTRIBUTES}
|
|
426
|
+
if filtered_attrs:
|
|
427
|
+
meta["attributes"] = filtered_attrs
|
|
428
|
+
|
|
483
429
|
return meta
|
|
484
430
|
|
|
485
|
-
|
|
431
|
+
@staticmethod
|
|
432
|
+
def _format_span_events(events: list[Any]) -> list[dict[str, Any]]:
|
|
486
433
|
"""Format span events for YAML output."""
|
|
487
|
-
result = []
|
|
434
|
+
result: list[dict[str, Any]] = []
|
|
488
435
|
for event in events:
|
|
489
436
|
try:
|
|
490
437
|
event_dict = {
|
|
491
438
|
"name": event.name,
|
|
492
|
-
"timestamp": datetime.fromtimestamp(
|
|
493
|
-
event.timestamp / 1e9, tz=timezone.utc
|
|
494
|
-
).isoformat(),
|
|
439
|
+
"timestamp": datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).isoformat(),
|
|
495
440
|
}
|
|
496
441
|
if event.attributes:
|
|
497
442
|
event_dict["attributes"] = dict(event.attributes)
|
|
@@ -500,28 +445,8 @@ class LocalTraceWriter:
|
|
|
500
445
|
continue
|
|
501
446
|
return result
|
|
502
447
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
mode = self._config.events_file_mode
|
|
506
|
-
|
|
507
|
-
if mode == "none":
|
|
508
|
-
return False
|
|
509
|
-
elif mode == "errors_only":
|
|
510
|
-
return status_code == "ERROR"
|
|
511
|
-
elif mode == "all":
|
|
512
|
-
return True
|
|
513
|
-
else:
|
|
514
|
-
# Default to errors_only if unknown mode
|
|
515
|
-
return status_code == "ERROR"
|
|
516
|
-
|
|
517
|
-
def _append_event(self, trace: TraceState, event: dict[str, Any]) -> None:
|
|
518
|
-
"""Append event to trace event log (JSONL format)."""
|
|
519
|
-
event["ts"] = datetime.now(timezone.utc).isoformat()
|
|
520
|
-
events_path = trace.path / "_events.jsonl"
|
|
521
|
-
with events_path.open("a", encoding="utf-8") as f:
|
|
522
|
-
f.write(json.dumps(event) + "\n")
|
|
523
|
-
|
|
524
|
-
def _write_trace_yaml(self, trace: TraceState) -> None:
|
|
448
|
+
@staticmethod
|
|
449
|
+
def _write_trace_yaml(trace: TraceState) -> None:
|
|
525
450
|
"""Write _trace.yaml file."""
|
|
526
451
|
trace_meta = {
|
|
527
452
|
"trace_id": trace.trace_id,
|
|
@@ -564,10 +489,11 @@ class LocalTraceWriter:
|
|
|
564
489
|
if self._config.include_error_index:
|
|
565
490
|
self._write_errors_index(trace, sorted_spans)
|
|
566
491
|
|
|
567
|
-
|
|
492
|
+
@staticmethod
|
|
493
|
+
def _write_tree_index(trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
|
|
568
494
|
"""Write _tree.yaml - lightweight tree structure (~5KB)."""
|
|
569
495
|
span_paths: dict[str, str] = {}
|
|
570
|
-
tree_entries = []
|
|
496
|
+
tree_entries: list[dict[str, Any]] = []
|
|
571
497
|
|
|
572
498
|
for span in sorted_spans:
|
|
573
499
|
# Skip spans that were identified as wrappers during merge
|
|
@@ -596,7 +522,7 @@ class LocalTraceWriter:
|
|
|
596
522
|
|
|
597
523
|
tree_entries.append(entry)
|
|
598
524
|
|
|
599
|
-
tree_data = {
|
|
525
|
+
tree_data: dict[str, Any] = {
|
|
600
526
|
"format_version": 3,
|
|
601
527
|
"trace_id": trace.trace_id,
|
|
602
528
|
"root_span_id": trace.root_span_id,
|
|
@@ -611,9 +537,10 @@ class LocalTraceWriter:
|
|
|
611
537
|
encoding="utf-8",
|
|
612
538
|
)
|
|
613
539
|
|
|
614
|
-
|
|
540
|
+
@staticmethod
|
|
541
|
+
def _write_llm_index(trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
|
|
615
542
|
"""Write _llm_calls.yaml - LLM-specific details."""
|
|
616
|
-
llm_calls = []
|
|
543
|
+
llm_calls: list[dict[str, Any]] = []
|
|
617
544
|
|
|
618
545
|
for span in sorted_spans:
|
|
619
546
|
if span.llm_info:
|
|
@@ -634,6 +561,8 @@ class LocalTraceWriter:
|
|
|
634
561
|
"output_tokens": span.llm_info.get("output_tokens", 0),
|
|
635
562
|
"total_tokens": span.llm_info.get("total_tokens", 0),
|
|
636
563
|
"cost": span.llm_info.get("cost", 0.0),
|
|
564
|
+
"expected_cost": span.llm_info.get("expected_cost"),
|
|
565
|
+
"purpose": span.llm_info.get("purpose"),
|
|
637
566
|
"duration_ms": span.duration_ms,
|
|
638
567
|
"status": span.status,
|
|
639
568
|
"path": relative_path,
|
|
@@ -644,12 +573,13 @@ class LocalTraceWriter:
|
|
|
644
573
|
|
|
645
574
|
llm_calls.append(llm_entry)
|
|
646
575
|
|
|
647
|
-
llm_data = {
|
|
576
|
+
llm_data: dict[str, Any] = {
|
|
648
577
|
"format_version": 3,
|
|
649
578
|
"trace_id": trace.trace_id,
|
|
650
579
|
"llm_call_count": len(llm_calls),
|
|
651
580
|
"total_tokens": trace.total_tokens,
|
|
652
581
|
"total_cost": round(trace.total_cost, 6),
|
|
582
|
+
"total_expected_cost": round(trace.total_expected_cost, 6),
|
|
653
583
|
"calls": llm_calls,
|
|
654
584
|
}
|
|
655
585
|
|
|
@@ -659,9 +589,10 @@ class LocalTraceWriter:
|
|
|
659
589
|
encoding="utf-8",
|
|
660
590
|
)
|
|
661
591
|
|
|
662
|
-
|
|
592
|
+
@staticmethod
|
|
593
|
+
def _write_errors_index(trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
|
|
663
594
|
"""Write _errors.yaml - failed spans only."""
|
|
664
|
-
error_spans = []
|
|
595
|
+
error_spans: list[dict[str, Any]] = []
|
|
665
596
|
|
|
666
597
|
for span in sorted_spans:
|
|
667
598
|
if span.status == "failed":
|
|
@@ -682,7 +613,7 @@ class LocalTraceWriter:
|
|
|
682
613
|
error_entry["end_time"] = span.end_time.isoformat()
|
|
683
614
|
|
|
684
615
|
# Get parent chain for context
|
|
685
|
-
parent_chain = []
|
|
616
|
+
parent_chain: list[str] = []
|
|
686
617
|
current_id = span.parent_id
|
|
687
618
|
while current_id and current_id in trace.spans:
|
|
688
619
|
parent = trace.spans[current_id]
|
|
@@ -694,7 +625,7 @@ class LocalTraceWriter:
|
|
|
694
625
|
error_spans.append(error_entry)
|
|
695
626
|
|
|
696
627
|
if error_spans: # Only write if there are errors
|
|
697
|
-
errors_data = {
|
|
628
|
+
errors_data: dict[str, Any] = {
|
|
698
629
|
"format_version": 3,
|
|
699
630
|
"trace_id": trace.trace_id,
|
|
700
631
|
"error_count": len(error_spans),
|
|
@@ -703,13 +634,12 @@ class LocalTraceWriter:
|
|
|
703
634
|
|
|
704
635
|
errors_path = trace.path / "_errors.yaml"
|
|
705
636
|
errors_path.write_text(
|
|
706
|
-
yaml.dump(
|
|
707
|
-
errors_data, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
708
|
-
),
|
|
637
|
+
yaml.dump(errors_data, default_flow_style=False, allow_unicode=True, sort_keys=False),
|
|
709
638
|
encoding="utf-8",
|
|
710
639
|
)
|
|
711
640
|
|
|
712
|
-
|
|
641
|
+
@staticmethod
|
|
642
|
+
def _detect_wrapper_spans(trace: TraceState) -> set[str]:
|
|
713
643
|
"""Detect Prefect wrapper spans that should be merged with their inner spans.
|
|
714
644
|
|
|
715
645
|
Detection criteria:
|
|
@@ -718,7 +648,7 @@ class LocalTraceWriter:
|
|
|
718
648
|
3. Parent has no I/O (input type is "none")
|
|
719
649
|
4. Parent has prefect.run.id, child does not
|
|
720
650
|
"""
|
|
721
|
-
wrappers = set()
|
|
651
|
+
wrappers: set[str] = set()
|
|
722
652
|
|
|
723
653
|
for span_id, span in trace.spans.items():
|
|
724
654
|
# Must have exactly one child
|
|
@@ -798,17 +728,16 @@ class LocalTraceWriter:
|
|
|
798
728
|
if wrapper_id in grandparent.children:
|
|
799
729
|
idx = grandparent.children.index(wrapper_id)
|
|
800
730
|
grandparent.children[idx] = child_id
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
trace.root_span_id = child_id
|
|
731
|
+
# Wrapper was root - child becomes new root
|
|
732
|
+
elif trace.root_span_id == wrapper_id:
|
|
733
|
+
trace.root_span_id = child_id
|
|
805
734
|
|
|
806
735
|
# Mark wrapper as merged (used in index generation)
|
|
807
736
|
wrapper.children = [] # Clear to indicate it's merged
|
|
808
737
|
|
|
809
738
|
def _finalize_trace(self, trace: TraceState) -> None:
|
|
810
739
|
"""Finalize a trace - update metadata and generate summary."""
|
|
811
|
-
end_time = datetime.now(
|
|
740
|
+
end_time = datetime.now(UTC)
|
|
812
741
|
duration = (end_time - trace.start_time).total_seconds()
|
|
813
742
|
|
|
814
743
|
# Determine final status
|
|
@@ -835,6 +764,7 @@ class LocalTraceWriter:
|
|
|
835
764
|
"llm_calls": trace.llm_call_count,
|
|
836
765
|
"total_tokens": trace.total_tokens,
|
|
837
766
|
"total_cost": round(trace.total_cost, 6),
|
|
767
|
+
"total_expected_cost": round(trace.total_expected_cost, 6),
|
|
838
768
|
},
|
|
839
769
|
}
|
|
840
770
|
|
|
@@ -848,24 +778,30 @@ class LocalTraceWriter:
|
|
|
848
778
|
self._write_index(trace)
|
|
849
779
|
|
|
850
780
|
# Generate summary if enabled
|
|
781
|
+
summary: str | None = None
|
|
851
782
|
if self._config.generate_summary:
|
|
852
|
-
# Lazy import to avoid circular dependency
|
|
853
|
-
from .summary import generate_summary # noqa: PLC0415
|
|
854
|
-
|
|
855
783
|
summary = generate_summary(trace)
|
|
856
784
|
summary_path = trace.path / "_summary.md"
|
|
857
785
|
summary_path.write_text(summary, encoding="utf-8")
|
|
858
786
|
|
|
859
|
-
#
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
787
|
+
# Generate LLM-powered auto-summary if enabled.
|
|
788
|
+
# asyncio.run() is unsafe when the current thread already has a running event loop.
|
|
789
|
+
# Skip if static summary is unavailable: auto-summary uses it as context input.
|
|
790
|
+
has_running_loop = False
|
|
791
|
+
try:
|
|
792
|
+
asyncio.get_running_loop()
|
|
793
|
+
has_running_loop = True
|
|
794
|
+
except RuntimeError:
|
|
795
|
+
pass
|
|
796
|
+
if self._config.auto_summary_enabled and not has_running_loop and summary is not None:
|
|
797
|
+
try:
|
|
798
|
+
auto_mod = importlib.import_module("ai_pipeline_core.observability._debug._auto_summary")
|
|
799
|
+
auto_summary_text = asyncio.run(auto_mod.generate_auto_summary(trace, summary, self._config.auto_summary_model))
|
|
800
|
+
if auto_summary_text:
|
|
801
|
+
auto_summary_path = trace.path / "_auto_summary.md"
|
|
802
|
+
auto_summary_path.write_text(auto_summary_text, encoding="utf-8")
|
|
803
|
+
except Exception as e:
|
|
804
|
+
logger.warning(f"Auto-summary generation failed: {e}")
|
|
869
805
|
|
|
870
806
|
def _cleanup_old_traces(self) -> None:
|
|
871
807
|
"""Delete old traces beyond max_traces limit."""
|
|
@@ -873,10 +809,7 @@ class LocalTraceWriter:
|
|
|
873
809
|
return
|
|
874
810
|
|
|
875
811
|
# Get all trace directories sorted by modification time
|
|
876
|
-
trace_dirs = []
|
|
877
|
-
for path in self._config.path.iterdir():
|
|
878
|
-
if path.is_dir() and (path / "_trace.yaml").exists():
|
|
879
|
-
trace_dirs.append((path.stat().st_mtime, path))
|
|
812
|
+
trace_dirs = [(path.stat().st_mtime, path) for path in self._config.path.iterdir() if path.is_dir() and (path / "_trace.yaml").exists()]
|
|
880
813
|
|
|
881
814
|
trace_dirs.sort(reverse=True) # Newest first
|
|
882
815
|
|
|
@@ -887,7 +820,8 @@ class LocalTraceWriter:
|
|
|
887
820
|
except Exception as e:
|
|
888
821
|
logger.warning(f"Failed to delete old trace {path}: {e}")
|
|
889
822
|
|
|
890
|
-
|
|
823
|
+
@staticmethod
|
|
824
|
+
def _sanitize_name(name: str) -> str:
|
|
891
825
|
"""Sanitize name for safe filesystem use.
|
|
892
826
|
|
|
893
827
|
Truncates to 24 chars + 4-char hash to avoid collisions and keep
|
|
@@ -897,11 +831,7 @@ class LocalTraceWriter:
|
|
|
897
831
|
safe = safe.strip(". ")
|
|
898
832
|
|
|
899
833
|
# Handle Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9)
|
|
900
|
-
reserved = (
|
|
901
|
-
{"CON", "PRN", "AUX", "NUL"}
|
|
902
|
-
| {f"COM{i}" for i in range(1, 10)}
|
|
903
|
-
| {f"LPT{i}" for i in range(1, 10)}
|
|
904
|
-
)
|
|
834
|
+
reserved = {"CON", "PRN", "AUX", "NUL"} | {f"COM{i}" for i in range(1, 10)} | {f"LPT{i}" for i in range(1, 10)}
|
|
905
835
|
if safe.upper() in reserved:
|
|
906
836
|
safe = f"_{safe}"
|
|
907
837
|
|