ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,913 @@
1
+ """Local trace writer for filesystem-based debugging."""
2
+
3
+ import atexit
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import re
8
+ import shutil
9
+ import socket
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+ from queue import Empty, Queue
14
+ from threading import Lock, Thread
15
+ from typing import Any
16
+
17
+ import yaml
18
+
19
+ from ai_pipeline_core.logging import get_pipeline_logger
20
+
21
+ from .config import TraceDebugConfig
22
+ from .content import ArtifactStore, ContentWriter
23
+
24
+ logger = get_pipeline_logger(__name__)
25
+
26
+
27
+ @dataclass
28
+ class WriteJob:
29
+ """Job for background writer thread."""
30
+
31
+ trace_id: str
32
+ span_id: str
33
+ name: str
34
+ parent_id: str | None
35
+ attributes: dict[str, Any]
36
+ events: list[Any]
37
+ status_code: str # "OK" | "ERROR" | "UNSET"
38
+ status_description: str | None
39
+ start_time_ns: int
40
+ end_time_ns: int
41
+
42
+
43
+ @dataclass
44
+ class SpanInfo:
45
+ """Information about a span for index building."""
46
+
47
+ span_id: str
48
+ parent_id: str | None
49
+ name: str
50
+ span_type: str
51
+ status: str
52
+ start_time: datetime
53
+ path: Path # Actual directory path for this span
54
+ depth: int = 0 # Nesting depth (0 for root)
55
+ order: int = 0 # Global execution order within trace
56
+ end_time: datetime | None = None
57
+ duration_ms: int = 0
58
+ children: list[str] = field(default_factory=list)
59
+ llm_info: dict[str, Any] | None = None
60
+ prefect_info: dict[str, Any] | None = None
61
+
62
+
63
+ @dataclass
64
+ class TraceState:
65
+ """State for an active trace."""
66
+
67
+ trace_id: str
68
+ name: str
69
+ path: Path
70
+ start_time: datetime
71
+ spans: dict[str, SpanInfo] = field(default_factory=dict)
72
+ root_span_id: str | None = None
73
+ total_tokens: int = 0
74
+ total_cost: float = 0.0
75
+ llm_call_count: int = 0
76
+ span_counter: int = 0 # Global counter for ordering span directories
77
+ merged_wrapper_ids: set[str] = field(default_factory=set) # IDs of merged wrappers
78
+
79
+
80
+ class LocalTraceWriter:
81
+ """Writes trace spans to local filesystem via background thread.
82
+
83
+ Uses a hierarchical directory structure where child spans are nested
84
+ inside parent span directories. Directory names use numeric prefixes
85
+ (01_, 02_, etc.) to preserve execution order when viewed with `tree`.
86
+ """
87
+
88
+ def __init__(self, config: TraceDebugConfig):
89
+ """Initialize trace writer with config."""
90
+ self._config = config
91
+ self._queue: Queue[WriteJob | None] = Queue()
92
+ self._traces: dict[str, TraceState] = {}
93
+ self._artifact_stores: dict[str, ArtifactStore] = {} # One per trace for deduplication
94
+ self._lock = Lock()
95
+ self._shutdown = False
96
+
97
+ # Ensure base path exists
98
+ config.path.mkdir(parents=True, exist_ok=True)
99
+
100
+ # Clean up old traces if needed
101
+ self._cleanup_old_traces()
102
+
103
+ # Start background writer thread
104
+ self._writer_thread = Thread(
105
+ target=self._writer_loop,
106
+ name="trace-debug-writer",
107
+ daemon=True,
108
+ )
109
+ self._writer_thread.start()
110
+
111
+ # Register shutdown handler
112
+ atexit.register(self.shutdown)
113
+
114
+ def on_span_start(
115
+ self,
116
+ trace_id: str,
117
+ span_id: str,
118
+ parent_id: str | None,
119
+ name: str,
120
+ ) -> None:
121
+ """Handle span start - create directories and record metadata.
122
+
123
+ Called from SpanProcessor.on_start() in the main thread.
124
+ Creates hierarchical directories nested under parent spans.
125
+ """
126
+ with self._lock:
127
+ trace = self._get_or_create_trace(trace_id, name)
128
+
129
+ # Determine parent path and depth
130
+ if parent_id and parent_id in trace.spans:
131
+ parent_info = trace.spans[parent_id]
132
+ parent_path = parent_info.path
133
+ depth = parent_info.depth + 1
134
+ elif parent_id:
135
+ # Parent ID provided but not found - orphan span, place at root
136
+ logger.warning(
137
+ f"Span {span_id} has unknown parent {parent_id}, placing at trace root"
138
+ )
139
+ parent_path = trace.path
140
+ depth = 0
141
+ else:
142
+ parent_path = trace.path
143
+ depth = 0
144
+
145
+ # Generate ordered directory name (4 digits supports up to 9999 spans)
146
+ trace.span_counter += 1
147
+ safe_name = self._sanitize_name(name)
148
+ dir_name = f"{trace.span_counter:04d}_{safe_name}"
149
+
150
+ # Create nested directory
151
+ span_dir = parent_path / dir_name
152
+ span_dir.mkdir(parents=True, exist_ok=True)
153
+
154
+ # Record span info
155
+ now = datetime.now(timezone.utc)
156
+ span_info = SpanInfo(
157
+ span_id=span_id,
158
+ parent_id=parent_id,
159
+ name=name,
160
+ span_type="default",
161
+ status="running",
162
+ start_time=now,
163
+ path=span_dir,
164
+ depth=depth,
165
+ order=trace.span_counter,
166
+ )
167
+ trace.spans[span_id] = span_info
168
+
169
+ # Track root span
170
+ if parent_id is None:
171
+ trace.root_span_id = span_id
172
+
173
+ # Update parent's children list
174
+ if parent_id and parent_id in trace.spans:
175
+ trace.spans[parent_id].children.append(span_id)
176
+
177
+ # Append to event log (lightweight - just appends a line)
178
+ self._append_event(
179
+ trace,
180
+ {
181
+ "type": "span_start",
182
+ "span_id": span_id,
183
+ "parent_id": parent_id,
184
+ "name": name,
185
+ "path": str(span_dir.relative_to(trace.path)),
186
+ },
187
+ )
188
+ # Note: _write_status() moved to on_span_end for performance
189
+ # (avoids blocking I/O in main thread on every span start)
190
+
191
+ def on_span_end(self, job: WriteJob) -> None:
192
+ """Queue span end job for background processing.
193
+
194
+ Called from SpanProcessor.on_end() in the main thread.
195
+ """
196
+ if not self._shutdown:
197
+ self._queue.put(job)
198
+
199
+ def shutdown(self, timeout: float = 30.0) -> None:
200
+ """Flush queue and stop writer thread."""
201
+ if self._shutdown:
202
+ return
203
+ self._shutdown = True
204
+
205
+ # Signal shutdown
206
+ self._queue.put(None)
207
+
208
+ # Wait for thread to finish
209
+ self._writer_thread.join(timeout=timeout)
210
+
211
+ # Finalize any remaining traces (ones that didn't have root span end yet)
212
+ with self._lock:
213
+ for trace in list(self._traces.values()):
214
+ try:
215
+ self._finalize_trace(trace)
216
+ except Exception as e:
217
+ logger.warning(f"Failed to finalize trace {trace.trace_id}: {e}")
218
+ self._traces.clear()
219
+
220
+ def _get_or_create_trace(self, trace_id: str, name: str) -> TraceState:
221
+ """Get existing trace or create new one."""
222
+ if trace_id in self._traces:
223
+ return self._traces[trace_id]
224
+
225
+ # Create new trace
226
+ timestamp = datetime.now(timezone.utc)
227
+ safe_name = self._sanitize_name(name)
228
+ dir_name = f"{timestamp.strftime('%Y%m%d_%H%M%S')}_{trace_id[:8]}_{safe_name}"
229
+ trace_path = self._config.path / dir_name
230
+
231
+ trace_path.mkdir(parents=True, exist_ok=True)
232
+ # Note: No 'spans/' subdirectory - spans are nested hierarchically
233
+
234
+ trace = TraceState(
235
+ trace_id=trace_id,
236
+ name=name,
237
+ path=trace_path,
238
+ start_time=timestamp,
239
+ )
240
+ self._traces[trace_id] = trace
241
+
242
+ # Create artifact store for this trace
243
+ self._artifact_stores[trace_id] = ArtifactStore(trace_path)
244
+
245
+ # Write initial trace metadata
246
+ self._write_trace_yaml(trace)
247
+
248
+ # Append trace start event
249
+ self._append_event(
250
+ trace,
251
+ {
252
+ "type": "trace_start",
253
+ "trace_id": trace_id,
254
+ "name": name,
255
+ },
256
+ )
257
+
258
+ return trace
259
+
260
+ def _writer_loop(self) -> None:
261
+ """Background thread loop for processing write jobs."""
262
+ while True:
263
+ try:
264
+ job = self._queue.get(timeout=1.0)
265
+ except Empty:
266
+ continue
267
+
268
+ if job is None:
269
+ # Shutdown signal
270
+ break
271
+
272
+ try:
273
+ self._process_job(job)
274
+ except Exception as e:
275
+ logger.warning(f"Trace debug write failed for span {job.span_id}: {e}")
276
+
277
+ def _process_job(self, job: WriteJob) -> None:
278
+ """Process a span end job - write all span data."""
279
+ with self._lock:
280
+ trace = self._traces.get(job.trace_id)
281
+ if not trace:
282
+ logger.warning(f"Trace {job.trace_id} not found for span {job.span_id}")
283
+ return
284
+
285
+ span_info = trace.spans.get(job.span_id)
286
+ if not span_info:
287
+ logger.warning(f"Span {job.span_id} not found in trace {job.trace_id}")
288
+ return
289
+
290
+ span_dir = span_info.path
291
+
292
+ # Extract input/output from attributes
293
+ input_content = self._extract_input(job.attributes)
294
+ output_content = self._extract_output(job.attributes)
295
+
296
+ # Get artifact store for this trace
297
+ artifact_store = self._artifact_stores.get(job.trace_id)
298
+
299
+ # Create content writer with artifact store
300
+ content_writer = ContentWriter(self._config, artifact_store)
301
+
302
+ # Write input/output
303
+ input_ref = content_writer.write(input_content, span_dir, "input")
304
+ output_ref = content_writer.write(output_content, span_dir, "output")
305
+
306
+ # Extract span type and metadata
307
+ span_type = self._extract_span_type(job.attributes)
308
+ llm_info = self._extract_llm_info(job.attributes)
309
+ prefect_info = self._extract_prefect_info(job.attributes)
310
+
311
+ # Update span info (span_info already validated above)
312
+ end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=timezone.utc)
313
+ span_info.end_time = end_time
314
+ span_info.duration_ms = int((job.end_time_ns - job.start_time_ns) / 1e6)
315
+ span_info.status = "failed" if job.status_code == "ERROR" else "completed"
316
+ span_info.span_type = span_type
317
+ span_info.llm_info = llm_info
318
+ span_info.prefect_info = prefect_info
319
+
320
+ # Update trace stats
321
+ if llm_info:
322
+ trace.llm_call_count += 1
323
+ trace.total_tokens += llm_info.get("total_tokens", 0)
324
+ trace.total_cost += llm_info.get("cost", 0.0)
325
+
326
+ # Build span metadata (input_ref and output_ref are now dicts)
327
+ span_meta = self._build_span_metadata_v3(
328
+ job, input_ref, output_ref, span_type, llm_info, prefect_info
329
+ )
330
+
331
+ # Write _span.yaml
332
+ span_yaml_path = span_dir / "_span.yaml"
333
+ span_yaml_path.write_text(
334
+ yaml.dump(span_meta, default_flow_style=False, allow_unicode=True, sort_keys=False),
335
+ encoding="utf-8",
336
+ )
337
+
338
+ # Write events.yaml based on config
339
+ if job.events and self._should_write_events(job.status_code):
340
+ events_data = self._format_span_events(job.events)
341
+ events_path = span_dir / "events.yaml"
342
+ events_path.write_text(
343
+ yaml.dump(events_data, default_flow_style=False, allow_unicode=True),
344
+ encoding="utf-8",
345
+ )
346
+
347
+ # Append to trace event log
348
+ self._append_event(
349
+ trace,
350
+ {
351
+ "type": "span_end",
352
+ "span_id": job.span_id,
353
+ "status": span_info.status if span_info else "unknown",
354
+ "duration_ms": span_info.duration_ms if span_info else 0,
355
+ },
356
+ )
357
+
358
+ # Update index
359
+ self._write_index(trace)
360
+
361
+ # Finalize trace when ALL spans are completed (not just root)
362
+ # This handles the case where child span end jobs arrive after root
363
+ running_spans = [s for s in trace.spans.values() if s.status == "running"]
364
+ if not running_spans:
365
+ self._finalize_trace(trace)
366
+ # Remove from memory to prevent memory leak
367
+ del self._traces[job.trace_id]
368
+ if job.trace_id in self._artifact_stores:
369
+ del self._artifact_stores[job.trace_id]
370
+
371
+ def _extract_input(self, attributes: dict[str, Any]) -> Any:
372
+ """Extract input from span attributes."""
373
+ input_str = attributes.get("lmnr.span.input")
374
+ if input_str:
375
+ try:
376
+ return json.loads(input_str)
377
+ except (json.JSONDecodeError, TypeError):
378
+ return input_str
379
+ return None
380
+
381
+ def _extract_output(self, attributes: dict[str, Any]) -> Any:
382
+ """Extract output from span attributes."""
383
+ output_str = attributes.get("lmnr.span.output")
384
+ if output_str:
385
+ try:
386
+ return json.loads(output_str)
387
+ except (json.JSONDecodeError, TypeError):
388
+ return output_str
389
+ return None
390
+
391
+ def _extract_span_type(self, attributes: dict[str, Any]) -> str:
392
+ """Extract span type from attributes."""
393
+ span_type = attributes.get("lmnr.span.type", "DEFAULT")
394
+ # Map to our types
395
+ type_map = {
396
+ "LLM": "llm",
397
+ "TOOL": "tool",
398
+ "DEFAULT": "default",
399
+ }
400
+ return type_map.get(span_type, "default")
401
+
402
+ def _extract_llm_info(self, attributes: dict[str, Any]) -> dict[str, Any] | None:
403
+ """Extract LLM-specific info from attributes."""
404
+ # Check for LLM attributes
405
+ input_tokens = attributes.get("gen_ai.usage.input_tokens") or attributes.get(
406
+ "gen_ai.usage.prompt_tokens"
407
+ )
408
+ output_tokens = attributes.get("gen_ai.usage.output_tokens") or attributes.get(
409
+ "gen_ai.usage.completion_tokens"
410
+ )
411
+
412
+ if input_tokens is None and output_tokens is None:
413
+ return None
414
+
415
+ return {
416
+ "model": attributes.get("gen_ai.response.model")
417
+ or attributes.get("gen_ai.request.model"),
418
+ "provider": attributes.get("gen_ai.system"),
419
+ "input_tokens": input_tokens or 0,
420
+ "output_tokens": output_tokens or 0,
421
+ "total_tokens": (input_tokens or 0) + (output_tokens or 0),
422
+ "cost": attributes.get("gen_ai.usage.cost", 0.0),
423
+ }
424
+
425
+ def _extract_prefect_info(self, attributes: dict[str, Any]) -> dict[str, Any] | None:
426
+ """Extract Prefect-specific info from attributes."""
427
+ run_id = attributes.get("prefect.run.id")
428
+ if not run_id:
429
+ return None
430
+
431
+ return {
432
+ "run_id": run_id,
433
+ "run_name": attributes.get("prefect.run.name"),
434
+ "run_type": attributes.get("prefect.run.type"),
435
+ "tags": attributes.get("prefect.tags", []),
436
+ }
437
+
438
+ def _build_span_metadata_v3(
439
+ self,
440
+ job: WriteJob,
441
+ input_ref: dict[str, Any],
442
+ output_ref: dict[str, Any],
443
+ span_type: str,
444
+ llm_info: dict[str, Any] | None,
445
+ prefect_info: dict[str, Any] | None,
446
+ ) -> dict[str, Any]:
447
+ """Build span metadata dictionary (V3 format with dict refs)."""
448
+ start_time = datetime.fromtimestamp(job.start_time_ns / 1e9, tz=timezone.utc)
449
+ end_time = datetime.fromtimestamp(job.end_time_ns / 1e9, tz=timezone.utc)
450
+ duration_ms = int((job.end_time_ns - job.start_time_ns) / 1e6)
451
+
452
+ meta: dict[str, Any] = {
453
+ "span_id": job.span_id,
454
+ "trace_id": job.trace_id,
455
+ "parent_id": job.parent_id,
456
+ "name": job.name,
457
+ "type": span_type,
458
+ "timing": {
459
+ "start": start_time.isoformat(),
460
+ "end": end_time.isoformat(),
461
+ "duration_ms": duration_ms,
462
+ },
463
+ "status": "failed" if job.status_code == "ERROR" else "completed",
464
+ }
465
+
466
+ # Add type-specific metadata
467
+ if prefect_info:
468
+ meta["prefect"] = prefect_info
469
+
470
+ if llm_info:
471
+ meta["llm"] = llm_info
472
+
473
+ # Add content references (input_ref and output_ref are dicts from ContentWriter.write())
474
+ meta["input"] = input_ref
475
+ meta["output"] = output_ref
476
+
477
+ # Add error info if failed
478
+ if job.status_code != "OK" and job.status_description:
479
+ meta["error"] = {
480
+ "message": job.status_description,
481
+ }
482
+
483
+ return meta
484
+
485
+ def _format_span_events(self, events: list[Any]) -> list[dict[str, Any]]:
486
+ """Format span events for YAML output."""
487
+ result = []
488
+ for event in events:
489
+ try:
490
+ event_dict = {
491
+ "name": event.name,
492
+ "timestamp": datetime.fromtimestamp(
493
+ event.timestamp / 1e9, tz=timezone.utc
494
+ ).isoformat(),
495
+ }
496
+ if event.attributes:
497
+ event_dict["attributes"] = dict(event.attributes)
498
+ result.append(event_dict)
499
+ except Exception:
500
+ continue
501
+ return result
502
+
503
+ def _should_write_events(self, status_code: str) -> bool:
504
+ """Check if events.yaml should be written based on config."""
505
+ mode = self._config.events_file_mode
506
+
507
+ if mode == "none":
508
+ return False
509
+ elif mode == "errors_only":
510
+ return status_code == "ERROR"
511
+ elif mode == "all":
512
+ return True
513
+ else:
514
+ # Default to errors_only if unknown mode
515
+ return status_code == "ERROR"
516
+
517
+ def _append_event(self, trace: TraceState, event: dict[str, Any]) -> None:
518
+ """Append event to trace event log (JSONL format)."""
519
+ event["ts"] = datetime.now(timezone.utc).isoformat()
520
+ events_path = trace.path / "_events.jsonl"
521
+ with events_path.open("a", encoding="utf-8") as f:
522
+ f.write(json.dumps(event) + "\n")
523
+
524
+ def _write_trace_yaml(self, trace: TraceState) -> None:
525
+ """Write _trace.yaml file."""
526
+ trace_meta = {
527
+ "trace_id": trace.trace_id,
528
+ "name": trace.name,
529
+ "start_time": trace.start_time.isoformat(),
530
+ "end_time": None,
531
+ "duration_seconds": None,
532
+ "status": "running",
533
+ "correlation": {
534
+ "hostname": socket.gethostname(),
535
+ "pid": os.getpid(),
536
+ },
537
+ "stats": {
538
+ "total_spans": len(trace.spans),
539
+ "llm_calls": trace.llm_call_count,
540
+ "total_tokens": trace.total_tokens,
541
+ "total_cost": round(trace.total_cost, 6),
542
+ },
543
+ }
544
+
545
+ trace_yaml_path = trace.path / "_trace.yaml"
546
+ trace_yaml_path.write_text(
547
+ yaml.dump(trace_meta, default_flow_style=False, allow_unicode=True, sort_keys=False),
548
+ encoding="utf-8",
549
+ )
550
+
551
+ def _write_index(self, trace: TraceState) -> None:
552
+ """Write split index files: _tree.yaml, _llm_calls.yaml, _errors.yaml."""
553
+ # Sort spans by execution order
554
+ sorted_spans = sorted(trace.spans.values(), key=lambda s: s.order)
555
+
556
+ # Write lightweight tree index (always)
557
+ self._write_tree_index(trace, sorted_spans)
558
+
559
+ # Write LLM calls index (if enabled)
560
+ if self._config.include_llm_index:
561
+ self._write_llm_index(trace, sorted_spans)
562
+
563
+ # Write errors index (if enabled)
564
+ if self._config.include_error_index:
565
+ self._write_errors_index(trace, sorted_spans)
566
+
567
+ def _write_tree_index(self, trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
568
+ """Write _tree.yaml - lightweight tree structure (~5KB)."""
569
+ span_paths: dict[str, str] = {}
570
+ tree_entries = []
571
+
572
+ for span in sorted_spans:
573
+ # Skip spans that were identified as wrappers during merge
574
+ if span.span_id in trace.merged_wrapper_ids:
575
+ continue
576
+
577
+ relative_path = span.path.relative_to(trace.path).as_posix() + "/"
578
+ span_paths[span.span_id] = relative_path
579
+
580
+ # Minimal entry - just hierarchy and navigation
581
+ entry: dict[str, Any] = {
582
+ "span_id": span.span_id,
583
+ "name": span.name,
584
+ "type": span.span_type,
585
+ "status": span.status,
586
+ "path": relative_path,
587
+ }
588
+
589
+ # Add parent_id if not root
590
+ if span.parent_id:
591
+ entry["parent_id"] = span.parent_id
592
+
593
+ # Add children if any
594
+ if span.children:
595
+ entry["children"] = span.children
596
+
597
+ tree_entries.append(entry)
598
+
599
+ tree_data = {
600
+ "format_version": 3,
601
+ "trace_id": trace.trace_id,
602
+ "root_span_id": trace.root_span_id,
603
+ "span_count": len(tree_entries),
604
+ "span_paths": span_paths,
605
+ "tree": tree_entries,
606
+ }
607
+
608
+ tree_path = trace.path / "_tree.yaml"
609
+ tree_path.write_text(
610
+ yaml.dump(tree_data, default_flow_style=False, allow_unicode=True, sort_keys=False),
611
+ encoding="utf-8",
612
+ )
613
+
614
+ def _write_llm_index(self, trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
615
+ """Write _llm_calls.yaml - LLM-specific details."""
616
+ llm_calls = []
617
+
618
+ for span in sorted_spans:
619
+ if span.llm_info:
620
+ relative_path = span.path.relative_to(trace.path).as_posix() + "/"
621
+
622
+ # Get parent context for better identification
623
+ parent_context = ""
624
+ if span.parent_id and span.parent_id in trace.spans:
625
+ parent_span = trace.spans[span.parent_id]
626
+ parent_context = f" (in {parent_span.name})"
627
+
628
+ llm_entry = {
629
+ "span_id": span.span_id,
630
+ "name": span.name + parent_context, # Add context to distinguish
631
+ "model": span.llm_info.get("model"),
632
+ "provider": span.llm_info.get("provider"),
633
+ "input_tokens": span.llm_info.get("input_tokens", 0),
634
+ "output_tokens": span.llm_info.get("output_tokens", 0),
635
+ "total_tokens": span.llm_info.get("total_tokens", 0),
636
+ "cost": span.llm_info.get("cost", 0.0),
637
+ "duration_ms": span.duration_ms,
638
+ "status": span.status,
639
+ "path": relative_path,
640
+ }
641
+
642
+ if span.start_time:
643
+ llm_entry["start_time"] = span.start_time.isoformat()
644
+
645
+ llm_calls.append(llm_entry)
646
+
647
+ llm_data = {
648
+ "format_version": 3,
649
+ "trace_id": trace.trace_id,
650
+ "llm_call_count": len(llm_calls),
651
+ "total_tokens": trace.total_tokens,
652
+ "total_cost": round(trace.total_cost, 6),
653
+ "calls": llm_calls,
654
+ }
655
+
656
+ llm_path = trace.path / "_llm_calls.yaml"
657
+ llm_path.write_text(
658
+ yaml.dump(llm_data, default_flow_style=False, allow_unicode=True, sort_keys=False),
659
+ encoding="utf-8",
660
+ )
661
+
662
+ def _write_errors_index(self, trace: TraceState, sorted_spans: list[SpanInfo]) -> None:
663
+ """Write _errors.yaml - failed spans only."""
664
+ error_spans = []
665
+
666
+ for span in sorted_spans:
667
+ if span.status == "failed":
668
+ relative_path = span.path.relative_to(trace.path).as_posix() + "/"
669
+
670
+ error_entry: dict[str, Any] = {
671
+ "span_id": span.span_id,
672
+ "name": span.name,
673
+ "type": span.span_type,
674
+ "depth": span.depth,
675
+ "duration_ms": span.duration_ms,
676
+ "path": relative_path,
677
+ }
678
+
679
+ if span.start_time:
680
+ error_entry["start_time"] = span.start_time.isoformat()
681
+ if span.end_time:
682
+ error_entry["end_time"] = span.end_time.isoformat()
683
+
684
+ # Get parent chain for context
685
+ parent_chain = []
686
+ current_id = span.parent_id
687
+ while current_id and current_id in trace.spans:
688
+ parent = trace.spans[current_id]
689
+ parent_chain.append(parent.name)
690
+ current_id = parent.parent_id
691
+ if parent_chain:
692
+ error_entry["parent_chain"] = list(reversed(parent_chain))
693
+
694
+ error_spans.append(error_entry)
695
+
696
+ if error_spans: # Only write if there are errors
697
+ errors_data = {
698
+ "format_version": 3,
699
+ "trace_id": trace.trace_id,
700
+ "error_count": len(error_spans),
701
+ "errors": error_spans,
702
+ }
703
+
704
+ errors_path = trace.path / "_errors.yaml"
705
+ errors_path.write_text(
706
+ yaml.dump(
707
+ errors_data, default_flow_style=False, allow_unicode=True, sort_keys=False
708
+ ),
709
+ encoding="utf-8",
710
+ )
711
+
712
+ def _detect_wrapper_spans(self, trace: TraceState) -> set[str]:
713
+ """Detect Prefect wrapper spans that should be merged with their inner spans.
714
+
715
+ Detection criteria:
716
+ 1. Parent has exactly one child
717
+ 2. Names match after stripping hash suffix (e.g., "task-abc123" matches "task")
718
+ 3. Parent has no I/O (input type is "none")
719
+ 4. Parent has prefect.run.id, child does not
720
+ """
721
+ wrappers = set()
722
+
723
+ for span_id, span in trace.spans.items():
724
+ # Must have exactly one child
725
+ if len(span.children) != 1:
726
+ continue
727
+
728
+ child_id = span.children[0]
729
+ child = trace.spans.get(child_id)
730
+ if not child:
731
+ continue
732
+
733
+ # Names must match after stripping hash suffix
734
+ parent_base = re.sub(r"-[a-f0-9]{3,}$", "", span.name)
735
+ child_base = re.sub(r"-[a-f0-9]{3,}$", "", child.name)
736
+ if parent_base != child_base:
737
+ continue
738
+
739
+ # Parent must have no I/O (check _span.yaml)
740
+ span_yaml = span.path / "_span.yaml"
741
+ if span_yaml.exists():
742
+ try:
743
+ span_meta = yaml.safe_load(span_yaml.read_text())
744
+ if span_meta.get("input", {}).get("type") != "none":
745
+ continue
746
+ except Exception:
747
+ continue
748
+
749
+ # Parent must have prefect info
750
+ if not span.prefect_info:
751
+ continue
752
+
753
+ # Child may have prefect_info if it inherited context from Prefect wrapper
754
+ # Only skip merge if child has DIFFERENT run_id (indicates nested task/flow)
755
+ if child.prefect_info:
756
+ child_run_id = child.prefect_info.get("run_id")
757
+ parent_run_id = span.prefect_info.get("run_id")
758
+ if child_run_id != parent_run_id:
759
+ # Different run IDs = truly nested Prefect task/flow, don't merge
760
+ continue
761
+
762
+ wrappers.add(span_id)
763
+
764
+ return wrappers
765
+
766
+ def _merge_wrapper_spans(self, trace: TraceState) -> None:
767
+ """Merge wrapper spans with their inner spans (virtual merge).
768
+
769
+ This modifies the span hierarchy so wrappers are skipped in index output.
770
+ Physical directories remain unchanged - only the logical view changes.
771
+ """
772
+ if not self._config.merge_wrapper_spans:
773
+ return
774
+
775
+ wrappers = self._detect_wrapper_spans(trace)
776
+ if not wrappers:
777
+ return
778
+
779
+ logger.debug(f"Merging {len(wrappers)} wrapper spans in trace {trace.trace_id}")
780
+
781
+ # Cache wrapper IDs for use in tree index writing
782
+ trace.merged_wrapper_ids = wrappers
783
+
784
+ # For each wrapper, reparent its child to the wrapper's parent
785
+ for wrapper_id in wrappers:
786
+ wrapper = trace.spans[wrapper_id]
787
+ child_id = wrapper.children[0]
788
+ child = trace.spans[child_id]
789
+ grandparent_id = wrapper.parent_id
790
+
791
+ # Update child's parent
792
+ child.parent_id = grandparent_id
793
+
794
+ # Update grandparent's children (if grandparent exists)
795
+ if grandparent_id and grandparent_id in trace.spans:
796
+ grandparent = trace.spans[grandparent_id]
797
+ # Remove wrapper, add child
798
+ if wrapper_id in grandparent.children:
799
+ idx = grandparent.children.index(wrapper_id)
800
+ grandparent.children[idx] = child_id
801
+ else:
802
+ # Wrapper was root - child becomes new root
803
+ if trace.root_span_id == wrapper_id:
804
+ trace.root_span_id = child_id
805
+
806
+ # Mark wrapper as merged (used in index generation)
807
+ wrapper.children = [] # Clear to indicate it's merged
808
+
809
+ def _finalize_trace(self, trace: TraceState) -> None:
810
+ """Finalize a trace - update metadata and generate summary."""
811
+ end_time = datetime.now(timezone.utc)
812
+ duration = (end_time - trace.start_time).total_seconds()
813
+
814
+ # Determine final status
815
+ failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
816
+ status = "failed" if failed_spans else "completed"
817
+
818
+ # Merge wrapper spans before generating indexes
819
+ self._merge_wrapper_spans(trace)
820
+
821
+ # Update _trace.yaml
822
+ trace_meta = {
823
+ "trace_id": trace.trace_id,
824
+ "name": trace.name,
825
+ "start_time": trace.start_time.isoformat(),
826
+ "end_time": end_time.isoformat(),
827
+ "duration_seconds": round(duration, 2),
828
+ "status": status,
829
+ "correlation": {
830
+ "hostname": socket.gethostname(),
831
+ "pid": os.getpid(),
832
+ },
833
+ "stats": {
834
+ "total_spans": len(trace.spans),
835
+ "llm_calls": trace.llm_call_count,
836
+ "total_tokens": trace.total_tokens,
837
+ "total_cost": round(trace.total_cost, 6),
838
+ },
839
+ }
840
+
841
+ trace_yaml_path = trace.path / "_trace.yaml"
842
+ trace_yaml_path.write_text(
843
+ yaml.dump(trace_meta, default_flow_style=False, allow_unicode=True, sort_keys=False),
844
+ encoding="utf-8",
845
+ )
846
+
847
+ # Final index update
848
+ self._write_index(trace)
849
+
850
+ # Generate summary if enabled
851
+ if self._config.generate_summary:
852
+ # Lazy import to avoid circular dependency
853
+ from .summary import generate_summary # noqa: PLC0415
854
+
855
+ summary = generate_summary(trace)
856
+ summary_path = trace.path / "_summary.md"
857
+ summary_path.write_text(summary, encoding="utf-8")
858
+
859
+ # Append trace end event
860
+ self._append_event(
861
+ trace,
862
+ {
863
+ "type": "trace_end",
864
+ "trace_id": trace.trace_id,
865
+ "status": status,
866
+ "duration_seconds": round(duration, 2),
867
+ },
868
+ )
869
+
870
+ def _cleanup_old_traces(self) -> None:
871
+ """Delete old traces beyond max_traces limit."""
872
+ if self._config.max_traces is None:
873
+ return
874
+
875
+ # Get all trace directories sorted by modification time
876
+ trace_dirs = []
877
+ for path in self._config.path.iterdir():
878
+ if path.is_dir() and (path / "_trace.yaml").exists():
879
+ trace_dirs.append((path.stat().st_mtime, path))
880
+
881
+ trace_dirs.sort(reverse=True) # Newest first
882
+
883
+ # Delete excess traces
884
+ for _, path in trace_dirs[self._config.max_traces :]:
885
+ try:
886
+ shutil.rmtree(path)
887
+ except Exception as e:
888
+ logger.warning(f"Failed to delete old trace {path}: {e}")
889
+
890
+ def _sanitize_name(self, name: str) -> str:
891
+ """Sanitize name for safe filesystem use.
892
+
893
+ Truncates to 24 chars + 4-char hash to avoid collisions and keep
894
+ paths manageable with deep nesting.
895
+ """
896
+ safe = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name)
897
+ safe = safe.strip(". ")
898
+
899
+ # Handle Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9)
900
+ reserved = (
901
+ {"CON", "PRN", "AUX", "NUL"}
902
+ | {f"COM{i}" for i in range(1, 10)}
903
+ | {f"LPT{i}" for i in range(1, 10)}
904
+ )
905
+ if safe.upper() in reserved:
906
+ safe = f"_{safe}"
907
+
908
+ # Truncate with hash suffix to avoid collisions
909
+ if len(safe) > 28:
910
+ name_hash = hashlib.md5(name.encode()).hexdigest()[:4]
911
+ safe = f"{safe[:24]}_{name_hash}"
912
+
913
+ return safe or "span"