ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ """OpenTelemetry SpanProcessor for local trace debugging."""
2
+
3
+ from opentelemetry.context import Context
4
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
5
+ from opentelemetry.trace import StatusCode
6
+
7
+ from .writer import LocalTraceWriter, WriteJob
8
+
9
+
10
+ class LocalDebugSpanProcessor(SpanProcessor):
11
+ """OpenTelemetry SpanProcessor that writes spans to local filesystem.
12
+
13
+ Integrates with the OpenTelemetry SDK to capture all spans and write them
14
+ to a structured directory hierarchy for debugging.
15
+
16
+ Usage:
17
+ writer = LocalTraceWriter(config)
18
+ processor = LocalDebugSpanProcessor(writer)
19
+ tracer_provider.add_span_processor(processor)
20
+ """
21
+
22
+ def __init__(self, writer: LocalTraceWriter):
23
+ """Initialize span processor with writer."""
24
+ self._writer = writer
25
+
26
+ def on_start(self, span: Span, parent_context: Context | None = None) -> None:
27
+ """Handle span start - create directories.
28
+
29
+ Creates the span directory early so we can see "running" spans.
30
+ Input/output data is not available yet - will be captured in on_end().
31
+ """
32
+ try:
33
+ if span.context is None:
34
+ return
35
+ trace_id = format(span.context.trace_id, "032x")
36
+ span_id = format(span.context.span_id, "016x")
37
+ parent_id = self._get_parent_span_id(span)
38
+
39
+ self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
40
+ except Exception:
41
+ # Never fail the actual span - debug tracing should be transparent
42
+ pass
43
+
44
+ def on_end(self, span: ReadableSpan) -> None:
45
+ """Handle span end - queue full span data for background write.
46
+
47
+ All data (input, output, attributes, events) is captured here because
48
+ Laminar sets these attributes after span start.
49
+ """
50
+ try:
51
+ if span.context is None or span.start_time is None or span.end_time is None:
52
+ return
53
+ job = WriteJob(
54
+ trace_id=format(span.context.trace_id, "032x"),
55
+ span_id=format(span.context.span_id, "016x"),
56
+ name=span.name,
57
+ parent_id=self._get_parent_span_id_from_readable(span),
58
+ attributes=dict(span.attributes) if span.attributes else {},
59
+ events=list(span.events) if span.events else [],
60
+ status_code=self._get_status_code(span),
61
+ status_description=span.status.description,
62
+ start_time_ns=span.start_time,
63
+ end_time_ns=span.end_time,
64
+ )
65
+ self._writer.on_span_end(job)
66
+ except Exception:
67
+ # Never fail the actual span
68
+ pass
69
+
70
+ def shutdown(self) -> None:
71
+ """Shutdown the processor and writer."""
72
+ self._writer.shutdown()
73
+
74
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
75
+ """Force flush is not needed for this processor."""
76
+ return True
77
+
78
+ def _get_parent_span_id(self, span: Span) -> str | None:
79
+ """Extract parent span ID from a writable Span."""
80
+ if hasattr(span, "parent") and span.parent:
81
+ parent_ctx = span.parent
82
+ if hasattr(parent_ctx, "span_id") and parent_ctx.span_id:
83
+ return format(parent_ctx.span_id, "016x")
84
+ return None
85
+
86
+ def _get_parent_span_id_from_readable(self, span: ReadableSpan) -> str | None:
87
+ """Extract parent span ID from a ReadableSpan."""
88
+ if span.parent:
89
+ if hasattr(span.parent, "span_id") and span.parent.span_id:
90
+ return format(span.parent.span_id, "016x")
91
+ return None
92
+
93
+ def _get_status_code(self, span: ReadableSpan) -> str:
94
+ """Get status code as string."""
95
+ if span.status.status_code == StatusCode.OK:
96
+ return "OK"
97
+ elif span.status.status_code == StatusCode.ERROR:
98
+ return "ERROR"
99
+ return "UNSET"
@@ -0,0 +1,236 @@
1
+ """Summary generation for trace debugging.
2
+
3
+ Generates a single _summary.md file that serves both human inspection and LLM debugging.
4
+ Combines high-level overview with detailed navigation for comprehensive trace analysis.
5
+ """
6
+
7
+ from .writer import SpanInfo, TraceState
8
+
9
+
10
+ def generate_summary(trace: TraceState) -> str:
11
+ """Generate unified _summary.md file.
12
+
13
+ Single file optimized for both human inspection and LLM debugger context.
14
+ Structure: Overview → Tree → Root Span → LLM Calls → Errors → Navigation.
15
+ """
16
+ lines = [
17
+ f"# Trace Summary: {trace.name}",
18
+ "",
19
+ ]
20
+
21
+ # Status and stats
22
+ failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
23
+ status_emoji = "❌" if failed_spans else "✅"
24
+ status_text = f"Failed ({len(failed_spans)} errors)" if failed_spans else "Completed"
25
+ duration_str = _format_duration(trace)
26
+
27
+ lines.extend([
28
+ f"**Status**: {status_emoji} {status_text} | "
29
+ f"**Duration**: {duration_str} | "
30
+ f"**Spans**: {len(trace.spans)} | "
31
+ f"**LLM Calls**: {trace.llm_call_count} | "
32
+ f"**Total Tokens**: {trace.total_tokens:,} | "
33
+ f"**Total Cost**: ${trace.total_cost:.4f}",
34
+ "",
35
+ ])
36
+
37
+ # Execution tree
38
+ lines.extend([
39
+ "## Execution Tree",
40
+ "",
41
+ "```",
42
+ ])
43
+
44
+ if trace.root_span_id and trace.root_span_id in trace.spans:
45
+ tree_lines = _build_tree(trace, trace.root_span_id, "")
46
+ lines.extend(tree_lines)
47
+ else:
48
+ # Fallback: list all spans
49
+ for span in sorted(trace.spans.values(), key=lambda s: s.start_time):
50
+ lines.append(_format_span_line(span))
51
+
52
+ lines.extend([
53
+ "```",
54
+ "",
55
+ ])
56
+
57
+ # Root span details
58
+ if trace.root_span_id and trace.root_span_id in trace.spans:
59
+ root = trace.spans[trace.root_span_id]
60
+ root_path = root.path.relative_to(trace.path).as_posix()
61
+ lines.extend([
62
+ "## Root Span",
63
+ "",
64
+ f"- **Name**: {root.name}",
65
+ f"- **Type**: {root.span_type}",
66
+ f"- **Duration**: {root.duration_ms}ms",
67
+ f"- **Input**: `{root_path}/input.yaml`",
68
+ f"- **Output**: `{root_path}/output.yaml`",
69
+ "",
70
+ ])
71
+
72
+ # LLM calls table with path column
73
+ llm_spans = [s for s in trace.spans.values() if s.llm_info]
74
+ if llm_spans:
75
+ llm_spans.sort(key=lambda s: s.llm_info.get("cost", 0) if s.llm_info else 0, reverse=True)
76
+
77
+ lines.extend([
78
+ "## LLM Calls (by cost)",
79
+ "",
80
+ "| # | Span | Model | Input→Output | Total | Cost | Path |",
81
+ "|---|------|-------|--------------|-------|------|------|",
82
+ ])
83
+
84
+ for i, span in enumerate(llm_spans, 1):
85
+ info = span.llm_info
86
+ if info:
87
+ model = info.get("model", "unknown")
88
+ in_tokens = info.get("input_tokens", 0)
89
+ out_tokens = info.get("output_tokens", 0)
90
+ total_tokens = info.get("total_tokens", 0)
91
+ cost = info.get("cost", 0)
92
+ span_path = span.path.relative_to(trace.path).as_posix()
93
+ lines.append(
94
+ f"| {i} | {span.name} | {model} | "
95
+ f"{in_tokens:,}→{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
96
+ f"`{span_path}/` |"
97
+ )
98
+
99
+ lines.append("")
100
+
101
+ # Errors
102
+ if failed_spans:
103
+ lines.extend([
104
+ "## Errors",
105
+ "",
106
+ ])
107
+ for span in failed_spans:
108
+ span_path = span.path.relative_to(trace.path).as_posix()
109
+ lines.append(f"- **{span.name}**: `{span_path}/_span.yaml`")
110
+ lines.append("")
111
+ else:
112
+ lines.extend([
113
+ "## Errors",
114
+ "",
115
+ "None - all spans completed successfully.",
116
+ "",
117
+ ])
118
+
119
+ # Navigation guide
120
+ lines.extend([
121
+ "## Navigation",
122
+ "",
123
+ "- Each span directory contains `_span.yaml` (metadata), `input.yaml`, `output.yaml`",
124
+ "- LLM span inputs contain the full message list",
125
+ "- `_tree.yaml` has span_id → path mapping and full hierarchy",
126
+ "",
127
+ ])
128
+
129
+ return "\n".join(lines)
130
+
131
+
132
+ def _format_duration(trace: TraceState) -> str:
133
+ """Format trace duration as human-readable string."""
134
+ # Calculate from spans if we have them
135
+ if not trace.spans:
136
+ return "unknown"
137
+
138
+ spans_list = list(trace.spans.values())
139
+ start = min(s.start_time for s in spans_list)
140
+ end_times = [s.end_time for s in spans_list if s.end_time]
141
+
142
+ if not end_times:
143
+ return "running..."
144
+
145
+ end = max(end_times)
146
+ duration = (end - start).total_seconds()
147
+
148
+ if duration < 1:
149
+ return f"{int(duration * 1000)}ms"
150
+ elif duration < 60:
151
+ return f"{duration:.1f}s"
152
+ elif duration < 3600:
153
+ minutes = int(duration // 60)
154
+ seconds = int(duration % 60)
155
+ return f"{minutes}m {seconds}s"
156
+ else:
157
+ hours = int(duration // 3600)
158
+ minutes = int((duration % 3600) // 60)
159
+ return f"{hours}h {minutes}m"
160
+
161
+
162
+ def _format_span_line(span: SpanInfo) -> str:
163
+ """Format a single span as a tree line (without prefix)."""
164
+ status_icon = "✅" if span.status == "completed" else "❌" if span.status == "failed" else "⏳"
165
+ duration = (
166
+ f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
167
+ )
168
+
169
+ llm_suffix = ""
170
+ if span.llm_info:
171
+ model = span.llm_info.get("model", "?")
172
+ tokens = span.llm_info.get("total_tokens", 0)
173
+ llm_suffix = f" [LLM: {model}, {tokens:,} tokens]"
174
+
175
+ return f"{span.name} ({duration}) {status_icon}{llm_suffix}"
176
+
177
+
178
+ def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
179
+ """Build tree representation of span hierarchy (fully recursive)."""
180
+ lines: list[str] = []
181
+ span = trace.spans.get(span_id)
182
+ if not span:
183
+ return lines
184
+
185
+ # Add this span's line
186
+ lines.append(f"{prefix}{_format_span_line(span)}")
187
+
188
+ # Process children recursively
189
+ children = span.children
190
+ for i, child_id in enumerate(children):
191
+ is_last = i == len(children) - 1
192
+ child_prefix = prefix + ("└── " if is_last else "├── ")
193
+ continuation_prefix = prefix + (" " if is_last else "│ ")
194
+
195
+ child_span = trace.spans.get(child_id)
196
+ if child_span:
197
+ # Add child line
198
+ lines.append(f"{child_prefix}{_format_span_line(child_span)}")
199
+
200
+ # Recursively add all descendants
201
+ for j, grandchild_id in enumerate(child_span.children):
202
+ gc_is_last = j == len(child_span.children) - 1
203
+ gc_prefix = continuation_prefix + ("└── " if gc_is_last else "├── ")
204
+ gc_continuation = continuation_prefix + (" " if gc_is_last else "│ ")
205
+
206
+ # Recursively build subtree for grandchild and all its descendants
207
+ subtree = _build_tree_recursive(trace, grandchild_id, gc_prefix, gc_continuation)
208
+ lines.extend(subtree)
209
+
210
+ return lines
211
+
212
+
213
+ def _build_tree_recursive(
214
+ trace: TraceState, span_id: str, prefix: str, continuation: str
215
+ ) -> list[str]:
216
+ """Recursively build tree for a span and all descendants."""
217
+ lines: list[str] = []
218
+ span = trace.spans.get(span_id)
219
+ if not span:
220
+ return lines
221
+
222
+ # Add this span's line with the given prefix
223
+ lines.append(f"{prefix}{_format_span_line(span)}")
224
+
225
+ # Process children
226
+ children = span.children
227
+ for i, child_id in enumerate(children):
228
+ is_last = i == len(children) - 1
229
+ child_prefix = continuation + ("└── " if is_last else "├── ")
230
+ child_continuation = continuation + (" " if is_last else "│ ")
231
+
232
+ # Recurse for all children
233
+ subtree = _build_tree_recursive(trace, child_id, child_prefix, child_continuation)
234
+ lines.extend(subtree)
235
+
236
+ return lines