ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. ai_pipeline_core/__init__.py +78 -125
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +130 -81
  37. ai_pipeline_core/llm/client.py +327 -193
  38. ai_pipeline_core/llm/model_options.py +14 -86
  39. ai_pipeline_core/llm/model_response.py +60 -103
  40. ai_pipeline_core/llm/model_types.py +16 -34
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -483
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/simple_runner/__init__.py +0 -14
  85. ai_pipeline_core/simple_runner/cli.py +0 -254
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  87. ai_pipeline_core/storage/__init__.py +0 -8
  88. ai_pipeline_core/storage/storage.py +0 -628
  89. ai_pipeline_core/utils/__init__.py +0 -8
  90. ai_pipeline_core/utils/deploy.py +0 -373
  91. ai_pipeline_core/utils/remote_deployment.py +0 -269
  92. ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
  93. ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
  94. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,98 @@
1
+ """OpenTelemetry SpanProcessor for local trace debugging."""
2
+
3
+ import contextlib
4
+
5
+ from opentelemetry.context import Context
6
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
7
+ from opentelemetry.trace import StatusCode
8
+
9
+ from ._writer import LocalTraceWriter, WriteJob
10
+
11
+
12
+ class LocalDebugSpanProcessor(SpanProcessor):
13
+ """OpenTelemetry SpanProcessor that writes spans to local filesystem.
14
+
15
+ Integrates with the OpenTelemetry SDK to capture all spans and write them
16
+ to a structured directory hierarchy for debugging.
17
+
18
+ Usage:
19
+ writer = LocalTraceWriter(config)
20
+ processor = LocalDebugSpanProcessor(writer)
21
+ tracer_provider.add_span_processor(processor)
22
+ """
23
+
24
+ def __init__(self, writer: LocalTraceWriter):
25
+ """Initialize span processor with writer."""
26
+ self._writer = writer
27
+
28
+ def on_start(self, span: Span, parent_context: Context | None = None) -> None:
29
+ """Handle span start - create directories.
30
+
31
+ Creates the span directory early so we can see "running" spans.
32
+ Input/output data is not available yet - will be captured in on_end().
33
+ """
34
+ with contextlib.suppress(Exception):
35
+ if span.context is None:
36
+ return
37
+ trace_id = format(span.context.trace_id, "032x")
38
+ span_id = format(span.context.span_id, "016x")
39
+ parent_id = self._get_parent_span_id(span)
40
+
41
+ self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
42
+
43
+ def on_end(self, span: ReadableSpan) -> None:
44
+ """Handle span end - queue full span data for background write.
45
+
46
+ All data (input, output, attributes, events) is captured here because
47
+ Laminar sets these attributes after span start.
48
+ """
49
+ with contextlib.suppress(Exception):
50
+ if span.context is None or span.start_time is None or span.end_time is None:
51
+ return
52
+ job = WriteJob(
53
+ trace_id=format(span.context.trace_id, "032x"),
54
+ span_id=format(span.context.span_id, "016x"),
55
+ name=span.name,
56
+ parent_id=self._get_parent_span_id_from_readable(span),
57
+ attributes=dict(span.attributes) if span.attributes else {},
58
+ events=list(span.events) if span.events else [],
59
+ status_code=self._get_status_code(span),
60
+ status_description=span.status.description,
61
+ start_time_ns=span.start_time,
62
+ end_time_ns=span.end_time,
63
+ )
64
+ self._writer.on_span_end(job)
65
+
66
+ def shutdown(self) -> None:
67
+ """Shutdown the processor and writer."""
68
+ self._writer.shutdown()
69
+
70
+ def force_flush(self, timeout_millis: int = 30000) -> bool: # noqa: PLR6301
71
+ """Force flush is not needed for this processor."""
72
+ _ = timeout_millis
73
+ return True
74
+
75
+ @staticmethod
76
+ def _get_parent_span_id(span: Span) -> str | None:
77
+ """Extract parent span ID from a writable Span."""
78
+ if hasattr(span, "parent") and span.parent:
79
+ parent_ctx = span.parent
80
+ if hasattr(parent_ctx, "span_id") and parent_ctx.span_id:
81
+ return format(parent_ctx.span_id, "016x")
82
+ return None
83
+
84
+ @staticmethod
85
+ def _get_parent_span_id_from_readable(span: ReadableSpan) -> str | None:
86
+ """Extract parent span ID from a ReadableSpan."""
87
+ if span.parent and hasattr(span.parent, "span_id") and span.parent.span_id:
88
+ return format(span.parent.span_id, "016x")
89
+ return None
90
+
91
+ @staticmethod
92
+ def _get_status_code(span: ReadableSpan) -> str:
93
+ """Get status code as string."""
94
+ if span.status.status_code == StatusCode.OK:
95
+ return "OK"
96
+ if span.status.status_code == StatusCode.ERROR:
97
+ return "ERROR"
98
+ return "UNSET"
@@ -0,0 +1,312 @@
1
+ """Static summary generation for trace debugging.
2
+
3
+ Generates _summary.md files with execution tree, LLM calls, cost breakdown,
4
+ and navigation guide. No LLM dependencies — pure text formatting.
5
+
6
+ For LLM-powered auto-summary, see _auto_summary.py.
7
+ """
8
+
9
+ from typing import Any
10
+
11
+ from ._types import SpanInfo, TraceState
12
+
13
+
14
+ def generate_summary(trace: TraceState) -> str: # noqa: PLR0912, PLR0914, PLR0915
15
+ """Generate unified _summary.md file.
16
+
17
+ Single file optimized for both human inspection and LLM debugger context.
18
+ Structure: Overview -> Tree -> Root Span -> LLM Calls -> Cost by Task -> Errors -> Navigation.
19
+ Cost by Task table includes expected cost comparison with OVER/OK status indicators.
20
+ """
21
+ lines = [
22
+ f"# Trace Summary: {trace.name}",
23
+ "",
24
+ ]
25
+
26
+ # Status and stats
27
+ failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
28
+ status_emoji = "\u274c" if failed_spans else "\u2705"
29
+ status_text = f"Failed ({len(failed_spans)} errors)" if failed_spans else "Completed"
30
+ duration_str = _format_duration(trace)
31
+
32
+ cost_str = f"**Total Cost**: ${trace.total_cost:.4f}"
33
+ if trace.total_expected_cost > 0:
34
+ cost_str += f" (expected: ${trace.total_expected_cost:.4f})"
35
+
36
+ lines.extend([
37
+ f"**Status**: {status_emoji} {status_text} | "
38
+ f"**Duration**: {duration_str} | "
39
+ f"**Spans**: {len(trace.spans)} | "
40
+ f"**LLM Calls**: {trace.llm_call_count} | "
41
+ f"**Total Tokens**: {trace.total_tokens:,} | "
42
+ f"{cost_str}",
43
+ "",
44
+ ])
45
+
46
+ # Execution tree
47
+ lines.extend([
48
+ "## Execution Tree",
49
+ "",
50
+ "```",
51
+ ])
52
+
53
+ if trace.root_span_id and trace.root_span_id in trace.spans:
54
+ tree_lines = _build_tree(trace, trace.root_span_id, "")
55
+ lines.extend(tree_lines)
56
+ else:
57
+ # Fallback: list all spans
58
+ lines.extend(_format_span_line(span) for span in sorted(trace.spans.values(), key=lambda s: s.start_time))
59
+
60
+ lines.extend([
61
+ "```",
62
+ "",
63
+ ])
64
+
65
+ # Root span details
66
+ if trace.root_span_id and trace.root_span_id in trace.spans:
67
+ root = trace.spans[trace.root_span_id]
68
+ root_path = root.path.relative_to(trace.path).as_posix()
69
+ lines.extend([
70
+ "## Root Span",
71
+ "",
72
+ f"- **Name**: {root.name}",
73
+ f"- **Type**: {root.span_type}",
74
+ f"- **Duration**: {root.duration_ms}ms",
75
+ f"- **Input**: `{root_path}/input.yaml`",
76
+ f"- **Output**: `{root_path}/output.yaml`",
77
+ "",
78
+ ])
79
+
80
+ # LLM calls table with path column
81
+ llm_spans = [s for s in trace.spans.values() if s.llm_info]
82
+ if llm_spans:
83
+ llm_spans.sort(key=lambda s: s.llm_info.get("cost", 0) if s.llm_info else 0, reverse=True)
84
+
85
+ lines.extend([
86
+ "## LLM Calls (by cost)",
87
+ "",
88
+ "| # | Span | Purpose | Model | Input\u2192Output | Total | Cost | Expected | Path |",
89
+ "|---|------|---------|-------|--------------|-------|------|----------|------|",
90
+ ])
91
+
92
+ for i, span in enumerate(llm_spans, 1):
93
+ info = span.llm_info
94
+ if info:
95
+ model = info.get("model", "unknown")
96
+ purpose = info.get("purpose", "")
97
+ in_tokens = info.get("input_tokens", 0)
98
+ out_tokens = info.get("output_tokens", 0)
99
+ total_tokens = info.get("total_tokens", 0)
100
+ cost = info.get("cost", 0)
101
+ expected = info.get("expected_cost")
102
+ expected_str = f"${expected:.4f}" if expected else ""
103
+ span_path = span.path.relative_to(trace.path).as_posix()
104
+ lines.append(
105
+ f"| {i} | {span.name} | {purpose} | {model} | "
106
+ f"{in_tokens:,}\u2192{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
107
+ f"{expected_str} | `{span_path}/` |"
108
+ )
109
+
110
+ lines.append("")
111
+
112
+ # Cost aggregation by parent task/flow
113
+ cost_by_parent = _aggregate_costs_by_parent(trace)
114
+ if cost_by_parent:
115
+ lines.extend([
116
+ "## Cost by Task",
117
+ "",
118
+ "| Name | Type | LLM Calls | Cost | Expected | Status |",
119
+ "|------|------|-----------|------|----------|--------|",
120
+ ])
121
+ for entry in cost_by_parent:
122
+ expected_str = f"${entry['expected_cost']:.4f}" if entry["expected_cost"] else ""
123
+ status = ""
124
+ if entry["expected_cost"] and entry["actual_cost"] > 0:
125
+ ratio = entry["actual_cost"] / entry["expected_cost"]
126
+ status = "OVER" if ratio > 1.1 else "OK"
127
+ lines.append(f"| {entry['name']} | {entry['type']} | {entry['llm_calls']} | ${entry['actual_cost']:.4f} | {expected_str} | {status} |")
128
+ lines.append("")
129
+
130
+ # Errors
131
+ if failed_spans:
132
+ lines.extend([
133
+ "## Errors",
134
+ "",
135
+ ])
136
+ for span in failed_spans:
137
+ span_path = span.path.relative_to(trace.path).as_posix()
138
+ lines.append(f"- **{span.name}**: `{span_path}/_span.yaml`")
139
+ lines.append("")
140
+ else:
141
+ lines.extend([
142
+ "## Errors",
143
+ "",
144
+ "None - all spans completed successfully.",
145
+ "",
146
+ ])
147
+
148
+ # Navigation guide
149
+ lines.extend([
150
+ "## Navigation",
151
+ "",
152
+ "- Each span directory contains `_span.yaml` (metadata), `input.yaml`, `output.yaml`",
153
+ "- LLM span inputs contain the full message list",
154
+ "- `_tree.yaml` has span_id \u2192 path mapping and full hierarchy",
155
+ "",
156
+ ])
157
+
158
+ return "\n".join(lines)
159
+
160
+
161
+ def _aggregate_costs_by_parent(trace: TraceState) -> list[dict[str, Any]]:
162
+ """Aggregate LLM costs by parent task/flow span."""
163
+ parent_costs: dict[str, dict[str, Any]] = {}
164
+
165
+ for span in trace.spans.values():
166
+ if not span.llm_info:
167
+ continue
168
+ cost = span.llm_info.get("cost", 0.0)
169
+ if not cost:
170
+ continue
171
+
172
+ # Find parent (task or flow span)
173
+ parent_id = span.parent_id
174
+ if not parent_id or parent_id not in trace.spans:
175
+ continue
176
+ parent = trace.spans[parent_id]
177
+
178
+ if parent_id not in parent_costs:
179
+ run_type = "unknown"
180
+ if parent.prefect_info:
181
+ run_type = parent.prefect_info.get("run_type", "unknown")
182
+ parent_costs[parent_id] = {
183
+ "name": parent.name,
184
+ "type": run_type,
185
+ "actual_cost": 0.0,
186
+ "expected_cost": parent.expected_cost,
187
+ "llm_calls": 0,
188
+ }
189
+ parent_costs[parent_id]["actual_cost"] += cost
190
+ parent_costs[parent_id]["llm_calls"] += 1
191
+
192
+ # Sort by cost descending
193
+ return sorted(parent_costs.values(), key=lambda x: x["actual_cost"], reverse=True)
194
+
195
+
196
+ def _format_duration(trace: TraceState) -> str:
197
+ """Format trace duration as human-readable string."""
198
+ # Calculate from spans if we have them
199
+ if not trace.spans:
200
+ return "unknown"
201
+
202
+ spans_list = list(trace.spans.values())
203
+ start = min(s.start_time for s in spans_list)
204
+ end_times = [s.end_time for s in spans_list if s.end_time]
205
+
206
+ if not end_times:
207
+ return "running..."
208
+
209
+ end = max(end_times)
210
+ duration = (end - start).total_seconds()
211
+
212
+ if duration < 1:
213
+ return f"{int(duration * 1000)}ms"
214
+ if duration < 60:
215
+ return f"{duration:.1f}s"
216
+ if duration < 3600:
217
+ minutes = int(duration // 60)
218
+ seconds = int(duration % 60)
219
+ return f"{minutes}m {seconds}s"
220
+ hours = int(duration // 3600)
221
+ minutes = int((duration % 3600) // 60)
222
+ return f"{hours}h {minutes}m"
223
+
224
+
225
+ def _format_span_line(span: SpanInfo) -> str:
226
+ """Format a single span as a tree line (without prefix)."""
227
+ if span.status == "completed":
228
+ status_icon = "\u2705"
229
+ elif span.status == "failed":
230
+ status_icon = "\u274c"
231
+ else:
232
+ status_icon = "\u23f3"
233
+ duration = f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
234
+
235
+ # Description suffix for task/flow spans
236
+ desc_suffix = ""
237
+ if span.description and span.span_type != "llm":
238
+ desc_suffix = f" -- {span.description}"
239
+
240
+ # LLM suffix: show purpose (if available) alongside model, plus cost
241
+ llm_suffix = ""
242
+ if span.llm_info:
243
+ model = span.llm_info.get("model", "?")
244
+ tokens = span.llm_info.get("total_tokens", 0)
245
+ cost = span.llm_info.get("cost", 0)
246
+ purpose = span.llm_info.get("purpose")
247
+
248
+ purpose_part = f"{purpose} | " if purpose else ""
249
+ cost_part = f", ${cost:.4f}" if cost else ""
250
+ llm_suffix = f" [LLM: {purpose_part}{model}, {tokens:,} tokens{cost_part}]"
251
+
252
+ return f"{span.name} ({duration}) {status_icon}{desc_suffix}{llm_suffix}"
253
+
254
+
255
+ def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
256
+ """Build tree representation of span hierarchy (fully recursive)."""
257
+ lines: list[str] = []
258
+ span = trace.spans.get(span_id)
259
+ if not span:
260
+ return lines
261
+
262
+ # Add this span's line
263
+ lines.append(f"{prefix}{_format_span_line(span)}")
264
+
265
+ # Process children recursively
266
+ children = span.children
267
+ for i, child_id in enumerate(children):
268
+ is_last = i == len(children) - 1
269
+ child_prefix = prefix + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
270
+ continuation_prefix = prefix + (" " if is_last else "\u2502 ")
271
+
272
+ child_span = trace.spans.get(child_id)
273
+ if child_span:
274
+ # Add child line
275
+ lines.append(f"{child_prefix}{_format_span_line(child_span)}")
276
+
277
+ # Recursively add all descendants
278
+ for j, grandchild_id in enumerate(child_span.children):
279
+ gc_is_last = j == len(child_span.children) - 1
280
+ gc_connector = "\u2514\u2500\u2500 " if gc_is_last else "\u251c\u2500\u2500 "
281
+ gc_prefix = continuation_prefix + gc_connector
282
+ gc_continuation = continuation_prefix + (" " if gc_is_last else "\u2502 ")
283
+
284
+ # Recursively build subtree for grandchild and all its descendants
285
+ subtree = _build_tree_recursive(trace, grandchild_id, gc_prefix, gc_continuation)
286
+ lines.extend(subtree)
287
+
288
+ return lines
289
+
290
+
291
+ def _build_tree_recursive(trace: TraceState, span_id: str, prefix: str, continuation: str) -> list[str]:
292
+ """Recursively build tree for a span and all descendants."""
293
+ lines: list[str] = []
294
+ span = trace.spans.get(span_id)
295
+ if not span:
296
+ return lines
297
+
298
+ # Add this span's line with the given prefix
299
+ lines.append(f"{prefix}{_format_span_line(span)}")
300
+
301
+ # Process children
302
+ children = span.children
303
+ for i, child_id in enumerate(children):
304
+ is_last = i == len(children) - 1
305
+ child_prefix = continuation + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
306
+ child_continuation = continuation + (" " if is_last else "\u2502 ")
307
+
308
+ # Recurse for all children
309
+ subtree = _build_tree_recursive(trace, child_id, child_prefix, child_continuation)
310
+ lines.extend(subtree)
311
+
312
+ return lines
@@ -0,0 +1,75 @@
1
+ """Shared data types for the debug tracing system.
2
+
3
+ Extracted to break the circular dependency between _writer.py and _summary.py:
4
+ _writer needs summary generation functions, _summary needs SpanInfo/TraceState.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class WriteJob:
15
+ """Job for background writer thread."""
16
+
17
+ trace_id: str
18
+ span_id: str
19
+ name: str
20
+ parent_id: str | None
21
+ attributes: dict[str, Any]
22
+ events: list[Any]
23
+ status_code: str # "OK" | "ERROR" | "UNSET"
24
+ status_description: str | None
25
+ start_time_ns: int
26
+ end_time_ns: int
27
+
28
+
29
+ @dataclass
30
+ class SpanInfo:
31
+ """Information about a span for index building.
32
+
33
+ Tracks execution details including timing, LLM metrics (tokens, cost, expected_cost, purpose),
34
+ and Prefect context for observability and cost tracking across the trace hierarchy.
35
+ """
36
+
37
+ span_id: str
38
+ parent_id: str | None
39
+ name: str
40
+ span_type: str
41
+ status: str
42
+ start_time: datetime
43
+ path: Path # Actual directory path for this span
44
+ depth: int = 0 # Nesting depth (0 for root)
45
+ order: int = 0 # Global execution order within trace
46
+ end_time: datetime | None = None
47
+ duration_ms: int = 0
48
+ children: list[str] = field(default_factory=list)
49
+ llm_info: dict[str, Any] | None = None
50
+ prefect_info: dict[str, Any] | None = None
51
+ description: str | None = None
52
+ expected_cost: float | None = None
53
+
54
+
55
+ @dataclass
56
+ class TraceState:
57
+ """State for an active trace.
58
+
59
+ Maintains trace metadata and span hierarchy with accumulated cost
60
+ metrics (total_cost, total_expected_cost) for monitoring resource
61
+ usage and budget tracking during trace execution.
62
+ """
63
+
64
+ trace_id: str
65
+ name: str
66
+ path: Path
67
+ start_time: datetime
68
+ spans: dict[str, SpanInfo] = field(default_factory=dict)
69
+ root_span_id: str | None = None
70
+ total_tokens: int = 0
71
+ total_cost: float = 0.0
72
+ total_expected_cost: float = 0.0
73
+ llm_call_count: int = 0
74
+ span_counter: int = 0 # Global counter for ordering span directories
75
+ merged_wrapper_ids: set[str] = field(default_factory=set) # IDs of merged wrappers