ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +32 -5
- ai_pipeline_core/debug/__init__.py +26 -0
- ai_pipeline_core/debug/config.py +91 -0
- ai_pipeline_core/debug/content.py +705 -0
- ai_pipeline_core/debug/processor.py +99 -0
- ai_pipeline_core/debug/summary.py +236 -0
- ai_pipeline_core/debug/writer.py +913 -0
- ai_pipeline_core/deployment/__init__.py +46 -0
- ai_pipeline_core/deployment/base.py +681 -0
- ai_pipeline_core/deployment/contract.py +84 -0
- ai_pipeline_core/deployment/helpers.py +98 -0
- ai_pipeline_core/documents/flow_document.py +1 -1
- ai_pipeline_core/documents/task_document.py +1 -1
- ai_pipeline_core/documents/temporary_document.py +1 -1
- ai_pipeline_core/flow/config.py +13 -2
- ai_pipeline_core/flow/options.py +4 -4
- ai_pipeline_core/images/__init__.py +362 -0
- ai_pipeline_core/images/_processing.py +157 -0
- ai_pipeline_core/llm/ai_messages.py +25 -4
- ai_pipeline_core/llm/client.py +15 -19
- ai_pipeline_core/llm/model_response.py +5 -5
- ai_pipeline_core/llm/model_types.py +10 -13
- ai_pipeline_core/logging/logging_mixin.py +2 -2
- ai_pipeline_core/pipeline.py +1 -1
- ai_pipeline_core/progress.py +127 -0
- ai_pipeline_core/prompt_builder/__init__.py +5 -0
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
- ai_pipeline_core/prompt_builder/global_cache.py +78 -0
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
- ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
- ai_pipeline_core/tracing.py +54 -2
- ai_pipeline_core/utils/deploy.py +214 -6
- ai_pipeline_core/utils/remote_deployment.py +37 -187
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +96 -27
- ai_pipeline_core-0.3.3.dist-info/RECORD +57 -0
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +1 -1
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""OpenTelemetry SpanProcessor for local trace debugging."""
|
|
2
|
+
|
|
3
|
+
from opentelemetry.context import Context
|
|
4
|
+
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
5
|
+
from opentelemetry.trace import StatusCode
|
|
6
|
+
|
|
7
|
+
from .writer import LocalTraceWriter, WriteJob
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LocalDebugSpanProcessor(SpanProcessor):
|
|
11
|
+
"""OpenTelemetry SpanProcessor that writes spans to local filesystem.
|
|
12
|
+
|
|
13
|
+
Integrates with the OpenTelemetry SDK to capture all spans and write them
|
|
14
|
+
to a structured directory hierarchy for debugging.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
writer = LocalTraceWriter(config)
|
|
18
|
+
processor = LocalDebugSpanProcessor(writer)
|
|
19
|
+
tracer_provider.add_span_processor(processor)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, writer: LocalTraceWriter):
|
|
23
|
+
"""Initialize span processor with writer."""
|
|
24
|
+
self._writer = writer
|
|
25
|
+
|
|
26
|
+
def on_start(self, span: Span, parent_context: Context | None = None) -> None:
|
|
27
|
+
"""Handle span start - create directories.
|
|
28
|
+
|
|
29
|
+
Creates the span directory early so we can see "running" spans.
|
|
30
|
+
Input/output data is not available yet - will be captured in on_end().
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
if span.context is None:
|
|
34
|
+
return
|
|
35
|
+
trace_id = format(span.context.trace_id, "032x")
|
|
36
|
+
span_id = format(span.context.span_id, "016x")
|
|
37
|
+
parent_id = self._get_parent_span_id(span)
|
|
38
|
+
|
|
39
|
+
self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
|
|
40
|
+
except Exception:
|
|
41
|
+
# Never fail the actual span - debug tracing should be transparent
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def on_end(self, span: ReadableSpan) -> None:
|
|
45
|
+
"""Handle span end - queue full span data for background write.
|
|
46
|
+
|
|
47
|
+
All data (input, output, attributes, events) is captured here because
|
|
48
|
+
Laminar sets these attributes after span start.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
if span.context is None or span.start_time is None or span.end_time is None:
|
|
52
|
+
return
|
|
53
|
+
job = WriteJob(
|
|
54
|
+
trace_id=format(span.context.trace_id, "032x"),
|
|
55
|
+
span_id=format(span.context.span_id, "016x"),
|
|
56
|
+
name=span.name,
|
|
57
|
+
parent_id=self._get_parent_span_id_from_readable(span),
|
|
58
|
+
attributes=dict(span.attributes) if span.attributes else {},
|
|
59
|
+
events=list(span.events) if span.events else [],
|
|
60
|
+
status_code=self._get_status_code(span),
|
|
61
|
+
status_description=span.status.description,
|
|
62
|
+
start_time_ns=span.start_time,
|
|
63
|
+
end_time_ns=span.end_time,
|
|
64
|
+
)
|
|
65
|
+
self._writer.on_span_end(job)
|
|
66
|
+
except Exception:
|
|
67
|
+
# Never fail the actual span
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def shutdown(self) -> None:
|
|
71
|
+
"""Shutdown the processor and writer."""
|
|
72
|
+
self._writer.shutdown()
|
|
73
|
+
|
|
74
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
75
|
+
"""Force flush is not needed for this processor."""
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
def _get_parent_span_id(self, span: Span) -> str | None:
|
|
79
|
+
"""Extract parent span ID from a writable Span."""
|
|
80
|
+
if hasattr(span, "parent") and span.parent:
|
|
81
|
+
parent_ctx = span.parent
|
|
82
|
+
if hasattr(parent_ctx, "span_id") and parent_ctx.span_id:
|
|
83
|
+
return format(parent_ctx.span_id, "016x")
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def _get_parent_span_id_from_readable(self, span: ReadableSpan) -> str | None:
|
|
87
|
+
"""Extract parent span ID from a ReadableSpan."""
|
|
88
|
+
if span.parent:
|
|
89
|
+
if hasattr(span.parent, "span_id") and span.parent.span_id:
|
|
90
|
+
return format(span.parent.span_id, "016x")
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
def _get_status_code(self, span: ReadableSpan) -> str:
|
|
94
|
+
"""Get status code as string."""
|
|
95
|
+
if span.status.status_code == StatusCode.OK:
|
|
96
|
+
return "OK"
|
|
97
|
+
elif span.status.status_code == StatusCode.ERROR:
|
|
98
|
+
return "ERROR"
|
|
99
|
+
return "UNSET"
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Summary generation for trace debugging.
|
|
2
|
+
|
|
3
|
+
Generates a single _summary.md file that serves both human inspection and LLM debugging.
|
|
4
|
+
Combines high-level overview with detailed navigation for comprehensive trace analysis.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .writer import SpanInfo, TraceState
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_summary(trace: TraceState) -> str:
|
|
11
|
+
"""Generate unified _summary.md file.
|
|
12
|
+
|
|
13
|
+
Single file optimized for both human inspection and LLM debugger context.
|
|
14
|
+
Structure: Overview → Tree → Root Span → LLM Calls → Errors → Navigation.
|
|
15
|
+
"""
|
|
16
|
+
lines = [
|
|
17
|
+
f"# Trace Summary: {trace.name}",
|
|
18
|
+
"",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
# Status and stats
|
|
22
|
+
failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
|
|
23
|
+
status_emoji = "❌" if failed_spans else "✅"
|
|
24
|
+
status_text = f"Failed ({len(failed_spans)} errors)" if failed_spans else "Completed"
|
|
25
|
+
duration_str = _format_duration(trace)
|
|
26
|
+
|
|
27
|
+
lines.extend([
|
|
28
|
+
f"**Status**: {status_emoji} {status_text} | "
|
|
29
|
+
f"**Duration**: {duration_str} | "
|
|
30
|
+
f"**Spans**: {len(trace.spans)} | "
|
|
31
|
+
f"**LLM Calls**: {trace.llm_call_count} | "
|
|
32
|
+
f"**Total Tokens**: {trace.total_tokens:,} | "
|
|
33
|
+
f"**Total Cost**: ${trace.total_cost:.4f}",
|
|
34
|
+
"",
|
|
35
|
+
])
|
|
36
|
+
|
|
37
|
+
# Execution tree
|
|
38
|
+
lines.extend([
|
|
39
|
+
"## Execution Tree",
|
|
40
|
+
"",
|
|
41
|
+
"```",
|
|
42
|
+
])
|
|
43
|
+
|
|
44
|
+
if trace.root_span_id and trace.root_span_id in trace.spans:
|
|
45
|
+
tree_lines = _build_tree(trace, trace.root_span_id, "")
|
|
46
|
+
lines.extend(tree_lines)
|
|
47
|
+
else:
|
|
48
|
+
# Fallback: list all spans
|
|
49
|
+
for span in sorted(trace.spans.values(), key=lambda s: s.start_time):
|
|
50
|
+
lines.append(_format_span_line(span))
|
|
51
|
+
|
|
52
|
+
lines.extend([
|
|
53
|
+
"```",
|
|
54
|
+
"",
|
|
55
|
+
])
|
|
56
|
+
|
|
57
|
+
# Root span details
|
|
58
|
+
if trace.root_span_id and trace.root_span_id in trace.spans:
|
|
59
|
+
root = trace.spans[trace.root_span_id]
|
|
60
|
+
root_path = root.path.relative_to(trace.path).as_posix()
|
|
61
|
+
lines.extend([
|
|
62
|
+
"## Root Span",
|
|
63
|
+
"",
|
|
64
|
+
f"- **Name**: {root.name}",
|
|
65
|
+
f"- **Type**: {root.span_type}",
|
|
66
|
+
f"- **Duration**: {root.duration_ms}ms",
|
|
67
|
+
f"- **Input**: `{root_path}/input.yaml`",
|
|
68
|
+
f"- **Output**: `{root_path}/output.yaml`",
|
|
69
|
+
"",
|
|
70
|
+
])
|
|
71
|
+
|
|
72
|
+
# LLM calls table with path column
|
|
73
|
+
llm_spans = [s for s in trace.spans.values() if s.llm_info]
|
|
74
|
+
if llm_spans:
|
|
75
|
+
llm_spans.sort(key=lambda s: s.llm_info.get("cost", 0) if s.llm_info else 0, reverse=True)
|
|
76
|
+
|
|
77
|
+
lines.extend([
|
|
78
|
+
"## LLM Calls (by cost)",
|
|
79
|
+
"",
|
|
80
|
+
"| # | Span | Model | Input→Output | Total | Cost | Path |",
|
|
81
|
+
"|---|------|-------|--------------|-------|------|------|",
|
|
82
|
+
])
|
|
83
|
+
|
|
84
|
+
for i, span in enumerate(llm_spans, 1):
|
|
85
|
+
info = span.llm_info
|
|
86
|
+
if info:
|
|
87
|
+
model = info.get("model", "unknown")
|
|
88
|
+
in_tokens = info.get("input_tokens", 0)
|
|
89
|
+
out_tokens = info.get("output_tokens", 0)
|
|
90
|
+
total_tokens = info.get("total_tokens", 0)
|
|
91
|
+
cost = info.get("cost", 0)
|
|
92
|
+
span_path = span.path.relative_to(trace.path).as_posix()
|
|
93
|
+
lines.append(
|
|
94
|
+
f"| {i} | {span.name} | {model} | "
|
|
95
|
+
f"{in_tokens:,}→{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
|
|
96
|
+
f"`{span_path}/` |"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
lines.append("")
|
|
100
|
+
|
|
101
|
+
# Errors
|
|
102
|
+
if failed_spans:
|
|
103
|
+
lines.extend([
|
|
104
|
+
"## Errors",
|
|
105
|
+
"",
|
|
106
|
+
])
|
|
107
|
+
for span in failed_spans:
|
|
108
|
+
span_path = span.path.relative_to(trace.path).as_posix()
|
|
109
|
+
lines.append(f"- **{span.name}**: `{span_path}/_span.yaml`")
|
|
110
|
+
lines.append("")
|
|
111
|
+
else:
|
|
112
|
+
lines.extend([
|
|
113
|
+
"## Errors",
|
|
114
|
+
"",
|
|
115
|
+
"None - all spans completed successfully.",
|
|
116
|
+
"",
|
|
117
|
+
])
|
|
118
|
+
|
|
119
|
+
# Navigation guide
|
|
120
|
+
lines.extend([
|
|
121
|
+
"## Navigation",
|
|
122
|
+
"",
|
|
123
|
+
"- Each span directory contains `_span.yaml` (metadata), `input.yaml`, `output.yaml`",
|
|
124
|
+
"- LLM span inputs contain the full message list",
|
|
125
|
+
"- `_tree.yaml` has span_id → path mapping and full hierarchy",
|
|
126
|
+
"",
|
|
127
|
+
])
|
|
128
|
+
|
|
129
|
+
return "\n".join(lines)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _format_duration(trace: TraceState) -> str:
|
|
133
|
+
"""Format trace duration as human-readable string."""
|
|
134
|
+
# Calculate from spans if we have them
|
|
135
|
+
if not trace.spans:
|
|
136
|
+
return "unknown"
|
|
137
|
+
|
|
138
|
+
spans_list = list(trace.spans.values())
|
|
139
|
+
start = min(s.start_time for s in spans_list)
|
|
140
|
+
end_times = [s.end_time for s in spans_list if s.end_time]
|
|
141
|
+
|
|
142
|
+
if not end_times:
|
|
143
|
+
return "running..."
|
|
144
|
+
|
|
145
|
+
end = max(end_times)
|
|
146
|
+
duration = (end - start).total_seconds()
|
|
147
|
+
|
|
148
|
+
if duration < 1:
|
|
149
|
+
return f"{int(duration * 1000)}ms"
|
|
150
|
+
elif duration < 60:
|
|
151
|
+
return f"{duration:.1f}s"
|
|
152
|
+
elif duration < 3600:
|
|
153
|
+
minutes = int(duration // 60)
|
|
154
|
+
seconds = int(duration % 60)
|
|
155
|
+
return f"{minutes}m {seconds}s"
|
|
156
|
+
else:
|
|
157
|
+
hours = int(duration // 3600)
|
|
158
|
+
minutes = int((duration % 3600) // 60)
|
|
159
|
+
return f"{hours}h {minutes}m"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _format_span_line(span: SpanInfo) -> str:
|
|
163
|
+
"""Format a single span as a tree line (without prefix)."""
|
|
164
|
+
status_icon = "✅" if span.status == "completed" else "❌" if span.status == "failed" else "⏳"
|
|
165
|
+
duration = (
|
|
166
|
+
f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
llm_suffix = ""
|
|
170
|
+
if span.llm_info:
|
|
171
|
+
model = span.llm_info.get("model", "?")
|
|
172
|
+
tokens = span.llm_info.get("total_tokens", 0)
|
|
173
|
+
llm_suffix = f" [LLM: {model}, {tokens:,} tokens]"
|
|
174
|
+
|
|
175
|
+
return f"{span.name} ({duration}) {status_icon}{llm_suffix}"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
|
|
179
|
+
"""Build tree representation of span hierarchy (fully recursive)."""
|
|
180
|
+
lines: list[str] = []
|
|
181
|
+
span = trace.spans.get(span_id)
|
|
182
|
+
if not span:
|
|
183
|
+
return lines
|
|
184
|
+
|
|
185
|
+
# Add this span's line
|
|
186
|
+
lines.append(f"{prefix}{_format_span_line(span)}")
|
|
187
|
+
|
|
188
|
+
# Process children recursively
|
|
189
|
+
children = span.children
|
|
190
|
+
for i, child_id in enumerate(children):
|
|
191
|
+
is_last = i == len(children) - 1
|
|
192
|
+
child_prefix = prefix + ("└── " if is_last else "├── ")
|
|
193
|
+
continuation_prefix = prefix + (" " if is_last else "│ ")
|
|
194
|
+
|
|
195
|
+
child_span = trace.spans.get(child_id)
|
|
196
|
+
if child_span:
|
|
197
|
+
# Add child line
|
|
198
|
+
lines.append(f"{child_prefix}{_format_span_line(child_span)}")
|
|
199
|
+
|
|
200
|
+
# Recursively add all descendants
|
|
201
|
+
for j, grandchild_id in enumerate(child_span.children):
|
|
202
|
+
gc_is_last = j == len(child_span.children) - 1
|
|
203
|
+
gc_prefix = continuation_prefix + ("└── " if gc_is_last else "├── ")
|
|
204
|
+
gc_continuation = continuation_prefix + (" " if gc_is_last else "│ ")
|
|
205
|
+
|
|
206
|
+
# Recursively build subtree for grandchild and all its descendants
|
|
207
|
+
subtree = _build_tree_recursive(trace, grandchild_id, gc_prefix, gc_continuation)
|
|
208
|
+
lines.extend(subtree)
|
|
209
|
+
|
|
210
|
+
return lines
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _build_tree_recursive(
|
|
214
|
+
trace: TraceState, span_id: str, prefix: str, continuation: str
|
|
215
|
+
) -> list[str]:
|
|
216
|
+
"""Recursively build tree for a span and all descendants."""
|
|
217
|
+
lines: list[str] = []
|
|
218
|
+
span = trace.spans.get(span_id)
|
|
219
|
+
if not span:
|
|
220
|
+
return lines
|
|
221
|
+
|
|
222
|
+
# Add this span's line with the given prefix
|
|
223
|
+
lines.append(f"{prefix}{_format_span_line(span)}")
|
|
224
|
+
|
|
225
|
+
# Process children
|
|
226
|
+
children = span.children
|
|
227
|
+
for i, child_id in enumerate(children):
|
|
228
|
+
is_last = i == len(children) - 1
|
|
229
|
+
child_prefix = continuation + ("└── " if is_last else "├── ")
|
|
230
|
+
child_continuation = continuation + (" " if is_last else "│ ")
|
|
231
|
+
|
|
232
|
+
# Recurse for all children
|
|
233
|
+
subtree = _build_tree_recursive(trace, child_id, child_prefix, child_continuation)
|
|
234
|
+
lines.extend(subtree)
|
|
235
|
+
|
|
236
|
+
return lines
|