PyPI - ai-pipeline-core - Versions diffs - 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.2.6py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

ai_pipeline_core/__init__.py +78 -125
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +37 -82
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +130 -81
ai_pipeline_core/llm/client.py +327 -193
ai_pipeline_core/llm/model_options.py +14 -86
ai_pipeline_core/llm/model_response.py +60 -103
ai_pipeline_core/llm/model_types.py +16 -34
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -483
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/simple_runner/__init__.py +0 -14
ai_pipeline_core/simple_runner/cli.py +0 -254
ai_pipeline_core/simple_runner/simple_runner.py +0 -247
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core/utils/deploy.py +0 -373
ai_pipeline_core/utils/remote_deployment.py +0 -269
ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
{ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/observability/_debug/_processor.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""OpenTelemetry SpanProcessor for local trace debugging."""
+import contextlib
+from opentelemetry.context import Context
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
+from opentelemetry.trace import StatusCode
+from ._writer import LocalTraceWriter, WriteJob
+class LocalDebugSpanProcessor(SpanProcessor):
+    """OpenTelemetry SpanProcessor that writes spans to local filesystem.
+    Integrates with the OpenTelemetry SDK to capture all spans and write them
+    to a structured directory hierarchy for debugging.
+    Usage:
+        writer = LocalTraceWriter(config)
+        processor = LocalDebugSpanProcessor(writer)
+        tracer_provider.add_span_processor(processor)
+    """
+    def __init__(self, writer: LocalTraceWriter):
+        """Initialize span processor with writer."""
+        self._writer = writer
+    def on_start(self, span: Span, parent_context: Context | None = None) -> None:
+        """Handle span start - create directories.
+        Creates the span directory early so we can see "running" spans.
+        Input/output data is not available yet - will be captured in on_end().
+        """
+        with contextlib.suppress(Exception):
+            if span.context is None:
+                return
+            trace_id = format(span.context.trace_id, "032x")
+            span_id = format(span.context.span_id, "016x")
+            parent_id = self._get_parent_span_id(span)
+            self._writer.on_span_start(trace_id, span_id, parent_id, span.name)
+    def on_end(self, span: ReadableSpan) -> None:
+        """Handle span end - queue full span data for background write.
+        All data (input, output, attributes, events) is captured here because
+        Laminar sets these attributes after span start.
+        """
+        with contextlib.suppress(Exception):
+            if span.context is None or span.start_time is None or span.end_time is None:
+                return
+            job = WriteJob(
+                trace_id=format(span.context.trace_id, "032x"),
+                span_id=format(span.context.span_id, "016x"),
+                name=span.name,
+                parent_id=self._get_parent_span_id_from_readable(span),
+                attributes=dict(span.attributes) if span.attributes else {},
+                events=list(span.events) if span.events else [],
+                status_code=self._get_status_code(span),
+                status_description=span.status.description,
+                start_time_ns=span.start_time,
+                end_time_ns=span.end_time,
+            )
+            self._writer.on_span_end(job)
+    def shutdown(self) -> None:
+        """Shutdown the processor and writer."""
+        self._writer.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:  # noqa: PLR6301
+        """Force flush is not needed for this processor."""
+        _ = timeout_millis
+        return True
+    @staticmethod
+    def _get_parent_span_id(span: Span) -> str | None:
+        """Extract parent span ID from a writable Span."""
+        if hasattr(span, "parent") and span.parent:
+            parent_ctx = span.parent
+            if hasattr(parent_ctx, "span_id") and parent_ctx.span_id:
+                return format(parent_ctx.span_id, "016x")
+        return None
+    @staticmethod
+    def _get_parent_span_id_from_readable(span: ReadableSpan) -> str | None:
+        """Extract parent span ID from a ReadableSpan."""
+        if span.parent and hasattr(span.parent, "span_id") and span.parent.span_id:
+            return format(span.parent.span_id, "016x")
+        return None
+    @staticmethod
+    def _get_status_code(span: ReadableSpan) -> str:
+        """Get status code as string."""
+        if span.status.status_code == StatusCode.OK:
+            return "OK"
+        if span.status.status_code == StatusCode.ERROR:
+            return "ERROR"
+        return "UNSET"

ai_pipeline_core/observability/_debug/_summary.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""Static summary generation for trace debugging.
+Generates _summary.md files with execution tree, LLM calls, cost breakdown,
+and navigation guide. No LLM dependencies — pure text formatting.
+For LLM-powered auto-summary, see _auto_summary.py.
+"""
+from typing import Any
+from ._types import SpanInfo, TraceState
+def generate_summary(trace: TraceState) -> str:  # noqa: PLR0912, PLR0914, PLR0915
+    """Generate unified _summary.md file.
+    Single file optimized for both human inspection and LLM debugger context.
+    Structure: Overview -> Tree -> Root Span -> LLM Calls -> Cost by Task -> Errors -> Navigation.
+    Cost by Task table includes expected cost comparison with OVER/OK status indicators.
+    """
+    lines = [
+        f"# Trace Summary: {trace.name}",
+        "",
+    ]
+    # Status and stats
+    failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
+    status_emoji = "\u274c" if failed_spans else "\u2705"
+    status_text = f"Failed ({len(failed_spans)} errors)" if failed_spans else "Completed"
+    duration_str = _format_duration(trace)
+    cost_str = f"**Total Cost**: ${trace.total_cost:.4f}"
+    if trace.total_expected_cost > 0:
+        cost_str += f" (expected: ${trace.total_expected_cost:.4f})"
+    lines.extend([
+        f"**Status**: {status_emoji} {status_text} | "
+        f"**Duration**: {duration_str} | "
+        f"**Spans**: {len(trace.spans)} | "
+        f"**LLM Calls**: {trace.llm_call_count} | "
+        f"**Total Tokens**: {trace.total_tokens:,} | "
+        f"{cost_str}",
+        "",
+    ])
+    # Execution tree
+    lines.extend([
+        "## Execution Tree",
+        "",
+        "```",
+    ])
+    if trace.root_span_id and trace.root_span_id in trace.spans:
+        tree_lines = _build_tree(trace, trace.root_span_id, "")
+        lines.extend(tree_lines)
+    else:
+        # Fallback: list all spans
+        lines.extend(_format_span_line(span) for span in sorted(trace.spans.values(), key=lambda s: s.start_time))
+    lines.extend([
+        "```",
+        "",
+    ])
+    # Root span details
+    if trace.root_span_id and trace.root_span_id in trace.spans:
+        root = trace.spans[trace.root_span_id]
+        root_path = root.path.relative_to(trace.path).as_posix()
+        lines.extend([
+            "## Root Span",
+            "",
+            f"- **Name**: {root.name}",
+            f"- **Type**: {root.span_type}",
+            f"- **Duration**: {root.duration_ms}ms",
+            f"- **Input**: `{root_path}/input.yaml`",
+            f"- **Output**: `{root_path}/output.yaml`",
+            "",
+        ])
+    # LLM calls table with path column
+    llm_spans = [s for s in trace.spans.values() if s.llm_info]
+    if llm_spans:
+        llm_spans.sort(key=lambda s: s.llm_info.get("cost", 0) if s.llm_info else 0, reverse=True)
+        lines.extend([
+            "## LLM Calls (by cost)",
+            "",
+            "| # | Span | Purpose | Model | Input\u2192Output | Total | Cost | Expected | Path |",
+            "|---|------|---------|-------|--------------|-------|------|----------|------|",
+        ])
+        for i, span in enumerate(llm_spans, 1):
+            info = span.llm_info
+            if info:
+                model = info.get("model", "unknown")
+                purpose = info.get("purpose", "")
+                in_tokens = info.get("input_tokens", 0)
+                out_tokens = info.get("output_tokens", 0)
+                total_tokens = info.get("total_tokens", 0)
+                cost = info.get("cost", 0)
+                expected = info.get("expected_cost")
+                expected_str = f"${expected:.4f}" if expected else ""
+                span_path = span.path.relative_to(trace.path).as_posix()
+                lines.append(
+                    f"| {i} | {span.name} | {purpose} | {model} | "
+                    f"{in_tokens:,}\u2192{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
+                    f"{expected_str} | `{span_path}/` |"
+                )
+        lines.append("")
+    # Cost aggregation by parent task/flow
+    cost_by_parent = _aggregate_costs_by_parent(trace)
+    if cost_by_parent:
+        lines.extend([
+            "## Cost by Task",
+            "",
+            "| Name | Type | LLM Calls | Cost | Expected | Status |",
+            "|------|------|-----------|------|----------|--------|",
+        ])
+        for entry in cost_by_parent:
+            expected_str = f"${entry['expected_cost']:.4f}" if entry["expected_cost"] else ""
+            status = ""
+            if entry["expected_cost"] and entry["actual_cost"] > 0:
+                ratio = entry["actual_cost"] / entry["expected_cost"]
+                status = "OVER" if ratio > 1.1 else "OK"
+            lines.append(f"| {entry['name']} | {entry['type']} | {entry['llm_calls']} | ${entry['actual_cost']:.4f} | {expected_str} | {status} |")
+        lines.append("")
+    # Errors
+    if failed_spans:
+        lines.extend([
+            "## Errors",
+            "",
+        ])
+        for span in failed_spans:
+            span_path = span.path.relative_to(trace.path).as_posix()
+            lines.append(f"- **{span.name}**: `{span_path}/_span.yaml`")
+        lines.append("")
+    else:
+        lines.extend([
+            "## Errors",
+            "",
+            "None - all spans completed successfully.",
+            "",
+        ])
+    # Navigation guide
+    lines.extend([
+        "## Navigation",
+        "",
+        "- Each span directory contains `_span.yaml` (metadata), `input.yaml`, `output.yaml`",
+        "- LLM span inputs contain the full message list",
+        "- `_tree.yaml` has span_id \u2192 path mapping and full hierarchy",
+        "",
+    ])
+    return "\n".join(lines)
+def _aggregate_costs_by_parent(trace: TraceState) -> list[dict[str, Any]]:
+    """Aggregate LLM costs by parent task/flow span."""
+    parent_costs: dict[str, dict[str, Any]] = {}
+    for span in trace.spans.values():
+        if not span.llm_info:
+            continue
+        cost = span.llm_info.get("cost", 0.0)
+        if not cost:
+            continue
+        # Find parent (task or flow span)
+        parent_id = span.parent_id
+        if not parent_id or parent_id not in trace.spans:
+            continue
+        parent = trace.spans[parent_id]
+        if parent_id not in parent_costs:
+            run_type = "unknown"
+            if parent.prefect_info:
+                run_type = parent.prefect_info.get("run_type", "unknown")
+            parent_costs[parent_id] = {
+                "name": parent.name,
+                "type": run_type,
+                "actual_cost": 0.0,
+                "expected_cost": parent.expected_cost,
+                "llm_calls": 0,
+            }
+        parent_costs[parent_id]["actual_cost"] += cost
+        parent_costs[parent_id]["llm_calls"] += 1
+    # Sort by cost descending
+    return sorted(parent_costs.values(), key=lambda x: x["actual_cost"], reverse=True)
+def _format_duration(trace: TraceState) -> str:
+    """Format trace duration as human-readable string."""
+    # Calculate from spans if we have them
+    if not trace.spans:
+        return "unknown"
+    spans_list = list(trace.spans.values())
+    start = min(s.start_time for s in spans_list)
+    end_times = [s.end_time for s in spans_list if s.end_time]
+    if not end_times:
+        return "running..."
+    end = max(end_times)
+    duration = (end - start).total_seconds()
+    if duration < 1:
+        return f"{int(duration * 1000)}ms"
+    if duration < 60:
+        return f"{duration:.1f}s"
+    if duration < 3600:
+        minutes = int(duration // 60)
+        seconds = int(duration % 60)
+        return f"{minutes}m {seconds}s"
+    hours = int(duration // 3600)
+    minutes = int((duration % 3600) // 60)
+    return f"{hours}h {minutes}m"
+def _format_span_line(span: SpanInfo) -> str:
+    """Format a single span as a tree line (without prefix)."""
+    if span.status == "completed":
+        status_icon = "\u2705"
+    elif span.status == "failed":
+        status_icon = "\u274c"
+    else:
+        status_icon = "\u23f3"
+    duration = f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
+    # Description suffix for task/flow spans
+    desc_suffix = ""
+    if span.description and span.span_type != "llm":
+        desc_suffix = f" -- {span.description}"
+    # LLM suffix: show purpose (if available) alongside model, plus cost
+    llm_suffix = ""
+    if span.llm_info:
+        model = span.llm_info.get("model", "?")
+        tokens = span.llm_info.get("total_tokens", 0)
+        cost = span.llm_info.get("cost", 0)
+        purpose = span.llm_info.get("purpose")
+        purpose_part = f"{purpose} | " if purpose else ""
+        cost_part = f", ${cost:.4f}" if cost else ""
+        llm_suffix = f" [LLM: {purpose_part}{model}, {tokens:,} tokens{cost_part}]"
+    return f"{span.name} ({duration}) {status_icon}{desc_suffix}{llm_suffix}"
+def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
+    """Build tree representation of span hierarchy (fully recursive)."""
+    lines: list[str] = []
+    span = trace.spans.get(span_id)
+    if not span:
+        return lines
+    # Add this span's line
+    lines.append(f"{prefix}{_format_span_line(span)}")
+    # Process children recursively
+    children = span.children
+    for i, child_id in enumerate(children):
+        is_last = i == len(children) - 1
+        child_prefix = prefix + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
+        continuation_prefix = prefix + ("    " if is_last else "\u2502   ")
+        child_span = trace.spans.get(child_id)
+        if child_span:
+            # Add child line
+            lines.append(f"{child_prefix}{_format_span_line(child_span)}")
+            # Recursively add all descendants
+            for j, grandchild_id in enumerate(child_span.children):
+                gc_is_last = j == len(child_span.children) - 1
+                gc_connector = "\u2514\u2500\u2500 " if gc_is_last else "\u251c\u2500\u2500 "
+                gc_prefix = continuation_prefix + gc_connector
+                gc_continuation = continuation_prefix + ("    " if gc_is_last else "\u2502   ")
+                # Recursively build subtree for grandchild and all its descendants
+                subtree = _build_tree_recursive(trace, grandchild_id, gc_prefix, gc_continuation)
+                lines.extend(subtree)
+    return lines
+def _build_tree_recursive(trace: TraceState, span_id: str, prefix: str, continuation: str) -> list[str]:
+    """Recursively build tree for a span and all descendants."""
+    lines: list[str] = []
+    span = trace.spans.get(span_id)
+    if not span:
+        return lines
+    # Add this span's line with the given prefix
+    lines.append(f"{prefix}{_format_span_line(span)}")
+    # Process children
+    children = span.children
+    for i, child_id in enumerate(children):
+        is_last = i == len(children) - 1
+        child_prefix = continuation + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
+        child_continuation = continuation + ("    " if is_last else "\u2502   ")
+        # Recurse for all children
+        subtree = _build_tree_recursive(trace, child_id, child_prefix, child_continuation)
+        lines.extend(subtree)
+    return lines

ai_pipeline_core/observability/_debug/_types.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Shared data types for the debug tracing system.
+Extracted to break the circular dependency between _writer.py and _summary.py:
+_writer needs summary generation functions, _summary needs SpanInfo/TraceState.
+"""
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+@dataclass
+class WriteJob:
+    """Job for background writer thread."""
+    trace_id: str
+    span_id: str
+    name: str
+    parent_id: str | None
+    attributes: dict[str, Any]
+    events: list[Any]
+    status_code: str  # "OK" | "ERROR" | "UNSET"
+    status_description: str | None
+    start_time_ns: int
+    end_time_ns: int
+@dataclass
+class SpanInfo:
+    """Information about a span for index building.
+    Tracks execution details including timing, LLM metrics (tokens, cost, expected_cost, purpose),
+    and Prefect context for observability and cost tracking across the trace hierarchy.
+    """
+    span_id: str
+    parent_id: str | None
+    name: str
+    span_type: str
+    status: str
+    start_time: datetime
+    path: Path  # Actual directory path for this span
+    depth: int = 0  # Nesting depth (0 for root)
+    order: int = 0  # Global execution order within trace
+    end_time: datetime | None = None
+    duration_ms: int = 0
+    children: list[str] = field(default_factory=list)
+    llm_info: dict[str, Any] | None = None
+    prefect_info: dict[str, Any] | None = None
+    description: str | None = None
+    expected_cost: float | None = None
+@dataclass
+class TraceState:
+    """State for an active trace.
+    Maintains trace metadata and span hierarchy with accumulated cost
+    metrics (total_cost, total_expected_cost) for monitoring resource
+    usage and budget tracking during trace execution.
+    """
+    trace_id: str
+    name: str
+    path: Path
+    start_time: datetime
+    spans: dict[str, SpanInfo] = field(default_factory=dict)
+    root_span_id: str | None = None
+    total_tokens: int = 0
+    total_cost: float = 0.0
+    total_expected_cost: float = 0.0
+    llm_call_count: int = 0
+    span_counter: int = 0  # Global counter for ordering span directories
+    merged_wrapper_ids: set[str] = field(default_factory=set)  # IDs of merged wrappers

ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.2.6py3-none-any.whl → 0.4.1py3-none-any.whl