PyPI - ai-pipeline-core - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

ai_pipeline_core/__init__.py +64 -158
ai_pipeline_core/deployment/__init__.py +6 -18
ai_pipeline_core/deployment/base.py +392 -212
ai_pipeline_core/deployment/contract.py +6 -10
ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
ai_pipeline_core/deployment/helpers.py +16 -17
ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +11 -84
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +32 -85
ai_pipeline_core/images/_processing.py +5 -11
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +102 -90
ai_pipeline_core/llm/client.py +229 -183
ai_pipeline_core/llm/model_options.py +12 -84
ai_pipeline_core/llm/model_response.py +53 -99
ai_pipeline_core/llm/model_types.py +8 -23
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
ai_pipeline_core/debug/__init__.py +0 -26
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -494
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/prompt_builder/__init__.py +0 -5
ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
ai_pipeline_core/prompt_builder/global_cache.py +0 -78
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} RENAMED Viewed

@@ -1,17 +1,22 @@
-"""Summary generation for trace debugging.
+"""Static summary generation for trace debugging.
-Generates a single _summary.md file that serves both human inspection and LLM debugging.
-Combines high-level overview with detailed navigation for comprehensive trace analysis.
+Generates _summary.md files with execution tree, LLM calls, cost breakdown,
+and navigation guide. No LLM dependencies — pure text formatting.
+For LLM-powered auto-summary, see _auto_summary.py.
 """
-from .writer import SpanInfo, TraceState
+from typing import Any
+from ._types import SpanInfo, TraceState
-def generate_summary(trace: TraceState) -> str:
+def generate_summary(trace: TraceState) -> str:  # noqa: PLR0912, PLR0914, PLR0915
     """Generate unified _summary.md file.
     Single file optimized for both human inspection and LLM debugger context.
-    Structure: Overview → Tree → Root Span → LLM Calls → Errors → Navigation.
+    Structure: Overview -> Tree -> Root Span -> LLM Calls -> Cost by Task -> Errors -> Navigation.
+    Cost by Task table includes expected cost comparison with OVER/OK status indicators.
     """
     lines = [
         f"# Trace Summary: {trace.name}",
@@ -20,17 +25,21 @@ def generate_summary(trace: TraceState) -> str:
     # Status and stats
     failed_spans = [s for s in trace.spans.values() if s.status == "failed"]
-    status_emoji = "❌" if failed_spans else "✅"
+    status_emoji = "\u274c" if failed_spans else "\u2705"
     status_text = f"Failed ({len(failed_spans)} errors)" if failed_spans else "Completed"
     duration_str = _format_duration(trace)
+    cost_str = f"**Total Cost**: ${trace.total_cost:.4f}"
+    if trace.total_expected_cost > 0:
+        cost_str += f" (expected: ${trace.total_expected_cost:.4f})"
     lines.extend([
         f"**Status**: {status_emoji} {status_text} | "
         f"**Duration**: {duration_str} | "
         f"**Spans**: {len(trace.spans)} | "
         f"**LLM Calls**: {trace.llm_call_count} | "
         f"**Total Tokens**: {trace.total_tokens:,} | "
-        f"**Total Cost**: ${trace.total_cost:.4f}",
+        f"{cost_str}",
         "",
     ])
@@ -46,8 +55,7 @@ def generate_summary(trace: TraceState) -> str:
         lines.extend(tree_lines)
     else:
         # Fallback: list all spans
-        for span in sorted(trace.spans.values(), key=lambda s: s.start_time):
-            lines.append(_format_span_line(span))
+        lines.extend(_format_span_line(span) for span in sorted(trace.spans.values(), key=lambda s: s.start_time))
     lines.extend([
         "```",
@@ -77,27 +85,48 @@ def generate_summary(trace: TraceState) -> str:
         lines.extend([
             "## LLM Calls (by cost)",
             "",
-            "| # | Span | Model | Input→Output | Total | Cost | Path |",
-            "|---|------|-------|--------------|-------|------|------|",
+            "| # | Span | Purpose | Model | Input\u2192Output | Total | Cost | Expected | Path |",
+            "|---|------|---------|-------|--------------|-------|------|----------|------|",
         ])
         for i, span in enumerate(llm_spans, 1):
             info = span.llm_info
             if info:
                 model = info.get("model", "unknown")
+                purpose = info.get("purpose", "")
                 in_tokens = info.get("input_tokens", 0)
                 out_tokens = info.get("output_tokens", 0)
                 total_tokens = info.get("total_tokens", 0)
                 cost = info.get("cost", 0)
+                expected = info.get("expected_cost")
+                expected_str = f"${expected:.4f}" if expected else ""
                 span_path = span.path.relative_to(trace.path).as_posix()
                 lines.append(
-                    f"| {i} | {span.name} | {model} | "
-                    f"{in_tokens:,}→{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
-                    f"`{span_path}/` |"
+                    f"| {i} | {span.name} | {purpose} | {model} | "
+                    f"{in_tokens:,}\u2192{out_tokens:,} | {total_tokens:,} | ${cost:.4f} | "
+                    f"{expected_str} | `{span_path}/` |"
                 )
         lines.append("")
+    # Cost aggregation by parent task/flow
+    cost_by_parent = _aggregate_costs_by_parent(trace)
+    if cost_by_parent:
+        lines.extend([
+            "## Cost by Task",
+            "",
+            "| Name | Type | LLM Calls | Cost | Expected | Status |",
+            "|------|------|-----------|------|----------|--------|",
+        ])
+        for entry in cost_by_parent:
+            expected_str = f"${entry['expected_cost']:.4f}" if entry["expected_cost"] else ""
+            status = ""
+            if entry["expected_cost"] and entry["actual_cost"] > 0:
+                ratio = entry["actual_cost"] / entry["expected_cost"]
+                status = "OVER" if ratio > 1.1 else "OK"
+            lines.append(f"| {entry['name']} | {entry['type']} | {entry['llm_calls']} | ${entry['actual_cost']:.4f} | {expected_str} | {status} |")
+        lines.append("")
     # Errors
     if failed_spans:
         lines.extend([
@@ -122,13 +151,48 @@ def generate_summary(trace: TraceState) -> str:
         "",
         "- Each span directory contains `_span.yaml` (metadata), `input.yaml`, `output.yaml`",
         "- LLM span inputs contain the full message list",
-        "- `_tree.yaml` has span_id → path mapping and full hierarchy",
+        "- `_tree.yaml` has span_id \u2192 path mapping and full hierarchy",
         "",
     ])
     return "\n".join(lines)
+def _aggregate_costs_by_parent(trace: TraceState) -> list[dict[str, Any]]:
+    """Aggregate LLM costs by parent task/flow span."""
+    parent_costs: dict[str, dict[str, Any]] = {}
+    for span in trace.spans.values():
+        if not span.llm_info:
+            continue
+        cost = span.llm_info.get("cost", 0.0)
+        if not cost:
+            continue
+        # Find parent (task or flow span)
+        parent_id = span.parent_id
+        if not parent_id or parent_id not in trace.spans:
+            continue
+        parent = trace.spans[parent_id]
+        if parent_id not in parent_costs:
+            run_type = "unknown"
+            if parent.prefect_info:
+                run_type = parent.prefect_info.get("run_type", "unknown")
+            parent_costs[parent_id] = {
+                "name": parent.name,
+                "type": run_type,
+                "actual_cost": 0.0,
+                "expected_cost": parent.expected_cost,
+                "llm_calls": 0,
+            }
+        parent_costs[parent_id]["actual_cost"] += cost
+        parent_costs[parent_id]["llm_calls"] += 1
+    # Sort by cost descending
+    return sorted(parent_costs.values(), key=lambda x: x["actual_cost"], reverse=True)
 def _format_duration(trace: TraceState) -> str:
     """Format trace duration as human-readable string."""
     # Calculate from spans if we have them
@@ -147,32 +211,45 @@ def _format_duration(trace: TraceState) -> str:
     if duration < 1:
         return f"{int(duration * 1000)}ms"
-    elif duration < 60:
+    if duration < 60:
         return f"{duration:.1f}s"
-    elif duration < 3600:
+    if duration < 3600:
         minutes = int(duration // 60)
         seconds = int(duration % 60)
         return f"{minutes}m {seconds}s"
-    else:
-        hours = int(duration // 3600)
-        minutes = int((duration % 3600) // 60)
-        return f"{hours}h {minutes}m"
+    hours = int(duration // 3600)
+    minutes = int((duration % 3600) // 60)
+    return f"{hours}h {minutes}m"
 def _format_span_line(span: SpanInfo) -> str:
     """Format a single span as a tree line (without prefix)."""
-    status_icon = "✅" if span.status == "completed" else "❌" if span.status == "failed" else "⏳"
-    duration = (
-        f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
-    )
+    if span.status == "completed":
+        status_icon = "\u2705"
+    elif span.status == "failed":
+        status_icon = "\u274c"
+    else:
+        status_icon = "\u23f3"
+    duration = f"{span.duration_ms}ms" if span.duration_ms < 1000 else f"{span.duration_ms / 1000:.1f}s"
+    # Description suffix for task/flow spans
+    desc_suffix = ""
+    if span.description and span.span_type != "llm":
+        desc_suffix = f" -- {span.description}"
+    # LLM suffix: show purpose (if available) alongside model, plus cost
     llm_suffix = ""
     if span.llm_info:
         model = span.llm_info.get("model", "?")
         tokens = span.llm_info.get("total_tokens", 0)
-        llm_suffix = f" [LLM: {model}, {tokens:,} tokens]"
+        cost = span.llm_info.get("cost", 0)
+        purpose = span.llm_info.get("purpose")
+        purpose_part = f"{purpose} | " if purpose else ""
+        cost_part = f", ${cost:.4f}" if cost else ""
+        llm_suffix = f" [LLM: {purpose_part}{model}, {tokens:,} tokens{cost_part}]"
-    return f"{span.name} ({duration}) {status_icon}{llm_suffix}"
+    return f"{span.name} ({duration}) {status_icon}{desc_suffix}{llm_suffix}"
 def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
@@ -189,8 +266,8 @@ def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
     children = span.children
     for i, child_id in enumerate(children):
         is_last = i == len(children) - 1
-        child_prefix = prefix + ("└── " if is_last else "├── ")
-        continuation_prefix = prefix + ("    " if is_last else "│   ")
+        child_prefix = prefix + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
+        continuation_prefix = prefix + ("    " if is_last else "\u2502   ")
         child_span = trace.spans.get(child_id)
         if child_span:
@@ -200,8 +277,9 @@ def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
             # Recursively add all descendants
             for j, grandchild_id in enumerate(child_span.children):
                 gc_is_last = j == len(child_span.children) - 1
-                gc_prefix = continuation_prefix + ("└── " if gc_is_last else "├── ")
-                gc_continuation = continuation_prefix + ("    " if gc_is_last else "│   ")
+                gc_connector = "\u2514\u2500\u2500 " if gc_is_last else "\u251c\u2500\u2500 "
+                gc_prefix = continuation_prefix + gc_connector
+                gc_continuation = continuation_prefix + ("    " if gc_is_last else "\u2502   ")
                 # Recursively build subtree for grandchild and all its descendants
                 subtree = _build_tree_recursive(trace, grandchild_id, gc_prefix, gc_continuation)
@@ -210,9 +288,7 @@ def _build_tree(trace: TraceState, span_id: str, prefix: str = "") -> list[str]:
     return lines
-def _build_tree_recursive(
-    trace: TraceState, span_id: str, prefix: str, continuation: str
-) -> list[str]:
+def _build_tree_recursive(trace: TraceState, span_id: str, prefix: str, continuation: str) -> list[str]:
     """Recursively build tree for a span and all descendants."""
     lines: list[str] = []
     span = trace.spans.get(span_id)
@@ -226,8 +302,8 @@ def _build_tree_recursive(
     children = span.children
     for i, child_id in enumerate(children):
         is_last = i == len(children) - 1
-        child_prefix = continuation + ("└── " if is_last else "├── ")
-        child_continuation = continuation + ("    " if is_last else "│   ")
+        child_prefix = continuation + ("\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 ")
+        child_continuation = continuation + ("    " if is_last else "\u2502   ")
         # Recurse for all children
         subtree = _build_tree_recursive(trace, child_id, child_prefix, child_continuation)

ai_pipeline_core/observability/_debug/_types.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Shared data types for the debug tracing system.
+Extracted to break the circular dependency between _writer.py and _summary.py:
+_writer needs summary generation functions, _summary needs SpanInfo/TraceState.
+"""
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+@dataclass
+class WriteJob:
+    """Job for background writer thread."""
+    trace_id: str
+    span_id: str
+    name: str
+    parent_id: str | None
+    attributes: dict[str, Any]
+    events: list[Any]
+    status_code: str  # "OK" | "ERROR" | "UNSET"
+    status_description: str | None
+    start_time_ns: int
+    end_time_ns: int
+@dataclass
+class SpanInfo:
+    """Information about a span for index building.
+    Tracks execution details including timing, LLM metrics (tokens, cost, expected_cost, purpose),
+    and Prefect context for observability and cost tracking across the trace hierarchy.
+    """
+    span_id: str
+    parent_id: str | None
+    name: str
+    span_type: str
+    status: str
+    start_time: datetime
+    path: Path  # Actual directory path for this span
+    depth: int = 0  # Nesting depth (0 for root)
+    order: int = 0  # Global execution order within trace
+    end_time: datetime | None = None
+    duration_ms: int = 0
+    children: list[str] = field(default_factory=list)
+    llm_info: dict[str, Any] | None = None
+    prefect_info: dict[str, Any] | None = None
+    description: str | None = None
+    expected_cost: float | None = None
+@dataclass
+class TraceState:
+    """State for an active trace.
+    Maintains trace metadata and span hierarchy with accumulated cost
+    metrics (total_cost, total_expected_cost) for monitoring resource
+    usage and budget tracking during trace execution.
+    """
+    trace_id: str
+    name: str
+    path: Path
+    start_time: datetime
+    spans: dict[str, SpanInfo] = field(default_factory=dict)
+    root_span_id: str | None = None
+    total_tokens: int = 0
+    total_cost: float = 0.0
+    total_expected_cost: float = 0.0
+    llm_call_count: int = 0
+    span_counter: int = 0  # Global counter for ordering span directories
+    merged_wrapper_ids: set[str] = field(default_factory=set)  # IDs of merged wrappers

ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl