PyPI - agentdelta - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentdelta 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

agentdelta/__init__.py +22 -0
agentdelta/api.py +131 -0
agentdelta/cli.py +138 -0
agentdelta/diff.py +191 -0
agentdelta/embed.py +112 -0
agentdelta/instrument.py +140 -0
agentdelta/mcp_server.py +200 -0
agentdelta/py.typed +0 -0
agentdelta/report.py +216 -0
agentdelta/trace.py +175 -0
agentdelta-0.1.0.dist-info/METADATA +463 -0
agentdelta-0.1.0.dist-info/RECORD +15 -0
agentdelta-0.1.0.dist-info/WHEEL +4 -0
agentdelta-0.1.0.dist-info/entry_points.txt +3 -0
agentdelta-0.1.0.dist-info/licenses/LICENSE +21 -0

agentdelta/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""agentdelta — semantic diff engine for AI agent behavior."""
+from agentdelta.diff import DiffResult, ForkPoint, diff_traces
+from agentdelta.instrument import AgentdeltaCallback, record
+from agentdelta.trace import AgentTrace, EdgeType, NodeType, TraceEdge, TraceNode
+__all__ = [
+    "AgentTrace",
+    "AgentdeltaCallback",
+    "DiffResult",
+    "EdgeType",
+    "ForkPoint",
+    "NodeType",
+    "TraceEdge",
+    "TraceNode",
+    "diff_traces",
+    "record",
+]
+from importlib.metadata import version as _version
+__version__ = _version("agentdelta")

agentdelta/api.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""FastAPI REST wrapper for agentdelta.
+Start with: uvicorn agentdelta.api:app --reload
+Install:    pip install "agentdelta[api]"
+Implements the openapi.yaml contract:
+    POST /diff      — compare two JSONL traces
+    POST /inspect   — summarise a single trace
+    GET  /health    — liveness probe
+"""
+from __future__ import annotations
+import json
+import tempfile
+from pathlib import Path
+from typing import Any
+try:
+    from fastapi import FastAPI, HTTPException
+    from pydantic import BaseModel, Field
+except ImportError as exc:
+    raise ImportError(
+        "API server requires: pip install 'agentdelta[api]'"
+    ) from exc
+from agentdelta import AgentTrace, diff_traces
+from agentdelta.report import to_json
+from agentdelta.trace import NodeType
+app = FastAPI(
+    title="agentdelta API",
+    description="Semantic diff engine for AI agent behavior.",
+    version="0.1.0",
+    license_info={"name": "MIT", "url": "https://github.com/sandeep-alluru/agentdelta/blob/main/LICENSE"},
+)
+# ── Request / Response models ─────────────────────────────────────────────────
+class DiffRequest(BaseModel):
+    trace_a: str = Field(..., description="Baseline JSONL trace content")
+    trace_b: str = Field(..., description="Candidate JSONL trace content")
+    fork_threshold: float = Field(0.70, ge=0.0, le=1.0, description="Fork detection threshold")
+    match_threshold: float = Field(0.85, ge=0.0, le=1.0, description="Match detection threshold")
+class InspectRequest(BaseModel):
+    trace: str = Field(..., description="JSONL trace content to inspect")
+class HealthResponse(BaseModel):
+    status: str
+    version: str
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def _load_trace_from_string(content: str, name: str) -> AgentTrace:
+    """Write content to a temp file and load it as an AgentTrace."""
+    tmp_path: str | None = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".jsonl", delete=False, prefix=f"agentdelta_{name}_"
+        ) as f:
+            f.write(content)
+            tmp_path = f.name
+        return AgentTrace.load(tmp_path)
+    except Exception as exc:
+        raise HTTPException(status_code=422, detail=f"Invalid {name} trace: {exc}") from exc
+    finally:
+        if tmp_path:
+            Path(tmp_path).unlink(missing_ok=True)
+# ── Routes ────────────────────────────────────────────────────────────────────
+@app.get("/health", response_model=HealthResponse)
+async def health() -> dict[str, str]:
+    """Liveness probe."""
+    from agentdelta import __version__
+    return {"status": "ok", "version": __version__}
+@app.post("/diff")
+async def diff(request: DiffRequest) -> Any:
+    """Compare two agent traces and return a DiffResult with fork point."""
+    trace_a = _load_trace_from_string(request.trace_a, "trace_a")
+    trace_b = _load_trace_from_string(request.trace_b, "trace_b")
+    result = diff_traces(
+        trace_a,
+        trace_b,
+        fork_threshold=request.fork_threshold,
+        match_threshold=request.match_threshold,
+    )
+    return json.loads(to_json(result))
+@app.post("/inspect")
+async def inspect(request: InspectRequest) -> Any:
+    """Summarise a single agent trace."""
+    trace = _load_trace_from_string(request.trace, "trace")
+    steps = [
+        {
+            "step": node.step,
+            "type": node.node_type.value,
+            "content_preview": node.content[:120],
+            "id": node.id,
+        }
+        for node in trace.nodes
+    ]
+    node_type_counts: dict[str, int] = {}
+    for node in trace.nodes:
+        key = node.node_type.value
+        node_type_counts[key] = node_type_counts.get(key, 0) + 1
+    return {
+        "run_id": trace.run_id,
+        "total_nodes": len(trace.nodes),
+        "total_edges": len(trace.edges),
+        "node_type_counts": node_type_counts,
+        "has_tool_calls": any(n.node_type == NodeType.TOOL_CALL for n in trace.nodes),
+        "steps": steps,
+        "metadata": trace.metadata,
+    }

agentdelta/cli.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Command-line interface for agentdelta."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import click
+from agentdelta.diff import diff_traces
+from agentdelta.report import print_diff, to_json, to_markdown
+from agentdelta.trace import AgentTrace
+@click.group()
+@click.version_option(package_name="agentdelta")
+def main() -> None:
+    """Semantic diff engine for AI agent behavior traces."""
+@main.command()
+@click.argument("trace_a", type=click.Path(exists=True, path_type=Path))
+@click.argument("trace_b", type=click.Path(exists=True, path_type=Path))
+@click.option(
+    "--format",
+    "fmt",
+    type=click.Choice(["rich", "json", "markdown"]),
+    default="rich",
+    show_default=True,
+    help="Output format.",
+)
+@click.option(
+    "--fork-threshold",
+    type=float,
+    default=0.70,
+    show_default=True,
+    help="Similarity below this marks a fork point.",
+)
+@click.option(
+    "--match-threshold",
+    type=float,
+    default=0.85,
+    show_default=True,
+    help="Similarity above this is a match.",
+)
+@click.option(
+    "--show-matches",
+    is_flag=True,
+    default=False,
+    help="Include matched (unchanged) steps in output.",
+)
+@click.option(
+    "--exit-code",
+    is_flag=True,
+    default=False,
+    help="Exit with code 1 if a regression is detected (useful in CI).",
+)
+def diff(
+    trace_a: Path,
+    trace_b: Path,
+    fmt: str,
+    fork_threshold: float,
+    match_threshold: float,
+    show_matches: bool,
+    exit_code: bool,
+) -> None:
+    """Diff two agent trace files and report behavioral divergence.
+    TRACE_A is the baseline run; TRACE_B is the candidate run.
+    """
+    run_a = AgentTrace.load(trace_a)
+    run_b = AgentTrace.load(trace_b)
+    result = diff_traces(
+        run_a,
+        run_b,
+        fork_threshold=fork_threshold,
+        match_threshold=match_threshold,
+    )
+    if fmt == "rich":
+        print_diff(result, show_matches=show_matches)
+    elif fmt == "json":
+        click.echo(to_json(result))
+    elif fmt == "markdown":
+        click.echo(to_markdown(result))
+    if exit_code and result.has_regression:
+        sys.exit(1)
+@main.command()
+@click.argument("trace_file", type=click.Path(path_type=Path))
+@click.option("--run-id", default=None, help="Explicit run identifier.")
+def inspect(trace_file: Path, run_id: str | None) -> None:
+    """Print a summary of a single trace file."""
+    from rich.console import Console
+    from rich.table import Table
+    from agentdelta.trace import NodeType
+    _NODE_ICONS = {
+        NodeType.START: "▶",
+        NodeType.LLM: "🧠",
+        NodeType.TOOL_CALL: "🔧",
+        NodeType.TOOL_RETURN: "↩",
+        NodeType.END: "■",
+    }
+    console = Console()
+    if not trace_file.exists():
+        raise click.ClickException(f"File not found: {trace_file}")
+    trace = AgentTrace.load(trace_file)
+    console.print(f"\n[bold]Trace:[/bold] [dim]{trace.run_id}[/dim]  "
+                  f"[dim]{len(trace.nodes)} nodes / {len(trace.edges)} edges[/dim]\n")
+    table = Table(show_header=True, header_style="bold", box=None, padding=(0, 1))
+    table.add_column("Step", style="dim", width=5)
+    table.add_column("Type", width=14)
+    table.add_column("Content")
+    for node in trace.nodes:
+        icon = _NODE_ICONS.get(node.node_type, "?")
+        content = node.content[:100].replace("\n", " ")
+        if len(node.content) > 100:
+            content += "…"
+        table.add_row(str(node.step), f"{icon} {node.node_type.value}", content)
+    console.print(table)
+    console.print()
+if __name__ == "__main__":
+    main()

agentdelta/diff.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""Core diff algorithm: align two agent traces and find the first semantic fork."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from agentdelta.embed import align_traces, embed_trace
+from agentdelta.trace import AgentTrace, NodeType, TraceNode
+@dataclass
+class ForkPoint:
+    """The first step where two traces take meaningfully different paths.
+    Attributes:
+        step_a: Step number in trace A where the fork occurred.
+        step_b: Step number in trace B where the fork occurred.
+        node_a: The divergent node from trace A.
+        node_b: The divergent node from trace B.
+        similarity: Cosine similarity between the two nodes at the fork (< fork_threshold).
+        description: Human-readable explanation of why this step diverged.
+    """
+    step_a: int
+    step_b: int
+    node_a: TraceNode
+    node_b: TraceNode
+    similarity: float
+    description: str
+    def is_tool_change(self) -> bool:
+        """Return True if the fork is a tool-selection or tool-return change."""
+        return (
+            self.node_a.node_type in (NodeType.TOOL_CALL, NodeType.TOOL_RETURN)
+            and self.node_b.node_type in (NodeType.TOOL_CALL, NodeType.TOOL_RETURN)
+        )
+    def is_reasoning_change(self) -> bool:
+        """Return True if the fork is an LLM reasoning divergence."""
+        return (
+            self.node_a.node_type == NodeType.LLM
+            and self.node_b.node_type == NodeType.LLM
+        )
+@dataclass
+class StepDiff:
+    """A single aligned step pair with its comparison result.
+    Attributes:
+        step_a: Node from trace A, or ``None`` if this step was added in B.
+        step_b: Node from trace B, or ``None`` if this step was removed in A.
+        similarity: Cosine similarity between the two nodes (0.0 for added/removed).
+        status: One of ``"match"``, ``"changed"``, ``"added"``, or ``"removed"``.
+        summary: Human-readable one-line description of this diff entry.
+    """
+    step_a: TraceNode | None
+    step_b: TraceNode | None
+    similarity: float
+    status: str
+    summary: str = ""
+@dataclass
+class DiffResult:
+    """Full diff result between two agent traces.
+    Attributes:
+        run_id_a: Run identifier of the baseline trace.
+        run_id_b: Run identifier of the candidate trace.
+        steps: All aligned step pairs, in order.
+        fork_point: The first divergent step, or ``None`` if the traces are equivalent.
+        summary: Pre-computed aggregate statistics (total, matched, changed, etc.).
+    """
+    run_id_a: str
+    run_id_b: str
+    steps: list[StepDiff] = field(default_factory=list)
+    fork_point: ForkPoint | None = None
+    summary: dict[str, Any] = field(default_factory=dict)
+    @property
+    def has_regression(self) -> bool:
+        """True if the traces diverged (a fork point was detected)."""
+        return self.fork_point is not None
+    @property
+    def changed_steps(self) -> list[StepDiff]:
+        """Steps where both traces have a node but they diverged semantically."""
+        return [s for s in self.steps if s.status == "changed"]
+    @property
+    def added_steps(self) -> list[StepDiff]:
+        """Steps present only in trace B (inserted relative to baseline)."""
+        return [s for s in self.steps if s.status == "added"]
+    @property
+    def removed_steps(self) -> list[StepDiff]:
+        """Steps present only in trace A (removed relative to baseline)."""
+        return [s for s in self.steps if s.status == "removed"]
+def _describe_fork(na: TraceNode, nb: TraceNode, similarity: float) -> str:
+    if na.node_type == NodeType.TOOL_CALL and nb.node_type == NodeType.TOOL_CALL:
+        tool_a = na.content.split("(")[0].strip()
+        tool_b = nb.content.split("(")[0].strip()
+        if tool_a != tool_b:
+            return f"Tool selection changed: '{tool_a}' → '{tool_b}'"
+        return f"Tool call arguments diverged (similarity: {similarity:.2f})"
+    if na.node_type == NodeType.LLM and nb.node_type == NodeType.LLM:
+        return f"Reasoning path diverged (similarity: {similarity:.2f})"
+    return (
+        f"Step type changed: {na.node_type.value} → {nb.node_type.value} "
+        f"(similarity: {similarity:.2f})"
+    )
+def diff_traces(
+    trace_a: AgentTrace,
+    trace_b: AgentTrace,
+    fork_threshold: float = 0.70,
+    match_threshold: float = 0.85,
+) -> DiffResult:
+    """
+    Compute a semantic diff between two agent traces.
+    Args:
+        trace_a: Baseline trace.
+        trace_b: Comparison trace.
+        fork_threshold: Similarity below this triggers a fork point.
+        match_threshold: Similarity above this is considered a match.
+    Returns:
+        DiffResult with aligned steps and the first fork point if found.
+    """
+    # Ensure both traces are embedded
+    embed_trace(trace_a)
+    embed_trace(trace_b)
+    alignment = align_traces(trace_a, trace_b, threshold=fork_threshold)
+    steps: list[StepDiff] = []
+    fork_point: ForkPoint | None = None
+    for na, nb, score in alignment:
+        if na is None:
+            summary = f"+ [{nb.node_type.value}] {nb.content[:80]}"
+            steps.append(StepDiff(None, nb, 0.0, "added", summary))
+        elif nb is None:
+            summary = f"- [{na.node_type.value}] {na.content[:80]}"
+            steps.append(StepDiff(na, None, 0.0, "removed", summary))
+        elif score >= match_threshold:
+            steps.append(StepDiff(na, nb, score, "match"))
+        else:
+            desc = _describe_fork(na, nb, score)
+            step = StepDiff(na, nb, score, "changed", desc)
+            steps.append(step)
+            # Record the first fork point
+            if fork_point is None:
+                fork_point = ForkPoint(
+                    step_a=na.step,
+                    step_b=nb.step,
+                    node_a=na,
+                    node_b=nb,
+                    similarity=score,
+                    description=desc,
+                )
+    total = len(alignment)
+    matched = sum(1 for s in steps if s.status == "match")
+    changed = sum(1 for s in steps if s.status == "changed")
+    result = DiffResult(
+        run_id_a=trace_a.run_id,
+        run_id_b=trace_b.run_id,
+        steps=steps,
+        fork_point=fork_point,
+        summary={
+            "total_steps": total,
+            "matched": matched,
+            "changed": changed,
+            "added": len([s for s in steps if s.status == "added"]),
+            "removed": len([s for s in steps if s.status == "removed"]),
+            "similarity_pct": round(matched / total * 100, 1) if total else 100.0,
+            "has_regression": fork_point is not None or (total > 0 and matched == 0),
+            "fork_step": fork_point.step_a if fork_point else None,
+        },
+    )
+    return result

agentdelta/embed.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Embedding and semantic alignment for trace nodes."""
+from __future__ import annotations
+import threading
+import numpy as np
+from agentdelta.trace import AgentTrace, TraceNode
+_lock = threading.Lock()
+_model = None
+def _get_model():
+    """Return the sentence-transformer singleton, initialising it on first call (thread-safe)."""
+    global _model
+    if _model is None:
+        with _lock:
+            if _model is None:
+                from sentence_transformers import SentenceTransformer
+                _model = SentenceTransformer("all-MiniLM-L6-v2")
+    return _model
+def embed_trace(trace: AgentTrace, batch_size: int = 64) -> AgentTrace:
+    """Compute embeddings for all nodes in a trace (in-place) and return the trace."""
+    model = _get_model()
+    contents = [node.content for node in trace.nodes]
+    if not contents:
+        return trace
+    embeddings = model.encode(contents, batch_size=batch_size, show_progress_bar=False)
+    for node, emb in zip(trace.nodes, embeddings, strict=False):
+        node.embedding = emb.tolist()
+    return trace
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    """Return cosine similarity between two embedding vectors. Returns 0.0 for zero vectors."""
+    va, vb = np.array(a), np.array(b)
+    denom = np.linalg.norm(va) * np.linalg.norm(vb)
+    if denom == 0:
+        return 0.0
+    return float(np.dot(va, vb) / denom)
+def find_best_match(
+    node: TraceNode,
+    candidates: list[TraceNode],
+    threshold: float = 0.75,
+) -> tuple[TraceNode | None, float]:
+    """Find the candidate most semantically similar to *node*.
+    Returns ``(best_node, score)``. If the best score is below *threshold*,
+    returns ``(None, best_score)`` rather than a low-confidence match.
+    """
+    if node.embedding is None or not candidates:
+        return None, 0.0
+    best_node, best_score = None, -1.0
+    for candidate in candidates:
+        if candidate.embedding is None:
+            continue
+        score = cosine_similarity(node.embedding, candidate.embedding)
+        if score > best_score:
+            best_score = score
+            best_node = candidate
+    if best_score < threshold:
+        return None, best_score
+    return best_node, best_score
+def align_traces(
+    trace_a: AgentTrace,
+    trace_b: AgentTrace,
+    window: int = 5,
+    threshold: float = 0.75,
+) -> list[tuple[TraceNode | None, TraceNode | None, float]]:
+    """Align nodes from two traces by semantic similarity within a sliding window.
+    Uses greedy 1:1 matching: each node in *trace_a* is paired with the
+    closest unmatched node in *trace_b* within ±*window* positions.
+    Returns:
+        List of ``(node_a, node_b, similarity)`` triples.
+        Unmatched nodes appear as ``(node, None, 0.0)`` or ``(None, node, 0.0)``.
+    """
+    nodes_a = trace_a.nodes
+    nodes_b = trace_b.nodes
+    alignment: list[tuple[TraceNode | None, TraceNode | None, float]] = []
+    used_b: set[int] = set()
+    node_to_idx: dict[int, int] = {id(nb): j for j, nb in enumerate(nodes_b)}
+    for i, na in enumerate(nodes_a):
+        start = max(0, i - window)
+        end = min(len(nodes_b), i + window + 1)
+        candidates = [nodes_b[j] for j in range(start, end) if j not in used_b]
+        match, score = find_best_match(na, candidates, threshold)
+        if match is not None:
+            used_b.add(node_to_idx[id(match)])
+            alignment.append((na, match, score))
+        else:
+            alignment.append((na, None, 0.0))
+    for j, nb in enumerate(nodes_b):
+        if j not in used_b:
+            alignment.append((None, nb, 0.0))
+    return alignment