agentdelta 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentdelta/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """agentdelta — semantic diff engine for AI agent behavior."""
2
+
3
+ from agentdelta.diff import DiffResult, ForkPoint, diff_traces
4
+ from agentdelta.instrument import AgentdeltaCallback, record
5
+ from agentdelta.trace import AgentTrace, EdgeType, NodeType, TraceEdge, TraceNode
6
+
7
+ __all__ = [
8
+ "AgentTrace",
9
+ "AgentdeltaCallback",
10
+ "DiffResult",
11
+ "EdgeType",
12
+ "ForkPoint",
13
+ "NodeType",
14
+ "TraceEdge",
15
+ "TraceNode",
16
+ "diff_traces",
17
+ "record",
18
+ ]
19
+
20
+ from importlib.metadata import version as _version
21
+
22
+ __version__ = _version("agentdelta")
agentdelta/api.py ADDED
@@ -0,0 +1,131 @@
1
+ """FastAPI REST wrapper for agentdelta.
2
+
3
+ Start with: uvicorn agentdelta.api:app --reload
4
+ Install: pip install "agentdelta[api]"
5
+
6
+ Implements the openapi.yaml contract:
7
+ POST /diff — compare two JSONL traces
8
+ POST /inspect — summarise a single trace
9
+ GET /health — liveness probe
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import tempfile
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ try:
20
+ from fastapi import FastAPI, HTTPException
21
+ from pydantic import BaseModel, Field
22
+ except ImportError as exc:
23
+ raise ImportError(
24
+ "API server requires: pip install 'agentdelta[api]'"
25
+ ) from exc
26
+
27
+ from agentdelta import AgentTrace, diff_traces
28
+ from agentdelta.report import to_json
29
+ from agentdelta.trace import NodeType
30
+
31
+ app = FastAPI(
32
+ title="agentdelta API",
33
+ description="Semantic diff engine for AI agent behavior.",
34
+ version="0.1.0",
35
+ license_info={"name": "MIT", "url": "https://github.com/sandeep-alluru/agentdelta/blob/main/LICENSE"},
36
+ )
37
+
38
+
39
+ # ── Request / Response models ─────────────────────────────────────────────────
40
+
41
+
42
+ class DiffRequest(BaseModel):
43
+ trace_a: str = Field(..., description="Baseline JSONL trace content")
44
+ trace_b: str = Field(..., description="Candidate JSONL trace content")
45
+ fork_threshold: float = Field(0.70, ge=0.0, le=1.0, description="Fork detection threshold")
46
+ match_threshold: float = Field(0.85, ge=0.0, le=1.0, description="Match detection threshold")
47
+
48
+
49
+ class InspectRequest(BaseModel):
50
+ trace: str = Field(..., description="JSONL trace content to inspect")
51
+
52
+
53
+ class HealthResponse(BaseModel):
54
+ status: str
55
+ version: str
56
+
57
+
58
+ # ── Helpers ───────────────────────────────────────────────────────────────────
59
+
60
+
61
+ def _load_trace_from_string(content: str, name: str) -> AgentTrace:
62
+ """Write content to a temp file and load it as an AgentTrace."""
63
+ tmp_path: str | None = None
64
+ try:
65
+ with tempfile.NamedTemporaryFile(
66
+ mode="w", suffix=".jsonl", delete=False, prefix=f"agentdelta_{name}_"
67
+ ) as f:
68
+ f.write(content)
69
+ tmp_path = f.name
70
+ return AgentTrace.load(tmp_path)
71
+ except Exception as exc:
72
+ raise HTTPException(status_code=422, detail=f"Invalid {name} trace: {exc}") from exc
73
+ finally:
74
+ if tmp_path:
75
+ Path(tmp_path).unlink(missing_ok=True)
76
+
77
+
78
+ # ── Routes ────────────────────────────────────────────────────────────────────
79
+
80
+
81
+ @app.get("/health", response_model=HealthResponse)
82
+ async def health() -> dict[str, str]:
83
+ """Liveness probe."""
84
+ from agentdelta import __version__
85
+ return {"status": "ok", "version": __version__}
86
+
87
+
88
+ @app.post("/diff")
89
+ async def diff(request: DiffRequest) -> Any:
90
+ """Compare two agent traces and return a DiffResult with fork point."""
91
+ trace_a = _load_trace_from_string(request.trace_a, "trace_a")
92
+ trace_b = _load_trace_from_string(request.trace_b, "trace_b")
93
+
94
+ result = diff_traces(
95
+ trace_a,
96
+ trace_b,
97
+ fork_threshold=request.fork_threshold,
98
+ match_threshold=request.match_threshold,
99
+ )
100
+ return json.loads(to_json(result))
101
+
102
+
103
+ @app.post("/inspect")
104
+ async def inspect(request: InspectRequest) -> Any:
105
+ """Summarise a single agent trace."""
106
+ trace = _load_trace_from_string(request.trace, "trace")
107
+
108
+ steps = [
109
+ {
110
+ "step": node.step,
111
+ "type": node.node_type.value,
112
+ "content_preview": node.content[:120],
113
+ "id": node.id,
114
+ }
115
+ for node in trace.nodes
116
+ ]
117
+
118
+ node_type_counts: dict[str, int] = {}
119
+ for node in trace.nodes:
120
+ key = node.node_type.value
121
+ node_type_counts[key] = node_type_counts.get(key, 0) + 1
122
+
123
+ return {
124
+ "run_id": trace.run_id,
125
+ "total_nodes": len(trace.nodes),
126
+ "total_edges": len(trace.edges),
127
+ "node_type_counts": node_type_counts,
128
+ "has_tool_calls": any(n.node_type == NodeType.TOOL_CALL for n in trace.nodes),
129
+ "steps": steps,
130
+ "metadata": trace.metadata,
131
+ }
agentdelta/cli.py ADDED
@@ -0,0 +1,138 @@
1
+ """Command-line interface for agentdelta."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from agentdelta.diff import diff_traces
11
+ from agentdelta.report import print_diff, to_json, to_markdown
12
+ from agentdelta.trace import AgentTrace
13
+
14
+
15
+ @click.group()
16
+ @click.version_option(package_name="agentdelta")
17
+ def main() -> None:
18
+ """Semantic diff engine for AI agent behavior traces."""
19
+
20
+
21
+ @main.command()
22
+ @click.argument("trace_a", type=click.Path(exists=True, path_type=Path))
23
+ @click.argument("trace_b", type=click.Path(exists=True, path_type=Path))
24
+ @click.option(
25
+ "--format",
26
+ "fmt",
27
+ type=click.Choice(["rich", "json", "markdown"]),
28
+ default="rich",
29
+ show_default=True,
30
+ help="Output format.",
31
+ )
32
+ @click.option(
33
+ "--fork-threshold",
34
+ type=float,
35
+ default=0.70,
36
+ show_default=True,
37
+ help="Similarity below this marks a fork point.",
38
+ )
39
+ @click.option(
40
+ "--match-threshold",
41
+ type=float,
42
+ default=0.85,
43
+ show_default=True,
44
+ help="Similarity above this is a match.",
45
+ )
46
+ @click.option(
47
+ "--show-matches",
48
+ is_flag=True,
49
+ default=False,
50
+ help="Include matched (unchanged) steps in output.",
51
+ )
52
+ @click.option(
53
+ "--exit-code",
54
+ is_flag=True,
55
+ default=False,
56
+ help="Exit with code 1 if a regression is detected (useful in CI).",
57
+ )
58
+ def diff(
59
+ trace_a: Path,
60
+ trace_b: Path,
61
+ fmt: str,
62
+ fork_threshold: float,
63
+ match_threshold: float,
64
+ show_matches: bool,
65
+ exit_code: bool,
66
+ ) -> None:
67
+ """Diff two agent trace files and report behavioral divergence.
68
+
69
+ TRACE_A is the baseline run; TRACE_B is the candidate run.
70
+ """
71
+ run_a = AgentTrace.load(trace_a)
72
+ run_b = AgentTrace.load(trace_b)
73
+
74
+ result = diff_traces(
75
+ run_a,
76
+ run_b,
77
+ fork_threshold=fork_threshold,
78
+ match_threshold=match_threshold,
79
+ )
80
+
81
+ if fmt == "rich":
82
+ print_diff(result, show_matches=show_matches)
83
+ elif fmt == "json":
84
+ click.echo(to_json(result))
85
+ elif fmt == "markdown":
86
+ click.echo(to_markdown(result))
87
+
88
+ if exit_code and result.has_regression:
89
+ sys.exit(1)
90
+
91
+
92
+ @main.command()
93
+ @click.argument("trace_file", type=click.Path(path_type=Path))
94
+ @click.option("--run-id", default=None, help="Explicit run identifier.")
95
+ def inspect(trace_file: Path, run_id: str | None) -> None:
96
+ """Print a summary of a single trace file."""
97
+
98
+ from rich.console import Console
99
+ from rich.table import Table
100
+
101
+ from agentdelta.trace import NodeType
102
+
103
+ _NODE_ICONS = {
104
+ NodeType.START: "▶",
105
+ NodeType.LLM: "🧠",
106
+ NodeType.TOOL_CALL: "🔧",
107
+ NodeType.TOOL_RETURN: "↩",
108
+ NodeType.END: "■",
109
+ }
110
+
111
+ console = Console()
112
+
113
+ if not trace_file.exists():
114
+ raise click.ClickException(f"File not found: {trace_file}")
115
+
116
+ trace = AgentTrace.load(trace_file)
117
+
118
+ console.print(f"\n[bold]Trace:[/bold] [dim]{trace.run_id}[/dim] "
119
+ f"[dim]{len(trace.nodes)} nodes / {len(trace.edges)} edges[/dim]\n")
120
+
121
+ table = Table(show_header=True, header_style="bold", box=None, padding=(0, 1))
122
+ table.add_column("Step", style="dim", width=5)
123
+ table.add_column("Type", width=14)
124
+ table.add_column("Content")
125
+
126
+ for node in trace.nodes:
127
+ icon = _NODE_ICONS.get(node.node_type, "?")
128
+ content = node.content[:100].replace("\n", " ")
129
+ if len(node.content) > 100:
130
+ content += "…"
131
+ table.add_row(str(node.step), f"{icon} {node.node_type.value}", content)
132
+
133
+ console.print(table)
134
+ console.print()
135
+
136
+
137
+ if __name__ == "__main__":
138
+ main()
agentdelta/diff.py ADDED
@@ -0,0 +1,191 @@
1
+ """Core diff algorithm: align two agent traces and find the first semantic fork."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ from agentdelta.embed import align_traces, embed_trace
9
+ from agentdelta.trace import AgentTrace, NodeType, TraceNode
10
+
11
+
12
+ @dataclass
13
+ class ForkPoint:
14
+ """The first step where two traces take meaningfully different paths.
15
+
16
+ Attributes:
17
+ step_a: Step number in trace A where the fork occurred.
18
+ step_b: Step number in trace B where the fork occurred.
19
+ node_a: The divergent node from trace A.
20
+ node_b: The divergent node from trace B.
21
+ similarity: Cosine similarity between the two nodes at the fork (< fork_threshold).
22
+ description: Human-readable explanation of why this step diverged.
23
+ """
24
+
25
+ step_a: int
26
+ step_b: int
27
+ node_a: TraceNode
28
+ node_b: TraceNode
29
+ similarity: float
30
+ description: str
31
+
32
+ def is_tool_change(self) -> bool:
33
+ """Return True if the fork is a tool-selection or tool-return change."""
34
+ return (
35
+ self.node_a.node_type in (NodeType.TOOL_CALL, NodeType.TOOL_RETURN)
36
+ and self.node_b.node_type in (NodeType.TOOL_CALL, NodeType.TOOL_RETURN)
37
+ )
38
+
39
+ def is_reasoning_change(self) -> bool:
40
+ """Return True if the fork is an LLM reasoning divergence."""
41
+ return (
42
+ self.node_a.node_type == NodeType.LLM
43
+ and self.node_b.node_type == NodeType.LLM
44
+ )
45
+
46
+
47
+ @dataclass
48
+ class StepDiff:
49
+ """A single aligned step pair with its comparison result.
50
+
51
+ Attributes:
52
+ step_a: Node from trace A, or ``None`` if this step was added in B.
53
+ step_b: Node from trace B, or ``None`` if this step was removed in A.
54
+ similarity: Cosine similarity between the two nodes (0.0 for added/removed).
55
+ status: One of ``"match"``, ``"changed"``, ``"added"``, or ``"removed"``.
56
+ summary: Human-readable one-line description of this diff entry.
57
+ """
58
+
59
+ step_a: TraceNode | None
60
+ step_b: TraceNode | None
61
+ similarity: float
62
+ status: str
63
+ summary: str = ""
64
+
65
+
66
+ @dataclass
67
+ class DiffResult:
68
+ """Full diff result between two agent traces.
69
+
70
+ Attributes:
71
+ run_id_a: Run identifier of the baseline trace.
72
+ run_id_b: Run identifier of the candidate trace.
73
+ steps: All aligned step pairs, in order.
74
+ fork_point: The first divergent step, or ``None`` if the traces are equivalent.
75
+ summary: Pre-computed aggregate statistics (total, matched, changed, etc.).
76
+ """
77
+
78
+ run_id_a: str
79
+ run_id_b: str
80
+ steps: list[StepDiff] = field(default_factory=list)
81
+ fork_point: ForkPoint | None = None
82
+ summary: dict[str, Any] = field(default_factory=dict)
83
+
84
+ @property
85
+ def has_regression(self) -> bool:
86
+ """True if the traces diverged (a fork point was detected)."""
87
+ return self.fork_point is not None
88
+
89
+ @property
90
+ def changed_steps(self) -> list[StepDiff]:
91
+ """Steps where both traces have a node but they diverged semantically."""
92
+ return [s for s in self.steps if s.status == "changed"]
93
+
94
+ @property
95
+ def added_steps(self) -> list[StepDiff]:
96
+ """Steps present only in trace B (inserted relative to baseline)."""
97
+ return [s for s in self.steps if s.status == "added"]
98
+
99
+ @property
100
+ def removed_steps(self) -> list[StepDiff]:
101
+ """Steps present only in trace A (removed relative to baseline)."""
102
+ return [s for s in self.steps if s.status == "removed"]
103
+
104
+
105
+ def _describe_fork(na: TraceNode, nb: TraceNode, similarity: float) -> str:
106
+ if na.node_type == NodeType.TOOL_CALL and nb.node_type == NodeType.TOOL_CALL:
107
+ tool_a = na.content.split("(")[0].strip()
108
+ tool_b = nb.content.split("(")[0].strip()
109
+ if tool_a != tool_b:
110
+ return f"Tool selection changed: '{tool_a}' → '{tool_b}'"
111
+ return f"Tool call arguments diverged (similarity: {similarity:.2f})"
112
+ if na.node_type == NodeType.LLM and nb.node_type == NodeType.LLM:
113
+ return f"Reasoning path diverged (similarity: {similarity:.2f})"
114
+ return (
115
+ f"Step type changed: {na.node_type.value} → {nb.node_type.value} "
116
+ f"(similarity: {similarity:.2f})"
117
+ )
118
+
119
+
120
+ def diff_traces(
121
+ trace_a: AgentTrace,
122
+ trace_b: AgentTrace,
123
+ fork_threshold: float = 0.70,
124
+ match_threshold: float = 0.85,
125
+ ) -> DiffResult:
126
+ """
127
+ Compute a semantic diff between two agent traces.
128
+
129
+ Args:
130
+ trace_a: Baseline trace.
131
+ trace_b: Comparison trace.
132
+ fork_threshold: Similarity below this triggers a fork point.
133
+ match_threshold: Similarity above this is considered a match.
134
+
135
+ Returns:
136
+ DiffResult with aligned steps and the first fork point if found.
137
+ """
138
+ # Ensure both traces are embedded
139
+ embed_trace(trace_a)
140
+ embed_trace(trace_b)
141
+
142
+ alignment = align_traces(trace_a, trace_b, threshold=fork_threshold)
143
+
144
+ steps: list[StepDiff] = []
145
+ fork_point: ForkPoint | None = None
146
+
147
+ for na, nb, score in alignment:
148
+ if na is None:
149
+ summary = f"+ [{nb.node_type.value}] {nb.content[:80]}"
150
+ steps.append(StepDiff(None, nb, 0.0, "added", summary))
151
+ elif nb is None:
152
+ summary = f"- [{na.node_type.value}] {na.content[:80]}"
153
+ steps.append(StepDiff(na, None, 0.0, "removed", summary))
154
+ elif score >= match_threshold:
155
+ steps.append(StepDiff(na, nb, score, "match"))
156
+ else:
157
+ desc = _describe_fork(na, nb, score)
158
+ step = StepDiff(na, nb, score, "changed", desc)
159
+ steps.append(step)
160
+ # Record the first fork point
161
+ if fork_point is None:
162
+ fork_point = ForkPoint(
163
+ step_a=na.step,
164
+ step_b=nb.step,
165
+ node_a=na,
166
+ node_b=nb,
167
+ similarity=score,
168
+ description=desc,
169
+ )
170
+
171
+ total = len(alignment)
172
+ matched = sum(1 for s in steps if s.status == "match")
173
+ changed = sum(1 for s in steps if s.status == "changed")
174
+
175
+ result = DiffResult(
176
+ run_id_a=trace_a.run_id,
177
+ run_id_b=trace_b.run_id,
178
+ steps=steps,
179
+ fork_point=fork_point,
180
+ summary={
181
+ "total_steps": total,
182
+ "matched": matched,
183
+ "changed": changed,
184
+ "added": len([s for s in steps if s.status == "added"]),
185
+ "removed": len([s for s in steps if s.status == "removed"]),
186
+ "similarity_pct": round(matched / total * 100, 1) if total else 100.0,
187
+ "has_regression": fork_point is not None or (total > 0 and matched == 0),
188
+ "fork_step": fork_point.step_a if fork_point else None,
189
+ },
190
+ )
191
+ return result
agentdelta/embed.py ADDED
@@ -0,0 +1,112 @@
1
+ """Embedding and semantic alignment for trace nodes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+
7
+ import numpy as np
8
+
9
+ from agentdelta.trace import AgentTrace, TraceNode
10
+
11
+ _lock = threading.Lock()
12
+ _model = None
13
+
14
+
15
+ def _get_model():
16
+ """Return the sentence-transformer singleton, initialising it on first call (thread-safe)."""
17
+ global _model
18
+ if _model is None:
19
+ with _lock:
20
+ if _model is None:
21
+ from sentence_transformers import SentenceTransformer
22
+ _model = SentenceTransformer("all-MiniLM-L6-v2")
23
+ return _model
24
+
25
+
26
+ def embed_trace(trace: AgentTrace, batch_size: int = 64) -> AgentTrace:
27
+ """Compute embeddings for all nodes in a trace (in-place) and return the trace."""
28
+ model = _get_model()
29
+ contents = [node.content for node in trace.nodes]
30
+ if not contents:
31
+ return trace
32
+ embeddings = model.encode(contents, batch_size=batch_size, show_progress_bar=False)
33
+ for node, emb in zip(trace.nodes, embeddings, strict=False):
34
+ node.embedding = emb.tolist()
35
+ return trace
36
+
37
+
38
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
39
+ """Return cosine similarity between two embedding vectors. Returns 0.0 for zero vectors."""
40
+ va, vb = np.array(a), np.array(b)
41
+ denom = np.linalg.norm(va) * np.linalg.norm(vb)
42
+ if denom == 0:
43
+ return 0.0
44
+ return float(np.dot(va, vb) / denom)
45
+
46
+
47
+ def find_best_match(
48
+ node: TraceNode,
49
+ candidates: list[TraceNode],
50
+ threshold: float = 0.75,
51
+ ) -> tuple[TraceNode | None, float]:
52
+ """Find the candidate most semantically similar to *node*.
53
+
54
+ Returns ``(best_node, score)``. If the best score is below *threshold*,
55
+ returns ``(None, best_score)`` rather than a low-confidence match.
56
+ """
57
+ if node.embedding is None or not candidates:
58
+ return None, 0.0
59
+
60
+ best_node, best_score = None, -1.0
61
+ for candidate in candidates:
62
+ if candidate.embedding is None:
63
+ continue
64
+ score = cosine_similarity(node.embedding, candidate.embedding)
65
+ if score > best_score:
66
+ best_score = score
67
+ best_node = candidate
68
+
69
+ if best_score < threshold:
70
+ return None, best_score
71
+ return best_node, best_score
72
+
73
+
74
+ def align_traces(
75
+ trace_a: AgentTrace,
76
+ trace_b: AgentTrace,
77
+ window: int = 5,
78
+ threshold: float = 0.75,
79
+ ) -> list[tuple[TraceNode | None, TraceNode | None, float]]:
80
+ """Align nodes from two traces by semantic similarity within a sliding window.
81
+
82
+ Uses greedy 1:1 matching: each node in *trace_a* is paired with the
83
+ closest unmatched node in *trace_b* within ±*window* positions.
84
+
85
+ Returns:
86
+ List of ``(node_a, node_b, similarity)`` triples.
87
+ Unmatched nodes appear as ``(node, None, 0.0)`` or ``(None, node, 0.0)``.
88
+ """
89
+ nodes_a = trace_a.nodes
90
+ nodes_b = trace_b.nodes
91
+
92
+ alignment: list[tuple[TraceNode | None, TraceNode | None, float]] = []
93
+ used_b: set[int] = set()
94
+ node_to_idx: dict[int, int] = {id(nb): j for j, nb in enumerate(nodes_b)}
95
+
96
+ for i, na in enumerate(nodes_a):
97
+ start = max(0, i - window)
98
+ end = min(len(nodes_b), i + window + 1)
99
+ candidates = [nodes_b[j] for j in range(start, end) if j not in used_b]
100
+
101
+ match, score = find_best_match(na, candidates, threshold)
102
+ if match is not None:
103
+ used_b.add(node_to_idx[id(match)])
104
+ alignment.append((na, match, score))
105
+ else:
106
+ alignment.append((na, None, 0.0))
107
+
108
+ for j, nb in enumerate(nodes_b):
109
+ if j not in used_b:
110
+ alignment.append((None, nb, 0.0))
111
+
112
+ return alignment