PyPI - agent-forensics - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agent-forensics 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

agent_forensics/__init__.py +38 -0
agent_forensics/core.py +183 -0
agent_forensics/dashboard.py +267 -0
agent_forensics/integrations/__init__.py +0 -0
agent_forensics/integrations/crewai.py +108 -0
agent_forensics/integrations/langchain.py +162 -0
agent_forensics/integrations/openai_agents.py +180 -0
agent_forensics/report.py +423 -0
agent_forensics/store.py +126 -0
agent_forensics-0.1.0.dist-info/METADATA +186 -0
agent_forensics-0.1.0.dist-info/RECORD +14 -0
agent_forensics-0.1.0.dist-info/WHEEL +5 -0
agent_forensics-0.1.0.dist-info/licenses/LICENSE +21 -0
agent_forensics-0.1.0.dist-info/top_level.txt +1 -0

agent_forensics/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Agent Forensics — Black box for AI agents.
+Usage:
+    from agent_forensics import Forensics
+    # 1. Initialize
+    f = Forensics(session="order-123", agent="shopping-agent")
+    # 2a. Manual recording (framework-agnostic)
+    f.decision("search_products", input={"query": "mouse"}, reasoning="User request")
+    f.tool_call("search_api", input={"q": "mouse"}, output={"results": [...]})
+    f.error("purchase_failed", output={"reason": "Out of stock"})
+    # 2b. LangChain auto-recording
+    agent.invoke(..., config={"callbacks": [f.langchain()]})
+    # 2c. OpenAI Agents SDK auto-recording
+    agent = Agent(name="...", hooks=f.openai_agents())
+    # 2d. CrewAI auto-recording
+    hooks = f.crewai()
+    agent = Agent(role="...", step_callback=hooks.step_callback)
+    # 3. Report
+    print(f.report())       # Markdown
+    f.save_markdown()       # forensics-report-order-123.md
+    f.save_pdf()            # forensics-report-order-123.pdf
+    # 4. Dashboard
+    f.dashboard(port=8080)  # http://localhost:8080
+"""
+from .core import Forensics
+from .store import Event, EventStore
+__version__ = "0.1.0"
+__all__ = ["Forensics", "Event", "EventStore"]

agent_forensics/core.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""
+Forensics — Main interface for AI agent forensics.
+Provides all functionality through a single class.
+Framework-agnostic, with integrations for LangChain/CrewAI/OpenAI and more.
+"""
+from .store import EventStore, Event, now
+from .report import generate_report, save_report, save_pdf
+class Forensics:
+    """AI Agent Forensics — Black box + report generator."""
+    def __init__(
+        self,
+        session: str = "default",
+        agent: str = "default-agent",
+        db_path: str = "forensics.db",
+    ):
+        self.session = session
+        self.agent = agent
+        self.store = EventStore(db_path)
+    # -- Manual Recording API --
+    def decision(self, action: str, *, input: dict = None, reasoning: str = "") -> str:
+        """Record when the agent makes a decision."""
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type="decision",
+            agent_id=self.agent,
+            action=action,
+            input_data=input or {},
+            output_data={},
+            reasoning=reasoning,
+            session_id=self.session,
+        ))
+    def tool_call(self, action: str, *, input: dict = None, output: dict = None, reasoning: str = "") -> str:
+        """Record a tool call."""
+        # start
+        self.store.save(Event(
+            timestamp=now(),
+            event_type="tool_call_start",
+            agent_id=self.agent,
+            action=f"tool:{action}",
+            input_data=input or {},
+            output_data={},
+            reasoning=reasoning or f"Calling tool: {action}",
+            session_id=self.session,
+        ))
+        # end
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type="tool_call_end",
+            agent_id=self.agent,
+            action="tool_result",
+            input_data={},
+            output_data=output or {},
+            reasoning="Tool execution completed",
+            session_id=self.session,
+        ))
+    def llm_call(self, *, input: dict = None, output: str = "", reasoning: str = "") -> str:
+        """Record an LLM call."""
+        self.store.save(Event(
+            timestamp=now(),
+            event_type="llm_call_start",
+            agent_id=self.agent,
+            action="llm_call",
+            input_data=input or {},
+            output_data={},
+            reasoning=reasoning or "LLM call",
+            session_id=self.session,
+        ))
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type="llm_call_end",
+            agent_id=self.agent,
+            action="llm_response",
+            input_data={},
+            output_data={"response": output},
+            reasoning="LLM response",
+            session_id=self.session,
+        ))
+    def error(self, action: str, *, output: dict = None, reasoning: str = "") -> str:
+        """Record an error/incident."""
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type="error",
+            agent_id=self.agent,
+            action=action,
+            input_data={},
+            output_data=output or {},
+            reasoning=reasoning or f"Error occurred: {action}",
+            session_id=self.session,
+        ))
+    def finish(self, output: str = "", *, reasoning: str = "") -> str:
+        """Record the agent's final result."""
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type="final_decision",
+            agent_id=self.agent,
+            action="agent_finish",
+            input_data={},
+            output_data={"response": output},
+            reasoning=reasoning or "Agent determined final answer",
+            session_id=self.session,
+        ))
+    def record(self, event_type: str, action: str, *, input: dict = None, output: dict = None, reasoning: str = "") -> str:
+        """Record a generic event."""
+        return self.store.save(Event(
+            timestamp=now(),
+            event_type=event_type,
+            agent_id=self.agent,
+            action=action,
+            input_data=input or {},
+            output_data=output or {},
+            reasoning=reasoning,
+            session_id=self.session,
+        ))
+    # -- Report API --
+    def report(self) -> str:
+        """Return the Markdown forensics report as a string."""
+        return generate_report(self.store, self.session)
+    def save_markdown(self, path: str = None) -> str:
+        """Save the Markdown report to a file."""
+        return save_report(self.store, self.session, output_dir=path or ".")
+    def save_pdf(self, path: str = None) -> str:
+        """Save the PDF report to a file."""
+        return save_pdf(self.store, self.session, output_dir=path or ".")
+    def events(self) -> list[Event]:
+        """Return all events for the current session."""
+        return self.store.get_session_events(self.session)
+    def sessions(self) -> list[str]:
+        """Return a list of all sessions."""
+        return self.store.get_all_sessions()
+    # -- Framework Integrations --
+    def langchain(self):
+        """Return a LangChain callback handler. agent.invoke(..., config={"callbacks": [f.langchain()]})"""
+        from .integrations.langchain import ForensicsCollector
+        return ForensicsCollector(
+            store=self.store,
+            session_id=self.session,
+            agent_id=self.agent,
+        )
+    def openai_agents(self):
+        """Return OpenAI Agents SDK hooks. Agent(hooks=f.openai_agents())"""
+        from .integrations.openai_agents import ForensicsAgentHooks
+        return ForensicsAgentHooks(
+            store=self.store,
+            session_id=self.session,
+            agent_id=self.agent,
+        )
+    def crewai(self):
+        """Return CrewAI callback collection. Agent(step_callback=hooks.step_callback)"""
+        from .integrations.crewai import ForensicsCrewAIHooks
+        return ForensicsCrewAIHooks(
+            store=self.store,
+            session_id=self.session,
+            agent_id=self.agent,
+        )
+    # -- Dashboard --
+    def dashboard(self, port: int = 8080):
+        """Launch the web dashboard."""
+        from .dashboard import run_dashboard
+        run_dashboard(self.store, port=port)

agent_forensics/dashboard.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""
+Forensics Dashboard — Visually inspect agent forensics data in the browser.
+Runs at http://localhost:8080.
+Uses only Python's built-in http.server with no external dependencies.
+"""
+import json
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from urllib.parse import urlparse, parse_qs
+from .store import EventStore
+STORE = None  # Injected by run_dashboard()
+def get_dashboard_html():
+    """Main dashboard HTML."""
+    sessions = STORE.get_all_sessions()
+    return f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<title>Agent Forensics Dashboard</title>
+<style>
+* {{ margin: 0; padding: 0; box-sizing: border-box; }}
+body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #0a0a0a; color: #e0e0e0; }}
+.header {{ background: #111; border-bottom: 1px solid #333; padding: 20px 40px; display: flex; justify-content: space-between; align-items: center; }}
+.header h1 {{ font-size: 20px; color: #fff; }}
+.header .badge {{ background: #1a73e8; color: #fff; padding: 4px 12px; border-radius: 12px; font-size: 12px; }}
+.container {{ max-width: 1200px; margin: 0 auto; padding: 30px 40px; }}
+.session-list {{ display: flex; gap: 12px; margin-bottom: 30px; flex-wrap: wrap; }}
+.session-btn {{ padding: 10px 20px; border: 1px solid #333; background: #1a1a1a; color: #ccc; border-radius: 8px; cursor: pointer; font-size: 14px; transition: all 0.2s; }}
+.session-btn:hover {{ border-color: #1a73e8; color: #fff; }}
+.session-btn.active {{ background: #1a73e8; border-color: #1a73e8; color: #fff; }}
+.session-btn.incident {{ border-color: #d93025; }}
+.session-btn.incident.active {{ background: #d93025; }}
+.summary {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 30px; }}
+.stat {{ background: #1a1a1a; border: 1px solid #333; border-radius: 10px; padding: 20px; }}
+.stat .label {{ font-size: 12px; color: #888; text-transform: uppercase; letter-spacing: 1px; }}
+.stat .value {{ font-size: 28px; font-weight: 700; margin-top: 8px; color: #fff; }}
+.stat.error .value {{ color: #d93025; }}
+.stat.ok .value {{ color: #34a853; }}
+.section {{ margin-bottom: 30px; }}
+.section h2 {{ font-size: 16px; color: #888; margin-bottom: 16px; text-transform: uppercase; letter-spacing: 1px; }}
+.timeline {{ position: relative; }}
+.event {{ display: flex; gap: 16px; margin-bottom: 2px; padding: 12px 16px; background: #1a1a1a; border-left: 3px solid #333; transition: background 0.2s; }}
+.event:hover {{ background: #222; }}
+.event.decision {{ border-left-color: #1a73e8; }}
+.event.error {{ border-left-color: #d93025; background: #1a0a0a; }}
+.event.tool_call_start {{ border-left-color: #f9ab00; }}
+.event.tool_call_end {{ border-left-color: #34a853; }}
+.event.final_decision {{ border-left-color: #a142f4; }}
+.event.llm_call_start {{ border-left-color: #555; }}
+.event.llm_call_end {{ border-left-color: #555; }}
+.event .time {{ font-size: 11px; color: #666; font-family: monospace; min-width: 90px; }}
+.event .type {{ font-size: 11px; font-weight: 600; min-width: 100px; padding: 2px 8px; border-radius: 4px; text-align: center; }}
+.type-decision {{ background: #1a3a5c; color: #5b9bd5; }}
+.type-error {{ background: #3a1a1a; color: #e06666; }}
+.type-tool_call_start {{ background: #3a3010; color: #f9ab00; }}
+.type-tool_call_end {{ background: #1a3a1a; color: #6aa84f; }}
+.type-final_decision {{ background: #2a1a3a; color: #b48fe0; }}
+.type-llm {{ background: #2a2a2a; color: #888; }}
+.event .detail {{ font-size: 13px; flex: 1; }}
+.event .detail .action {{ font-weight: 600; color: #ccc; }}
+.event .detail .reasoning {{ color: #888; margin-top: 4px; font-size: 12px; }}
+.causal {{ background: #111; border: 1px solid #333; border-radius: 10px; padding: 24px; font-family: monospace; font-size: 13px; line-height: 1.8; overflow-x: auto; }}
+.causal .node {{ margin: 4px 0; }}
+.causal .decision-node {{ color: #5b9bd5; font-weight: bold; }}
+.causal .tool-node {{ color: #f9ab00; padding-left: 24px; }}
+.causal .result-ok {{ color: #34a853; padding-left: 48px; }}
+.causal .result-error {{ color: #d93025; padding-left: 48px; font-weight: bold; }}
+.causal .final-node {{ color: #b48fe0; font-weight: bold; margin-top: 8px; }}
+.causal .error-node {{ color: #d93025; padding-left: 24px; font-weight: bold; }}
+#session-content {{ min-height: 400px; }}
+.compliance {{ background: #1a1a1a; border: 1px solid #333; border-radius: 10px; padding: 20px; font-size: 13px; color: #888; }}
+.compliance strong {{ color: #ccc; }}
+</style>
+</head>
+<body>
+<div class="header">
+    <h1>Agent Forensics Dashboard</h1>
+    <span class="badge">PoC v0.1</span>
+</div>
+<div class="container">
+    <div class="session-list" id="session-list">
+        {''.join(f'<button class="session-btn" onclick="loadSession(this, \'{s}\')">{s}</button>' for s in sessions)}
+    </div>
+    <div id="session-content">
+        <p style="color:#666; text-align:center; padding:60px;">Select a session</p>
+    </div>
+</div>
+<script>
+async function loadSession(btn, sessionId) {{
+    document.querySelectorAll('.session-btn').forEach(b => b.classList.remove('active'));
+    btn.classList.add('active');
+    const res = await fetch('/api/session?id=' + sessionId);
+    const data = await res.json();
+    renderSession(data);
+}}
+function renderSession(data) {{
+    const events = data.events;
+    const decisions = events.filter(e => e.event_type === 'decision');
+    const errors = events.filter(e => e.event_type === 'error');
+    const hasIncident = errors.length > 0 || events.some(e =>
+        JSON.stringify(e.output_data).toLowerCase().includes('error') ||
+        JSON.stringify(e.output_data).toLowerCase().includes('fail')
+    );
+    let html = '';
+    // Summary
+    html += '<div class="summary">';
+    html += `<div class="stat"><div class="label">Total Events</div><div class="value">${{events.length}}</div></div>`;
+    html += `<div class="stat"><div class="label">Decisions</div><div class="value">${{decisions.length}}</div></div>`;
+    html += `<div class="stat ${{errors.length > 0 ? 'error' : 'ok'}}"><div class="label">Errors</div><div class="value">${{errors.length}}</div></div>`;
+    html += `<div class="stat ${{hasIncident ? 'error' : 'ok'}}"><div class="label">Status</div><div class="value">${{hasIncident ? 'INCIDENT' : 'OK'}}</div></div>`;
+    html += '</div>';
+    // Timeline
+    html += '<div class="section"><h2>Timeline</h2><div class="timeline">';
+    events.forEach((e, i) => {{
+        const time = e.timestamp.split('T')[1].split('+')[0].substring(0, 12);
+        const typeClass = e.event_type.startsWith('llm') ? 'llm' : e.event_type;
+        const typeLabel = {{
+            'llm_call_start': 'LLM REQ',
+            'llm_call_end': 'LLM RES',
+            'tool_call_start': 'TOOL REQ',
+            'tool_call_end': 'TOOL RES',
+            'decision': 'DECISION',
+            'final_decision': 'FINAL',
+            'error': 'ERROR'
+        }}[e.event_type] || e.event_type;
+        const detail = extractDetail(e);
+        html += `<div class="event ${{e.event_type}}">`;
+        html += `<span class="time">${{time}}</span>`;
+        html += `<span class="type type-${{typeClass}}">${{typeLabel}}</span>`;
+        html += `<div class="detail"><div class="action">${{e.action}}</div><div class="reasoning">${{detail}}</div></div>`;
+        html += '</div>';
+    }});
+    html += '</div></div>';
+    // Causal Chain
+    if (hasIncident) {{
+        html += '<div class="section"><h2>Causal Chain (Root Cause Analysis)</h2><div class="causal">';
+        events.forEach(e => {{
+            if (e.event_type === 'decision') {{
+                html += `<div class="node decision-node">[DECISION] ${{e.action}}</div>`;
+                html += `<div class="node" style="padding-left:24px;color:#666">${{truncate(e.reasoning, 150)}}</div>`;
+            }} else if (e.event_type === 'tool_call_start') {{
+                html += `<div class="node tool-node">→ [TOOL] ${{e.action}}</div>`;
+            }} else if (e.event_type === 'tool_call_end') {{
+                const result = JSON.stringify(e.output_data);
+                const isErr = result.toLowerCase().includes('error') || result.toLowerCase().includes('fail');
+                html += `<div class="node ${{isErr ? 'result-error' : 'result-ok'}}">${{isErr ? '✗' : '✓'}} ${{truncate(result, 120)}}</div>`;
+            }} else if (e.event_type === 'error') {{
+                html += `<div class="node error-node">✗ ERROR: ${{truncate(JSON.stringify(e.output_data), 150)}}</div>`;
+            }} else if (e.event_type === 'final_decision') {{
+                html += `<div class="node final-node">[FINAL] ${{truncate(e.output_data.response || JSON.stringify(e.output_data), 150)}}</div>`;
+            }}
+        }});
+        html += '</div></div>';
+    }}
+    // Compliance
+    html += '<div class="section"><h2>Compliance</h2><div class="compliance">';
+    html += '<p><strong>EU AI Act Article 14</strong> — Human Oversight requirement supported.</p>';
+    html += `<p>All ${{decisions.length}} decision points recorded. ${{errors.length}} errors captured.</p>`;
+    html += '</div></div>';
+    document.getElementById('session-content').innerHTML = html;
+    // Update button style
+    document.querySelectorAll('.session-btn.active').forEach(btn => {{
+        if (hasIncident) btn.classList.add('incident');
+        else btn.classList.remove('incident');
+    }});
+}}
+function extractDetail(e) {{
+    if (e.event_type === 'llm_call_start') {{
+        const msgs = e.input_data.messages || [];
+        if (msgs.length > 0) {{
+            const last = msgs[msgs.length - 1];
+            return truncate(`[${{last.role}}] ${{last.content}}`, 150);
+        }}
+        return '';
+    }}
+    if (e.event_type === 'decision') return truncate(e.reasoning, 150);
+    if (e.event_type === 'final_decision') return truncate(e.output_data.response || e.reasoning, 150);
+    if (e.event_type === 'tool_call_start') return truncate(JSON.stringify(e.input_data), 150);
+    if (e.event_type === 'tool_call_end') return truncate(JSON.stringify(e.output_data), 150);
+    if (e.event_type === 'error') return truncate(JSON.stringify(e.output_data), 150);
+    return truncate(e.reasoning, 150);
+}}
+function truncate(text, max) {{
+    if (!text) return '';
+    return text.length > max ? text.substring(0, max) + '...' : text;
+}}
+</script>
+</body>
+</html>"""
+class DashboardHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        parsed = urlparse(self.path)
+        if parsed.path == "/" or parsed.path == "":
+            self.send_response(200)
+            self.send_header("Content-Type", "text/html; charset=utf-8")
+            self.end_headers()
+            self.wfile.write(get_dashboard_html().encode("utf-8"))
+        elif parsed.path == "/api/session":
+            params = parse_qs(parsed.query)
+            session_id = params.get("id", [""])[0]
+            events = STORE.get_session_events(session_id)
+            event_dicts = []
+            for e in events:
+                event_dicts.append({
+                    "event_id": e.event_id,
+                    "timestamp": e.timestamp,
+                    "event_type": e.event_type,
+                    "agent_id": e.agent_id,
+                    "action": e.action,
+                    "input_data": e.input_data,
+                    "output_data": e.output_data,
+                    "reasoning": e.reasoning,
+                })
+            self.send_response(200)
+            self.send_header("Content-Type", "application/json; charset=utf-8")
+            self.end_headers()
+            self.wfile.write(json.dumps({"events": event_dicts}, ensure_ascii=False).encode("utf-8"))
+        else:
+            self.send_response(404)
+            self.end_headers()
+    def log_message(self, format, *args):
+        """Suppress log output."""
+        pass
+def run_dashboard(store: EventStore, port: int = 8080):
+    """Start the dashboard server."""
+    global STORE
+    STORE = store
+    server = HTTPServer(("0.0.0.0", port), DashboardHandler)
+    print(f"\n  Agent Forensics Dashboard")
+    print(f"  http://localhost:{port}")
+    print(f"  Press Ctrl+C to stop\n")
+    server.serve_forever()
+if __name__ == "__main__":
+    run_dashboard(EventStore("forensics.db"))

agent_forensics/integrations/__init__.py ADDED Viewed

File without changes

agent_forensics/integrations/crewai.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""
+CrewAI integration — Captures all actions via step_callback and task_callback.
+Usage:
+    from agent_forensics import Forensics
+    from crewai import Agent, Task, Crew
+    f = Forensics(session="order-123")
+    hooks = f.crewai()
+    agent = Agent(
+        role="shopper",
+        goal="...",
+        step_callback=hooks.step_callback,  # Capture every step
+    )
+    task = Task(
+        description="...",
+        agent=agent,
+        callback=hooks.task_callback,       # Capture on task completion
+    )
+    crew = Crew(
+        agents=[agent],
+        tasks=[task],
+        step_callback=hooks.step_callback,  # Also available at Crew level
+    )
+"""
+from ..store import EventStore, Event, now
+class ForensicsCrewAIHooks:
+    """Forensics callback collection for CrewAI."""
+    def __init__(self, store: EventStore, session_id: str, agent_id: str = "crewai-agent"):
+        self.store = store
+        self.session_id = session_id
+        self.agent_id = agent_id
+    def step_callback(self, step_output) -> None:
+        """
+        Called on every agent step.
+        step_output is an AgentAction, ToolResult, or other intermediate result.
+        """
+        output_str = str(step_output)[:1000]
+        # AgentAction case (tool call decision)
+        if hasattr(step_output, "tool") and hasattr(step_output, "tool_input"):
+            self.store.save(Event(
+                timestamp=now(),
+                event_type="decision",
+                agent_id=self.agent_id,
+                action=f"agent_decision:{step_output.tool}",
+                input_data={"tool_input": str(step_output.tool_input)[:500]},
+                output_data={},
+                reasoning=getattr(step_output, "log", "")[:500] or f"Agent decided to use tool {step_output.tool}",
+                session_id=self.session_id,
+            ))
+            return
+        # ToolResult case
+        if hasattr(step_output, "result"):
+            result = str(step_output.result)[:1000]
+            is_error = "error" in result.lower() or "fail" in result.lower()
+            self.store.save(Event(
+                timestamp=now(),
+                event_type="error" if is_error else "tool_call_end",
+                agent_id=self.agent_id,
+                action="tool_result",
+                input_data={},
+                output_data={"result": result},
+                reasoning="Tool execution failed" if is_error else "Tool execution completed",
+                session_id=self.session_id,
+            ))
+            return
+        # Other (LLM response, etc.)
+        self.store.save(Event(
+            timestamp=now(),
+            event_type="llm_call_end",
+            agent_id=self.agent_id,
+            action="step_output",
+            input_data={},
+            output_data={"output": output_str},
+            reasoning="Agent step completed",
+            session_id=self.session_id,
+        ))
+    def task_callback(self, task_output) -> None:
+        """
+        Called on task completion.
+        task_output is a TaskOutput object.
+        """
+        description = getattr(task_output, "description", "unknown task")
+        raw = getattr(task_output, "raw", str(task_output))
+        key = getattr(task_output, "key", "")
+        self.store.save(Event(
+            timestamp=now(),
+            event_type="final_decision",
+            agent_id=self.agent_id,
+            action=f"task_complete:{key}" if key else "task_complete",
+            input_data={"task_description": str(description)[:500]},
+            output_data={"result": str(raw)[:1000]},
+            reasoning=f"Task completed: {str(description)[:200]}",
+            session_id=self.session_id,
+        ))