PyPI - agentdebugx - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentdebugx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

agentdebug/__init__.py +65 -0
agentdebug/adapters/__init__.py +10 -0
agentdebug/adapters/base.py +22 -0
agentdebug/adapters/langgraph.py +261 -0
agentdebug/adapters/otel.py +151 -0
agentdebug/adapters/raw.py +134 -0
agentdebug/analyzers.py +152 -0
agentdebug/attribution.py +230 -0
agentdebug/cli.py +272 -0
agentdebug/events.py +114 -0
agentdebug/instrumentation.py +57 -0
agentdebug/judges.py +258 -0
agentdebug/llm.py +165 -0
agentdebug/models.py +169 -0
agentdebug/recorder.py +183 -0
agentdebug/recovery.py +113 -0
agentdebug/storage.py +167 -0
agentdebug/taxonomy.py +271 -0
agentdebug/ui/__init__.py +14 -0
agentdebug/ui/server.py +260 -0
agentdebugx-0.1.0.dist-info/METADATA +217 -0
agentdebugx-0.1.0.dist-info/RECORD +25 -0
agentdebugx-0.1.0.dist-info/WHEEL +4 -0
agentdebugx-0.1.0.dist-info/entry_points.txt +3 -0
agentdebugx-0.1.0.dist-info/licenses/LICENSE +21 -0

agentdebug/taxonomy.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""Seed failure taxonomy for AgentDebugX.
+This module starts with research-grounded failure modes and is designed to be
+extended by generated, project-specific taxonomy nodes.
+"""
+from __future__ import annotations
+from typing import Dict, List, Optional
+from agentdebug.models import FailureMode
+def _mode(
+    mode_id: str,
+    name: str,
+    family: str,
+    description: str,
+    signals: List[str],
+    suggestions: List[str],
+    source: str,
+) -> FailureMode:
+    return FailureMode(
+        mode_id=mode_id,
+        name=name,
+        family=family,
+        description=description,
+        signals=signals,
+        suggestion_templates=suggestions,
+        source=source,
+    )
+SEED_FAILURE_MODES: Dict[str, FailureMode] = {
+    'memory.retrieval_failure': _mode(
+        'memory.retrieval_failure',
+        'Memory retrieval failure',
+        'memory',
+        'The agent failed to retrieve relevant prior state, observation, or task context.',
+        ['missing context', 'forgot', 'stale context', 'retrieval miss'],
+        [
+            'Persist the missing state as structured memory and attach it to the next planning step.',
+            'Add a retrieval quality check before acting on retrieved context.',
+        ],
+        'AgentDebug',
+    ),
+    'memory.hallucination': _mode(
+        'memory.hallucination',
+        'Memory hallucination',
+        'memory',
+        'The agent treats unobserved or invented state as remembered fact.',
+        ['invented memory', 'unsupported recall', 'false prior state'],
+        [
+            'Require memory reads to cite the source event or artifact before use.',
+            'Separate observed state from inferred state in the agent prompt.',
+        ],
+        'AgentDebug',
+    ),
+    'reflection.progress_misjudge': _mode(
+        'reflection.progress_misjudge',
+        'Progress misjudge',
+        'reflection',
+        'The agent incorrectly judges whether the task is solved or whether progress was made.',
+        ['premature success', 'incorrect self evaluation', 'progress misjudge'],
+        [
+            'Add an external task verifier before termination.',
+            'Record explicit success criteria and compare the current state against each criterion.',
+        ],
+        'AgentDebug',
+    ),
+    'reflection.causal_misattribution': _mode(
+        'reflection.causal_misattribution',
+        'Causal misattribution',
+        'reflection',
+        'The agent explains failure using the wrong cause and then optimizes the wrong behavior.',
+        ['wrong root cause', 'misattribution', 'incorrect blame'],
+        [
+            'Use counterfactual replay or ablation to test whether the suspected step caused the failure.',
+            'Preserve evidence links for each causal claim in the reflection.',
+        ],
+        'AgentDebug',
+    ),
+    'planning.constraint_ignorance': _mode(
+        'planning.constraint_ignorance',
+        'Constraint ignorance',
+        'planning',
+        'The plan ignores task, environment, policy, schema, or safety constraints.',
+        ['constraint ignored', 'policy violation', 'cannot satisfy requirement'],
+        [
+            'Compile task and tool constraints into pre-action checks.',
+            'Fail closed when required constraints cannot be verified.',
+        ],
+        'AgentDebug / MAST',
+    ),
+    'planning.impossible_action': _mode(
+        'planning.impossible_action',
+        'Impossible action',
+        'planning',
+        'The agent plans an action that cannot be executed in the current environment.',
+        ['impossible action', 'unavailable operation', 'invalid transition'],
+        [
+            'Expose environment affordances as a machine-readable action space.',
+            'Ask the planner to replan from the last valid state after an impossible action is detected.',
+        ],
+        'AgentDebug',
+    ),
+    'planning.inefficient_plan': _mode(
+        'planning.inefficient_plan',
+        'Inefficient plan',
+        'planning',
+        'The agent loops, over-decomposes, or burns steps without increasing task completion probability.',
+        ['loop', 'step explosion', 'no progress', 'repeated action'],
+        [
+            'Add loop detection over tool calls and state deltas.',
+            'Trigger a replan when the same state-action pattern repeats.',
+        ],
+        'AgentDebug / MAST',
+    ),
+    'action.wrong_tool': _mode(
+        'action.wrong_tool',
+        'Wrong tool selection',
+        'action',
+        'The agent selects a tool that does not match the task intent or current state.',
+        ['wrong tool', 'unknown tool', 'tool mismatch'],
+        [
+            'Add tool routing examples and negative examples for similar tools.',
+            'Use a tool-selection verifier before executing high-impact actions.',
+        ],
+        'AgentDebug / AgentRx',
+    ),
+    'action.invalid_action': _mode(
+        'action.invalid_action',
+        'Invalid action',
+        'action',
+        'The selected action is syntactically or semantically invalid for the environment.',
+        ['invalid action', 'bad command', 'action rejected'],
+        [
+            'Validate action schemas before execution and return actionable repair hints.',
+            'Record rejected actions to fine-tune tool descriptions and examples.',
+        ],
+        'AgentDebug',
+    ),
+    'action.format_error': _mode(
+        'action.format_error',
+        'Format error',
+        'action',
+        'The agent produced malformed JSON, arguments, commands, or protocol messages.',
+        ['json parse', 'schema validation', 'malformed', 'format error'],
+        [
+            'Enforce structured output with schema validation and retry using validation errors.',
+            'Prefer typed tool APIs over free-form command strings where possible.',
+        ],
+        'AgentDebug / AgentRx',
+    ),
+    'action.parameter_error': _mode(
+        'action.parameter_error',
+        'Parameter error',
+        'action',
+        'The agent called the right tool with missing, hallucinated, or invalid parameters.',
+        ['missing parameter', 'invalid parameter', 'hallucinated argument'],
+        [
+            'Validate parameters against tool schemas and ask for missing user/context fields.',
+            'Log parameter provenance so hallucinated arguments are visible in traces.',
+        ],
+        'AgentDebug / AgentRx',
+    ),
+    'system.tool_execution_error': _mode(
+        'system.tool_execution_error',
+        'Tool execution error',
+        'system',
+        'A tool, API, browser, environment, or dependency failed during execution.',
+        ['timeout', 'exception', 'http error', 'tool failed', 'api error'],
+        [
+            'Capture tool stderr/status/latency and classify retryable versus non-retryable failures.',
+            'Add idempotency keys and rollback logic for side-effecting tools.',
+        ],
+        'AgentDebug / AgentRx',
+    ),
+    'system.llm_limit': _mode(
+        'system.llm_limit',
+        'LLM limit',
+        'system',
+        'The model hit context, rate, content, or capability limits.',
+        ['context length', 'rate limit', 'token limit', 'model refusal'],
+        [
+            'Summarize or shard long context with explicit loss accounting.',
+            'Route limit failures to a fallback model or smaller prompt plan.',
+        ],
+        'AgentDebug',
+    ),
+    'system.environment_error': _mode(
+        'system.environment_error',
+        'Environment error',
+        'system',
+        'The external environment changed or returned noisy observations that misled the agent.',
+        ['environment changed', 'noisy observation', 'browser state', 'missing element'],
+        [
+            'Log environment snapshots and compare state before and after actions.',
+            'Use stable APIs or accessibility trees when visual/browser observations are brittle.',
+        ],
+        'AgentDebug / AgentSight',
+    ),
+    'multiagent.handoff_loss': _mode(
+        'multiagent.handoff_loss',
+        'Handoff context loss',
+        'multiagent',
+        'A receiving agent loses task constraints, rejected alternatives, or decision rationale.',
+        ['handoff missing context', 'lost rationale', 'context handoff'],
+        [
+            'Make handoff payloads typed and include goal, constraints, evidence, confidence, and open questions.',
+            'Validate that the receiver can restate critical constraints before proceeding.',
+        ],
+        'MAST / Who&When',
+    ),
+    'multiagent.role_drift': _mode(
+        'multiagent.role_drift',
+        'Role drift',
+        'multiagent',
+        'An agent acts outside its assigned responsibility or duplicates another agent role.',
+        ['role drift', 'responsibility overlap', 'agent conflict'],
+        [
+            'Define ownership boundaries and permitted actions per agent.',
+            'Add supervisor checks for role violations and duplicate work.',
+        ],
+        'MAST / Who&When',
+    ),
+    'verification.missing_task_validation': _mode(
+        'verification.missing_task_validation',
+        'Missing task validation',
+        'verification',
+        'The system accepts task completion without an independent final-state check.',
+        ['no verifier', 'unchecked final answer', 'missing validation'],
+        [
+            'Add final-state validation that is independent of the acting agent.',
+            'Store task-specific success criteria in the trace before execution begins.',
+        ],
+        'MAST',
+    ),
+    'verification.premature_stop': _mode(
+        'verification.premature_stop',
+        'Premature stop',
+        'verification',
+        'The agent stops before satisfying the task or exits after hitting a hidden stop condition.',
+        ['premature stop', 'early termination', 'max steps'],
+        [
+            'Differentiate success, abandonment, and max-step termination in the run schema.',
+            'Trigger recovery planning when termination reason is not verified success.',
+        ],
+        'MAST / AgentDebug',
+    ),
+    'multimodal.perception_error': _mode(
+        'multimodal.perception_error',
+        'Multimodal perception error',
+        'multimodal',
+        'The agent misreads an image, UI, audio, video, or file artifact and acts on bad perception.',
+        ['visual mismatch', 'ocr error', 'audio transcription error', 'ui element mismatch'],
+        [
+            'Store raw multimodal artifacts alongside extracted text and confidence scores.',
+            'Use redundant perception channels such as accessibility trees plus screenshots for UI tasks.',
+        ],
+        'AgentDebugX proposed extension',
+    ),
+}
+def get_failure_mode(mode_id: str) -> Optional[FailureMode]:
+    return SEED_FAILURE_MODES.get(mode_id)
+def list_failure_modes() -> List[FailureMode]:
+    return list(SEED_FAILURE_MODES.values())

agentdebug/ui/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Optional dashboard for AgentDebugX.
+Run with::
+    pip install agentdebugx[ui]
+    agentdebug serve --store-sqlite .agentdebug/errors.sqlite
+The server is a single-file FastAPI app + a no-build HTML/JS frontend served
+from ``GET /``. Everything is local-only (loopback) by default.
+"""
+from agentdebug.ui.server import build_app, serve
+__all__ = ['build_app', 'serve']

agentdebug/ui/server.py ADDED Viewed

@@ -0,0 +1,260 @@
+"""Minimal FastAPI dashboard for AgentDebugX.
+Endpoints:
+* ``GET  /``                       — single-page HTML console.
+* ``GET  /api/v1/traces``          — list trace IDs in the store.
+* ``GET  /api/v1/traces/{tid}``    — fetch a trajectory + freshly analyzed report.
+* ``GET  /api/v1/traces/{tid}/raw``— raw trajectory JSON.
+* ``GET  /api/v1/taxonomy``        — list seed failure modes.
+* ``GET  /healthz``                — liveness.
+The server is intentionally tiny and built on a no-build (vanilla JS) frontend
+so it ships with the wheel and runs without `npm`.
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import Any, Dict, List, Optional, cast
+from agentdebug.analyzers import HeuristicAnalyzer
+from agentdebug.models import AgentTrajectory, DiagnosticReport, model_to_json
+from agentdebug.storage import JsonlTraceStore, SQLiteTraceStore, TraceStore
+from agentdebug.taxonomy import SEED_FAILURE_MODES
+LOG = logging.getLogger('agentdebug.ui')
+def _to_dict(model: Any) -> Dict[str, Any]:
+    """Pydantic v1/v2 compatible serialization to dict."""
+    if hasattr(model, 'model_dump'):
+        return cast(Dict[str, Any], model.model_dump(mode='json'))
+    return cast(Dict[str, Any], json.loads(model.json()))
+def build_app(store: TraceStore) -> Any:
+    try:
+        from fastapi import FastAPI, HTTPException
+        from fastapi.responses import HTMLResponse, JSONResponse
+    except ImportError as exc:  # pragma: no cover - exercised in docs
+        raise ImportError(
+            'AgentDebugX UI requires `fastapi` and `uvicorn`. '
+            'Install with `pip install agentdebugx[ui]`.'
+        ) from exc
+    app = FastAPI(
+        title='AgentDebugX',
+        description='Local debug console for agent trajectories.',
+        version='0.1.0',
+    )
+    @app.get('/healthz')
+    def healthz() -> Dict[str, str]:
+        return {'status': 'ok'}
+    @app.get('/api/v1/traces')
+    def list_traces() -> Dict[str, List[str]]:
+        return {'traces': store.list_traces()}
+    @app.get('/api/v1/traces/{trace_id}')
+    def get_trace(trace_id: str) -> Dict[str, Any]:
+        trajectory = store.load_trajectory(trace_id)
+        if trajectory is None:
+            raise HTTPException(status_code=404, detail=f'unknown trace_id: {trace_id}')
+        report = HeuristicAnalyzer().analyze(trajectory)
+        return {
+            'trajectory': _to_dict(trajectory),
+            'report': _to_dict(report),
+        }
+    @app.get('/api/v1/traces/{trace_id}/raw')
+    def get_trace_raw(trace_id: str) -> JSONResponse:
+        trajectory = store.load_trajectory(trace_id)
+        if trajectory is None:
+            raise HTTPException(status_code=404, detail=f'unknown trace_id: {trace_id}')
+        return JSONResponse(content=_to_dict(trajectory))
+    @app.get('/api/v1/taxonomy')
+    def get_taxonomy() -> Dict[str, Any]:
+        return {
+            'modes': [_to_dict(m) for m in SEED_FAILURE_MODES.values()],
+        }
+    @app.get('/', response_class=HTMLResponse)
+    def index() -> str:
+        return _INDEX_HTML
+    return app
+def serve(
+    store: TraceStore,
+    *,
+    host: str = '127.0.0.1',
+    port: int = 7777,
+) -> None:
+    try:
+        import uvicorn
+    except ImportError as exc:  # pragma: no cover
+        raise ImportError(
+            'AgentDebugX UI requires `uvicorn`. '
+            'Install with `pip install agentdebugx[ui]`.'
+        ) from exc
+    app = build_app(store)
+    LOG.info('Serving AgentDebugX console at http://%s:%s', host, port)
+    uvicorn.run(app, host=host, port=port, log_level='warning')
+def store_from_path(path: str) -> TraceStore:
+    """Heuristic: ``.sqlite`` → SQLiteTraceStore; everything else → JSONL."""
+    if path.endswith(('.sqlite', '.db')):
+        return SQLiteTraceStore(path)
+    return JsonlTraceStore(path)
+# Single-file HTML console. Plain DOM + fetch — no build step required.
+_INDEX_HTML = """<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<title>AgentDebugX Console</title>
+<style>
+  :root { color-scheme: dark; --bg:#0e1117; --bg2:#161b22; --fg:#c9d1d9; --muted:#8b949e;
+          --acc:#58a6ff; --bad:#f85149; --warn:#d29922; --good:#3fb950; }
+  * { box-sizing: border-box; }
+  html, body { margin:0; padding:0; height:100%; background:var(--bg); color:var(--fg);
+               font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", sans-serif; }
+  header { padding:10px 14px; border-bottom:1px solid #21262d; display:flex; align-items:center; gap:14px;}
+  header h1 { font-size:14px; margin:0; font-weight:600; letter-spacing:0.4px; }
+  .pill { font-size:11px; color:var(--muted); border:1px solid #30363d; padding:1px 6px; border-radius:10px; }
+  main { display:grid; grid-template-columns: 280px 1fr; height: calc(100% - 44px); }
+  aside { background:var(--bg2); border-right:1px solid #21262d; overflow:auto; }
+  aside ul { list-style:none; margin:0; padding:0; }
+  aside li { padding:8px 12px; border-bottom:1px solid #21262d; cursor:pointer; font-size:12px; }
+  aside li:hover { background:#1f2630; }
+  aside li.active { background:#1c2530; color:var(--acc); }
+  section { padding:14px 18px; overflow:auto; }
+  h2 { font-size:15px; margin:8px 0 6px; }
+  h3 { font-size:13px; margin:14px 0 6px; color:var(--muted); text-transform:uppercase; letter-spacing:0.7px; }
+  table { width:100%; border-collapse: collapse; font-size:12px; }
+  th, td { text-align:left; padding:5px 8px; border-bottom:1px solid #21262d; vertical-align: top; }
+  th { font-weight:600; color:var(--muted); font-size:11px; }
+  .step { font-family: ui-monospace, SFMono-Regular, Consolas, monospace; }
+  .err { color: var(--bad); }
+  .warn { color: var(--warn); }
+  .good { color: var(--good); }
+  .badge { display:inline-block; padding:1px 6px; border-radius:8px; font-size:11px; background:#1f2630; color:var(--acc); margin-right:6px; }
+  .empty { color:var(--muted); padding:20px; text-align:center; }
+  pre { background:#1c2128; padding:10px; border-radius:6px; overflow:auto; font-size:11px; }
+  .summary { background:#1c2128; padding:10px 12px; border-left:3px solid var(--acc);
+             border-radius:0 4px 4px 0; font-size:13px; margin-bottom:8px; }
+  .root { background:#1c2128; padding:10px 12px; border-left:3px solid var(--warn);
+          border-radius:0 4px 4px 0; font-size:12px; margin-bottom:12px; }
+  .ev-event_id { color:var(--muted); font-size:10px; font-family:ui-monospace, monospace; }
+</style>
+</head>
+<body>
+<header>
+  <h1>AgentDebugX</h1>
+  <span class="pill" id="trace-count">…</span>
+  <span class="pill">v0.1</span>
+</header>
+<main>
+  <aside><ul id="trace-list"></ul></aside>
+  <section id="detail"><div class="empty">Select a trace from the left.</div></section>
+</main>
+<script>
+async function api(path) {
+  const r = await fetch(path);
+  if (!r.ok) throw new Error('HTTP ' + r.status);
+  return r.json();
+}
+function fmt(v) {
+  if (v === null || v === undefined) return '';
+  if (typeof v === 'object') return JSON.stringify(v);
+  return String(v);
+}
+function truncate(s, n) { s = fmt(s); return s.length > n ? s.slice(0, n) + '…' : s; }
+function escapeHtml(s) {
+  return String(s).replace(/[&<>'"]/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;',"'":'&#39;','"':'&quot;'})[c]);
+}
+async function loadTraceList() {
+  const data = await api('/api/v1/traces');
+  const ul = document.getElementById('trace-list');
+  ul.innerHTML = '';
+  document.getElementById('trace-count').textContent = data.traces.length + ' trace' + (data.traces.length === 1 ? '' : 's');
+  data.traces.forEach(tid => {
+    const li = document.createElement('li');
+    li.textContent = tid;
+    li.dataset.tid = tid;
+    li.onclick = () => { selectTrace(tid, li); };
+    ul.appendChild(li);
+  });
+  if (data.traces.length === 0) {
+    document.getElementById('detail').innerHTML = '<div class="empty">No traces in store.</div>';
+  }
+}
+async function selectTrace(tid, li) {
+  document.querySelectorAll('aside li').forEach(el => el.classList.remove('active'));
+  li.classList.add('active');
+  document.getElementById('detail').innerHTML = '<div class="empty">Loading…</div>';
+  try {
+    const data = await api('/api/v1/traces/' + encodeURIComponent(tid));
+    renderTrace(data.trajectory, data.report);
+  } catch (e) {
+    document.getElementById('detail').innerHTML = '<div class="empty err">' + e + '</div>';
+  }
+}
+function renderTrace(traj, report) {
+  let html = '';
+  html += '<h2>' + escapeHtml(traj.trace_id) + '</h2>';
+  html += '<div><span class="badge">' + escapeHtml(traj.framework || 'unknown') + '</span>';
+  html += '<span class="badge">' + (traj.events?.length || 0) + ' events</span></div>';
+  if (traj.goal) html += '<p style="color:var(--muted);margin-top:4px;font-size:12px;">' + escapeHtml(traj.goal) + '</p>';
+  html += '<div class="summary"><b>Summary:</b> ' + escapeHtml(report.summary || '—') + '</div>';
+  if (report.root_cause_event_id) {
+    html += '<div class="root"><b>Root cause:</b> agent=<code>' + escapeHtml(report.root_cause_agent || '') + '</code>';
+    html += ', step=<code>' + escapeHtml(String(report.root_cause_step_index)) + '</code>';
+    html += ', event=<span class="ev-event_id">' + escapeHtml(report.root_cause_event_id) + '</span></div>';
+  }
+  html += '<h3>Events</h3><table><thead><tr><th>#</th><th>Agent</th><th>Type</th><th>Module</th><th>Input</th><th>Output</th><th>Error</th></tr></thead><tbody>';
+  for (const ev of traj.events || []) {
+    html += '<tr>';
+    html += '<td class="step">' + (ev.step_index ?? '') + '</td>';
+    html += '<td>' + escapeHtml(ev.agent_name || '') + '</td>';
+    html += '<td>' + escapeHtml(ev.event_type || '') + '</td>';
+    html += '<td>' + escapeHtml(ev.module || '') + '</td>';
+    html += '<td>' + escapeHtml(truncate(ev.input, 80)) + '</td>';
+    html += '<td>' + escapeHtml(truncate(ev.output, 80)) + '</td>';
+    const err = ev.error ? '<span class="err">' + escapeHtml(truncate(ev.error, 80)) + '</span>' : '';
+    html += '<td>' + err + '</td>';
+    html += '</tr>';
+  }
+  html += '</tbody></table>';
+  html += '<h3>Findings</h3>';
+  if ((report.findings || []).length === 0) {
+    html += '<div class="empty">No findings.</div>';
+  } else {
+    html += '<table><thead><tr><th>Mode</th><th>Family</th><th>Step</th><th>Agent</th><th>Confidence</th><th>Evidence</th><th>Suggestion</th></tr></thead><tbody>';
+    for (const f of report.findings) {
+      html += '<tr>';
+      html += '<td><code>' + escapeHtml(f.failure_mode?.mode_id || '') + '</code></td>';
+      html += '<td>' + escapeHtml(f.failure_mode?.family || '') + '</td>';
+      html += '<td class="step">' + (f.step_index ?? '') + '</td>';
+      html += '<td>' + escapeHtml(f.agent_name || '') + '</td>';
+      html += '<td>' + (typeof f.confidence === 'number' ? f.confidence.toFixed(2) : '') + '</td>';
+      html += '<td>' + escapeHtml((f.evidence || []).join('; ')) + '</td>';
+      html += '<td>' + escapeHtml(f.suggestion || '') + '</td>';
+      html += '</tr>';
+    }
+    html += '</tbody></table>';
+  }
+  document.getElementById('detail').innerHTML = html;
+}
+loadTraceList();
+</script>
+</body>
+</html>
+"""