agent-sleuth 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ """Agent Sleuth — prevents untrusted data from triggering consequential actions in your agent.
2
+
3
+ In-process information-flow-control for LLM agents. Value-level provenance lineage tracked
4
+ at the tool-I/O boundary: deterministic, classifier-free, zero extra LLM calls on the
5
+ common path. See AGENT_SLEUTH_ARCHITECTURE.MD.
6
+ """
7
+
8
+ from .core.errors import TaintViolationError
9
+ from .core.policy import IFCPolicy
10
+ from .core.values import TaintedValue, Trust
11
+ from .engine import Engine
12
+ from .runtime import Sleuth
13
+
14
+ __all__ = [
15
+ "Sleuth",
16
+ "Engine",
17
+ "Trust",
18
+ "TaintedValue",
19
+ "IFCPolicy",
20
+ "TaintViolationError",
21
+ "tracked_tool",
22
+ ]
23
+
24
+ try:
25
+ from importlib.metadata import version as _version
26
+ __version__ = _version("agent_sleuth")
27
+ except Exception:
28
+ __version__ = "0.0.1"
29
+
30
+
31
+ def tracked_tool(engine, name=None):
32
+ """Decorator factory for raw tools (re-exported from adapters.decorator)."""
33
+ from .adapters.decorator import tracked_tool as _tt
34
+
35
+ return _tt(engine, name=name)
@@ -0,0 +1,5 @@
1
+ """Framework adapters. Translate framework callbacks into core Engine calls."""
2
+
3
+ from .decorator import tracked_tool
4
+
5
+ __all__ = ["tracked_tool"]
@@ -0,0 +1,56 @@
1
+ """adapters/decorator.py — @tracked_tool for raw/custom agents (§4.9, v0 step 1).
2
+
3
+ Wraps a plain tool function so that, with no agent framework, every call runs the ingress
4
+ lineage check (raise in enforce, log in audit) and every return is fingerprinted + labeled.
5
+
6
+ Usage::
7
+
8
+ sleuth = Sleuth(agent=None, ...) # owns the engine
9
+ fetch_url = sleuth.track(fetch_url) # or: @sleuth.tracked_tool
10
+ send_email = sleuth.track(send_email)
11
+
12
+ The standalone ``tracked_tool(engine)`` decorator factory is also exported for users who
13
+ construct an Engine directly.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import functools
19
+ import inspect
20
+ from typing import Any, Callable
21
+
22
+ from ..engine import Engine
23
+
24
+
25
+ def _call_args(fn: Callable, args: tuple, kwargs: dict) -> dict[str, Any]:
26
+ """Best-effort bind positional+keyword args into a name->value dict for the checker."""
27
+ try:
28
+ bound = inspect.signature(fn).bind_partial(*args, **kwargs)
29
+ bound.apply_defaults()
30
+ return dict(bound.arguments)
31
+ except (TypeError, ValueError):
32
+ # Fall back to kwargs plus positional-by-index.
33
+ merged = dict(kwargs)
34
+ for i, a in enumerate(args):
35
+ merged[f"arg{i}"] = a
36
+ return merged
37
+
38
+
39
+ def tracked_tool(engine: Engine, name: str | None = None) -> Callable[[Callable], Callable]:
40
+ """Decorator factory: bind a tool function to an Engine for ingress/egress tracking."""
41
+
42
+ def decorate(fn: Callable) -> Callable:
43
+ tool_name = name or getattr(fn, "__name__", "tool")
44
+
45
+ @functools.wraps(fn)
46
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
47
+ call_args = _call_args(fn, args, kwargs)
48
+ engine.on_tool_call(tool_name, call_args) # may raise in enforce mode
49
+ result = fn(*args, **kwargs)
50
+ engine.on_tool_result(tool_name, result)
51
+ return result
52
+
53
+ wrapper.__sleuth_tool_name__ = tool_name # type: ignore[attr-defined]
54
+ return wrapper
55
+
56
+ return decorate
@@ -0,0 +1,121 @@
1
+ """adapters/langchain.py — the LangChain interception layer (§4.6).
2
+
3
+ Integration is a ``BaseCallbackHandler`` the developer passes in — zero changes to their
4
+ agent. ``on_tool_end`` fingerprints + labels output (egress); ``on_tool_start`` runs the
5
+ ingress lineage check (enforce raises, audit logs).
6
+
7
+ LangChain is imported lazily so ``core/`` and the rest of the library stay dependency-free
8
+ (§11.3, Friction 3 §6). If langchain-core is not installed, importing the handler raises a
9
+ clear error; the rest of agent_sleuth still works.
10
+
11
+ Stability hazard (§4.6, §6): pin to the stable callback interface; the core engine is
12
+ framework-agnostic so adding CrewAI / ADK / raw agents never touches the engine.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ from typing import Any
19
+ from uuid import UUID
20
+
21
+ from ..engine import Engine
22
+
23
+ try: # pragma: no cover - import shim
24
+ from langchain_core.callbacks import BaseCallbackHandler
25
+
26
+ _HAS_LANGCHAIN = True
27
+ except Exception: # pragma: no cover
28
+ _HAS_LANGCHAIN = False
29
+
30
+ class BaseCallbackHandler: # type: ignore[no-redef]
31
+ """Fallback base so the module imports without langchain-core installed."""
32
+
33
+
34
+ def _parse_input(input_str: str, inputs: dict[str, Any] | None) -> dict[str, Any]:
35
+ """Resolve a tool's call args from LangChain's on_tool_start payload.
36
+
37
+ Newer LangChain passes structured ``inputs``; older versions pass an ``input_str``
38
+ (often a JSON object, sometimes a bare string). Handle both.
39
+ """
40
+ if inputs:
41
+ return dict(inputs)
42
+ s = (input_str or "").strip()
43
+ if s[:1] in ("{", "["):
44
+ try:
45
+ parsed = json.loads(s)
46
+ if isinstance(parsed, dict):
47
+ return parsed
48
+ except (ValueError, TypeError):
49
+ pass
50
+ return {"input": input_str}
51
+
52
+
53
+ def _extract_content(output: Any) -> Any:
54
+ """LangChain may wrap output in a ToolMessage; pull the content if present."""
55
+ return getattr(output, "content", output)
56
+
57
+
58
+ class IFCCallbackHandler(BaseCallbackHandler):
59
+ """Sync LangChain callback handler driving the Sleuth engine at the tool boundary."""
60
+
61
+ def __init__(self, engine: Engine):
62
+ if not _HAS_LANGCHAIN:
63
+ raise ImportError(
64
+ "IFCCallbackHandler requires langchain-core. "
65
+ "Install with: pip install 'agent_sleuth[langchain]'"
66
+ )
67
+ self.engine = engine
68
+ # Map LangChain run_id -> tool name so on_tool_end knows which tool produced output.
69
+ self._run_tools: dict[UUID, str] = {}
70
+
71
+ def _start(self, serialized, input_str, run_id, inputs):
72
+ name = (serialized or {}).get("name", "tool")
73
+ if run_id is not None:
74
+ self._run_tools[run_id] = name
75
+ self.engine.on_tool_call(name, _parse_input(input_str, inputs))
76
+
77
+ def _end(self, output, run_id, kwargs):
78
+ name = self._run_tools.pop(run_id, None) if run_id is not None else None
79
+ name = name or kwargs.get("name") or "tool"
80
+ self.engine.on_tool_result(name, _extract_content(output))
81
+
82
+ # --- ingress -----------------------------------------------------------------
83
+ def on_tool_start(
84
+ self,
85
+ serialized: dict[str, Any],
86
+ input_str: str,
87
+ *,
88
+ run_id: UUID | None = None,
89
+ inputs: dict[str, Any] | None = None,
90
+ **kwargs: Any,
91
+ ) -> None:
92
+ # Raises TaintViolationError in enforce mode to halt the call.
93
+ self._start(serialized, input_str, run_id, inputs)
94
+
95
+ # --- egress ------------------------------------------------------------------
96
+ def on_tool_end(self, output: Any, *, run_id: UUID | None = None, **kwargs: Any) -> None:
97
+ self._end(output, run_id, kwargs)
98
+
99
+
100
+ class AsyncIFCCallbackHandler(IFCCallbackHandler):
101
+ """Async LangChain callback handler. Delegates to the same synchronous core (§ async).
102
+
103
+ The engine's work is pure-Python and non-blocking, so async callbacks simply await the
104
+ shared sync logic.
105
+ """
106
+
107
+ async def on_tool_start( # type: ignore[override]
108
+ self,
109
+ serialized: dict[str, Any],
110
+ input_str: str,
111
+ *,
112
+ run_id: UUID | None = None,
113
+ inputs: dict[str, Any] | None = None,
114
+ **kwargs: Any,
115
+ ) -> None:
116
+ self._start(serialized, input_str, run_id, inputs)
117
+
118
+ async def on_tool_end( # type: ignore[override]
119
+ self, output: Any, *, run_id: UUID | None = None, **kwargs: Any
120
+ ) -> None:
121
+ self._end(output, run_id, kwargs)
agent_sleuth/config.py ADDED
@@ -0,0 +1,51 @@
1
+ """config.py — optional YAML config loading (§4, §12).
2
+
3
+ Produces an IFCPolicy from a YAML file; falls back to name-based defaults when absent.
4
+ PyYAML is an optional dependency: ``pip install 'agent_sleuth[config]'``.
5
+
6
+ Example config::
7
+
8
+ mode: audit
9
+ untrusted_sources: [read_email, fetch_url, search_web]
10
+ consequential_actions: [send_email, write_file, post_slack]
11
+ destination_allowlist: [me@myco.com]
12
+ destination_fields:
13
+ post_slack: channel
14
+ strict: false
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ from .core.policy import IFCPolicy
23
+
24
+
25
+ def policy_from_dict(data: dict[str, Any]) -> IFCPolicy:
26
+ return IFCPolicy(
27
+ untrusted_sources=list(data.get("untrusted_sources", []) or []),
28
+ consequential_actions=list(data.get("consequential_actions", []) or []),
29
+ destination_allowlist=list(data.get("destination_allowlist", []) or []),
30
+ destination_fields=dict(data.get("destination_fields", {}) or {}),
31
+ mode=data.get("mode", "audit"),
32
+ strict=bool(data.get("strict", False)),
33
+ use_name_heuristics=bool(data.get("use_name_heuristics", True)),
34
+ )
35
+
36
+
37
+ def load_policy(path: str | Path) -> IFCPolicy:
38
+ """Load an IFCPolicy from a YAML file. Returns name-based defaults if the file is
39
+ missing. Raises ImportError if PyYAML is not installed and a real file is present."""
40
+ p = Path(path)
41
+ if not p.exists():
42
+ return IFCPolicy.from_defaults()
43
+ try:
44
+ import yaml
45
+ except ImportError as e: # pragma: no cover
46
+ raise ImportError(
47
+ "Loading a config file requires PyYAML. "
48
+ "Install with: pip install 'agent_sleuth[config]'"
49
+ ) from e
50
+ data = yaml.safe_load(p.read_text()) or {}
51
+ return policy_from_dict(data)
@@ -0,0 +1,23 @@
1
+ """Framework-agnostic core engine. Never imports an agent framework (§11.3)."""
2
+
3
+ from .errors import TaintViolationError
4
+ from .fingerprint import extract_values, fingerprint, normalize
5
+ from .lineage import Violation, check
6
+ from .policy import IFCPolicy
7
+ from .store import TaintStore
8
+ from .trace import render
9
+ from .values import TaintedValue, Trust
10
+
11
+ __all__ = [
12
+ "TaintViolationError",
13
+ "extract_values",
14
+ "fingerprint",
15
+ "normalize",
16
+ "Violation",
17
+ "check",
18
+ "IFCPolicy",
19
+ "TaintStore",
20
+ "render",
21
+ "TaintedValue",
22
+ "Trust",
23
+ ]
@@ -0,0 +1,15 @@
1
+ """core/errors.py — exceptions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .lineage import Violation
6
+
7
+
8
+ class TaintViolationError(Exception):
9
+ """Raised (enforce mode only) when a consequential sink call carries untrusted-origin
10
+ data to a non-allowlisted destination. Carries the Violation and its rendered trace."""
11
+
12
+ def __init__(self, violation: Violation, rendered: str):
13
+ self.violation = violation
14
+ self.rendered = rendered
15
+ super().__init__(rendered)
@@ -0,0 +1,145 @@
1
+ """core/fingerprint.py — turning tool outputs into trackable values (§4.2).
2
+
3
+ This is the heart of the value-level approach. On every tool return we extract the
4
+ *specific values* worth tracking rather than labeling the whole blob:
5
+
6
+ - Structured returns (dict / list / JSON-string) are tracked per-field.
7
+ - Free text is indexed by high-value extractables (emails, URLs, tokens, phones, IDs)
8
+ pulled with regex, plus the whole normalized blob as a coarse substring candidate.
9
+
10
+ Matching is deterministic and classifier-free (design principle §11.1): no model is ever
11
+ asked "is this an injection?". The only question is "did this exact untrusted-origin value
12
+ appear in a sink argument?".
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import json
19
+ import re
20
+ import time
21
+ from typing import Any
22
+
23
+ from .values import TaintedValue, Trust
24
+
25
+ # Minimum length (in characters) for a substring/extractable to count as a trackable
26
+ # "value". Too short → spurious matches; this is the §12 open decision, resolved at 6.
27
+ MIN_VALUE_LEN = 6
28
+
29
+ # Ordered extractable inventory (§4.2, §12). Order matters: more specific patterns first
30
+ # so e.g. a token inside a URL is captured by the URL rule. Each entry is (kind, regex).
31
+ _EXTRACTABLE_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
32
+ ("url", re.compile(r"https?://[^\s<>\"')]+", re.IGNORECASE)),
33
+ ("email", re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")),
34
+ ("token", re.compile(r"\b(?:sk|pk|ghp|gho|ghs|xox[baprs])[-_][A-Za-z0-9_-]{8,}\b")),
35
+ ("uuid", re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
36
+ r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")),
37
+ # Long hex / base64-ish secret blobs (API keys, hashes) — >= 20 chars.
38
+ ("secret", re.compile(r"\b[A-Za-z0-9+/=_-]{20,}\b")),
39
+ ("phone", re.compile(r"\+?\d[\d\s().-]{7,}\d")),
40
+ ]
41
+
42
+ # Kinds whose values are case-insensitive identifiers and may be casefolded when
43
+ # normalized. Free text keeps its original case to avoid over-matching.
44
+ _CASEFOLD_KINDS = {"email", "url", "uuid"}
45
+
46
+ _WS = re.compile(r"\s+")
47
+
48
+
49
+ def normalize(s: str, *, casefold: bool = False) -> str:
50
+ """Normalize a string for content-addressing: trim, collapse whitespace runs."""
51
+ out = _WS.sub(" ", s.strip())
52
+ return out.casefold() if casefold else out
53
+
54
+
55
+ def fingerprint(s: str, *, casefold: bool = False) -> str:
56
+ """Content-addressed key for a value: SHA-256 of its normalized form."""
57
+ return hashlib.sha256(normalize(s, casefold=casefold).encode("utf-8")).hexdigest()
58
+
59
+
60
+ def _flatten(obj: Any, prefix: str = "") -> list[tuple[str, Any]]:
61
+ """Yield (field_path, leaf_value) for a nested dict/list structure."""
62
+ leaves: list[tuple[str, Any]] = []
63
+ if isinstance(obj, dict):
64
+ for k, v in obj.items():
65
+ path = f"{prefix}.{k}" if prefix else str(k)
66
+ leaves.extend(_flatten(v, path))
67
+ elif isinstance(obj, (list, tuple)):
68
+ for i, v in enumerate(obj):
69
+ leaves.extend(_flatten(v, f"{prefix}[{i}]"))
70
+ else:
71
+ leaves.append((prefix, obj))
72
+ return leaves
73
+
74
+
75
+ def _coerce_structured(output: Any) -> Any | None:
76
+ """Return a dict/list if output is one (or a JSON string encoding one), else None."""
77
+ if isinstance(output, (dict, list, tuple)):
78
+ return output
79
+ if isinstance(output, str):
80
+ text = output.strip()
81
+ if text[:1] in ("{", "["):
82
+ try:
83
+ return json.loads(text)
84
+ except (ValueError, TypeError):
85
+ return None
86
+ return None
87
+
88
+
89
+ def extract_values(
90
+ output: Any,
91
+ *,
92
+ source: str,
93
+ trust: Trust,
94
+ trace_id: str,
95
+ step: int | None = None,
96
+ ) -> list[TaintedValue]:
97
+ """Extract the specific trackable values from a tool's output.
98
+
99
+ Structured outputs are tracked per leaf field; free text is indexed by regex
100
+ extractables (plus the whole normalized blob). Returns one TaintedValue per value.
101
+ """
102
+ now = time.time()
103
+ values: list[TaintedValue] = []
104
+ seen: set[str] = set()
105
+
106
+ def add(val: Any, field_path: str | None) -> None:
107
+ if val is None:
108
+ return
109
+ text = val if isinstance(val, str) else str(val)
110
+ if len(normalize(text)) < MIN_VALUE_LEN:
111
+ return
112
+ fp = fingerprint(text)
113
+ if fp in seen:
114
+ return
115
+ seen.add(fp)
116
+ values.append(
117
+ TaintedValue(
118
+ value=text,
119
+ trust=trust,
120
+ source=source,
121
+ trace_id=trace_id,
122
+ created_at=now,
123
+ field_path=field_path,
124
+ step=step,
125
+ )
126
+ )
127
+
128
+ structured = _coerce_structured(output)
129
+ if structured is not None:
130
+ for path, leaf in _flatten(structured):
131
+ add(leaf, path or None)
132
+ # Leaf strings may themselves embed extractables (e.g. a body field).
133
+ if isinstance(leaf, str):
134
+ for _kind, pat in _EXTRACTABLE_PATTERNS:
135
+ for m in pat.findall(leaf):
136
+ add(m, path or None)
137
+ else:
138
+ text = output if isinstance(output, str) else str(output)
139
+ for _kind, pat in _EXTRACTABLE_PATTERNS:
140
+ for m in pat.findall(text):
141
+ add(m, None)
142
+ # Coarse fallback: the whole normalized blob, for verbatim substring lineage.
143
+ add(text, None)
144
+
145
+ return values
@@ -0,0 +1,128 @@
1
+ """core/lineage.py — the matching engine (§4.5).
2
+
3
+ Given a pending sink call (tool name + arguments) and the provenance store, decide whether
4
+ the call carries untrusted-origin values to a non-allowlisted destination. The check is
5
+ deterministic: verbatim substring match or structured-field equality against untrusted
6
+ fingerprints — never an LLM judging intent (§11.1).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+ from typing import Any
13
+
14
+ from .fingerprint import MIN_VALUE_LEN, fingerprint, normalize
15
+ from .policy import IFCPolicy
16
+ from .store import TaintStore
17
+
18
+
19
+ @dataclass
20
+ class Violation:
21
+ """A detected untrusted-origin → sink flow, carrying the full lineage chain."""
22
+
23
+ sink_tool: str
24
+ sink_field: str | None
25
+ sink_arg_value: str
26
+ matched_value: str
27
+ source_tool: str
28
+ source_step: int | None
29
+ source_field_path: str | None
30
+ destination: str | None
31
+ mode: str
32
+ reason: str = "untrusted-origin value reached a consequential sink"
33
+ blocked: bool = False
34
+
35
+ def to_dict(self) -> dict[str, Any]:
36
+ return {
37
+ "sink_tool": self.sink_tool,
38
+ "sink_field": self.sink_field,
39
+ "sink_arg_value": self.sink_arg_value,
40
+ "matched_value": self.matched_value,
41
+ "source_tool": self.source_tool,
42
+ "source_step": self.source_step,
43
+ "source_field_path": self.source_field_path,
44
+ "destination": self.destination,
45
+ "mode": self.mode,
46
+ "reason": self.reason,
47
+ "blocked": self.blocked,
48
+ }
49
+
50
+
51
+ def _iter_arg_values(args: dict[str, Any]) -> list[tuple[str, str]]:
52
+ """Flatten sink args into (field_name, text) pairs for matching."""
53
+ out: list[tuple[str, str]] = []
54
+
55
+ def rec(field_name: str, val: Any) -> None:
56
+ if isinstance(val, dict):
57
+ for k, v in val.items():
58
+ rec(f"{field_name}.{k}" if field_name else str(k), v)
59
+ elif isinstance(val, (list, tuple)):
60
+ for i, v in enumerate(val):
61
+ rec(f"{field_name}[{i}]", v)
62
+ elif val is not None:
63
+ out.append((field_name, val if isinstance(val, str) else str(val)))
64
+
65
+ for k, v in args.items():
66
+ rec(k, v)
67
+ return out
68
+
69
+
70
+ def check(
71
+ tool_name: str,
72
+ args: dict[str, Any],
73
+ store: TaintStore,
74
+ policy: IFCPolicy,
75
+ query: str | None = None,
76
+ ) -> Violation | None:
77
+ """Run the v0 lineage algorithm. Returns a Violation or None (allow)."""
78
+ # 1. Not consequential → allow.
79
+ if not policy.is_consequential(tool_name):
80
+ return None
81
+
82
+ # 2. Destination allowlisted (config) or in trusted query → allow.
83
+ destination = policy.resolve_destination(tool_name, args)
84
+ if policy.is_allowed_destination(destination, query):
85
+ return None
86
+
87
+ arg_values = _iter_arg_values(args)
88
+ untrusted = store.untrusted_values()
89
+
90
+ # 3. For each value in the sink args, test value-lineage against untrusted fingerprints.
91
+ for tv in untrusted:
92
+ src_text = tv.value if isinstance(tv.value, str) else str(tv.value)
93
+ src_fp = fingerprint(src_text)
94
+ src_norm = normalize(src_text)
95
+ if len(src_norm) < MIN_VALUE_LEN:
96
+ continue
97
+ for field_name, arg_text in arg_values:
98
+ arg_norm = normalize(arg_text)
99
+ # Structured-field equality (exact) or verbatim substring containment.
100
+ if fingerprint(arg_text) == src_fp or src_norm in arg_norm:
101
+ return Violation(
102
+ sink_tool=tool_name,
103
+ sink_field=field_name,
104
+ sink_arg_value=arg_text,
105
+ matched_value=src_text,
106
+ source_tool=tv.source,
107
+ source_step=tv.step,
108
+ source_field_path=tv.field_path,
109
+ destination=destination,
110
+ mode=policy.mode,
111
+ )
112
+
113
+ # 5. Strict / run-level mode: no untrusted value present but run is tainted → violation.
114
+ if policy.strict and store.is_run_tainted():
115
+ return Violation(
116
+ sink_tool=tool_name,
117
+ sink_field=None,
118
+ sink_arg_value="",
119
+ matched_value="(run-level taint)",
120
+ source_tool="(run)",
121
+ source_step=None,
122
+ source_field_path=None,
123
+ destination=destination,
124
+ mode=policy.mode,
125
+ reason="strict mode: consequential sink fired in a tainted run",
126
+ )
127
+
128
+ return None
@@ -0,0 +1,106 @@
1
+ """core/policy.py — the policy (§4.4).
2
+
3
+ Classifies tools as untrusted sources / consequential sinks, resolves a sink's destination
4
+ field, and decides whether a destination is allowed (configurable allowlist + the §13
5
+ implicit "appears verbatim in the trusted query" notion).
6
+
7
+ Defaults from tool-name conventions make config trivial (Friction 1, §6): most developers
8
+ never touch the lists.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ from .fingerprint import normalize
17
+
18
+ # Name-based heuristics (§4.4). A tool counts as the category if its name contains any keyword.
19
+ UNTRUSTED_KEYWORDS = ("read", "fetch", "search", "get", "browse", "retrieve", "load")
20
+ CONSEQUENTIAL_KEYWORDS = ("send", "write", "delete", "post", "update", "create", "execute", "run")
21
+
22
+ # Default destination-field map (tool name substring -> arg field holding the destination).
23
+ DEFAULT_DESTINATION_FIELDS: dict[str, str] = {
24
+ "send_email": "to",
25
+ "email": "to",
26
+ "http_post": "url",
27
+ "post": "url",
28
+ "write_file": "path",
29
+ "write": "path",
30
+ "slack": "channel",
31
+ }
32
+
33
+ # Field-name heuristics tried (in order) when no explicit map entry matches.
34
+ DESTINATION_FIELD_HEURISTICS = ("to", "recipient", "recipients", "url", "endpoint",
35
+ "path", "channel", "destination", "dest", "address")
36
+
37
+
38
+ def _matches_keyword(tool_name: str, keywords: tuple[str, ...]) -> bool:
39
+ name = tool_name.lower()
40
+ return any(k in name for k in keywords)
41
+
42
+
43
+ @dataclass
44
+ class IFCPolicy:
45
+ untrusted_sources: list[str] = field(default_factory=list)
46
+ consequential_actions: list[str] = field(default_factory=list)
47
+ destination_allowlist: list[str] = field(default_factory=list)
48
+ destination_fields: dict[str, str] = field(default_factory=dict)
49
+ mode: str = "audit" # "audit" | "enforce" | "confirm"
50
+ strict: bool = False # run-level taint enforcement (coarse, §4.3)
51
+ use_name_heuristics: bool = True
52
+
53
+ def is_untrusted_source(self, tool_name: str) -> bool:
54
+ if tool_name in self.untrusted_sources:
55
+ return True
56
+ if self.use_name_heuristics and _matches_keyword(tool_name, UNTRUSTED_KEYWORDS):
57
+ return True
58
+ return False
59
+
60
+ def is_consequential(self, tool_name: str) -> bool:
61
+ if tool_name in self.consequential_actions:
62
+ return True
63
+ if self.use_name_heuristics and _matches_keyword(tool_name, CONSEQUENTIAL_KEYWORDS):
64
+ return True
65
+ return False
66
+
67
+ def resolve_destination(self, tool_name: str, args: dict[str, Any]) -> str | None:
68
+ """Identify the destination value in a sink call's args.
69
+
70
+ Tries the explicit destination_fields map (default + developer overrides), then
71
+ the field-name heuristics.
72
+ """
73
+ merged = {**DEFAULT_DESTINATION_FIELDS, **self.destination_fields}
74
+ # Exact tool match first, then substring match on tool name.
75
+ for key in (tool_name, *(k for k in merged if k in tool_name.lower())):
76
+ fieldname = merged.get(key)
77
+ if fieldname and fieldname in args:
78
+ return _as_text(args[fieldname])
79
+ for fieldname in DESTINATION_FIELD_HEURISTICS:
80
+ if fieldname in args:
81
+ return _as_text(args[fieldname])
82
+ return None
83
+
84
+ def is_allowed_destination(self, dest: str | None, query: str | None = None) -> bool:
85
+ """A destination is allowed if it is on the configurable allowlist (§4.4) or
86
+ appears verbatim in the trusted query (§13 implicit notion)."""
87
+ if not dest:
88
+ return False
89
+ norm_dest = normalize(dest, casefold=True)
90
+ for allowed in self.destination_allowlist:
91
+ if normalize(allowed, casefold=True) == norm_dest:
92
+ return True
93
+ if query and norm_dest and norm_dest in normalize(query, casefold=True):
94
+ return True
95
+ return False
96
+
97
+ @classmethod
98
+ def from_defaults(cls, mode: str = "audit") -> "IFCPolicy":
99
+ """Policy driven purely by name heuristics + empty allowlist."""
100
+ return cls(mode=mode, use_name_heuristics=True)
101
+
102
+
103
+ def _as_text(v: Any) -> str:
104
+ if isinstance(v, (list, tuple)):
105
+ return ", ".join(str(x) for x in v)
106
+ return str(v)
@@ -0,0 +1,85 @@
1
+ """core/store.py — the provenance store (§4.3).
2
+
3
+ Holds labels and lineage across tool calls *within a single agent run*. The LLM's context
4
+ window is stateful, so the label store must match that statefulness. Two levels of
5
+ granularity, both implemented:
6
+
7
+ 1. Value-level lineage (primary, the wedge): content-addressed fingerprint -> TaintedValue.
8
+ 2. Run-level taint (coarse fallback / strict mode): once any untrusted data enters the run,
9
+ the whole run is considered tainted.
10
+
11
+ ``reset()`` is called at the start of every run — taint does not bleed across independent
12
+ agent invocations.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+
19
+ from .fingerprint import fingerprint
20
+ from .values import TaintedValue, Trust
21
+
22
+
23
+ @dataclass
24
+ class StoreEvent:
25
+ """An ordered record of one tool's labeled output, for trace rendering."""
26
+
27
+ step: int
28
+ source: str
29
+ trust: Trust
30
+ values: list[TaintedValue]
31
+
32
+
33
+ class TaintStore:
34
+ def __init__(self) -> None:
35
+ self._store: dict[str, TaintedValue] = {}
36
+ self._run_taint_level: Trust = Trust.TRUSTED
37
+ self._events: list[StoreEvent] = []
38
+ self._step: int = 0
39
+
40
+ def next_step(self) -> int:
41
+ """Advance and return the current tool-call step index (1-based)."""
42
+ self._step += 1
43
+ return self._step
44
+
45
+ @property
46
+ def step(self) -> int:
47
+ return self._step
48
+
49
+ def label(self, values: list[TaintedValue], *, source: str, trust: Trust) -> None:
50
+ """Record a tool's extracted values into the store and the event log."""
51
+ for v in values:
52
+ fp = fingerprint(v.value if isinstance(v.value, str) else str(v.value))
53
+ # First writer wins for a given fingerprint, but untrusted always dominates
54
+ # (conservative: a value seen as untrusted stays untrusted).
55
+ existing = self._store.get(fp)
56
+ if existing is None or (existing.trust == Trust.TRUSTED and v.is_tainted()):
57
+ self._store[fp] = v
58
+ if trust == Trust.UNTRUSTED:
59
+ self._run_taint_level = Trust.UNTRUSTED
60
+ self._events.append(
61
+ StoreEvent(step=values[0].step if values else self._step,
62
+ source=source, trust=trust, values=values)
63
+ )
64
+
65
+ def get(self, fp: str) -> TaintedValue | None:
66
+ return self._store.get(fp)
67
+
68
+ def untrusted_values(self) -> list[TaintedValue]:
69
+ return [v for v in self._store.values() if v.is_tainted()]
70
+
71
+ def get_run_trust(self) -> Trust:
72
+ return self._run_taint_level
73
+
74
+ def is_run_tainted(self) -> bool:
75
+ return self._run_taint_level == Trust.UNTRUSTED
76
+
77
+ def events(self) -> list[StoreEvent]:
78
+ return list(self._events)
79
+
80
+ def reset(self) -> None:
81
+ """Fresh taint state per run."""
82
+ self._store.clear()
83
+ self._run_taint_level = Trust.TRUSTED
84
+ self._events.clear()
85
+ self._step = 0
@@ -0,0 +1,49 @@
1
+ """core/trace.py — the "why blocked" provenance trace (§4.7).
2
+
3
+ This is not a nice-to-have, it is the marketing (§8). On every violation we render the
4
+ lineage chain from source to sink in a form that is genuinely readable and shareable. A
5
+ screenshot of a caught attack is the entire early growth strategy.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .lineage import Violation
11
+
12
+
13
+ def _short(s: str, limit: int = 80) -> str:
14
+ s = s.replace("\n", " ").strip()
15
+ return s if len(s) <= limit else s[: limit - 1] + "…"
16
+
17
+
18
+ def render(violation: Violation) -> str:
19
+ """Render a violation as a readable source→sink lineage chain."""
20
+ v = violation
21
+ verb = "BLOCKED" if v.blocked else "WOULD BLOCK"
22
+
23
+ src = v.source_tool
24
+ if v.source_step is not None:
25
+ src += f" (step {v.source_step}, untrusted)"
26
+ else:
27
+ src += " (untrusted)"
28
+ if v.source_field_path:
29
+ src += f" field {v.source_field_path}"
30
+
31
+ sink_field = v.sink_field or "?"
32
+ lineage = (f"{v.source_tool}"
33
+ + (f" (step {v.source_step})" if v.source_step is not None else "")
34
+ + f" → value \"{_short(v.matched_value)}\""
35
+ + f" → {v.sink_tool}.{sink_field}")
36
+
37
+ action = "blocked, call halted" if v.blocked else "logged (audit mode), call allowed"
38
+
39
+ lines = [
40
+ f"{verb}: {v.sink_tool}() called with tainted inputs",
41
+ f" Taint source: {src}",
42
+ f" Injected value detected in argument: {sink_field}=\"{_short(v.sink_arg_value)}\"",
43
+ f" Lineage: {lineage}",
44
+ ]
45
+ if v.destination:
46
+ lines.append(f" Destination: {_short(v.destination)} (not allowlisted)")
47
+ lines.append(f" Reason: {v.reason}")
48
+ lines.append(f" Action: {action}")
49
+ return "\n".join(lines)
@@ -0,0 +1,43 @@
1
+ """core/values.py — the atom.
2
+
3
+ Every piece of data the system tracks is a labeled value (§4.1).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+
13
+ class Trust(Enum):
14
+ TRUSTED = "trusted"
15
+ UNTRUSTED = "untrusted"
16
+
17
+
18
+ @dataclass
19
+ class TaintedValue:
20
+ """A single tracked value plus its provenance label.
21
+
22
+ Attributes:
23
+ value: the raw extracted value (string for free-text extractables, or the
24
+ leaf value for structured returns).
25
+ trust: TRUSTED or UNTRUSTED.
26
+ source: which tool produced this value.
27
+ trace_id: id that ties this value to a lineage across hops.
28
+ created_at: wall-clock time the value was recorded (ordering / display).
29
+ field_path: for structured returns, the path to the leaf (e.g.
30
+ ``results[0].email``). ``None`` for free-text extractables.
31
+ step: the tool-call step index within the run (for "fetch_url at step 2").
32
+ """
33
+
34
+ value: Any
35
+ trust: Trust
36
+ source: str
37
+ trace_id: str
38
+ created_at: float
39
+ field_path: str | None = None
40
+ step: int | None = None
41
+
42
+ def is_tainted(self) -> bool:
43
+ return self.trust == Trust.UNTRUSTED
agent_sleuth/engine.py ADDED
@@ -0,0 +1,84 @@
1
+ """engine.py — framework-agnostic ingress/egress glue shared by all adapters.
2
+
3
+ The adapters (decorator, LangChain callback, future MCP proxy) are thin: they translate
4
+ framework events into two calls on the Engine:
5
+
6
+ - ``on_tool_call(name, args)`` — ingress: lineage-check a pending sink call. Raises in
7
+ enforce mode, records a (non-blocking) violation in audit mode.
8
+ - ``on_tool_result(name, output)`` — egress: fingerprint + label the tool output.
9
+
10
+ The Engine owns no framework imports, keeping ``core/`` and this glue dependency-free.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from typing import Any, Callable
17
+
18
+ from .core.errors import TaintViolationError
19
+ from .core.fingerprint import extract_values
20
+ from .core.lineage import Violation, check
21
+ from .core.policy import IFCPolicy
22
+ from .core.store import TaintStore
23
+ from .core.trace import render
24
+ from .core.values import Trust
25
+
26
+ logger = logging.getLogger("agent_sleuth")
27
+
28
+
29
+ class Engine:
30
+ def __init__(self, policy: IFCPolicy, store: TaintStore):
31
+ self.policy = policy
32
+ self.store = store
33
+ self.violations: list[Violation] = []
34
+ self.query: str | None = None
35
+ self.confirm_callback: Callable[[Violation, str], bool] | None = None
36
+
37
+ def set_query(self, query: str | None) -> None:
38
+ self.query = query
39
+
40
+ def on_tool_call(self, name: str, args: dict[str, Any]) -> Violation | None:
41
+ """Ingress check for a pending tool call. Returns the Violation if one fired."""
42
+ violation = check(name, args, self.store, self.policy, self.query)
43
+ if violation is None:
44
+ return None
45
+
46
+ if self.policy.mode == "enforce":
47
+ violation.blocked = True
48
+ rendered = render(violation)
49
+ self.violations.append(violation)
50
+ logger.warning(rendered)
51
+ raise TaintViolationError(violation, rendered)
52
+
53
+ if self.policy.mode == "confirm":
54
+ rendered = render(violation)
55
+ allow = True
56
+ if self.confirm_callback is not None:
57
+ allow = bool(self.confirm_callback(violation, rendered))
58
+ violation.blocked = not allow
59
+ self.violations.append(violation)
60
+ logger.warning(rendered)
61
+ if not allow:
62
+ raise TaintViolationError(violation, rendered)
63
+ return violation
64
+
65
+ # audit (default): log + record, never block.
66
+ violation.blocked = False
67
+ rendered = render(violation)
68
+ self.violations.append(violation)
69
+ logger.info(rendered)
70
+ return violation
71
+
72
+ def on_tool_result(self, name: str, output: Any, *, trace_id: str | None = None) -> None:
73
+ """Egress: fingerprint + label a tool's output."""
74
+ trust = Trust.UNTRUSTED if self.policy.is_untrusted_source(name) else Trust.TRUSTED
75
+ step = self.store.next_step()
76
+ values = extract_values(
77
+ output,
78
+ source=name,
79
+ trust=trust,
80
+ trace_id=trace_id or f"{name}:{step}",
81
+ step=step,
82
+ )
83
+ if values:
84
+ self.store.label(values, source=name, trust=trust)
@@ -0,0 +1,107 @@
1
+ """runtime.py — Sleuth, the public developer-facing API (§4.8).
2
+
3
+ The single thing the developer imports. Constructs the policy (from explicit lists or
4
+ name-based defaults), owns the store and engine, resets per run, and exposes ``violations``
5
+ and ``report()``.
6
+
7
+ Three-line integration (§0)::
8
+
9
+ from agent_sleuth import Sleuth
10
+
11
+ agent = Sleuth(agent=your_agent, untrusted=[...], consequential=[...], mode="audit")
12
+ result = agent.run("summarize my emails and send a report to my boss")
13
+ print(agent.report())
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any, Callable
19
+
20
+ from .core.errors import TaintViolationError
21
+ from .core.lineage import Violation
22
+ from .core.policy import IFCPolicy
23
+ from .core.store import TaintStore
24
+ from .core.trace import render
25
+ from .engine import Engine
26
+
27
+
28
+ class Sleuth:
29
+ def __init__(
30
+ self,
31
+ agent: Any = None,
32
+ untrusted: list[str] | None = None,
33
+ consequential: list[str] | None = None,
34
+ destinations: list[str] | None = None,
35
+ mode: str = "audit",
36
+ policy: IFCPolicy | None = None,
37
+ strict: bool = False,
38
+ confirm_callback: Callable[[Violation, str], bool] | None = None,
39
+ ):
40
+ if policy is not None:
41
+ self.policy = policy
42
+ elif untrusted is None and consequential is None:
43
+ # Pure name-based defaults (§4.4): most developers never touch the lists.
44
+ self.policy = IFCPolicy.from_defaults(mode=mode)
45
+ self.policy.destination_allowlist = destinations or []
46
+ self.policy.strict = strict
47
+ else:
48
+ self.policy = IFCPolicy(
49
+ untrusted_sources=untrusted or [],
50
+ consequential_actions=consequential or [],
51
+ destination_allowlist=destinations or [],
52
+ mode=mode,
53
+ strict=strict,
54
+ )
55
+ self.store = TaintStore()
56
+ self.engine = Engine(self.policy, self.store)
57
+ self.engine.confirm_callback = confirm_callback
58
+ self.agent = agent
59
+
60
+ # --- adapter wiring ----------------------------------------------------------
61
+ @property
62
+ def handler(self):
63
+ """A fresh LangChain callback handler bound to this Sleuth's engine."""
64
+ from .adapters.langchain import IFCCallbackHandler
65
+
66
+ return IFCCallbackHandler(self.engine)
67
+
68
+ def track(self, fn: Callable, name: str | None = None) -> Callable:
69
+ """Wrap a raw tool function with @tracked_tool bound to this engine."""
70
+ from .adapters.decorator import tracked_tool
71
+
72
+ return tracked_tool(self.engine, name=name)(fn)
73
+
74
+ # --- run ---------------------------------------------------------------------
75
+ def run(self, query: str, **kwargs: Any) -> Any:
76
+ """Reset taint state, stash the trusted query, run the wrapped agent under the
77
+ callback handler. Catches TaintViolationError in enforce/confirm and returns its
78
+ rendered trace so the caller sees a clean blocked result."""
79
+ self.reset(query)
80
+ if self.agent is None:
81
+ raise ValueError("Sleuth.run requires an agent; for raw tools use .track().")
82
+ try:
83
+ callbacks = kwargs.pop("callbacks", []) or []
84
+ callbacks = [*callbacks, self.handler]
85
+ return self.agent.run(query, callbacks=callbacks, **kwargs)
86
+ except TaintViolationError as e:
87
+ return e.rendered
88
+
89
+ def reset(self, query: str | None = None) -> None:
90
+ """Fresh taint state per run (§4.3). Optionally set the trusted query."""
91
+ self.store.reset()
92
+ self.engine.violations = []
93
+ self.engine.set_query(query)
94
+
95
+ # --- reporting ---------------------------------------------------------------
96
+ @property
97
+ def violations(self) -> list[dict]:
98
+ return [v.to_dict() for v in self.engine.violations]
99
+
100
+ def report(self) -> str:
101
+ """Human-readable summary: '✓ none' or enumerated rendered traces."""
102
+ vs = self.engine.violations
103
+ if not vs:
104
+ return "Agent Sleuth: ✓ no violations detected."
105
+ header = f"Agent Sleuth: {len(vs)} violation(s) detected\n"
106
+ body = "\n\n".join(render(v) for v in vs)
107
+ return header + "\n" + body
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent_sleuth
3
+ Version: 0.0.1
4
+ Summary: Prevents untrusted data from triggering consequential actions in your agent.
5
+ Project-URL: Homepage, https://github.com/Behuve-Labs/agent-sleuth
6
+ Project-URL: Source, https://github.com/Behuve-Labs/agent-sleuth
7
+ Project-URL: Issues, https://github.com/Behuve-Labs/agent-sleuth/issues
8
+ Author: Arnav Tripathy, Noah Wong
9
+ License: Copyright 2026 Behuve
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16
+ License-File: LICENSE.md
17
+ Keywords: agent,ifc,llm,prompt-injection,security,taint
18
+ Classifier: Development Status :: 3 - Alpha
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: License :: OSI Approved :: MIT License
21
+ Classifier: Programming Language :: Python :: 3
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Topic :: Security
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Requires-Python: >=3.10
28
+ Provides-Extra: agentdojo
29
+ Requires-Dist: agentdojo; extra == 'agentdojo'
30
+ Provides-Extra: config
31
+ Requires-Dist: pyyaml>=6.0; extra == 'config'
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0; extra == 'dev'
34
+ Requires-Dist: pyyaml>=6.0; extra == 'dev'
35
+ Requires-Dist: ruff>=0.1; extra == 'dev'
36
+ Provides-Extra: langchain
37
+ Requires-Dist: langchain-core>=0.1; extra == 'langchain'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # Agent Sleuth
41
+
42
+ > **Prevents untrusted data from triggering consequential actions in your agent.**
43
+
44
+ Agent Sleuth is an in-process **information-flow-control (IFC)** library for LLM agents. It
45
+ stops untrusted data (web pages, email bodies, tool outputs, retrieved documents) from
46
+ driving **consequential actions** (sending email, writing files, posting to external
47
+ services).
48
+
49
+ The mechanism is **value-level provenance lineage tracked at the tool-I/O boundary** — *not*
50
+ taint-tracking through the model's forward pass. When an untrusted tool returns data, we
51
+ fingerprint the specific values in it. When a later consequential ("sink") call's arguments
52
+ carry those fingerprinted values — verbatim or via structured-field tracking — that is a
53
+ **deterministic, classifier-free provenance edge**. A small policy fires: untrusted-origin
54
+ value reaching a non-allowlisted external sink → **block or confirm**.
55
+
56
+ - **Deterministic, not a classifier.** The guarantee is a value-lineage match, never an LLM judging intent.
57
+ - **Zero extra LLM calls** on the common path.
58
+ - **Drop-in.** Three lines, zero changes to your agent.
59
+ - **Audit-mode first.** Observe for a week, then switch to enforce.
60
+
61
+ ## Install
62
+
63
+ ```bash
64
+ pip install agent_sleuth # core, zero agent-framework deps
65
+ pip install 'agent_sleuth[langchain]' # + LangChain callback handler
66
+ pip install 'agent_sleuth[config]' # + YAML config loading
67
+ pip install 'agent_sleuth[dev]' # + pytest/ruff
68
+ ```
69
+
70
+ ## Three-line integration (raw / custom agent)
71
+
72
+ ```python
73
+ from agent_sleuth import Sleuth
74
+
75
+ sleuth = Sleuth(
76
+ untrusted=["read_email", "fetch_url", "search_web"],
77
+ consequential=["send_email", "write_file", "post_slack"],
78
+ destinations=["me@myco.com"], # your own channels = trusted egress
79
+ mode="audit", # → "enforce" once you trust it
80
+ )
81
+ sleuth.reset(query="summarize my emails and send a report to my boss")
82
+
83
+ # wrap your tools (or pass sleuth.handler to a LangChain agent — see below)
84
+ fetch_url = sleuth.track(fetch_url)
85
+ send_email = sleuth.track(send_email)
86
+
87
+ # ... run your agent ...
88
+ print(sleuth.report())
89
+ ```
90
+
91
+ You can also skip the explicit lists entirely — `Sleuth()` uses **name-based defaults**
92
+ (tools containing `read/fetch/search/get/...` are untrusted; `send/write/post/delete/...`
93
+ are consequential).
94
+
95
+ ## LangChain (zero changes to your agent)
96
+
97
+ ```python
98
+ from agent_sleuth import Sleuth
99
+
100
+ sleuth = Sleuth(agent=your_langchain_agent, mode="audit")
101
+ result = sleuth.run("summarize my emails and send a report to my boss")
102
+ print(sleuth.report())
103
+ ```
104
+
105
+ `Sleuth.run()` resets taint state, stashes the trusted query, and attaches the
106
+ `IFCCallbackHandler` to your agent — no edits to your chain.
107
+
108
+ ## What a caught attack looks like
109
+
110
+ ```
111
+ BLOCKED: send_email() called with tainted inputs
112
+ Taint source: fetch_url (step 2, untrusted)
113
+ Injected value detected in argument: to="attacker@evil.com"
114
+ Lineage: fetch_url (step 2) → value "attacker@evil.com" → send_email.to
115
+ Destination: attacker@evil.com (not allowlisted)
116
+ Reason: untrusted-origin value reached a consequential sink
117
+ Action: blocked, call halted
118
+ ```
119
+
120
+ ## Modes
121
+
122
+ - `audit` (default): detect + log + render the trace; **never block**.
123
+ - `enforce`: raise `TaintViolationError` and halt the offending sink call.
124
+ - `confirm`: surface the violation to a callback for an allow/deny decision before dispatch.
125
+
126
+ ## Honest coverage envelope (v0)
127
+
128
+ > Sound on the verbatim/structured-exfil class. Zero extra LLM calls on the common path.
129
+ > Drop-in. **Laundering** (base64/paraphrase of a secret) and **pure control-flow hijack**
130
+ > (a sink call whose arguments carry no untrusted bytes) are explicitly **out of scope for
131
+ > v0** — documented non-goals, not bugs. Control-flow integrity (the plan-allowlist) and a
132
+ > configurable allow/denylist with deny-over-allow precedence land in v1.
133
+
134
+ | Attack class | v0 |
135
+ |---|---|
136
+ | Verbatim exfiltration (untrusted value appears literally in sink arg) | ✅ deterministic |
137
+ | Structured exfiltration (untrusted field → sink field) | ✅ deterministic |
138
+ | Legit egress to your own channel (destination allowlist) | ✅ allowed (no false positive) |
139
+ | Control-flow hijack (out-of-plan sink, no untrusted bytes) | ❌ v1 (plan-allowlist) |
140
+ | Laundering (base64 / paraphrase / transform) | ❌ v2+ (opt-in quarantine) |
141
+
142
+ ## Benchmark
143
+
144
+ ```bash
145
+ PYTHONPATH=. python benchmarks/agentdojo/run.py
146
+ ```
147
+
148
+ A self-contained reproduction of AgentDojo-style indirect-injection tasks (real AgentDojo
149
+ needs a live LLM + API keys; see the harness docstring for the thin real-AgentDojo wiring).
150
+ Reports ASR (attack success rate) and utility per mode.
151
+
152
+ ## Develop
153
+
154
+ ```bash
155
+ pip install -e '.[dev,langchain,config]'
156
+ pytest
157
+ ```
158
+
159
+ See `AGENT_SLEUTH_ARCHITECTURE.MD` for the full design.
@@ -0,0 +1,19 @@
1
+ agent_sleuth/__init__.py,sha256=ZQUbSwTbc96_-wzW4qPOE7sb5-NJMoGYdnN-oy29j64,1006
2
+ agent_sleuth/config.py,sha256=S_icJkR2rIfCWmentRiJKFTc_jMHH77y1Kaf70MBS7o,1802
3
+ agent_sleuth/engine.py,sha256=AbW_xt2z-A3oSY8N5HpuaGcplSDIzNtfkj9Acdagd0s,3133
4
+ agent_sleuth/runtime.py,sha256=f3A69_IkFvZn-Rpo6GdpFTkFtVl4Uiwrmy-asKOq-bs,4230
5
+ agent_sleuth/adapters/__init__.py,sha256=LsR9CMsfGuTwZei_94AU75631HPTim3lS_K9BI7EJ7c,145
6
+ agent_sleuth/adapters/decorator.py,sha256=mbPZFi5rYMV-dNuSs-jNnLs5cT8a-JiKQycsbSGCJ0U,1994
7
+ agent_sleuth/adapters/langchain.py,sha256=VmUpzwvF0cEkDtGUWIivdfy5SxRVykB87y3y87fvbYM,4477
8
+ agent_sleuth/core/__init__.py,sha256=XQvlguubplmW9nZAP1cGYno-GCMmxRr3CctK_Pt7RGg,562
9
+ agent_sleuth/core/errors.py,sha256=1Nz0gWSpUbAvbxMAwYyf9aOS7mVItfDeIkDVKBz3o9k,492
10
+ agent_sleuth/core/fingerprint.py,sha256=xxHn8EApmIpNtHdQU3jE2ZnhHvisk-YdV-YcL5i4p_E,5362
11
+ agent_sleuth/core/lineage.py,sha256=3jWBBBbSkEdbbUdlO5eIfHW58r9Bb-VSxpTLOE5xd78,4494
12
+ agent_sleuth/core/policy.py,sha256=1SSzsBPqHOApmuKtwHUFpJzUNL8DWQ0EnGryMkERTzY,4277
13
+ agent_sleuth/core/store.py,sha256=ANwViiuf1uPrim7K8riHUKgkmO72wGgrQu3UyzhNjuc,2915
14
+ agent_sleuth/core/trace.py,sha256=E0vj_y6DEwFfV4cBlCKt0kt2zEWd_rdSTHjWUPI5hXQ,1757
15
+ agent_sleuth/core/values.py,sha256=9AsinGk489IdAjQlfwh0s1z9kveDO1NN9pmWI6iR2SU,1228
16
+ agent_sleuth-0.0.1.dist-info/METADATA,sha256=LsP9cZ1_njB8Qc_604QzJLRe7CWbfiuu00RI63OFdg4,7236
17
+ agent_sleuth-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
18
+ agent_sleuth-0.0.1.dist-info/licenses/LICENSE.md,sha256=EW1yJhY8iaGX_uf56XtYlaL_iRqdBqlMVcO8SEPDzlE,1053
19
+ agent_sleuth-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,7 @@
1
+ Copyright 2026 Behuve
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.