PyPI - federated-agent-audit - Versions diffs - 0.2.0__py3-none-any.whl - Mend

federated-agent-audit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

federated_agent_audit/__init__.py +158 -0
federated_agent_audit/access_control.py +199 -0
federated_agent_audit/blame.py +179 -0
federated_agent_audit/cascade_detector.py +281 -0
federated_agent_audit/channel_auditor.py +286 -0
federated_agent_audit/cli.py +287 -0
federated_agent_audit/commit_reveal.py +97 -0
federated_agent_audit/compositional_leak.py +296 -0
federated_agent_audit/compound_attack.py +380 -0
federated_agent_audit/config.py +166 -0
federated_agent_audit/cross_container.py +301 -0
federated_agent_audit/cross_platform_denanon.py +310 -0
federated_agent_audit/desensitizer.py +469 -0
federated_agent_audit/dp_mechanism.py +180 -0
federated_agent_audit/embeddings.py +107 -0
federated_agent_audit/epoch_chain.py +531 -0
federated_agent_audit/injection_detector.py +428 -0
federated_agent_audit/integrity.py +114 -0
federated_agent_audit/lifecycle.py +262 -0
federated_agent_audit/llm_judge.py +527 -0
federated_agent_audit/local_auditor.py +350 -0
federated_agent_audit/memory_audit.py +351 -0
federated_agent_audit/merkle.py +70 -0
federated_agent_audit/negative_inference.py +246 -0
federated_agent_audit/network_auditor.py +489 -0
federated_agent_audit/privacy_gate.py +171 -0
federated_agent_audit/privacy_loss.py +362 -0
federated_agent_audit/py.typed +0 -0
federated_agent_audit/regulatory_compliance.py +520 -0
federated_agent_audit/reporting/__init__.py +5 -0
federated_agent_audit/reporting/html_report.py +1130 -0
federated_agent_audit/risk_aggregator.py +358 -0
federated_agent_audit/scenario_classifier.py +134 -0
federated_agent_audit/schemas.py +261 -0
federated_agent_audit/sdk/__init__.py +54 -0
federated_agent_audit/sdk/_entry_builder.py +131 -0
federated_agent_audit/sdk/_facade.py +185 -0
federated_agent_audit/sdk/crewai.py +246 -0
federated_agent_audit/sdk/generic.py +93 -0
federated_agent_audit/sdk/intercept.py +547 -0
federated_agent_audit/sdk/langchain.py +261 -0
federated_agent_audit/sdk/multiagent.py +269 -0
federated_agent_audit/semantic_detector.py +405 -0
federated_agent_audit/session_identity.py +277 -0
federated_agent_audit/taint_tracker.py +154 -0
federated_agent_audit/topology.py +283 -0
federated_agent_audit/transport/__init__.py +7 -0
federated_agent_audit/transport/client.py +163 -0
federated_agent_audit/transport/server.py +144 -0
federated_agent_audit/transport/wire.py +62 -0
federated_agent_audit-0.2.0.dist-info/METADATA +359 -0
federated_agent_audit-0.2.0.dist-info/RECORD +56 -0
federated_agent_audit-0.2.0.dist-info/WHEEL +5 -0
federated_agent_audit-0.2.0.dist-info/entry_points.txt +2 -0
federated_agent_audit-0.2.0.dist-info/licenses/LICENSE +189 -0
federated_agent_audit-0.2.0.dist-info/top_level.txt +1 -0

federated_agent_audit/__init__.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Privacy-preserving audit for multi-agent AI systems.
+Quick start — scan text in one line:
+    from federated_agent_audit import scan
+    result = scan("Zhang Wei's SSN is 123-45-6789 and salary is $185,000")
+    print(result)  # shows what was detected and redacted
+Or protect your OpenAI calls:
+    from federated_agent_audit import firewall
+    fw = firewall(["salary", "SSN"])
+    fw.patch_openai()  # every LLM response is now auto-checked
+"""
+from __future__ import annotations
+__version__ = "0.1.0"
+from .schemas import (
+    ActionType,
+    AuditEntry,
+    CompositionalRisk,
+    DesensitizedEdge,
+    LocalAuditReport,
+    NetworkAuditResult,
+    PrivacyPolicy,
+    TaintLabel,
+)
+from .sdk import FederatedAudit, LLMFirewall, MultiAgentTracer, audited
+from .local_auditor import LocalAuditor
+from .network_auditor import NetworkAuditor
+from .risk_aggregator import RiskAggregator
+from .reporting import generate_html_report
+from .config import load_policy, load_policies_dir, validate_policy
+from .llm_judge import LLMJudge, JudgeResult, create_judge
+from .compositional_leak import CompositionalLeakDetector, CompositionSignal
+from .memory_audit import MemoryAuditor, MemoryAnomaly
+from .cross_platform_denanon import CrossPlatformDetector, DeanonRisk
+from .cascade_detector import CascadeDetector, CascadeEvent
+from .regulatory_compliance import ComplianceEngine, ComplianceReport, ComplianceStatus
+__all__ = [
+    # Core facade
+    "FederatedAudit",
+    "LLMFirewall",
+    "MultiAgentTracer",
+    "audited",
+    # LLM-as-Judge
+    "LLMJudge",
+    "JudgeResult",
+    "create_judge",
+    # Schemas
+    "PrivacyPolicy",
+    "AuditEntry",
+    "ActionType",
+    "TaintLabel",
+    "DesensitizedEdge",
+    "LocalAuditReport",
+    "NetworkAuditResult",
+    "CompositionalRisk",
+    # Auditors
+    "LocalAuditor",
+    "NetworkAuditor",
+    "RiskAggregator",
+    # Reporting
+    "generate_html_report",
+    # Config
+    "load_policy",
+    "load_policies_dir",
+    "validate_policy",
+    # Five Structural Threat Detectors
+    "CompositionalLeakDetector",
+    "CompositionSignal",
+    "MemoryAuditor",
+    "MemoryAnomaly",
+    "CrossPlatformDetector",
+    "DeanonRisk",
+    "CascadeDetector",
+    "CascadeEvent",
+    "ComplianceEngine",
+    "ComplianceReport",
+    "ComplianceStatus",
+    # Quick-start shortcuts
+    "scan",
+    "firewall",
+]
+# ── Quick-start shortcuts ────────────────────────────────────────
+def scan(
+    text: str,
+    protect: list[str] | None = None,
+    mode: str = "redact",
+) -> dict:
+    """One-line privacy scan. Zero setup required.
+    Args:
+        text: Text to check for sensitive content.
+        protect: List of sensitive terms to watch for (e.g. ["salary", "SSN"]).
+                 If None, uses built-in PII detection only.
+        mode: "redact" (replace sensitive content) or "block" (reject entirely).
+    Returns:
+        dict with keys: clean (bool), text (redacted version),
+        detected (list of matched rules), original (original text).
+    Example:
+        >>> from federated_agent_audit import scan
+        >>> r = scan("Her salary is $185,000")
+        >>> r["clean"]
+        False
+        >>> r["text"]
+        'Her [REDACTED] is [REDACTED]'
+    """
+    if protect is None:
+        # Default: protect common PII and sensitive categories
+        protect = [
+            "SSN", "email", "phone", "credit card", "salary",
+            "password", "address", "passport", "bank account",
+            "diagnosis", "medical record", "prescription",
+            "date of birth", "driver's license",
+        ]
+    policy = PrivacyPolicy(agent_id="_scan", must_not_share=protect)
+    fw = LLMFirewall(policy, mode=mode)
+    result = fw.check(text)
+    return {
+        "clean": not result.was_blocked and not result.was_redacted,
+        "text": result.final_text,
+        "detected": result.matched_rules,
+        "original": result.original_text,
+        "blocked": result.was_blocked,
+    }
+def firewall(
+    protect: list[str],
+    mode: str = "redact",
+    **kwargs,
+) -> LLMFirewall:
+    """Create an LLMFirewall in one line.
+    Args:
+        protect: Sensitive terms to watch for (e.g. ["salary", "SSN"]).
+        mode: "redact" or "block".
+    Returns:
+        LLMFirewall instance. Call .patch_openai() or .patch_anthropic() to activate.
+    Example:
+        >>> from federated_agent_audit import firewall
+        >>> fw = firewall(["salary", "SSN", "diagnosis"])
+        >>> fw.patch_openai()  # done — every OpenAI response is now checked
+    """
+    policy = PrivacyPolicy(agent_id="_firewall", must_not_share=protect)
+    return LLMFirewall(policy, mode=mode, **kwargs)

federated_agent_audit/access_control.py ADDED Viewed

@@ -0,0 +1,199 @@
+"""Mandatory Access Control (MAC) for agent privilege escalation detection.
+Implements a role-based + mandatory access control framework for
+multi-agent systems. Detects and prevents privilege escalation where
+an agent attempts actions beyond its authorized scope.
+Models three types of escalation (from "Taming Privilege Escalation
+in LLM-Based Agent Systems", arXiv 2601.11893):
+1. Vertical: agent gains higher-privilege capabilities
+2. Horizontal: agent accesses another user's resources
+3. Delegation: agent passes capabilities it shouldn't to sub-agents
+Design:
+- Each agent has a set of allowed capabilities (tools, domains, actions)
+- Each resource has a security label (sensitivity level + domain)
+- Access is granted only if agent's clearance dominates resource's label
+  (Bell-LaPadula: no-read-up, no-write-down)
+References:
+- Bell-LaPadula 1973: mandatory access control model
+- arXiv 2601.11893: privilege escalation in LLM agent systems
+- TrustAgent Survey §tool_module: manipulation, abuse
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from datetime import datetime, timezone
+class AccessDecision(str, Enum):
+    ALLOW = "allow"
+    DENY = "deny"
+    ESCALATION_BLOCKED = "escalation_blocked"
+class EscalationType(str, Enum):
+    NONE = "none"
+    VERTICAL = "vertical"       # gaining higher privilege
+    HORIZONTAL = "horizontal"   # accessing another user's scope
+    DELEGATION = "delegation"   # passing unauthorized caps to sub-agent
+@dataclass
+class SecurityLabel:
+    """Security classification for a resource or action."""
+    level: int = 0                # 0 (public) to 5 (top secret)
+    domains: set[str] = field(default_factory=set)  # e.g. {"health", "finance"}
+    owner_id: str = ""            # which user owns this resource
+@dataclass
+class AgentClearance:
+    """What an agent is allowed to access."""
+    agent_id: str
+    user_id: str
+    max_level: int = 3            # max sensitivity level this agent can read
+    allowed_domains: set[str] = field(default_factory=set)
+    allowed_tools: set[str] = field(default_factory=set)
+    allowed_actions: set[str] = field(default_factory=set)
+    can_delegate: bool = False    # can this agent delegate to sub-agents?
+    delegatable_tools: set[str] = field(default_factory=set)
+@dataclass
+class AccessRequest:
+    """An agent's request to access a resource or perform an action."""
+    agent_id: str
+    action: str               # "read", "write", "execute", "delegate"
+    resource_label: SecurityLabel
+    tool_name: str = ""
+    target_agent_id: str = ""  # for delegation
+@dataclass
+class AccessResult:
+    """Result of an access control check."""
+    request: AccessRequest
+    decision: AccessDecision
+    escalation_type: EscalationType = EscalationType.NONE
+    reason: str = ""
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+class AccessController:
+    """Mandatory Access Control engine for agent systems.
+    Enforces Bell-LaPadula properties:
+    - Simple security (no-read-up): agent can't read above its clearance
+    - Star property (no-write-down): agent can't write below its clearance
+      (prevents leaking high-sensitivity data to low-sensitivity channels)
+    """
+    def __init__(self) -> None:
+        self._clearances: dict[str, AgentClearance] = {}
+        self._audit_log: list[AccessResult] = []
+    def register_agent(self, clearance: AgentClearance) -> None:
+        """Register an agent's security clearance."""
+        self._clearances[clearance.agent_id] = clearance
+    def check_access(self, request: AccessRequest) -> AccessResult:
+        """Check if an access request is permitted.
+        Applies Bell-LaPadula + domain-based + tool-based checks.
+        """
+        clearance = self._clearances.get(request.agent_id)
+        if clearance is None:
+            result = AccessResult(
+                request=request,
+                decision=AccessDecision.DENY,
+                reason=f"agent {request.agent_id} not registered",
+            )
+            self._audit_log.append(result)
+            return result
+        # --- Vertical Escalation: level check ---
+        if request.action == "read":
+            # no-read-up: can't read above clearance
+            if request.resource_label.level > clearance.max_level:
+                return self._deny(request, EscalationType.VERTICAL,
+                    f"read level {request.resource_label.level} > clearance {clearance.max_level}")
+        if request.action == "write":
+            # no-write-down: can't write to lower level (prevents data leaking down)
+            if request.resource_label.level < clearance.max_level:
+                return self._deny(request, EscalationType.VERTICAL,
+                    f"write to level {request.resource_label.level} < clearance {clearance.max_level} (no-write-down)")
+        # --- Domain check ---
+        required_domains = request.resource_label.domains
+        if required_domains and not required_domains.issubset(clearance.allowed_domains):
+            missing = required_domains - clearance.allowed_domains
+            return self._deny(request, EscalationType.VERTICAL,
+                f"missing domain access: {missing}")
+        # --- Horizontal Escalation: user boundary ---
+        if request.resource_label.owner_id:
+            if request.resource_label.owner_id != clearance.user_id:
+                return self._deny(request, EscalationType.HORIZONTAL,
+                    f"cross-user access: agent user={clearance.user_id}, "
+                    f"resource owner={request.resource_label.owner_id}")
+        # --- Tool check ---
+        if request.action == "execute" and request.tool_name:
+            if clearance.allowed_tools and request.tool_name not in clearance.allowed_tools:
+                return self._deny(request, EscalationType.VERTICAL,
+                    f"tool {request.tool_name} not in allowed tools")
+        # --- Delegation Escalation ---
+        if request.action == "delegate":
+            if not clearance.can_delegate:
+                return self._deny(request, EscalationType.DELEGATION,
+                    f"agent {request.agent_id} not authorized to delegate")
+            if request.tool_name and request.tool_name not in clearance.delegatable_tools:
+                return self._deny(request, EscalationType.DELEGATION,
+                    f"tool {request.tool_name} not in delegatable tools")
+            # check that target agent exists and has <= clearance
+            target = self._clearances.get(request.target_agent_id)
+            if target and target.max_level > clearance.max_level:
+                return self._deny(request, EscalationType.DELEGATION,
+                    f"delegating to agent with higher clearance: "
+                    f"{target.max_level} > {clearance.max_level}")
+        # --- Allowed ---
+        result = AccessResult(
+            request=request,
+            decision=AccessDecision.ALLOW,
+        )
+        self._audit_log.append(result)
+        return result
+    def _deny(self, request: AccessRequest, esc_type: EscalationType, reason: str) -> AccessResult:
+        result = AccessResult(
+            request=request,
+            decision=AccessDecision.ESCALATION_BLOCKED,
+            escalation_type=esc_type,
+            reason=reason,
+        )
+        self._audit_log.append(result)
+        return result
+    @property
+    def audit_log(self) -> list[AccessResult]:
+        return self._audit_log[:]
+    def escalation_summary(self) -> dict[str, int]:
+        """Count escalation attempts by type."""
+        counts: dict[str, int] = {}
+        for r in self._audit_log:
+            if r.escalation_type != EscalationType.NONE:
+                key = r.escalation_type.value
+                counts[key] = counts.get(key, 0) + 1
+        return counts

federated_agent_audit/blame.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""Causal blame attribution on desensitized data.
+Re-implements the blame algorithm from multi-agent-tracing
+within the federated privacy model. The central auditor never
+sees raw text — blame is determined from structural signals:
+1. local_violation flag on edges (agent's own auditor flagged it)
+2. Sensitivity amplification (outgoing sensitivity > incoming)
+3. Domain expansion (new sensitive domains appeared)
+Inspired by multi-agent-tracing's causal_graph.blame() but
+adapted to work on DesensitizedEdge metadata only.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import networkx as nx
+from .schemas import CompositionalRisk
+@dataclass
+class BlameResult:
+    """Attribution result for a single compositional risk."""
+    risk_id: str
+    blame_agent: str        # agent most responsible
+    blame_hop: int           # position in the chain (0-indexed)
+    blame_reason: str        # why this agent was blamed
+    chain: list[str]         # full agent chain from source to sink
+    confidence: float        # 0-1, how confident the attribution is
+def blame_risk(
+    risk: CompositionalRisk,
+    graph: nx.DiGraph,
+) -> BlameResult | None:
+    """Attribute a compositional risk to the responsible agent.
+    Algorithm:
+    1. Find a path through involved_agents in the graph.
+    2. Walk backward from the last agent.
+    3. Blame the first agent where:
+       a. The outgoing edge has local_violation=True, OR
+       b. sensitivity_level on outgoing > incoming (amplification), OR
+       c. New sensitive domains appear that weren't incoming.
+    4. If no clear blame point, blame the source agent.
+    Returns None if the risk has < 2 agents or no path exists.
+    """
+    agents = risk.involved_agents
+    if len(agents) < 2:
+        return None
+    # Try to find an actual path through the involved agents
+    chain = _find_chain(agents, graph)
+    if not chain or len(chain) < 2:
+        return None
+    # Walk backward looking for the blame point
+    best_agent = chain[0]  # default: blame source
+    best_hop = 0
+    best_reason = "source of data flow"
+    best_confidence = 0.3
+    for i in range(len(chain) - 1, 0, -1):
+        src = chain[i - 1]
+        dst = chain[i]
+        if not graph.has_edge(src, dst):
+            continue
+        edge_data = graph.edges[src, dst]
+        incoming_data = _get_best_incoming(graph, src)
+        # Check 1: local violation flag
+        if edge_data.get("local_violation", False):
+            best_agent = src
+            best_hop = i - 1
+            best_reason = "local auditor flagged violation on outgoing edge"
+            best_confidence = 0.9
+            break
+        # Check 2: sensitivity amplification
+        outgoing_sens = edge_data.get("sensitivity_level", 0)
+        incoming_sens = incoming_data.get("sensitivity_level", 0)
+        if outgoing_sens > incoming_sens and outgoing_sens >= 3:
+            best_agent = src
+            best_hop = i - 1
+            best_reason = (
+                f"sensitivity amplified from {incoming_sens} to {outgoing_sens}"
+            )
+            best_confidence = 0.7
+            break
+        # Check 3: domain expansion
+        outgoing_domains = set(edge_data.get("domains", []))
+        incoming_domains = set(incoming_data.get("domains", []))
+        sensitive_new = (outgoing_domains - incoming_domains) & {
+            "health", "finance", "legal", "identity",
+        }
+        if sensitive_new:
+            best_agent = src
+            best_hop = i - 1
+            best_reason = f"introduced sensitive domains: {sensitive_new}"
+            best_confidence = 0.6
+            break
+    return BlameResult(
+        risk_id=risk.risk_id,
+        blame_agent=best_agent,
+        blame_hop=best_hop,
+        blame_reason=best_reason,
+        chain=chain,
+        confidence=best_confidence,
+    )
+def blame_all(
+    risks: list[CompositionalRisk],
+    graph: nx.DiGraph,
+) -> dict[str, BlameResult]:
+    """Attribute all risks. Returns risk_id -> BlameResult.
+    Also stamps each risk's blame fields in-place.
+    """
+    results = {}
+    for risk in risks:
+        result = blame_risk(risk, graph)
+        if result is not None:
+            risk.blame_agent = result.blame_agent
+            risk.blame_hop = result.blame_hop
+            risk.blame_reason = result.blame_reason
+            results[risk.risk_id] = result
+    return results
+def _find_chain(agents: list[str], graph: nx.DiGraph) -> list[str]:
+    """Find the best path through the involved agents in the graph.
+    Tries to build a connected chain from the agent list.
+    Falls back to the original order if no path exists.
+    """
+    # First: check if agents in order form a valid path
+    valid = True
+    for i in range(len(agents) - 1):
+        if not graph.has_node(agents[i]) or not graph.has_node(agents[i + 1]):
+            valid = False
+            break
+        if not (graph.has_edge(agents[i], agents[i + 1]) or
+                graph.has_edge(agents[i + 1], agents[i])):
+            valid = False
+            break
+    if valid:
+        return agents
+    # Try shortest path between first and last agent
+    if len(agents) >= 2 and graph.has_node(agents[0]) and graph.has_node(agents[-1]):
+        try:
+            return list(nx.shortest_path(graph, agents[0], agents[-1]))
+        except nx.NetworkXNoPath:
+            pass
+    # Fallback: return the original order
+    return [a for a in agents if graph.has_node(a)]
+def _get_best_incoming(graph: nx.DiGraph, agent: str) -> dict:
+    """Get the highest-sensitivity incoming edge data for an agent."""
+    best = {}
+    best_sens = -1
+    for u, _, data in graph.in_edges(agent, data=True):
+        sens = data.get("sensitivity_level", 0)
+        if sens > best_sens:
+            best_sens = sens
+            best = data
+    return best