PyPI - agentegrity - Versions diffs - 0.2.0__py3-none-any.whl - Mend

agentegrity 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

agentegrity/__init__.py +27 -0
agentegrity/adapters/__init__.py +10 -0
agentegrity/adapters/base.py +96 -0
agentegrity/adapters/claude.py +460 -0
agentegrity/core/__init__.py +16 -0
agentegrity/core/attestation.py +244 -0
agentegrity/core/evaluator.py +418 -0
agentegrity/core/monitor.py +251 -0
agentegrity/core/profile.py +196 -0
agentegrity/layers/__init__.py +5 -0
agentegrity/layers/adversarial.py +305 -0
agentegrity/layers/cortical.py +440 -0
agentegrity/layers/cortical_llm.py +304 -0
agentegrity/layers/governance.py +392 -0
agentegrity/layers/recovery.py +272 -0
agentegrity/sdk/__init__.py +3 -0
agentegrity/sdk/client.py +237 -0
agentegrity-0.2.0.dist-info/METADATA +328 -0
agentegrity-0.2.0.dist-info/RECORD +21 -0
agentegrity-0.2.0.dist-info/WHEEL +4 -0
agentegrity-0.2.0.dist-info/licenses/LICENSE +189 -0

agentegrity/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Agentegrity Framework - The open standard for AI agent integrity.
+Agentegrity defines what it means for an autonomous AI agent to be whole:
+adversarially coherent, environmentally portable, and verifiably assured.
+"""
+__version__ = "0.2.0"
+from agentegrity.core.attestation import AttestationChain, AttestationRecord
+from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
+from agentegrity.core.monitor import IntegrityMonitor
+from agentegrity.core.profile import AgentProfile, AgentType, DeploymentContext, RiskTier
+from agentegrity.sdk.client import AgentegrityClient
+__all__ = [
+    "AgentProfile",
+    "AgentType",
+    "DeploymentContext",
+    "RiskTier",
+    "IntegrityEvaluator",
+    "IntegrityScore",
+    "AttestationRecord",
+    "AttestationChain",
+    "IntegrityMonitor",
+    "AgentegrityClient",
+]

agentegrity/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Framework adapters for integrating agentegrity with agent SDKs."""
+from agentegrity.adapters.base import FrameworkAdapter, FrameworkEvent
+from agentegrity.adapters.claude import ClaudeAdapter
+__all__ = [
+    "ClaudeAdapter",
+    "FrameworkAdapter",
+    "FrameworkEvent",
+]

agentegrity/adapters/base.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""
+Base adapter protocol for framework integrations.
+All framework adapters (Claude Agent SDK, LangGraph, OpenAI Agents SDK,
+CrewAI) implement this Protocol so they can be used interchangeably
+with the agentegrity evaluation pipeline.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Protocol
+from agentegrity.core.evaluator import IntegrityScore
+from agentegrity.core.profile import AgentProfile
+class FrameworkAdapter(Protocol):
+    """Protocol that all framework adapters must implement.
+    An adapter instruments a specific agent framework by:
+    1. Registering hooks/callbacks at framework extension points
+    2. Collecting runtime context from those hooks
+    3. Triggering integrity evaluations with the collected context
+    4. Emitting structured FrameworkEvents for audit trails
+    """
+    @property
+    def name(self) -> str:
+        """Unique name for this adapter (e.g. 'claude', 'langgraph')."""
+        ...
+    @property
+    def profile(self) -> AgentProfile:
+        """The agent profile being monitored."""
+        ...
+    @property
+    def events(self) -> list[FrameworkEvent]:
+        """All events emitted by this adapter during the session."""
+        ...
+    def get_collected_context(self) -> dict[str, Any]:
+        """Return the accumulated runtime context for evaluation."""
+        ...
+    async def on_event(
+        self, event_type: str, event_data: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Handle a framework event and return hook output.
+        Parameters
+        ----------
+        event_type : str
+            The type of event (e.g. "pre_tool_use", "post_tool_use").
+        event_data : dict
+            Framework-specific event data.
+        Returns
+        -------
+        dict
+            Hook output that the framework will process (e.g. permission
+            decisions, additional context). Return {} to allow without
+            modification.
+        """
+        ...
+@dataclass
+class FrameworkEvent:
+    """A structured event emitted by a framework adapter.
+    Every adapter interaction produces a FrameworkEvent for the audit
+    trail. Events include the raw framework data plus any evaluation
+    result that was triggered.
+    """
+    event_type: str
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    adapter_name: str = ""
+    data: dict[str, Any] = field(default_factory=dict)
+    evaluation_result: IntegrityScore | None = None
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "event_type": self.event_type,
+            "timestamp": self.timestamp.isoformat(),
+            "adapter_name": self.adapter_name,
+            "data": self.data,
+            "evaluation_result": (
+                self.evaluation_result.to_dict()
+                if self.evaluation_result
+                else None
+            ),
+        }

agentegrity/adapters/claude.py ADDED Viewed

@@ -0,0 +1,460 @@
+"""
+Claude Agent SDK adapter for agentegrity.
+Instruments agents built on the Claude Agent SDK by registering hooks
+at five integration points: Harness (Stop), Tools (PreToolUse/PostToolUse),
+Session (UserPromptSubmit/PreCompact), Sandbox (file/command boundary
+enforcement), and Orchestration (SubagentStart/SubagentStop).
+Usage:
+    from agentegrity.adapters.claude import ClaudeAdapter
+    from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions
+    adapter = ClaudeAdapter(profile=my_profile)
+    options = ClaudeAgentOptions(hooks=adapter.create_hooks())
+    async with ClaudeSDKClient(options=options) as client:
+        ...
+"""
+from __future__ import annotations
+import logging
+from collections import defaultdict
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any
+from agentegrity.adapters.base import FrameworkEvent
+from agentegrity.core.attestation import AttestationChain, AttestationRecord, Evidence
+from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
+from agentegrity.core.profile import AgentProfile
+logger = logging.getLogger("agentegrity.adapters.claude")
+@dataclass
+class _ContextBuffer:
+    """Internal buffer that accumulates runtime context from SDK hooks."""
+    inputs: list[str] = field(default_factory=list)
+    tool_calls: list[dict[str, Any]] = field(default_factory=list)
+    tool_outputs: list[dict[str, Any]] = field(default_factory=list)
+    tool_failures: list[dict[str, Any]] = field(default_factory=list)
+    tool_usage: dict[str, int] = field(default_factory=lambda: defaultdict(int))
+    action_distribution: dict[str, int] = field(default_factory=lambda: defaultdict(int))
+    reasoning_chain: list[str] = field(default_factory=list)
+    subagents: list[dict[str, Any]] = field(default_factory=list)
+    def to_evaluation_context(self) -> dict[str, Any]:
+        """Convert accumulated data to the context dict the layers expect."""
+        return {
+            "input": self.inputs[-1] if self.inputs else "",
+            "tool_outputs": self.tool_outputs,
+            "reasoning_chain": self.reasoning_chain,
+            "goals": [],
+            "instructions": [],
+            "memory_reads": [],
+            "action_distribution": dict(self.action_distribution),
+            "tool_usage": dict(self.tool_usage),
+            "action": (
+                self.tool_calls[-1]
+                if self.tool_calls
+                else {"type": "respond"}
+            ),
+        }
+class ClaudeAdapter:
+    """Instruments a Claude Agent SDK agent with agentegrity evaluation.
+    Parameters
+    ----------
+    profile : AgentProfile
+        The agent being monitored.
+    evaluator : IntegrityEvaluator, optional
+        Custom evaluator. If not provided, a default three-layer evaluator
+        is created.
+    enforce : bool
+        If True, governance "block" results cause PreToolUse hooks to
+        return permissionDecision="deny". If False (default), hooks only
+        measure and log — they never block tool execution.
+    api_key : str, optional
+        Anthropic API key for LLM-backed checks. Falls back to
+        ANTHROPIC_API_KEY environment variable.
+    """
+    def __init__(
+        self,
+        profile: AgentProfile,
+        evaluator: IntegrityEvaluator | None = None,
+        enforce: bool = False,
+        api_key: str | None = None,
+    ) -> None:
+        self._profile = profile
+        self._enforce = enforce
+        self._api_key = api_key
+        self._buffer = _ContextBuffer()
+        self._events: list[FrameworkEvent] = []
+        self._chain = AttestationChain()
+        self._evaluation_count = 0
+        if evaluator is not None:
+            self._evaluator = evaluator
+        else:
+            from agentegrity.layers.adversarial import AdversarialLayer
+            from agentegrity.layers.cortical import CorticalLayer
+            from agentegrity.layers.governance import GovernanceLayer
+            self._evaluator = IntegrityEvaluator(
+                layers=[
+                    AdversarialLayer(),
+                    CorticalLayer(),
+                    GovernanceLayer(),
+                ]
+            )
+    @property
+    def name(self) -> str:
+        return "claude"
+    @property
+    def profile(self) -> AgentProfile:
+        return self._profile
+    @property
+    def events(self) -> list[FrameworkEvent]:
+        return list(self._events)
+    @property
+    def attestation_chain(self) -> AttestationChain:
+        return self._chain
+    @property
+    def evaluation_count(self) -> int:
+        return self._evaluation_count
+    def get_collected_context(self) -> dict[str, Any]:
+        return self._buffer.to_evaluation_context()
+    def _emit_event(
+        self,
+        event_type: str,
+        data: dict[str, Any],
+        score: IntegrityScore | None = None,
+    ) -> FrameworkEvent:
+        event = FrameworkEvent(
+            event_type=event_type,
+            adapter_name=self.name,
+            data=data,
+            evaluation_result=score,
+        )
+        self._events.append(event)
+        return event
+    def _run_evaluation(self, context: dict[str, Any] | None = None) -> IntegrityScore:
+        ctx = context or self._buffer.to_evaluation_context()
+        score = self._evaluator.evaluate(self._profile, ctx)
+        self._evaluation_count += 1
+        record = AttestationRecord(
+            agent_id=self._profile.agent_id,
+            integrity_score=score.to_dict(),
+            layer_states={r.layer_name: r.to_dict() for r in score.layer_results},
+            evidence=[
+                Evidence(
+                    evidence_type="layer_result",
+                    source=r.layer_name,
+                    content_hash=str(hash(str(r.to_dict()))),
+                    summary=f"{r.layer_name}: {r.score:.3f} ({r.action})",
+                )
+                for r in score.layer_results
+            ],
+        )
+        self._chain.append(record)
+        return score
+    async def on_event(
+        self, event_type: str, event_data: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Route a framework event to the appropriate handler."""
+        handlers = {
+            "pre_tool_use": self._handle_pre_tool_use,
+            "post_tool_use": self._handle_post_tool_use,
+            "post_tool_use_failure": self._handle_post_tool_use_failure,
+            "user_prompt_submit": self._handle_user_prompt_submit,
+            "stop": self._handle_stop,
+            "subagent_start": self._handle_subagent_start,
+            "subagent_stop": self._handle_subagent_stop,
+            "pre_compact": self._handle_pre_compact,
+        }
+        handler = handlers.get(event_type)
+        if handler:
+            try:
+                return await handler(event_data)
+            except Exception as exc:
+                logger.warning(
+                    "Hook handler %s failed: %s", event_type, exc, exc_info=True
+                )
+        return {}
+    # --- Hook callbacks for create_hooks() ---
+    async def _hook_pre_tool_use(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("pre_tool_use", input_data)
+        except Exception as exc:
+            logger.warning("PreToolUse hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_post_tool_use(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("post_tool_use", input_data)
+        except Exception as exc:
+            logger.warning("PostToolUse hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_post_tool_use_failure(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("post_tool_use_failure", input_data)
+        except Exception as exc:
+            logger.warning("PostToolUseFailure hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_user_prompt_submit(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("user_prompt_submit", input_data)
+        except Exception as exc:
+            logger.warning("UserPromptSubmit hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_stop(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("stop", input_data)
+        except Exception as exc:
+            logger.warning("Stop hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_subagent_start(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("subagent_start", input_data)
+        except Exception as exc:
+            logger.warning("SubagentStart hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_subagent_stop(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("subagent_stop", input_data)
+        except Exception as exc:
+            logger.warning("SubagentStop hook failed: %s", exc, exc_info=True)
+            return {}
+    async def _hook_pre_compact(
+        self,
+        input_data: dict[str, Any],
+        tool_use_id: str | None,
+        context: Any,
+    ) -> dict[str, Any]:
+        try:
+            return await self.on_event("pre_compact", input_data)
+        except Exception as exc:
+            logger.warning("PreCompact hook failed: %s", exc, exc_info=True)
+            return {}
+    # --- Event handlers ---
+    async def _handle_pre_tool_use(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        tool_name = data.get("tool_name", "")
+        tool_input = data.get("tool_input", {})
+        self._buffer.tool_calls.append(
+            {"tool": tool_name, "type": "tool_call", **tool_input}
+        )
+        self._buffer.tool_usage[tool_name] += 1
+        self._buffer.action_distribution["tool_call"] += 1
+        score = self._run_evaluation()
+        self._emit_event("pre_tool_use", data, score)
+        if self._enforce and score.action == "block":
+            return {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "deny",
+                    "permissionDecisionReason": (
+                        f"Agentegrity integrity score {score.composite:.3f} "
+                        f"triggered block action"
+                    ),
+                }
+            }
+        return {}
+    async def _handle_post_tool_use(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        tool_response = data.get("tool_response", "")
+        self._buffer.tool_outputs.append(
+            {"tool": data.get("tool_name", ""), "output": tool_response}
+        )
+        score = self._run_evaluation()
+        self._emit_event("post_tool_use", data, score)
+        return {}
+    async def _handle_post_tool_use_failure(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        self._buffer.tool_failures.append({
+            "tool": data.get("tool_name", ""),
+            "error": data.get("error", ""),
+        })
+        self._emit_event("post_tool_use_failure", data)
+        return {}
+    async def _handle_user_prompt_submit(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        prompt = data.get("prompt", data.get("user_message", ""))
+        if isinstance(prompt, str):
+            self._buffer.inputs.append(prompt)
+        self._buffer.action_distribution["user_prompt"] += 1
+        score = self._run_evaluation()
+        self._emit_event("user_prompt_submit", data, score)
+        return {}
+    async def _handle_stop(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        score = self._run_evaluation()
+        self._emit_event("stop", data, score)
+        return {}
+    async def _handle_subagent_start(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        self._buffer.subagents.append({
+            "agent_id": data.get("agent_id", ""),
+            "started": datetime.now(timezone.utc).isoformat(),
+        })
+        self._emit_event("subagent_start", data)
+        return {}
+    async def _handle_subagent_stop(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        self._buffer.subagents.append({
+            "agent_id": data.get("agent_id", ""),
+            "stopped": datetime.now(timezone.utc).isoformat(),
+            "transcript_path": data.get("agent_transcript_path", ""),
+        })
+        self._emit_event("subagent_stop", data)
+        return {}
+    async def _handle_pre_compact(
+        self, data: dict[str, Any]
+    ) -> dict[str, Any]:
+        # Archive the current reasoning chain before compaction
+        self._emit_event(
+            "pre_compact",
+            {
+                "reasoning_chain_length": len(self._buffer.reasoning_chain),
+                "archived_chain": list(self._buffer.reasoning_chain),
+            },
+        )
+        return {}
+    # --- Public API ---
+    def create_hooks(self) -> dict[str, list[Any]]:
+        """Create Claude Agent SDK hook configuration.
+        Returns a dict suitable for passing to ClaudeAgentOptions(hooks=...).
+        Import HookMatcher at call time to avoid hard dependency on
+        claude-agent-sdk when the adapter module is just imported.
+        """
+        try:
+            from claude_agent_sdk import HookMatcher  # type: ignore[import-not-found]
+        except ImportError:
+            raise ImportError(
+                "claude-agent-sdk is required for the Claude adapter. "
+                "Install it with: pip install agentegrity[claude]"
+            ) from None
+        return {
+            "PreToolUse": [
+                HookMatcher(hooks=[self._hook_pre_tool_use]),
+            ],
+            "PostToolUse": [
+                HookMatcher(hooks=[self._hook_post_tool_use]),
+            ],
+            "PostToolUseFailure": [
+                HookMatcher(hooks=[self._hook_post_tool_use_failure]),
+            ],
+            "UserPromptSubmit": [
+                HookMatcher(hooks=[self._hook_user_prompt_submit]),
+            ],
+            "Stop": [
+                HookMatcher(hooks=[self._hook_stop]),
+            ],
+            "SubagentStart": [
+                HookMatcher(hooks=[self._hook_subagent_start]),
+            ],
+            "SubagentStop": [
+                HookMatcher(hooks=[self._hook_subagent_stop]),
+            ],
+            "PreCompact": [
+                HookMatcher(hooks=[self._hook_pre_compact]),
+            ],
+        }
+    def get_summary(self) -> dict[str, Any]:
+        """Return a summary of the adapter's session."""
+        return {
+            "adapter": self.name,
+            "agent_id": self._profile.agent_id,
+            "evaluations": self._evaluation_count,
+            "events": len(self._events),
+            "attestation_records": len(self._chain.records),
+            "chain_valid": self._chain.verify_chain(),
+            "enforce_mode": self._enforce,
+        }

agentegrity/core/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from agentegrity.core.attestation import AttestationChain, AttestationRecord
+from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
+from agentegrity.core.monitor import IntegrityMonitor
+from agentegrity.core.profile import AgentProfile, AgentType, DeploymentContext, RiskTier
+__all__ = [
+    "AgentProfile",
+    "AgentType",
+    "DeploymentContext",
+    "RiskTier",
+    "IntegrityEvaluator",
+    "IntegrityScore",
+    "AttestationRecord",
+    "AttestationChain",
+    "IntegrityMonitor",
+]