agentegrity 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ """
2
+ Agentegrity Framework - The open standard for AI agent integrity.
3
+
4
+ Agentegrity defines what it means for an autonomous AI agent to be whole:
5
+ adversarially coherent, environmentally portable, and verifiably assured.
6
+ """
7
+
8
+ __version__ = "0.2.0"
9
+
10
+ from agentegrity.core.attestation import AttestationChain, AttestationRecord
11
+ from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
12
+ from agentegrity.core.monitor import IntegrityMonitor
13
+ from agentegrity.core.profile import AgentProfile, AgentType, DeploymentContext, RiskTier
14
+ from agentegrity.sdk.client import AgentegrityClient
15
+
16
+ __all__ = [
17
+ "AgentProfile",
18
+ "AgentType",
19
+ "DeploymentContext",
20
+ "RiskTier",
21
+ "IntegrityEvaluator",
22
+ "IntegrityScore",
23
+ "AttestationRecord",
24
+ "AttestationChain",
25
+ "IntegrityMonitor",
26
+ "AgentegrityClient",
27
+ ]
@@ -0,0 +1,10 @@
1
+ """Framework adapters for integrating agentegrity with agent SDKs."""
2
+
3
+ from agentegrity.adapters.base import FrameworkAdapter, FrameworkEvent
4
+ from agentegrity.adapters.claude import ClaudeAdapter
5
+
6
+ __all__ = [
7
+ "ClaudeAdapter",
8
+ "FrameworkAdapter",
9
+ "FrameworkEvent",
10
+ ]
@@ -0,0 +1,96 @@
1
+ """
2
+ Base adapter protocol for framework integrations.
3
+
4
+ All framework adapters (Claude Agent SDK, LangGraph, OpenAI Agents SDK,
5
+ CrewAI) implement this Protocol so they can be used interchangeably
6
+ with the agentegrity evaluation pipeline.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from typing import Any, Protocol
14
+
15
+ from agentegrity.core.evaluator import IntegrityScore
16
+ from agentegrity.core.profile import AgentProfile
17
+
18
+
19
+ class FrameworkAdapter(Protocol):
20
+ """Protocol that all framework adapters must implement.
21
+
22
+ An adapter instruments a specific agent framework by:
23
+ 1. Registering hooks/callbacks at framework extension points
24
+ 2. Collecting runtime context from those hooks
25
+ 3. Triggering integrity evaluations with the collected context
26
+ 4. Emitting structured FrameworkEvents for audit trails
27
+ """
28
+
29
+ @property
30
+ def name(self) -> str:
31
+ """Unique name for this adapter (e.g. 'claude', 'langgraph')."""
32
+ ...
33
+
34
+ @property
35
+ def profile(self) -> AgentProfile:
36
+ """The agent profile being monitored."""
37
+ ...
38
+
39
+ @property
40
+ def events(self) -> list[FrameworkEvent]:
41
+ """All events emitted by this adapter during the session."""
42
+ ...
43
+
44
+ def get_collected_context(self) -> dict[str, Any]:
45
+ """Return the accumulated runtime context for evaluation."""
46
+ ...
47
+
48
+ async def on_event(
49
+ self, event_type: str, event_data: dict[str, Any]
50
+ ) -> dict[str, Any]:
51
+ """Handle a framework event and return hook output.
52
+
53
+ Parameters
54
+ ----------
55
+ event_type : str
56
+ The type of event (e.g. "pre_tool_use", "post_tool_use").
57
+ event_data : dict
58
+ Framework-specific event data.
59
+
60
+ Returns
61
+ -------
62
+ dict
63
+ Hook output that the framework will process (e.g. permission
64
+ decisions, additional context). Return {} to allow without
65
+ modification.
66
+ """
67
+ ...
68
+
69
+
70
+ @dataclass
71
+ class FrameworkEvent:
72
+ """A structured event emitted by a framework adapter.
73
+
74
+ Every adapter interaction produces a FrameworkEvent for the audit
75
+ trail. Events include the raw framework data plus any evaluation
76
+ result that was triggered.
77
+ """
78
+
79
+ event_type: str
80
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
81
+ adapter_name: str = ""
82
+ data: dict[str, Any] = field(default_factory=dict)
83
+ evaluation_result: IntegrityScore | None = None
84
+
85
+ def to_dict(self) -> dict[str, Any]:
86
+ return {
87
+ "event_type": self.event_type,
88
+ "timestamp": self.timestamp.isoformat(),
89
+ "adapter_name": self.adapter_name,
90
+ "data": self.data,
91
+ "evaluation_result": (
92
+ self.evaluation_result.to_dict()
93
+ if self.evaluation_result
94
+ else None
95
+ ),
96
+ }
@@ -0,0 +1,460 @@
1
+ """
2
+ Claude Agent SDK adapter for agentegrity.
3
+
4
+ Instruments agents built on the Claude Agent SDK by registering hooks
5
+ at five integration points: Harness (Stop), Tools (PreToolUse/PostToolUse),
6
+ Session (UserPromptSubmit/PreCompact), Sandbox (file/command boundary
7
+ enforcement), and Orchestration (SubagentStart/SubagentStop).
8
+
9
+ Usage:
10
+ from agentegrity.adapters.claude import ClaudeAdapter
11
+ from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions
12
+
13
+ adapter = ClaudeAdapter(profile=my_profile)
14
+ options = ClaudeAgentOptions(hooks=adapter.create_hooks())
15
+ async with ClaudeSDKClient(options=options) as client:
16
+ ...
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+ from collections import defaultdict
23
+ from dataclasses import dataclass, field
24
+ from datetime import datetime, timezone
25
+ from typing import Any
26
+
27
+ from agentegrity.adapters.base import FrameworkEvent
28
+ from agentegrity.core.attestation import AttestationChain, AttestationRecord, Evidence
29
+ from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
30
+ from agentegrity.core.profile import AgentProfile
31
+
32
+ logger = logging.getLogger("agentegrity.adapters.claude")
33
+
34
+
35
+ @dataclass
36
+ class _ContextBuffer:
37
+ """Internal buffer that accumulates runtime context from SDK hooks."""
38
+
39
+ inputs: list[str] = field(default_factory=list)
40
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
41
+ tool_outputs: list[dict[str, Any]] = field(default_factory=list)
42
+ tool_failures: list[dict[str, Any]] = field(default_factory=list)
43
+ tool_usage: dict[str, int] = field(default_factory=lambda: defaultdict(int))
44
+ action_distribution: dict[str, int] = field(default_factory=lambda: defaultdict(int))
45
+ reasoning_chain: list[str] = field(default_factory=list)
46
+ subagents: list[dict[str, Any]] = field(default_factory=list)
47
+
48
+ def to_evaluation_context(self) -> dict[str, Any]:
49
+ """Convert accumulated data to the context dict the layers expect."""
50
+ return {
51
+ "input": self.inputs[-1] if self.inputs else "",
52
+ "tool_outputs": self.tool_outputs,
53
+ "reasoning_chain": self.reasoning_chain,
54
+ "goals": [],
55
+ "instructions": [],
56
+ "memory_reads": [],
57
+ "action_distribution": dict(self.action_distribution),
58
+ "tool_usage": dict(self.tool_usage),
59
+ "action": (
60
+ self.tool_calls[-1]
61
+ if self.tool_calls
62
+ else {"type": "respond"}
63
+ ),
64
+ }
65
+
66
+
67
+ class ClaudeAdapter:
68
+ """Instruments a Claude Agent SDK agent with agentegrity evaluation.
69
+
70
+ Parameters
71
+ ----------
72
+ profile : AgentProfile
73
+ The agent being monitored.
74
+ evaluator : IntegrityEvaluator, optional
75
+ Custom evaluator. If not provided, a default three-layer evaluator
76
+ is created.
77
+ enforce : bool
78
+ If True, governance "block" results cause PreToolUse hooks to
79
+ return permissionDecision="deny". If False (default), hooks only
80
+ measure and log — they never block tool execution.
81
+ api_key : str, optional
82
+ Anthropic API key for LLM-backed checks. Falls back to
83
+ ANTHROPIC_API_KEY environment variable.
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ profile: AgentProfile,
89
+ evaluator: IntegrityEvaluator | None = None,
90
+ enforce: bool = False,
91
+ api_key: str | None = None,
92
+ ) -> None:
93
+ self._profile = profile
94
+ self._enforce = enforce
95
+ self._api_key = api_key
96
+ self._buffer = _ContextBuffer()
97
+ self._events: list[FrameworkEvent] = []
98
+ self._chain = AttestationChain()
99
+ self._evaluation_count = 0
100
+
101
+ if evaluator is not None:
102
+ self._evaluator = evaluator
103
+ else:
104
+ from agentegrity.layers.adversarial import AdversarialLayer
105
+ from agentegrity.layers.cortical import CorticalLayer
106
+ from agentegrity.layers.governance import GovernanceLayer
107
+
108
+ self._evaluator = IntegrityEvaluator(
109
+ layers=[
110
+ AdversarialLayer(),
111
+ CorticalLayer(),
112
+ GovernanceLayer(),
113
+ ]
114
+ )
115
+
116
+ @property
117
+ def name(self) -> str:
118
+ return "claude"
119
+
120
+ @property
121
+ def profile(self) -> AgentProfile:
122
+ return self._profile
123
+
124
+ @property
125
+ def events(self) -> list[FrameworkEvent]:
126
+ return list(self._events)
127
+
128
+ @property
129
+ def attestation_chain(self) -> AttestationChain:
130
+ return self._chain
131
+
132
+ @property
133
+ def evaluation_count(self) -> int:
134
+ return self._evaluation_count
135
+
136
+ def get_collected_context(self) -> dict[str, Any]:
137
+ return self._buffer.to_evaluation_context()
138
+
139
+ def _emit_event(
140
+ self,
141
+ event_type: str,
142
+ data: dict[str, Any],
143
+ score: IntegrityScore | None = None,
144
+ ) -> FrameworkEvent:
145
+ event = FrameworkEvent(
146
+ event_type=event_type,
147
+ adapter_name=self.name,
148
+ data=data,
149
+ evaluation_result=score,
150
+ )
151
+ self._events.append(event)
152
+ return event
153
+
154
+ def _run_evaluation(self, context: dict[str, Any] | None = None) -> IntegrityScore:
155
+ ctx = context or self._buffer.to_evaluation_context()
156
+ score = self._evaluator.evaluate(self._profile, ctx)
157
+ self._evaluation_count += 1
158
+
159
+ record = AttestationRecord(
160
+ agent_id=self._profile.agent_id,
161
+ integrity_score=score.to_dict(),
162
+ layer_states={r.layer_name: r.to_dict() for r in score.layer_results},
163
+ evidence=[
164
+ Evidence(
165
+ evidence_type="layer_result",
166
+ source=r.layer_name,
167
+ content_hash=str(hash(str(r.to_dict()))),
168
+ summary=f"{r.layer_name}: {r.score:.3f} ({r.action})",
169
+ )
170
+ for r in score.layer_results
171
+ ],
172
+ )
173
+ self._chain.append(record)
174
+
175
+ return score
176
+
177
+ async def on_event(
178
+ self, event_type: str, event_data: dict[str, Any]
179
+ ) -> dict[str, Any]:
180
+ """Route a framework event to the appropriate handler."""
181
+ handlers = {
182
+ "pre_tool_use": self._handle_pre_tool_use,
183
+ "post_tool_use": self._handle_post_tool_use,
184
+ "post_tool_use_failure": self._handle_post_tool_use_failure,
185
+ "user_prompt_submit": self._handle_user_prompt_submit,
186
+ "stop": self._handle_stop,
187
+ "subagent_start": self._handle_subagent_start,
188
+ "subagent_stop": self._handle_subagent_stop,
189
+ "pre_compact": self._handle_pre_compact,
190
+ }
191
+ handler = handlers.get(event_type)
192
+ if handler:
193
+ try:
194
+ return await handler(event_data)
195
+ except Exception as exc:
196
+ logger.warning(
197
+ "Hook handler %s failed: %s", event_type, exc, exc_info=True
198
+ )
199
+ return {}
200
+
201
+ # --- Hook callbacks for create_hooks() ---
202
+
203
+ async def _hook_pre_tool_use(
204
+ self,
205
+ input_data: dict[str, Any],
206
+ tool_use_id: str | None,
207
+ context: Any,
208
+ ) -> dict[str, Any]:
209
+ try:
210
+ return await self.on_event("pre_tool_use", input_data)
211
+ except Exception as exc:
212
+ logger.warning("PreToolUse hook failed: %s", exc, exc_info=True)
213
+ return {}
214
+
215
+ async def _hook_post_tool_use(
216
+ self,
217
+ input_data: dict[str, Any],
218
+ tool_use_id: str | None,
219
+ context: Any,
220
+ ) -> dict[str, Any]:
221
+ try:
222
+ return await self.on_event("post_tool_use", input_data)
223
+ except Exception as exc:
224
+ logger.warning("PostToolUse hook failed: %s", exc, exc_info=True)
225
+ return {}
226
+
227
+ async def _hook_post_tool_use_failure(
228
+ self,
229
+ input_data: dict[str, Any],
230
+ tool_use_id: str | None,
231
+ context: Any,
232
+ ) -> dict[str, Any]:
233
+ try:
234
+ return await self.on_event("post_tool_use_failure", input_data)
235
+ except Exception as exc:
236
+ logger.warning("PostToolUseFailure hook failed: %s", exc, exc_info=True)
237
+ return {}
238
+
239
+ async def _hook_user_prompt_submit(
240
+ self,
241
+ input_data: dict[str, Any],
242
+ tool_use_id: str | None,
243
+ context: Any,
244
+ ) -> dict[str, Any]:
245
+ try:
246
+ return await self.on_event("user_prompt_submit", input_data)
247
+ except Exception as exc:
248
+ logger.warning("UserPromptSubmit hook failed: %s", exc, exc_info=True)
249
+ return {}
250
+
251
+ async def _hook_stop(
252
+ self,
253
+ input_data: dict[str, Any],
254
+ tool_use_id: str | None,
255
+ context: Any,
256
+ ) -> dict[str, Any]:
257
+ try:
258
+ return await self.on_event("stop", input_data)
259
+ except Exception as exc:
260
+ logger.warning("Stop hook failed: %s", exc, exc_info=True)
261
+ return {}
262
+
263
+ async def _hook_subagent_start(
264
+ self,
265
+ input_data: dict[str, Any],
266
+ tool_use_id: str | None,
267
+ context: Any,
268
+ ) -> dict[str, Any]:
269
+ try:
270
+ return await self.on_event("subagent_start", input_data)
271
+ except Exception as exc:
272
+ logger.warning("SubagentStart hook failed: %s", exc, exc_info=True)
273
+ return {}
274
+
275
+ async def _hook_subagent_stop(
276
+ self,
277
+ input_data: dict[str, Any],
278
+ tool_use_id: str | None,
279
+ context: Any,
280
+ ) -> dict[str, Any]:
281
+ try:
282
+ return await self.on_event("subagent_stop", input_data)
283
+ except Exception as exc:
284
+ logger.warning("SubagentStop hook failed: %s", exc, exc_info=True)
285
+ return {}
286
+
287
+ async def _hook_pre_compact(
288
+ self,
289
+ input_data: dict[str, Any],
290
+ tool_use_id: str | None,
291
+ context: Any,
292
+ ) -> dict[str, Any]:
293
+ try:
294
+ return await self.on_event("pre_compact", input_data)
295
+ except Exception as exc:
296
+ logger.warning("PreCompact hook failed: %s", exc, exc_info=True)
297
+ return {}
298
+
299
+ # --- Event handlers ---
300
+
301
+ async def _handle_pre_tool_use(
302
+ self, data: dict[str, Any]
303
+ ) -> dict[str, Any]:
304
+ tool_name = data.get("tool_name", "")
305
+ tool_input = data.get("tool_input", {})
306
+
307
+ self._buffer.tool_calls.append(
308
+ {"tool": tool_name, "type": "tool_call", **tool_input}
309
+ )
310
+ self._buffer.tool_usage[tool_name] += 1
311
+ self._buffer.action_distribution["tool_call"] += 1
312
+
313
+ score = self._run_evaluation()
314
+ self._emit_event("pre_tool_use", data, score)
315
+
316
+ if self._enforce and score.action == "block":
317
+ return {
318
+ "hookSpecificOutput": {
319
+ "hookEventName": "PreToolUse",
320
+ "permissionDecision": "deny",
321
+ "permissionDecisionReason": (
322
+ f"Agentegrity integrity score {score.composite:.3f} "
323
+ f"triggered block action"
324
+ ),
325
+ }
326
+ }
327
+
328
+ return {}
329
+
330
+ async def _handle_post_tool_use(
331
+ self, data: dict[str, Any]
332
+ ) -> dict[str, Any]:
333
+ tool_response = data.get("tool_response", "")
334
+ self._buffer.tool_outputs.append(
335
+ {"tool": data.get("tool_name", ""), "output": tool_response}
336
+ )
337
+
338
+ score = self._run_evaluation()
339
+ self._emit_event("post_tool_use", data, score)
340
+
341
+ return {}
342
+
343
+ async def _handle_post_tool_use_failure(
344
+ self, data: dict[str, Any]
345
+ ) -> dict[str, Any]:
346
+ self._buffer.tool_failures.append({
347
+ "tool": data.get("tool_name", ""),
348
+ "error": data.get("error", ""),
349
+ })
350
+ self._emit_event("post_tool_use_failure", data)
351
+ return {}
352
+
353
+ async def _handle_user_prompt_submit(
354
+ self, data: dict[str, Any]
355
+ ) -> dict[str, Any]:
356
+ prompt = data.get("prompt", data.get("user_message", ""))
357
+ if isinstance(prompt, str):
358
+ self._buffer.inputs.append(prompt)
359
+ self._buffer.action_distribution["user_prompt"] += 1
360
+
361
+ score = self._run_evaluation()
362
+ self._emit_event("user_prompt_submit", data, score)
363
+ return {}
364
+
365
+ async def _handle_stop(
366
+ self, data: dict[str, Any]
367
+ ) -> dict[str, Any]:
368
+ score = self._run_evaluation()
369
+ self._emit_event("stop", data, score)
370
+ return {}
371
+
372
+ async def _handle_subagent_start(
373
+ self, data: dict[str, Any]
374
+ ) -> dict[str, Any]:
375
+ self._buffer.subagents.append({
376
+ "agent_id": data.get("agent_id", ""),
377
+ "started": datetime.now(timezone.utc).isoformat(),
378
+ })
379
+ self._emit_event("subagent_start", data)
380
+ return {}
381
+
382
+ async def _handle_subagent_stop(
383
+ self, data: dict[str, Any]
384
+ ) -> dict[str, Any]:
385
+ self._buffer.subagents.append({
386
+ "agent_id": data.get("agent_id", ""),
387
+ "stopped": datetime.now(timezone.utc).isoformat(),
388
+ "transcript_path": data.get("agent_transcript_path", ""),
389
+ })
390
+ self._emit_event("subagent_stop", data)
391
+ return {}
392
+
393
+ async def _handle_pre_compact(
394
+ self, data: dict[str, Any]
395
+ ) -> dict[str, Any]:
396
+ # Archive the current reasoning chain before compaction
397
+ self._emit_event(
398
+ "pre_compact",
399
+ {
400
+ "reasoning_chain_length": len(self._buffer.reasoning_chain),
401
+ "archived_chain": list(self._buffer.reasoning_chain),
402
+ },
403
+ )
404
+ return {}
405
+
406
+ # --- Public API ---
407
+
408
+ def create_hooks(self) -> dict[str, list[Any]]:
409
+ """Create Claude Agent SDK hook configuration.
410
+
411
+ Returns a dict suitable for passing to ClaudeAgentOptions(hooks=...).
412
+ Import HookMatcher at call time to avoid hard dependency on
413
+ claude-agent-sdk when the adapter module is just imported.
414
+ """
415
+ try:
416
+ from claude_agent_sdk import HookMatcher # type: ignore[import-not-found]
417
+ except ImportError:
418
+ raise ImportError(
419
+ "claude-agent-sdk is required for the Claude adapter. "
420
+ "Install it with: pip install agentegrity[claude]"
421
+ ) from None
422
+
423
+ return {
424
+ "PreToolUse": [
425
+ HookMatcher(hooks=[self._hook_pre_tool_use]),
426
+ ],
427
+ "PostToolUse": [
428
+ HookMatcher(hooks=[self._hook_post_tool_use]),
429
+ ],
430
+ "PostToolUseFailure": [
431
+ HookMatcher(hooks=[self._hook_post_tool_use_failure]),
432
+ ],
433
+ "UserPromptSubmit": [
434
+ HookMatcher(hooks=[self._hook_user_prompt_submit]),
435
+ ],
436
+ "Stop": [
437
+ HookMatcher(hooks=[self._hook_stop]),
438
+ ],
439
+ "SubagentStart": [
440
+ HookMatcher(hooks=[self._hook_subagent_start]),
441
+ ],
442
+ "SubagentStop": [
443
+ HookMatcher(hooks=[self._hook_subagent_stop]),
444
+ ],
445
+ "PreCompact": [
446
+ HookMatcher(hooks=[self._hook_pre_compact]),
447
+ ],
448
+ }
449
+
450
+ def get_summary(self) -> dict[str, Any]:
451
+ """Return a summary of the adapter's session."""
452
+ return {
453
+ "adapter": self.name,
454
+ "agent_id": self._profile.agent_id,
455
+ "evaluations": self._evaluation_count,
456
+ "events": len(self._events),
457
+ "attestation_records": len(self._chain.records),
458
+ "chain_valid": self._chain.verify_chain(),
459
+ "enforce_mode": self._enforce,
460
+ }
@@ -0,0 +1,16 @@
1
+ from agentegrity.core.attestation import AttestationChain, AttestationRecord
2
+ from agentegrity.core.evaluator import IntegrityEvaluator, IntegrityScore
3
+ from agentegrity.core.monitor import IntegrityMonitor
4
+ from agentegrity.core.profile import AgentProfile, AgentType, DeploymentContext, RiskTier
5
+
6
+ __all__ = [
7
+ "AgentProfile",
8
+ "AgentType",
9
+ "DeploymentContext",
10
+ "RiskTier",
11
+ "IntegrityEvaluator",
12
+ "IntegrityScore",
13
+ "AttestationRecord",
14
+ "AttestationChain",
15
+ "IntegrityMonitor",
16
+ ]