antz-audit 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial/adapters/__init__.py +0 -0
- adversarial/adapters/http_agent.py +139 -0
- adversarial/adapters/langgraph.py +265 -0
- adversarial/attack_engine.py +337 -0
- adversarial/attacks/base.py +53 -0
- adversarial/attacks/constraint_bypass.py +320 -0
- adversarial/attacks/goal_hijacking.py +265 -0
- adversarial/attacks/indirect_injection.py +200 -0
- adversarial/attacks/prompt_injection.py +300 -0
- adversarial/attacks/threshold_probing.py +677 -0
- adversarial/cli/progress.py +251 -0
- adversarial/utils/retry.py +257 -0
- antz_audit-0.3.2.dist-info/METADATA +442 -0
- antz_audit-0.3.2.dist-info/RECORD +27 -0
- antz_audit-0.3.2.dist-info/WHEEL +5 -0
- antz_audit-0.3.2.dist-info/entry_points.txt +3 -0
- antz_audit-0.3.2.dist-info/top_level.txt +4 -0
- constitution/__init__.py +0 -0
- constitution/builder.py +432 -0
- constitution/schema.py +349 -0
- defense/__init__.py +0 -0
- defense/constitution_hardener.py +138 -0
- reporting/__init__.py +0 -0
- reporting/audit_report.py +565 -0
- reporting/pdf_renderer.py +461 -0
- reporting/server.py +498 -0
- reporting/templates/report.md.j2 +246 -0
|
File without changes
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP Agent Adapter — wraps any external agent REST API as a LangChain
|
|
3
|
+
BaseChatModel so the attack engine can audit it without code access.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
from adversarial.adapters.http_agent import HTTPAgentAdapter
|
|
7
|
+
|
|
8
|
+
adapter = HTTPAgentAdapter(
|
|
9
|
+
url="https://my-agent.company.com/api/chat",
|
|
10
|
+
headers={"Authorization": "Bearer sk-..."},
|
|
11
|
+
payload_template={"session_id": "audit-001"},
|
|
12
|
+
message_field="message",
|
|
13
|
+
response_field="response",
|
|
14
|
+
)
|
|
15
|
+
attack = ConstraintBypassAttack(adapter, judge, constitution)
|
|
16
|
+
|
|
17
|
+
Probe before attacking:
|
|
18
|
+
ok, info = await adapter.probe()
|
|
19
|
+
if not ok:
|
|
20
|
+
print(info)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
import httpx
|
|
29
|
+
from langchain_core.callbacks import (
|
|
30
|
+
AsyncCallbackManagerForLLMRun,
|
|
31
|
+
CallbackManagerForLLMRun,
|
|
32
|
+
)
|
|
33
|
+
from langchain_core.language_models import BaseChatModel
|
|
34
|
+
from langchain_core.messages import AIMessage, BaseMessage
|
|
35
|
+
from langchain_core.outputs import ChatGeneration, ChatResult
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class HTTPAgentAdapter(BaseChatModel):
|
|
39
|
+
"""
|
|
40
|
+
Adapts any HTTP/REST agent endpoint to the LangChain BaseChatModel interface.
|
|
41
|
+
|
|
42
|
+
The adapter sends the last HumanMessage content to the configured URL and
|
|
43
|
+
extracts the response using a dot-path field selector.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
url: str
|
|
47
|
+
headers: dict[str, str] = {}
|
|
48
|
+
payload_template: dict[str, Any] = {}
|
|
49
|
+
message_field: str = "message"
|
|
50
|
+
response_field: str = "response"
|
|
51
|
+
timeout: float = 30.0
|
|
52
|
+
max_retries: int = 2
|
|
53
|
+
|
|
54
|
+
class Config:
|
|
55
|
+
arbitrary_types_allowed = True
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def _llm_type(self) -> str:
|
|
59
|
+
return "http_agent_adapter"
|
|
60
|
+
|
|
61
|
+
def _generate(
|
|
62
|
+
self,
|
|
63
|
+
messages: list[BaseMessage],
|
|
64
|
+
stop: list[str] | None = None,
|
|
65
|
+
run_manager: CallbackManagerForLLMRun | None = None,
|
|
66
|
+
**kwargs: Any,
|
|
67
|
+
) -> ChatResult:
|
|
68
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
69
|
+
self._agenerate(messages, stop, **kwargs)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
async def _agenerate(
|
|
73
|
+
self,
|
|
74
|
+
messages: list[BaseMessage],
|
|
75
|
+
stop: list[str] | None = None,
|
|
76
|
+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
|
77
|
+
**kwargs: Any,
|
|
78
|
+
) -> ChatResult:
|
|
79
|
+
user_text = next(
|
|
80
|
+
(m.content for m in reversed(messages) if m.type == "human"),
|
|
81
|
+
str(messages[-1].content) if messages else "",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
payload = {**self.payload_template, self.message_field: user_text}
|
|
85
|
+
|
|
86
|
+
last_exc: Exception | None = None
|
|
87
|
+
for attempt in range(self.max_retries + 1):
|
|
88
|
+
try:
|
|
89
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
90
|
+
resp = await client.post(
|
|
91
|
+
self.url,
|
|
92
|
+
json=payload,
|
|
93
|
+
headers=self.headers,
|
|
94
|
+
)
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
data = resp.json()
|
|
97
|
+
text = self._extract(data, self.response_field)
|
|
98
|
+
msg = AIMessage(content=text)
|
|
99
|
+
return ChatResult(generations=[ChatGeneration(message=msg)])
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
last_exc = exc
|
|
102
|
+
if attempt < self.max_retries:
|
|
103
|
+
await asyncio.sleep(2 ** attempt)
|
|
104
|
+
|
|
105
|
+
error_text = f"[HTTP_AGENT_ERROR after {self.max_retries + 1} attempts: {last_exc}]"
|
|
106
|
+
return ChatResult(generations=[ChatGeneration(message=AIMessage(content=error_text))])
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _extract(data: Any, field_path: str) -> str:
|
|
110
|
+
"""Dot-path extractor: 'choices.0.message.content' style."""
|
|
111
|
+
parts = field_path.split(".")
|
|
112
|
+
current = data
|
|
113
|
+
for part in parts:
|
|
114
|
+
if isinstance(current, dict):
|
|
115
|
+
current = current.get(part, "")
|
|
116
|
+
elif isinstance(current, list) and part.isdigit():
|
|
117
|
+
current = current[int(part)]
|
|
118
|
+
else:
|
|
119
|
+
return str(current)
|
|
120
|
+
return str(current) if current is not None else ""
|
|
121
|
+
|
|
122
|
+
async def probe(self) -> tuple[bool, str]:
|
|
123
|
+
"""
|
|
124
|
+
Connectivity check before running the full attack campaign.
|
|
125
|
+
Returns (reachable, info_message).
|
|
126
|
+
"""
|
|
127
|
+
try:
|
|
128
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
129
|
+
payload = {**self.payload_template, self.message_field: "ping"}
|
|
130
|
+
resp = await client.post(self.url, json=payload, headers=self.headers)
|
|
131
|
+
if resp.status_code < 500:
|
|
132
|
+
return True, f"Agent reachable — HTTP {resp.status_code}"
|
|
133
|
+
return False, f"Agent returned HTTP {resp.status_code}"
|
|
134
|
+
except httpx.ConnectError:
|
|
135
|
+
return False, f"Cannot connect to {self.url}"
|
|
136
|
+
except httpx.TimeoutException:
|
|
137
|
+
return False, f"Timeout connecting to {self.url}"
|
|
138
|
+
except Exception as exc:
|
|
139
|
+
return False, f"Probe failed: {exc}"
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LangGraph and CrewAI Adapters — wrap existing agent graphs and crews
|
|
3
|
+
as LangChain BaseChatModel so the attack engine can audit them directly.
|
|
4
|
+
|
|
5
|
+
Usage (LangGraph):
|
|
6
|
+
from adversarial.adapters.langgraph import LangGraphAdapter
|
|
7
|
+
|
|
8
|
+
graph = builder.compile()
|
|
9
|
+
adapter = LangGraphAdapter(graph, input_key="messages", output_key="messages")
|
|
10
|
+
attack = ConstraintBypassAttack(adapter, judge, constitution)
|
|
11
|
+
|
|
12
|
+
Usage (CrewAI):
|
|
13
|
+
from adversarial.adapters.crewai import CrewAIAdapter
|
|
14
|
+
|
|
15
|
+
crew = Crew(agents=[...], tasks=[...])
|
|
16
|
+
adapter = CrewAIAdapter(crew, input_variable="customer_request")
|
|
17
|
+
attack = ConstraintBypassAttack(adapter, judge, constitution)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import asyncio
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from langchain_core.callbacks import (
|
|
26
|
+
AsyncCallbackManagerForLLMRun,
|
|
27
|
+
CallbackManagerForLLMRun,
|
|
28
|
+
)
|
|
29
|
+
from langchain_core.language_models import BaseChatModel
|
|
30
|
+
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
|
|
31
|
+
from langchain_core.outputs import ChatGeneration, ChatResult
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# LangGraph Adapter
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
class LangGraphAdapter(BaseChatModel):
|
|
38
|
+
"""
|
|
39
|
+
Wraps a compiled LangGraph (StateGraph) as a BaseChatModel.
|
|
40
|
+
|
|
41
|
+
The adapter invokes the graph with the last HumanMessage and extracts
|
|
42
|
+
the final AI response from the graph state.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
graph: A compiled LangGraph (result of builder.compile()).
|
|
46
|
+
input_key: State key used to pass messages into the graph.
|
|
47
|
+
output_key: State key from which to extract the response.
|
|
48
|
+
If the value is a list of messages, the last AIMessage
|
|
49
|
+
content is returned.
|
|
50
|
+
config: Optional LangGraph config dict (thread_id, etc.).
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
graph: Any
|
|
54
|
+
input_key: str = "messages"
|
|
55
|
+
output_key: str = "messages"
|
|
56
|
+
config: dict[str, Any] = {}
|
|
57
|
+
|
|
58
|
+
class Config:
|
|
59
|
+
arbitrary_types_allowed = True
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def _llm_type(self) -> str:
|
|
63
|
+
return "langgraph_adapter"
|
|
64
|
+
|
|
65
|
+
def _generate(
|
|
66
|
+
self,
|
|
67
|
+
messages: list[BaseMessage],
|
|
68
|
+
stop: list[str] | None = None,
|
|
69
|
+
run_manager: CallbackManagerForLLMRun | None = None,
|
|
70
|
+
**kwargs: Any,
|
|
71
|
+
) -> ChatResult:
|
|
72
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
73
|
+
self._agenerate(messages, stop, **kwargs)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
async def _agenerate(
|
|
77
|
+
self,
|
|
78
|
+
messages: list[BaseMessage],
|
|
79
|
+
stop: list[str] | None = None,
|
|
80
|
+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
|
81
|
+
**kwargs: Any,
|
|
82
|
+
) -> ChatResult:
|
|
83
|
+
user_text = next(
|
|
84
|
+
(m.content for m in reversed(messages) if m.type == "human"),
|
|
85
|
+
str(messages[-1].content) if messages else "",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
graph_input = {self.input_key: [HumanMessage(content=user_text)]}
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
if hasattr(self.graph, "ainvoke"):
|
|
92
|
+
state = await self.graph.ainvoke(graph_input, config=self.config)
|
|
93
|
+
else:
|
|
94
|
+
state = self.graph.invoke(graph_input, config=self.config)
|
|
95
|
+
|
|
96
|
+
response_text = self._extract_response(state)
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
response_text = f"[LANGGRAPH_ERROR: {exc}]"
|
|
99
|
+
|
|
100
|
+
return ChatResult(
|
|
101
|
+
generations=[ChatGeneration(message=AIMessage(content=response_text))]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def _extract_response(self, state: Any) -> str:
|
|
105
|
+
value = state.get(self.output_key, "") if isinstance(state, dict) else state
|
|
106
|
+
|
|
107
|
+
if isinstance(value, list):
|
|
108
|
+
for msg in reversed(value):
|
|
109
|
+
if hasattr(msg, "content") and getattr(msg, "type", "") in ("ai", "assistant"):
|
|
110
|
+
return str(msg.content)
|
|
111
|
+
if hasattr(msg, "content"):
|
|
112
|
+
return str(msg.content)
|
|
113
|
+
return str(value[-1]) if value else ""
|
|
114
|
+
|
|
115
|
+
if isinstance(value, str):
|
|
116
|
+
return value
|
|
117
|
+
|
|
118
|
+
if hasattr(value, "content"):
|
|
119
|
+
return str(value.content)
|
|
120
|
+
|
|
121
|
+
return str(value)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# CrewAI Adapter
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
class CrewAIAdapter(BaseChatModel):
|
|
129
|
+
"""
|
|
130
|
+
Wraps a CrewAI Crew as a BaseChatModel.
|
|
131
|
+
|
|
132
|
+
The adapter calls crew.kickoff() with the last HumanMessage injected
|
|
133
|
+
as the value of `input_variable`.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
crew: A CrewAI Crew instance.
|
|
137
|
+
input_variable: The input variable name the crew expects (default: "input").
|
|
138
|
+
extra_inputs: Additional inputs to pass alongside the user message.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
crew: Any
|
|
142
|
+
input_variable: str = "input"
|
|
143
|
+
extra_inputs: dict[str, Any] = {}
|
|
144
|
+
|
|
145
|
+
class Config:
|
|
146
|
+
arbitrary_types_allowed = True
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def _llm_type(self) -> str:
|
|
150
|
+
return "crewai_adapter"
|
|
151
|
+
|
|
152
|
+
def _generate(
|
|
153
|
+
self,
|
|
154
|
+
messages: list[BaseMessage],
|
|
155
|
+
stop: list[str] | None = None,
|
|
156
|
+
run_manager: CallbackManagerForLLMRun | None = None,
|
|
157
|
+
**kwargs: Any,
|
|
158
|
+
) -> ChatResult:
|
|
159
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
160
|
+
self._agenerate(messages, stop, **kwargs)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
async def _agenerate(
|
|
164
|
+
self,
|
|
165
|
+
messages: list[BaseMessage],
|
|
166
|
+
stop: list[str] | None = None,
|
|
167
|
+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
|
168
|
+
**kwargs: Any,
|
|
169
|
+
) -> ChatResult:
|
|
170
|
+
user_text = next(
|
|
171
|
+
(m.content for m in reversed(messages) if m.type == "human"),
|
|
172
|
+
str(messages[-1].content) if messages else "",
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
inputs = {**self.extra_inputs, self.input_variable: user_text}
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
if hasattr(self.crew, "kickoff_async"):
|
|
179
|
+
result = await self.crew.kickoff_async(inputs=inputs)
|
|
180
|
+
else:
|
|
181
|
+
loop = asyncio.get_running_loop()
|
|
182
|
+
result = await loop.run_in_executor(
|
|
183
|
+
None, lambda: self.crew.kickoff(inputs=inputs)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
response_text = (
|
|
187
|
+
result.raw
|
|
188
|
+
if hasattr(result, "raw")
|
|
189
|
+
else str(result)
|
|
190
|
+
)
|
|
191
|
+
except Exception as exc:
|
|
192
|
+
response_text = f"[CREWAI_ERROR: {exc}]"
|
|
193
|
+
|
|
194
|
+
return ChatResult(
|
|
195
|
+
generations=[ChatGeneration(message=AIMessage(content=response_text))]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
# AutoGen Adapter (basic)
|
|
201
|
+
# ---------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
class AutoGenAdapter(BaseChatModel):
|
|
204
|
+
"""
|
|
205
|
+
Wraps an AutoGen ConversableAgent as a BaseChatModel.
|
|
206
|
+
|
|
207
|
+
Sends a single message and returns the last reply.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
agent: An AutoGen ConversableAgent or AssistantAgent.
|
|
211
|
+
initiator_name: Name of the human proxy sending the message.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
agent: Any
|
|
215
|
+
initiator_name: str = "adversarial_tester"
|
|
216
|
+
|
|
217
|
+
class Config:
|
|
218
|
+
arbitrary_types_allowed = True
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def _llm_type(self) -> str:
|
|
222
|
+
return "autogen_adapter"
|
|
223
|
+
|
|
224
|
+
def _generate(
|
|
225
|
+
self,
|
|
226
|
+
messages: list[BaseMessage],
|
|
227
|
+
stop: list[str] | None = None,
|
|
228
|
+
run_manager: CallbackManagerForLLMRun | None = None,
|
|
229
|
+
**kwargs: Any,
|
|
230
|
+
) -> ChatResult:
|
|
231
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
232
|
+
self._agenerate(messages, stop, **kwargs)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
async def _agenerate(
|
|
236
|
+
self,
|
|
237
|
+
messages: list[BaseMessage],
|
|
238
|
+
stop: list[str] | None = None,
|
|
239
|
+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
|
240
|
+
**kwargs: Any,
|
|
241
|
+
) -> ChatResult:
|
|
242
|
+
user_text = next(
|
|
243
|
+
(m.content for m in reversed(messages) if m.type == "human"),
|
|
244
|
+
str(messages[-1].content) if messages else "",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
loop = asyncio.get_running_loop()
|
|
249
|
+
chat_result = await loop.run_in_executor(
|
|
250
|
+
None,
|
|
251
|
+
lambda: self.agent.initiate_chat(
|
|
252
|
+
self.agent,
|
|
253
|
+
message=user_text,
|
|
254
|
+
max_turns=1,
|
|
255
|
+
silent=True,
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
history = chat_result.chat_history if hasattr(chat_result, "chat_history") else []
|
|
259
|
+
response = history[-1].get("content", "") if history else str(chat_result)
|
|
260
|
+
except Exception as exc:
|
|
261
|
+
response = f"[AUTOGEN_ERROR: {exc}]"
|
|
262
|
+
|
|
263
|
+
return ChatResult(
|
|
264
|
+
generations=[ChatGeneration(message=AIMessage(content=response))]
|
|
265
|
+
)
|