evalguard-python 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
evalguard/__init__.py ADDED
@@ -0,0 +1,42 @@
1
+ """EvalGuard Python SDK -- evaluate, red-team, and guard LLM applications."""
2
+
3
+ from .client import EvalGuardClient, EvalGuardError
4
+ from .guardrails import GuardrailClient, GuardrailViolation
5
+ from .types import (
6
+ BenchmarkResult,
7
+ CaseResult,
8
+ ComplianceReport,
9
+ DriftReport,
10
+ EvalCase,
11
+ EvalResult,
12
+ EvalRun,
13
+ FirewallResult,
14
+ FirewallRule,
15
+ SecurityFinding,
16
+ SecurityScanResult,
17
+ TokenUsage,
18
+ )
19
+
20
+ __version__ = "1.1.0"
21
+
22
+ __all__ = [
23
+ # Core client
24
+ "EvalGuardClient",
25
+ "EvalGuardError",
26
+ # Guardrails
27
+ "GuardrailClient",
28
+ "GuardrailViolation",
29
+ # Types
30
+ "BenchmarkResult",
31
+ "CaseResult",
32
+ "ComplianceReport",
33
+ "DriftReport",
34
+ "EvalCase",
35
+ "EvalResult",
36
+ "EvalRun",
37
+ "FirewallResult",
38
+ "FirewallRule",
39
+ "SecurityFinding",
40
+ "SecurityScanResult",
41
+ "TokenUsage",
42
+ ]
evalguard/anthropic.py ADDED
@@ -0,0 +1,182 @@
1
+ """Drop-in Anthropic wrapper with EvalGuard guardrails.
2
+
3
+ Usage::
4
+
5
+ from evalguard.anthropic import wrap
6
+ from anthropic import Anthropic
7
+
8
+ client = wrap(Anthropic(), api_key="eg_...", project_id="proj_...")
9
+ response = client.messages.create(
10
+ model="claude-sonnet-4-20250514",
11
+ max_tokens=1024,
12
+ messages=[{"role": "user", "content": "Hello"}],
13
+ )
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import time
19
+ from typing import Any, List, Optional
20
+
21
+ from .guardrails import GuardrailClient, GuardrailViolation
22
+
23
+
24
+ def wrap(
25
+ client: Any,
26
+ *,
27
+ api_key: str,
28
+ project_id: Optional[str] = None,
29
+ base_url: str = "https://api.evalguard.ai",
30
+ rules: Optional[List[str]] = None,
31
+ block_on_violation: bool = True,
32
+ timeout: float = 5.0,
33
+ ) -> "_AnthropicProxy":
34
+ """Wrap an ``anthropic.Anthropic`` client with guardrails.
35
+
36
+ Parameters
37
+ ----------
38
+ client:
39
+ An instantiated ``anthropic.Anthropic`` or ``anthropic.AsyncAnthropic``.
40
+ api_key:
41
+ EvalGuard API key.
42
+ project_id:
43
+ Optional EvalGuard project ID.
44
+ rules:
45
+ Guardrail rules. Defaults to prompt-injection + PII.
46
+ block_on_violation:
47
+ Raise on violation if *True*, log-only if *False*.
48
+ """
49
+ guard = GuardrailClient(
50
+ api_key=api_key,
51
+ base_url=base_url,
52
+ project_id=project_id,
53
+ timeout=timeout,
54
+ )
55
+ return _AnthropicProxy(client, guard, rules, block_on_violation)
56
+
57
+
58
+ class _AnthropicProxy:
59
+ """Transparent proxy that intercepts ``messages.create``."""
60
+
61
+ __slots__ = ("_client", "_guard", "_rules", "_block")
62
+
63
+ def __init__(self, client: Any, guard: GuardrailClient, rules: Optional[List[str]], block: bool) -> None:
64
+ self._client = client
65
+ self._guard = guard
66
+ self._rules = rules
67
+ self._block = block
68
+
69
+ def __getattr__(self, name: str) -> Any:
70
+ attr = getattr(self._client, name)
71
+ if name == "messages":
72
+ return _MessagesProxy(attr, self._guard, self._rules, self._block)
73
+ return attr
74
+
75
+
76
+ class _MessagesProxy:
77
+ __slots__ = ("_messages", "_guard", "_rules", "_block")
78
+
79
+ def __init__(self, messages: Any, guard: GuardrailClient, rules: Optional[List[str]], block: bool) -> None:
80
+ self._messages = messages
81
+ self._guard = guard
82
+ self._rules = rules
83
+ self._block = block
84
+
85
+ def __getattr__(self, name: str) -> Any:
86
+ if name == "create":
87
+ return self._guarded_create
88
+ return getattr(self._messages, name)
89
+
90
+ def _guarded_create(self, **kwargs: Any) -> Any:
91
+ messages = kwargs.get("messages", [])
92
+ system = kwargs.get("system", "")
93
+ prompt_text = _extract_prompt(messages, system)
94
+ model = kwargs.get("model", "unknown")
95
+
96
+ # ── Pre-LLM check ────────────────────────────────────────────
97
+ start = time.monotonic()
98
+ check = self._guard.check_input(
99
+ prompt_text,
100
+ rules=self._rules,
101
+ metadata={"model": model, "framework": "anthropic"},
102
+ )
103
+ guard_ms = (time.monotonic() - start) * 1000
104
+
105
+ if not check.get("allowed", True):
106
+ if self._block:
107
+ raise GuardrailViolation(check.get("violations", []))
108
+
109
+ # ── Call Anthropic ────────────────────────────────────────────
110
+ start = time.monotonic()
111
+ response = self._messages.create(**kwargs)
112
+ llm_ms = (time.monotonic() - start) * 1000
113
+
114
+ # ── Post-LLM trace ───────────────────────────────────────────
115
+ output_text = _extract_response(response)
116
+ self._guard.log_trace(
117
+ {
118
+ "provider": "anthropic",
119
+ "model": model,
120
+ "input": prompt_text,
121
+ "output": output_text,
122
+ "guard_latency_ms": round(guard_ms, 2),
123
+ "llm_latency_ms": round(llm_ms, 2),
124
+ "violations": check.get("violations", []),
125
+ "token_usage": _extract_usage(response),
126
+ }
127
+ )
128
+
129
+ return response
130
+
131
+
132
+ def _extract_prompt(messages: list, system: Any = "") -> str:
133
+ """Build a single string from Anthropic-style messages + system prompt."""
134
+ parts: list[str] = []
135
+ if isinstance(system, str) and system:
136
+ parts.append(system)
137
+ elif isinstance(system, list):
138
+ for block in system:
139
+ if isinstance(block, dict) and block.get("type") == "text":
140
+ parts.append(block.get("text", ""))
141
+
142
+ for msg in messages:
143
+ content = msg.get("content", "") if isinstance(msg, dict) else getattr(msg, "content", "")
144
+ if isinstance(content, str):
145
+ parts.append(content)
146
+ elif isinstance(content, list):
147
+ for block in content:
148
+ if isinstance(block, dict) and block.get("type") == "text":
149
+ parts.append(block.get("text", ""))
150
+ return "\n".join(parts)
151
+
152
+
153
+ def _extract_response(response: Any) -> str:
154
+ """Extract text from an Anthropic Message response."""
155
+ try:
156
+ content = response.content if hasattr(response, "content") else response.get("content", [])
157
+ parts: list[str] = []
158
+ for block in content:
159
+ if hasattr(block, "text"):
160
+ parts.append(block.text)
161
+ elif isinstance(block, dict) and block.get("type") == "text":
162
+ parts.append(block.get("text", ""))
163
+ return "\n".join(parts)
164
+ except Exception:
165
+ return ""
166
+
167
+
168
+ def _extract_usage(response: Any) -> Optional[dict]:
169
+ """Extract token usage from an Anthropic response."""
170
+ try:
171
+ usage = response.usage if hasattr(response, "usage") else response.get("usage")
172
+ if usage:
173
+ input_tokens = getattr(usage, "input_tokens", None) or (usage.get("input_tokens") if isinstance(usage, dict) else None)
174
+ output_tokens = getattr(usage, "output_tokens", None) or (usage.get("output_tokens") if isinstance(usage, dict) else None)
175
+ return {
176
+ "prompt_tokens": input_tokens,
177
+ "completion_tokens": output_tokens,
178
+ "total_tokens": (input_tokens or 0) + (output_tokens or 0),
179
+ }
180
+ except Exception:
181
+ pass
182
+ return None
evalguard/bedrock.py ADDED
@@ -0,0 +1,280 @@
1
+ """AWS Bedrock wrapper with EvalGuard guardrails.
2
+
3
+ Usage::
4
+
5
+ from evalguard.bedrock import wrap
6
+ import boto3
7
+
8
+ bedrock = boto3.client("bedrock-runtime")
9
+ client = wrap(bedrock, api_key="eg_...", project_id="proj_...")
10
+
11
+ response = client.invoke_model(
12
+ modelId="anthropic.claude-3-sonnet-20240229-v1:0",
13
+ body='{"messages":[{"role":"user","content":"Hello"}],"max_tokens":256}',
14
+ )
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import time
21
+ from typing import Any, Dict, List, Optional
22
+
23
+ from .guardrails import GuardrailClient, GuardrailViolation
24
+
25
+
26
+ def wrap(
27
+ client: Any,
28
+ *,
29
+ api_key: str,
30
+ project_id: Optional[str] = None,
31
+ base_url: str = "https://api.evalguard.ai",
32
+ rules: Optional[List[str]] = None,
33
+ block_on_violation: bool = True,
34
+ timeout: float = 5.0,
35
+ ) -> "_BedrockProxy":
36
+ """Wrap a ``boto3.client('bedrock-runtime')`` with guardrails.
37
+
38
+ Parameters
39
+ ----------
40
+ client:
41
+ A boto3 Bedrock Runtime client.
42
+ api_key:
43
+ EvalGuard API key.
44
+ project_id:
45
+ Optional project ID.
46
+ rules:
47
+ Guardrail rules. Defaults to prompt-injection + PII.
48
+ block_on_violation:
49
+ Raise on violation if *True*.
50
+ """
51
+ guard = GuardrailClient(
52
+ api_key=api_key,
53
+ base_url=base_url,
54
+ project_id=project_id,
55
+ timeout=timeout,
56
+ )
57
+ return _BedrockProxy(client, guard, rules, block_on_violation)
58
+
59
+
60
+ class _BedrockProxy:
61
+ """Transparent proxy that intercepts ``invoke_model`` and ``converse``."""
62
+
63
+ __slots__ = ("_client", "_guard", "_rules", "_block")
64
+
65
+ def __init__(self, client: Any, guard: GuardrailClient, rules: Optional[List[str]], block: bool) -> None:
66
+ self._client = client
67
+ self._guard = guard
68
+ self._rules = rules
69
+ self._block = block
70
+
71
+ def __getattr__(self, name: str) -> Any:
72
+ if name == "invoke_model":
73
+ return self._guarded_invoke_model
74
+ if name == "converse":
75
+ return self._guarded_converse
76
+ return getattr(self._client, name)
77
+
78
+ def _guarded_invoke_model(self, **kwargs: Any) -> Any:
79
+ model_id = kwargs.get("modelId", kwargs.get("ModelId", "unknown"))
80
+ body_raw = kwargs.get("body", kwargs.get("Body", "{}"))
81
+
82
+ # Parse the body to extract prompt text
83
+ body = json.loads(body_raw) if isinstance(body_raw, (str, bytes)) else body_raw
84
+ prompt_text = _extract_invoke_prompt(body, model_id)
85
+
86
+ # ── Pre-LLM check ────────────────────────────────────────────
87
+ start = time.monotonic()
88
+ check = self._guard.check_input(
89
+ prompt_text,
90
+ rules=self._rules,
91
+ metadata={"model": model_id, "framework": "bedrock"},
92
+ )
93
+ guard_ms = (time.monotonic() - start) * 1000
94
+
95
+ if not check.get("allowed", True):
96
+ if self._block:
97
+ raise GuardrailViolation(check.get("violations", []))
98
+
99
+ # ── Call Bedrock ──────────────────────────────────────────────
100
+ start = time.monotonic()
101
+ response = self._client.invoke_model(**kwargs)
102
+ llm_ms = (time.monotonic() - start) * 1000
103
+
104
+ # ── Post-LLM trace ───────────────────────────────────────────
105
+ output_text = _extract_invoke_response(response, model_id)
106
+ self._guard.log_trace(
107
+ {
108
+ "provider": "bedrock",
109
+ "model": model_id,
110
+ "input": prompt_text,
111
+ "output": output_text,
112
+ "guard_latency_ms": round(guard_ms, 2),
113
+ "llm_latency_ms": round(llm_ms, 2),
114
+ "violations": check.get("violations", []),
115
+ }
116
+ )
117
+
118
+ return response
119
+
120
+ def _guarded_converse(self, **kwargs: Any) -> Any:
121
+ model_id = kwargs.get("modelId", kwargs.get("ModelId", "unknown"))
122
+ messages = kwargs.get("messages", [])
123
+ system = kwargs.get("system", [])
124
+
125
+ prompt_text = _extract_converse_prompt(messages, system)
126
+
127
+ # ── Pre-LLM check ────────────────────────────────────────────
128
+ start = time.monotonic()
129
+ check = self._guard.check_input(
130
+ prompt_text,
131
+ rules=self._rules,
132
+ metadata={"model": model_id, "framework": "bedrock"},
133
+ )
134
+ guard_ms = (time.monotonic() - start) * 1000
135
+
136
+ if not check.get("allowed", True):
137
+ if self._block:
138
+ raise GuardrailViolation(check.get("violations", []))
139
+
140
+ # ── Call Bedrock ──────────────────────────────────────────────
141
+ start = time.monotonic()
142
+ response = self._client.converse(**kwargs)
143
+ llm_ms = (time.monotonic() - start) * 1000
144
+
145
+ # ── Post-LLM trace ───────────────────────────────────────────
146
+ output_text = _extract_converse_response(response)
147
+ usage = response.get("usage", {})
148
+ self._guard.log_trace(
149
+ {
150
+ "provider": "bedrock",
151
+ "model": model_id,
152
+ "input": prompt_text,
153
+ "output": output_text,
154
+ "guard_latency_ms": round(guard_ms, 2),
155
+ "llm_latency_ms": round(llm_ms, 2),
156
+ "violations": check.get("violations", []),
157
+ "token_usage": {
158
+ "prompt_tokens": usage.get("inputTokens"),
159
+ "completion_tokens": usage.get("outputTokens"),
160
+ "total_tokens": usage.get("totalTokens"),
161
+ } if usage else None,
162
+ }
163
+ )
164
+
165
+ return response
166
+
167
+
168
+ def _extract_invoke_prompt(body: dict, model_id: str) -> str:
169
+ """Extract prompt from Bedrock invoke_model body (varies by model provider)."""
170
+ # Anthropic Claude via Bedrock
171
+ if "anthropic" in model_id.lower():
172
+ messages = body.get("messages", [])
173
+ system = body.get("system", "")
174
+ parts: list[str] = []
175
+ if system:
176
+ parts.append(system if isinstance(system, str) else str(system))
177
+ for msg in messages:
178
+ content = msg.get("content", "")
179
+ if isinstance(content, str):
180
+ parts.append(content)
181
+ elif isinstance(content, list):
182
+ for block in content:
183
+ if isinstance(block, dict) and block.get("type") == "text":
184
+ parts.append(block.get("text", ""))
185
+ return "\n".join(parts)
186
+
187
+ # Amazon Titan
188
+ if "titan" in model_id.lower():
189
+ config = body.get("inputText", "")
190
+ return config if isinstance(config, str) else str(config)
191
+
192
+ # Meta Llama
193
+ if "meta" in model_id.lower() or "llama" in model_id.lower():
194
+ return body.get("prompt", str(body))
195
+
196
+ # Cohere
197
+ if "cohere" in model_id.lower():
198
+ return body.get("prompt", body.get("message", str(body)))
199
+
200
+ # AI21
201
+ if "ai21" in model_id.lower():
202
+ return body.get("prompt", str(body))
203
+
204
+ # Mistral
205
+ if "mistral" in model_id.lower():
206
+ return body.get("prompt", str(body))
207
+
208
+ # Fallback
209
+ return body.get("prompt", body.get("inputText", str(body)))
210
+
211
+
212
+ def _extract_invoke_response(response: dict, model_id: str) -> str:
213
+ """Extract output text from Bedrock invoke_model response."""
214
+ try:
215
+ body = response.get("body")
216
+ if body is None:
217
+ return ""
218
+ # StreamingBody needs to be read
219
+ if hasattr(body, "read"):
220
+ raw = body.read()
221
+ data = json.loads(raw)
222
+ else:
223
+ data = json.loads(body) if isinstance(body, (str, bytes)) else body
224
+
225
+ # Anthropic
226
+ if "anthropic" in model_id.lower():
227
+ content = data.get("content", [])
228
+ return "\n".join(
229
+ b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
230
+ )
231
+
232
+ # Titan
233
+ if "titan" in model_id.lower():
234
+ results = data.get("results", [{}])
235
+ return results[0].get("outputText", "") if results else ""
236
+
237
+ # Llama
238
+ if "meta" in model_id.lower() or "llama" in model_id.lower():
239
+ return data.get("generation", "")
240
+
241
+ # Cohere
242
+ if "cohere" in model_id.lower():
243
+ return data.get("text", data.get("generations", [{}])[0].get("text", ""))
244
+
245
+ return data.get("completion", data.get("generation", str(data)))
246
+ except Exception:
247
+ return ""
248
+
249
+
250
+ def _extract_converse_prompt(messages: list, system: Any = None) -> str:
251
+ """Extract prompt from Bedrock Converse API messages."""
252
+ parts: list[str] = []
253
+ if system:
254
+ for block in (system if isinstance(system, list) else [system]):
255
+ if isinstance(block, dict):
256
+ parts.append(block.get("text", ""))
257
+ elif isinstance(block, str):
258
+ parts.append(block)
259
+
260
+ for msg in messages:
261
+ content = msg.get("content", [])
262
+ for block in (content if isinstance(content, list) else [content]):
263
+ if isinstance(block, dict):
264
+ parts.append(block.get("text", ""))
265
+ elif isinstance(block, str):
266
+ parts.append(block)
267
+ return "\n".join(parts)
268
+
269
+
270
+ def _extract_converse_response(response: dict) -> str:
271
+ """Extract output from Bedrock Converse API response."""
272
+ try:
273
+ output = response.get("output", {})
274
+ message = output.get("message", {})
275
+ content = message.get("content", [])
276
+ return "\n".join(
277
+ b.get("text", "") for b in content if isinstance(b, dict)
278
+ )
279
+ except Exception:
280
+ return ""