ai-lib-python 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_lib_python/guardrails/base.py +336 -336
- ai_lib_python/guardrails/filters.py +561 -561
- ai_lib_python/guardrails/validators.py +475 -475
- ai_lib_python/protocol/v2/capabilities.py +224 -224
- {ai_lib_python-0.8.2.dist-info → ai_lib_python-0.8.3.dist-info}/METADATA +1 -1
- {ai_lib_python-0.8.2.dist-info → ai_lib_python-0.8.3.dist-info}/RECORD +9 -9
- {ai_lib_python-0.8.2.dist-info → ai_lib_python-0.8.3.dist-info}/WHEEL +0 -0
- {ai_lib_python-0.8.2.dist-info → ai_lib_python-0.8.3.dist-info}/licenses/LICENSE-APACHE +0 -0
- {ai_lib_python-0.8.2.dist-info → ai_lib_python-0.8.3.dist-info}/licenses/LICENSE-MIT +0 -0
ai_lib_python/guardrails/base.py
CHANGED
|
@@ -1,336 +1,336 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Base classes for guardrail filtering and validation.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from __future__ import annotations
|
|
6
|
-
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from enum import Enum
|
|
9
|
-
from typing import TYPE_CHECKING, Any
|
|
10
|
-
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from collections.abc import Callable
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class GuardrailSeverity(Enum):
|
|
16
|
-
"""Severity levels for guardrail violations."""
|
|
17
|
-
|
|
18
|
-
INFO = "info"
|
|
19
|
-
WARNING = "warning"
|
|
20
|
-
ERROR = "error"
|
|
21
|
-
CRITICAL = "critical"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@dataclass
|
|
25
|
-
class GuardrailViolation:
|
|
26
|
-
"""Represents a single guardrail rule violation."""
|
|
27
|
-
|
|
28
|
-
rule_id: str
|
|
29
|
-
message: str
|
|
30
|
-
severity: GuardrailSeverity
|
|
31
|
-
matched_text: str | None = None
|
|
32
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
33
|
-
|
|
34
|
-
def __post_init__(self) -> None:
|
|
35
|
-
"""Validate severity."""
|
|
36
|
-
if not isinstance(self.severity, GuardrailSeverity):
|
|
37
|
-
self.severity = GuardrailSeverity(self.severity)
|
|
38
|
-
|
|
39
|
-
def to_dict(self) -> dict[str, Any]:
|
|
40
|
-
"""Convert violation to dictionary."""
|
|
41
|
-
return {
|
|
42
|
-
"rule_id": self.rule_id,
|
|
43
|
-
"message": self.message,
|
|
44
|
-
"severity": self.severity.value,
|
|
45
|
-
"matched_text": self.matched_text,
|
|
46
|
-
"metadata": self.metadata,
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@dataclass
|
|
51
|
-
class GuardrailResult:
|
|
52
|
-
"""Result of applying a guardrail to content."""
|
|
53
|
-
|
|
54
|
-
is_safe: bool
|
|
55
|
-
violations: list[GuardrailViolation] = field(default_factory=list)
|
|
56
|
-
filtered_content: str | None = None
|
|
57
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
58
|
-
|
|
59
|
-
@classmethod
|
|
60
|
-
def safe(cls, content: str | None = None) -> GuardrailResult:
|
|
61
|
-
"""Create a safe result."""
|
|
62
|
-
return cls(is_safe=True, violations=[], filtered_content=content)
|
|
63
|
-
|
|
64
|
-
@classmethod
|
|
65
|
-
def violated(
|
|
66
|
-
cls,
|
|
67
|
-
violations: list[GuardrailViolation],
|
|
68
|
-
filtered_content: str | None = None,
|
|
69
|
-
) -> GuardrailResult:
|
|
70
|
-
"""Create a violated result."""
|
|
71
|
-
return cls(
|
|
72
|
-
is_safe=False,
|
|
73
|
-
violations=violations,
|
|
74
|
-
filtered_content=filtered_content,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
def to_dict(self) -> dict[str, Any]:
|
|
78
|
-
"""Convert result to dictionary."""
|
|
79
|
-
return {
|
|
80
|
-
"is_safe": self.is_safe,
|
|
81
|
-
"violations": [v.to_dict() for v in self.violations],
|
|
82
|
-
"filtered_content": self.filtered_content,
|
|
83
|
-
"metadata": self.metadata,
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
class Guardrail:
|
|
88
|
-
"""Base class for all guardrail filters and validators.
|
|
89
|
-
|
|
90
|
-
Guardrails check content for safety, compliance, or policy violations.
|
|
91
|
-
They can be applied to both user inputs and AI model outputs.
|
|
92
|
-
|
|
93
|
-
Example:
|
|
94
|
-
>>> from ai_lib_python.guardrails import KeywordFilter
|
|
95
|
-
>>>
|
|
96
|
-
>>> filter = KeywordFilter(
|
|
97
|
-
... rule_id="no-api-keys",
|
|
98
|
-
... keywords=["sk-", "Bearer"],
|
|
99
|
-
... severity=GuardrailSeverity.CRITICAL,
|
|
100
|
-
... )
|
|
101
|
-
>>>
|
|
102
|
-
>>> result = filter.check("Here is my key: sk-12345")
|
|
103
|
-
>>> if not result.is_safe:
|
|
104
|
-
... print(f"Blocked: {result.violations[0].message}")
|
|
105
|
-
"""
|
|
106
|
-
|
|
107
|
-
def __init__(
|
|
108
|
-
self,
|
|
109
|
-
rule_id: str,
|
|
110
|
-
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
111
|
-
enabled: bool = True,
|
|
112
|
-
) -> None:
|
|
113
|
-
"""Initialize the guardrail.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
rule_id: Unique identifier for this guardrail rule
|
|
117
|
-
severity: Severity level for violations
|
|
118
|
-
enabled: Whether this guardrail is active
|
|
119
|
-
"""
|
|
120
|
-
if not rule_id:
|
|
121
|
-
raise ValueError("rule_id must be non-empty")
|
|
122
|
-
|
|
123
|
-
if not isinstance(severity, GuardrailSeverity):
|
|
124
|
-
severity = GuardrailSeverity(severity)
|
|
125
|
-
|
|
126
|
-
self._rule_id = rule_id
|
|
127
|
-
self._severity = severity
|
|
128
|
-
self._enabled = enabled
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def rule_id(self) -> str:
|
|
132
|
-
"""Get the rule ID."""
|
|
133
|
-
return self._rule_id
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def severity(self) -> GuardrailSeverity:
|
|
137
|
-
"""Get the violation severity."""
|
|
138
|
-
return self._severity
|
|
139
|
-
|
|
140
|
-
@property
|
|
141
|
-
def enabled(self) -> bool:
|
|
142
|
-
"""Check if this guardrail is enabled."""
|
|
143
|
-
return self._enabled
|
|
144
|
-
|
|
145
|
-
def enable(self) -> None:
|
|
146
|
-
"""Enable this guardrail."""
|
|
147
|
-
self._enabled = True
|
|
148
|
-
|
|
149
|
-
def disable(self) -> None:
|
|
150
|
-
"""Disable this guardrail."""
|
|
151
|
-
self._enabled = False
|
|
152
|
-
|
|
153
|
-
def check(self, content: str) -> GuardrailResult:
|
|
154
|
-
"""Check if content violates this guardrail.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
content: Text content to check
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
GuardrailResult with violation details if unsafe
|
|
161
|
-
"""
|
|
162
|
-
if not self._enabled:
|
|
163
|
-
return GuardrailResult.safe(content=content)
|
|
164
|
-
|
|
165
|
-
if not content:
|
|
166
|
-
return GuardrailResult.safe(content=content)
|
|
167
|
-
|
|
168
|
-
return self._check_impl(content)
|
|
169
|
-
|
|
170
|
-
def filter(self, content: str) -> str:
|
|
171
|
-
"""Filter content by removing or replacing violations.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
content: Text content to filter
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
Filtered content with violations removed/replaced
|
|
178
|
-
"""
|
|
179
|
-
result = self.check(content)
|
|
180
|
-
return result.filtered_content if result.filtered_content is not None else content
|
|
181
|
-
|
|
182
|
-
def _check_impl(self, content: str) -> GuardrailResult:
|
|
183
|
-
"""Implementation of the guardrail check logic.
|
|
184
|
-
|
|
185
|
-
Subclasses must override this method.
|
|
186
|
-
|
|
187
|
-
Args:
|
|
188
|
-
content: Text content to check
|
|
189
|
-
|
|
190
|
-
Returns:
|
|
191
|
-
GuardrailResult with violations if content is unsafe
|
|
192
|
-
"""
|
|
193
|
-
raise NotImplementedError("Subclasses must implement _check_impl")
|
|
194
|
-
|
|
195
|
-
def _filter_impl(self, content: str) -> str:
|
|
196
|
-
"""Implementation of the content filter logic.
|
|
197
|
-
|
|
198
|
-
Subclasses can override this method to provide filtering capabilities.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
content: Text content to filter
|
|
202
|
-
|
|
203
|
-
Returns:
|
|
204
|
-
Filtered content
|
|
205
|
-
"""
|
|
206
|
-
# Default: return original content (no filtering)
|
|
207
|
-
return content
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
class CompositeGuardrail(Guardrail):
|
|
211
|
-
"""Combines multiple guardrails into a single check.
|
|
212
|
-
|
|
213
|
-
All guardrails are evaluated and all violations are collected.
|
|
214
|
-
|
|
215
|
-
Example:
|
|
216
|
-
>>> from ai_lib_python.guardrails import KeywordFilter, LengthFilter
|
|
217
|
-
>>>
|
|
218
|
-
>>> composite = CompositeGuardrail(
|
|
219
|
-
... rule_id="composite-check",
|
|
220
|
-
... guardrails=[
|
|
221
|
-
... KeywordFilter("no-api-keys", ["sk-"], GuardrailSeverity.CRITICAL),
|
|
222
|
-
... LengthFilter("max-length", max_length=1000),
|
|
223
|
-
... ],
|
|
224
|
-
... )
|
|
225
|
-
"""
|
|
226
|
-
|
|
227
|
-
def __init__(
|
|
228
|
-
self,
|
|
229
|
-
rule_id: str,
|
|
230
|
-
guardrails: list[Guardrail],
|
|
231
|
-
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
232
|
-
stop_on_first: bool = False,
|
|
233
|
-
) -> None:
|
|
234
|
-
"""Initialize composite guardrail.
|
|
235
|
-
|
|
236
|
-
Args:
|
|
237
|
-
rule_id: Unique identifier
|
|
238
|
-
guardrails: List of guardrails to compose
|
|
239
|
-
severity: Default severity for composite violations
|
|
240
|
-
stop_on_first: Stop after first violation or continue checking all
|
|
241
|
-
"""
|
|
242
|
-
super().__init__(rule_id, severity)
|
|
243
|
-
self._guardrails = guardrails
|
|
244
|
-
self._stop_on_first = stop_on_first
|
|
245
|
-
|
|
246
|
-
@property
|
|
247
|
-
def guardrails(self) -> list[Guardrail]:
|
|
248
|
-
"""Get the list of guardrails."""
|
|
249
|
-
return self._guardrails
|
|
250
|
-
|
|
251
|
-
def add_guardrail(self, guardrail: Guardrail) -> None:
|
|
252
|
-
"""Add a guardrail to the composite."""
|
|
253
|
-
self._guardrails.append(guardrail)
|
|
254
|
-
|
|
255
|
-
def remove_guardrail(self, rule_id: str) -> bool:
|
|
256
|
-
"""Remove a guardrail by rule ID."""
|
|
257
|
-
for i, g in enumerate(self._guardrails):
|
|
258
|
-
if g._rule_id == rule_id:
|
|
259
|
-
self._guardrails.pop(i)
|
|
260
|
-
return True
|
|
261
|
-
return False
|
|
262
|
-
|
|
263
|
-
def _check_impl(self, content: str) -> GuardrailResult:
|
|
264
|
-
"""Check all guardrails and collect violations."""
|
|
265
|
-
all_violations: list[GuardrailViolation] = []
|
|
266
|
-
|
|
267
|
-
for guardrail in self._guardrails:
|
|
268
|
-
result = guardrail.check(content)
|
|
269
|
-
|
|
270
|
-
if not result.is_safe:
|
|
271
|
-
all_violations.extend(result.violations)
|
|
272
|
-
|
|
273
|
-
if self._stop_on_first:
|
|
274
|
-
return GuardrailResult.violated(all_violations, content)
|
|
275
|
-
|
|
276
|
-
if all_violations:
|
|
277
|
-
return GuardrailResult.violated(all_violations, content)
|
|
278
|
-
|
|
279
|
-
return GuardrailResult.safe(content=content)
|
|
280
|
-
|
|
281
|
-
def _filter_impl(self, content: str) -> str:
|
|
282
|
-
"""Apply all filters in sequence."""
|
|
283
|
-
filtered = content
|
|
284
|
-
for guardrail in self._guardrails:
|
|
285
|
-
filtered = guardrail.filter(filtered)
|
|
286
|
-
return filtered
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
class ConditionalGuardrail(Guardrail):
|
|
290
|
-
"""Applies a guardrail only when a condition is met.
|
|
291
|
-
|
|
292
|
-
Useful for context-aware filtering.
|
|
293
|
-
|
|
294
|
-
Example:
|
|
295
|
-
>>> filter = ConditionalGuardrail(
|
|
296
|
-
... rule_id="filter-in-chat",
|
|
297
|
-
... guardrail=KeywordFilter("no-api-keys", ["sk-"]),
|
|
298
|
-
... condition=lambda ctx: ctx.get("mode") == "chat",
|
|
299
|
-
... )
|
|
300
|
-
"""
|
|
301
|
-
|
|
302
|
-
def __init__(
|
|
303
|
-
self,
|
|
304
|
-
rule_id: str,
|
|
305
|
-
guardrail: Guardrail,
|
|
306
|
-
condition: Callable[[dict[str, Any]], bool],
|
|
307
|
-
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
308
|
-
) -> None:
|
|
309
|
-
"""Initialize conditional guardrail.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
rule_id: Unique identifier
|
|
313
|
-
guardrail: Guardrail to apply conditionally
|
|
314
|
-
condition: Function that takes context dict and returns bool
|
|
315
|
-
severity: Severity for violations
|
|
316
|
-
"""
|
|
317
|
-
super().__init__(rule_id, severity)
|
|
318
|
-
self._guardrail = guardrail
|
|
319
|
-
self._condition = condition
|
|
320
|
-
self._context: dict[str, Any] = {}
|
|
321
|
-
|
|
322
|
-
def set_context(self, context: dict[str, Any]) -> None:
|
|
323
|
-
"""Set the context for condition evaluation."""
|
|
324
|
-
self._context = context
|
|
325
|
-
|
|
326
|
-
def _check_impl(self, content: str) -> GuardrailResult:
|
|
327
|
-
"""Check only if condition is met."""
|
|
328
|
-
if self._condition(self._context):
|
|
329
|
-
return self._guardrail.check(content)
|
|
330
|
-
return GuardrailResult.safe(content=content)
|
|
331
|
-
|
|
332
|
-
def _filter_impl(self, content: str) -> str:
|
|
333
|
-
"""Filter only if condition is met."""
|
|
334
|
-
if self._condition(self._context):
|
|
335
|
-
return self._guardrail.filter(content)
|
|
336
|
-
return content
|
|
1
|
+
"""
|
|
2
|
+
Base classes for guardrail filtering and validation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GuardrailSeverity(Enum):
|
|
16
|
+
"""Severity levels for guardrail violations."""
|
|
17
|
+
|
|
18
|
+
INFO = "info"
|
|
19
|
+
WARNING = "warning"
|
|
20
|
+
ERROR = "error"
|
|
21
|
+
CRITICAL = "critical"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class GuardrailViolation:
|
|
26
|
+
"""Represents a single guardrail rule violation."""
|
|
27
|
+
|
|
28
|
+
rule_id: str
|
|
29
|
+
message: str
|
|
30
|
+
severity: GuardrailSeverity
|
|
31
|
+
matched_text: str | None = None
|
|
32
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
def __post_init__(self) -> None:
|
|
35
|
+
"""Validate severity."""
|
|
36
|
+
if not isinstance(self.severity, GuardrailSeverity):
|
|
37
|
+
self.severity = GuardrailSeverity(self.severity)
|
|
38
|
+
|
|
39
|
+
def to_dict(self) -> dict[str, Any]:
|
|
40
|
+
"""Convert violation to dictionary."""
|
|
41
|
+
return {
|
|
42
|
+
"rule_id": self.rule_id,
|
|
43
|
+
"message": self.message,
|
|
44
|
+
"severity": self.severity.value,
|
|
45
|
+
"matched_text": self.matched_text,
|
|
46
|
+
"metadata": self.metadata,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class GuardrailResult:
|
|
52
|
+
"""Result of applying a guardrail to content."""
|
|
53
|
+
|
|
54
|
+
is_safe: bool
|
|
55
|
+
violations: list[GuardrailViolation] = field(default_factory=list)
|
|
56
|
+
filtered_content: str | None = None
|
|
57
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def safe(cls, content: str | None = None) -> GuardrailResult:
|
|
61
|
+
"""Create a safe result."""
|
|
62
|
+
return cls(is_safe=True, violations=[], filtered_content=content)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def violated(
|
|
66
|
+
cls,
|
|
67
|
+
violations: list[GuardrailViolation],
|
|
68
|
+
filtered_content: str | None = None,
|
|
69
|
+
) -> GuardrailResult:
|
|
70
|
+
"""Create a violated result."""
|
|
71
|
+
return cls(
|
|
72
|
+
is_safe=False,
|
|
73
|
+
violations=violations,
|
|
74
|
+
filtered_content=filtered_content,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def to_dict(self) -> dict[str, Any]:
|
|
78
|
+
"""Convert result to dictionary."""
|
|
79
|
+
return {
|
|
80
|
+
"is_safe": self.is_safe,
|
|
81
|
+
"violations": [v.to_dict() for v in self.violations],
|
|
82
|
+
"filtered_content": self.filtered_content,
|
|
83
|
+
"metadata": self.metadata,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Guardrail:
|
|
88
|
+
"""Base class for all guardrail filters and validators.
|
|
89
|
+
|
|
90
|
+
Guardrails check content for safety, compliance, or policy violations.
|
|
91
|
+
They can be applied to both user inputs and AI model outputs.
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
>>> from ai_lib_python.guardrails import KeywordFilter
|
|
95
|
+
>>>
|
|
96
|
+
>>> filter = KeywordFilter(
|
|
97
|
+
... rule_id="no-api-keys",
|
|
98
|
+
... keywords=["sk-", "Bearer"],
|
|
99
|
+
... severity=GuardrailSeverity.CRITICAL,
|
|
100
|
+
... )
|
|
101
|
+
>>>
|
|
102
|
+
>>> result = filter.check("Here is my key: sk-12345")
|
|
103
|
+
>>> if not result.is_safe:
|
|
104
|
+
... print(f"Blocked: {result.violations[0].message}")
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
rule_id: str,
|
|
110
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
111
|
+
enabled: bool = True,
|
|
112
|
+
) -> None:
|
|
113
|
+
"""Initialize the guardrail.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
rule_id: Unique identifier for this guardrail rule
|
|
117
|
+
severity: Severity level for violations
|
|
118
|
+
enabled: Whether this guardrail is active
|
|
119
|
+
"""
|
|
120
|
+
if not rule_id:
|
|
121
|
+
raise ValueError("rule_id must be non-empty")
|
|
122
|
+
|
|
123
|
+
if not isinstance(severity, GuardrailSeverity):
|
|
124
|
+
severity = GuardrailSeverity(severity)
|
|
125
|
+
|
|
126
|
+
self._rule_id = rule_id
|
|
127
|
+
self._severity = severity
|
|
128
|
+
self._enabled = enabled
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def rule_id(self) -> str:
|
|
132
|
+
"""Get the rule ID."""
|
|
133
|
+
return self._rule_id
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def severity(self) -> GuardrailSeverity:
|
|
137
|
+
"""Get the violation severity."""
|
|
138
|
+
return self._severity
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def enabled(self) -> bool:
|
|
142
|
+
"""Check if this guardrail is enabled."""
|
|
143
|
+
return self._enabled
|
|
144
|
+
|
|
145
|
+
def enable(self) -> None:
|
|
146
|
+
"""Enable this guardrail."""
|
|
147
|
+
self._enabled = True
|
|
148
|
+
|
|
149
|
+
def disable(self) -> None:
|
|
150
|
+
"""Disable this guardrail."""
|
|
151
|
+
self._enabled = False
|
|
152
|
+
|
|
153
|
+
def check(self, content: str) -> GuardrailResult:
|
|
154
|
+
"""Check if content violates this guardrail.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
content: Text content to check
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
GuardrailResult with violation details if unsafe
|
|
161
|
+
"""
|
|
162
|
+
if not self._enabled:
|
|
163
|
+
return GuardrailResult.safe(content=content)
|
|
164
|
+
|
|
165
|
+
if not content:
|
|
166
|
+
return GuardrailResult.safe(content=content)
|
|
167
|
+
|
|
168
|
+
return self._check_impl(content)
|
|
169
|
+
|
|
170
|
+
def filter(self, content: str) -> str:
|
|
171
|
+
"""Filter content by removing or replacing violations.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
content: Text content to filter
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Filtered content with violations removed/replaced
|
|
178
|
+
"""
|
|
179
|
+
result = self.check(content)
|
|
180
|
+
return result.filtered_content if result.filtered_content is not None else content
|
|
181
|
+
|
|
182
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
183
|
+
"""Implementation of the guardrail check logic.
|
|
184
|
+
|
|
185
|
+
Subclasses must override this method.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
content: Text content to check
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
GuardrailResult with violations if content is unsafe
|
|
192
|
+
"""
|
|
193
|
+
raise NotImplementedError("Subclasses must implement _check_impl")
|
|
194
|
+
|
|
195
|
+
def _filter_impl(self, content: str) -> str:
|
|
196
|
+
"""Implementation of the content filter logic.
|
|
197
|
+
|
|
198
|
+
Subclasses can override this method to provide filtering capabilities.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
content: Text content to filter
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Filtered content
|
|
205
|
+
"""
|
|
206
|
+
# Default: return original content (no filtering)
|
|
207
|
+
return content
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class CompositeGuardrail(Guardrail):
|
|
211
|
+
"""Combines multiple guardrails into a single check.
|
|
212
|
+
|
|
213
|
+
All guardrails are evaluated and all violations are collected.
|
|
214
|
+
|
|
215
|
+
Example:
|
|
216
|
+
>>> from ai_lib_python.guardrails import KeywordFilter, LengthFilter
|
|
217
|
+
>>>
|
|
218
|
+
>>> composite = CompositeGuardrail(
|
|
219
|
+
... rule_id="composite-check",
|
|
220
|
+
... guardrails=[
|
|
221
|
+
... KeywordFilter("no-api-keys", ["sk-"], GuardrailSeverity.CRITICAL),
|
|
222
|
+
... LengthFilter("max-length", max_length=1000),
|
|
223
|
+
... ],
|
|
224
|
+
... )
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def __init__(
|
|
228
|
+
self,
|
|
229
|
+
rule_id: str,
|
|
230
|
+
guardrails: list[Guardrail],
|
|
231
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
232
|
+
stop_on_first: bool = False,
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Initialize composite guardrail.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
rule_id: Unique identifier
|
|
238
|
+
guardrails: List of guardrails to compose
|
|
239
|
+
severity: Default severity for composite violations
|
|
240
|
+
stop_on_first: Stop after first violation or continue checking all
|
|
241
|
+
"""
|
|
242
|
+
super().__init__(rule_id, severity)
|
|
243
|
+
self._guardrails = guardrails
|
|
244
|
+
self._stop_on_first = stop_on_first
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def guardrails(self) -> list[Guardrail]:
|
|
248
|
+
"""Get the list of guardrails."""
|
|
249
|
+
return self._guardrails
|
|
250
|
+
|
|
251
|
+
def add_guardrail(self, guardrail: Guardrail) -> None:
|
|
252
|
+
"""Add a guardrail to the composite."""
|
|
253
|
+
self._guardrails.append(guardrail)
|
|
254
|
+
|
|
255
|
+
def remove_guardrail(self, rule_id: str) -> bool:
|
|
256
|
+
"""Remove a guardrail by rule ID."""
|
|
257
|
+
for i, g in enumerate(self._guardrails):
|
|
258
|
+
if g._rule_id == rule_id:
|
|
259
|
+
self._guardrails.pop(i)
|
|
260
|
+
return True
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
264
|
+
"""Check all guardrails and collect violations."""
|
|
265
|
+
all_violations: list[GuardrailViolation] = []
|
|
266
|
+
|
|
267
|
+
for guardrail in self._guardrails:
|
|
268
|
+
result = guardrail.check(content)
|
|
269
|
+
|
|
270
|
+
if not result.is_safe:
|
|
271
|
+
all_violations.extend(result.violations)
|
|
272
|
+
|
|
273
|
+
if self._stop_on_first:
|
|
274
|
+
return GuardrailResult.violated(all_violations, content)
|
|
275
|
+
|
|
276
|
+
if all_violations:
|
|
277
|
+
return GuardrailResult.violated(all_violations, content)
|
|
278
|
+
|
|
279
|
+
return GuardrailResult.safe(content=content)
|
|
280
|
+
|
|
281
|
+
def _filter_impl(self, content: str) -> str:
|
|
282
|
+
"""Apply all filters in sequence."""
|
|
283
|
+
filtered = content
|
|
284
|
+
for guardrail in self._guardrails:
|
|
285
|
+
filtered = guardrail.filter(filtered)
|
|
286
|
+
return filtered
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ConditionalGuardrail(Guardrail):
|
|
290
|
+
"""Applies a guardrail only when a condition is met.
|
|
291
|
+
|
|
292
|
+
Useful for context-aware filtering.
|
|
293
|
+
|
|
294
|
+
Example:
|
|
295
|
+
>>> filter = ConditionalGuardrail(
|
|
296
|
+
... rule_id="filter-in-chat",
|
|
297
|
+
... guardrail=KeywordFilter("no-api-keys", ["sk-"]),
|
|
298
|
+
... condition=lambda ctx: ctx.get("mode") == "chat",
|
|
299
|
+
... )
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
def __init__(
|
|
303
|
+
self,
|
|
304
|
+
rule_id: str,
|
|
305
|
+
guardrail: Guardrail,
|
|
306
|
+
condition: Callable[[dict[str, Any]], bool],
|
|
307
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
308
|
+
) -> None:
|
|
309
|
+
"""Initialize conditional guardrail.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
rule_id: Unique identifier
|
|
313
|
+
guardrail: Guardrail to apply conditionally
|
|
314
|
+
condition: Function that takes context dict and returns bool
|
|
315
|
+
severity: Severity for violations
|
|
316
|
+
"""
|
|
317
|
+
super().__init__(rule_id, severity)
|
|
318
|
+
self._guardrail = guardrail
|
|
319
|
+
self._condition = condition
|
|
320
|
+
self._context: dict[str, Any] = {}
|
|
321
|
+
|
|
322
|
+
def set_context(self, context: dict[str, Any]) -> None:
|
|
323
|
+
"""Set the context for condition evaluation."""
|
|
324
|
+
self._context = context
|
|
325
|
+
|
|
326
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
327
|
+
"""Check only if condition is met."""
|
|
328
|
+
if self._condition(self._context):
|
|
329
|
+
return self._guardrail.check(content)
|
|
330
|
+
return GuardrailResult.safe(content=content)
|
|
331
|
+
|
|
332
|
+
def _filter_impl(self, content: str) -> str:
|
|
333
|
+
"""Filter only if condition is met."""
|
|
334
|
+
if self._condition(self._context):
|
|
335
|
+
return self._guardrail.filter(content)
|
|
336
|
+
return content
|