ai-lib-python 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_lib_python/__init__.py +43 -0
- ai_lib_python/batch/__init__.py +15 -0
- ai_lib_python/batch/collector.py +244 -0
- ai_lib_python/batch/executor.py +224 -0
- ai_lib_python/cache/__init__.py +26 -0
- ai_lib_python/cache/backends.py +380 -0
- ai_lib_python/cache/key.py +237 -0
- ai_lib_python/cache/manager.py +332 -0
- ai_lib_python/client/__init__.py +37 -0
- ai_lib_python/client/builder.py +528 -0
- ai_lib_python/client/cancel.py +368 -0
- ai_lib_python/client/core.py +433 -0
- ai_lib_python/client/response.py +134 -0
- ai_lib_python/embeddings/__init__.py +36 -0
- ai_lib_python/embeddings/client.py +339 -0
- ai_lib_python/embeddings/types.py +234 -0
- ai_lib_python/embeddings/vectors.py +246 -0
- ai_lib_python/errors/__init__.py +41 -0
- ai_lib_python/errors/base.py +316 -0
- ai_lib_python/errors/classification.py +210 -0
- ai_lib_python/guardrails/__init__.py +35 -0
- ai_lib_python/guardrails/base.py +336 -0
- ai_lib_python/guardrails/filters.py +583 -0
- ai_lib_python/guardrails/validators.py +475 -0
- ai_lib_python/pipeline/__init__.py +55 -0
- ai_lib_python/pipeline/accumulate.py +248 -0
- ai_lib_python/pipeline/base.py +240 -0
- ai_lib_python/pipeline/decode.py +281 -0
- ai_lib_python/pipeline/event_map.py +506 -0
- ai_lib_python/pipeline/fan_out.py +284 -0
- ai_lib_python/pipeline/select.py +297 -0
- ai_lib_python/plugins/__init__.py +32 -0
- ai_lib_python/plugins/base.py +294 -0
- ai_lib_python/plugins/hooks.py +296 -0
- ai_lib_python/plugins/middleware.py +285 -0
- ai_lib_python/plugins/registry.py +294 -0
- ai_lib_python/protocol/__init__.py +71 -0
- ai_lib_python/protocol/loader.py +317 -0
- ai_lib_python/protocol/manifest.py +385 -0
- ai_lib_python/protocol/validator.py +460 -0
- ai_lib_python/py.typed +1 -0
- ai_lib_python/resilience/__init__.py +102 -0
- ai_lib_python/resilience/backpressure.py +225 -0
- ai_lib_python/resilience/circuit_breaker.py +318 -0
- ai_lib_python/resilience/executor.py +343 -0
- ai_lib_python/resilience/fallback.py +341 -0
- ai_lib_python/resilience/preflight.py +413 -0
- ai_lib_python/resilience/rate_limiter.py +291 -0
- ai_lib_python/resilience/retry.py +299 -0
- ai_lib_python/resilience/signals.py +283 -0
- ai_lib_python/routing/__init__.py +118 -0
- ai_lib_python/routing/manager.py +593 -0
- ai_lib_python/routing/strategy.py +345 -0
- ai_lib_python/routing/types.py +397 -0
- ai_lib_python/structured/__init__.py +33 -0
- ai_lib_python/structured/json_mode.py +281 -0
- ai_lib_python/structured/schema.py +316 -0
- ai_lib_python/structured/validator.py +334 -0
- ai_lib_python/telemetry/__init__.py +127 -0
- ai_lib_python/telemetry/exporters/__init__.py +9 -0
- ai_lib_python/telemetry/exporters/prometheus.py +111 -0
- ai_lib_python/telemetry/feedback.py +446 -0
- ai_lib_python/telemetry/health.py +409 -0
- ai_lib_python/telemetry/logger.py +389 -0
- ai_lib_python/telemetry/metrics.py +496 -0
- ai_lib_python/telemetry/tracer.py +473 -0
- ai_lib_python/tokens/__init__.py +25 -0
- ai_lib_python/tokens/counter.py +282 -0
- ai_lib_python/tokens/estimator.py +286 -0
- ai_lib_python/transport/__init__.py +34 -0
- ai_lib_python/transport/auth.py +141 -0
- ai_lib_python/transport/http.py +364 -0
- ai_lib_python/transport/pool.py +425 -0
- ai_lib_python/types/__init__.py +41 -0
- ai_lib_python/types/events.py +343 -0
- ai_lib_python/types/message.py +332 -0
- ai_lib_python/types/tool.py +191 -0
- ai_lib_python/utils/__init__.py +21 -0
- ai_lib_python/utils/tool_call_assembler.py +317 -0
- ai_lib_python-0.5.0.dist-info/METADATA +837 -0
- ai_lib_python-0.5.0.dist-info/RECORD +84 -0
- ai_lib_python-0.5.0.dist-info/WHEEL +4 -0
- ai_lib_python-0.5.0.dist-info/licenses/LICENSE-APACHE +201 -0
- ai_lib_python-0.5.0.dist-info/licenses/LICENSE-MIT +21 -0
|
@@ -0,0 +1,583 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Concrete filter implementations for common guardrail use cases.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import string
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from ai_lib_python.guardrails.base import (
|
|
12
|
+
CompositeGuardrail,
|
|
13
|
+
Guardrail,
|
|
14
|
+
GuardrailResult,
|
|
15
|
+
GuardrailSeverity,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Callable
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class KeywordFilter(Guardrail):
|
|
23
|
+
"""Filters content based on keyword matches.
|
|
24
|
+
|
|
25
|
+
Useful for blocking specific words, phrases, or patterns.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
>>> filter = KeywordFilter(
|
|
29
|
+
... rule_id="no-api-keys",
|
|
30
|
+
... keywords=["sk-", "Bearer", "password"],
|
|
31
|
+
... severity=GuardrailSeverity.CRITICAL,
|
|
32
|
+
... case_sensitive=False,
|
|
33
|
+
... )
|
|
34
|
+
>>>
|
|
35
|
+
>>> result = filter.check("Here's my API key: sk-12345")
|
|
36
|
+
>>> assert not result.is_safe
|
|
37
|
+
>>> print(result.violations[0].matched_text) # "sk-"
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
rule_id: str,
|
|
43
|
+
keywords: list[str],
|
|
44
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
45
|
+
case_sensitive: bool = False,
|
|
46
|
+
match_substring: bool = True,
|
|
47
|
+
replacement: str | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Initialize keyword filter.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
rule_id: Unique identifier
|
|
53
|
+
keywords: List of keywords to filter
|
|
54
|
+
severity: Severity for violations
|
|
55
|
+
case_sensitive: Whether matching is case sensitive
|
|
56
|
+
match_substring: Match keywords anywhere or whole words
|
|
57
|
+
replacement: String to replace matches with (for filtering)
|
|
58
|
+
"""
|
|
59
|
+
super().__init__(rule_id, severity)
|
|
60
|
+
self._keywords = keywords if case_sensitive else [k.lower() for k in keywords]
|
|
61
|
+
self._case_sensitive = case_sensitive
|
|
62
|
+
self._match_substring = match_substring
|
|
63
|
+
self._replacement = replacement or "[REDACTED]"
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def keywords(self) -> list[str]:
|
|
67
|
+
"""Get the list of keywords."""
|
|
68
|
+
return self._keywords
|
|
69
|
+
|
|
70
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
71
|
+
"""Check for keyword matches."""
|
|
72
|
+
violations = []
|
|
73
|
+
text_to_check = content if self._case_sensitive else content.lower()
|
|
74
|
+
|
|
75
|
+
for keyword in self._keywords:
|
|
76
|
+
if self._match_substring:
|
|
77
|
+
if keyword in text_to_check:
|
|
78
|
+
violations.append(
|
|
79
|
+
self._create_violation(
|
|
80
|
+
f"Found forbidden keyword: {keyword}",
|
|
81
|
+
keyword,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
# Match whole words only
|
|
86
|
+
pattern = r"\b" + re.escape(keyword) + r"\b"
|
|
87
|
+
if re.search(pattern, text_to_check, re.IGNORECASE if not self._case_sensitive else 0):
|
|
88
|
+
violations.append(
|
|
89
|
+
self._create_violation(
|
|
90
|
+
f"Found forbidden keyword: {keyword}",
|
|
91
|
+
keyword,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if violations:
|
|
96
|
+
return GuardrailResult.violated(violations, content)
|
|
97
|
+
|
|
98
|
+
return GuardrailResult.safe(content=content)
|
|
99
|
+
|
|
100
|
+
def _filter_impl(self, content: str) -> str:
|
|
101
|
+
"""Replace matched keywords."""
|
|
102
|
+
filtered = content
|
|
103
|
+
for keyword in self._keywords:
|
|
104
|
+
if self._match_substring:
|
|
105
|
+
filtered = filtered.replace(
|
|
106
|
+
keyword,
|
|
107
|
+
self._replacement,
|
|
108
|
+
)
|
|
109
|
+
if not self._case_sensitive:
|
|
110
|
+
# Also replace case-insensitive matches
|
|
111
|
+
case_pattern = re.compile(re.escape(keyword), re.IGNORECASE)
|
|
112
|
+
filtered = case_pattern.sub(self._replacement, filtered)
|
|
113
|
+
else:
|
|
114
|
+
pattern = r"\b" + re.escape(keyword) + r"\b"
|
|
115
|
+
flags = re.IGNORECASE if not self._case_sensitive else 0
|
|
116
|
+
filtered = re.sub(pattern, self._replacement, filtered, flags=flags)
|
|
117
|
+
|
|
118
|
+
return filtered
|
|
119
|
+
|
|
120
|
+
def _create_violation(
|
|
121
|
+
self,
|
|
122
|
+
message: str,
|
|
123
|
+
matched_text: str,
|
|
124
|
+
) -> "GuardrailResult":
|
|
125
|
+
"""Create a violation result."""
|
|
126
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
127
|
+
|
|
128
|
+
return GuardrailResult.violated(
|
|
129
|
+
[
|
|
130
|
+
GuardrailViolation(
|
|
131
|
+
rule_id=self._rule_id,
|
|
132
|
+
message=message,
|
|
133
|
+
severity=self._severity,
|
|
134
|
+
matched_text=matched_text,
|
|
135
|
+
)
|
|
136
|
+
]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class RegexFilter(Guardrail):
|
|
141
|
+
"""Filters content using regular expressions.
|
|
142
|
+
|
|
143
|
+
More flexible than keyword filtering for complex patterns.
|
|
144
|
+
|
|
145
|
+
Example:
|
|
146
|
+
>>> # Match API key patterns
|
|
147
|
+
>>> filter = RegexFilter(
|
|
148
|
+
... rule_id="api-keys",
|
|
149
|
+
... pattern=r"sk-[a-zA-Z0-9]{32}",
|
|
150
|
+
... severity=GuardrailSeverity.CRITICAL,
|
|
151
|
+
... )
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
def __init__(
|
|
155
|
+
self,
|
|
156
|
+
rule_id: str,
|
|
157
|
+
pattern: str,
|
|
158
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
159
|
+
flags: int = 0,
|
|
160
|
+
replacement: str | None = None,
|
|
161
|
+
message: str | None = None,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Initialize regex filter.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
rule_id: Unique identifier
|
|
167
|
+
pattern: Regular expression pattern
|
|
168
|
+
severity: Severity for violations
|
|
169
|
+
flags: Regex flags (re.IGNORECASE, etc.)
|
|
170
|
+
replacement: String to replace matches with
|
|
171
|
+
message: Custom violation message (pattern used if None)
|
|
172
|
+
"""
|
|
173
|
+
super().__init__(rule_id, severity)
|
|
174
|
+
self._pattern = re.compile(pattern, flags)
|
|
175
|
+
self._replacement = replacement or "[REDACTED]"
|
|
176
|
+
self._message = message
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def pattern(self) -> re.Pattern:
|
|
180
|
+
"""Get the compiled pattern."""
|
|
181
|
+
return self._pattern
|
|
182
|
+
|
|
183
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
184
|
+
"""Check content against regex pattern."""
|
|
185
|
+
match = self._pattern.search(content)
|
|
186
|
+
|
|
187
|
+
if match:
|
|
188
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
189
|
+
|
|
190
|
+
message = self._message or f"Content matches forbidden pattern: {self._pattern.pattern}"
|
|
191
|
+
|
|
192
|
+
return GuardrailResult.violated(
|
|
193
|
+
[
|
|
194
|
+
GuardrailViolation(
|
|
195
|
+
rule_id=self._rule_id,
|
|
196
|
+
message=message,
|
|
197
|
+
severity=self._severity,
|
|
198
|
+
matched_text=match.group(0),
|
|
199
|
+
)
|
|
200
|
+
],
|
|
201
|
+
content,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return GuardrailResult.safe(content=content)
|
|
205
|
+
|
|
206
|
+
def _filter_impl(self, content: str) -> str:
|
|
207
|
+
"""Replace all pattern matches."""
|
|
208
|
+
return self._pattern.sub(self._replacement, content)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class LengthFilter(Guardrail):
|
|
212
|
+
"""Filters content based on length constraints.
|
|
213
|
+
|
|
214
|
+
Useful for preventing overly long or short inputs/outputs.
|
|
215
|
+
|
|
216
|
+
Example:
|
|
217
|
+
>>> filter = LengthFilter(
|
|
218
|
+
... rule_id="length-limits",
|
|
219
|
+
... min_length=10,
|
|
220
|
+
... max_length=1000,
|
|
221
|
+
... severity=GuardrailSeverity.ERROR,
|
|
222
|
+
... )
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def __init__(
|
|
226
|
+
self,
|
|
227
|
+
rule_id: str,
|
|
228
|
+
min_length: int | None = None,
|
|
229
|
+
max_length: int | None = None,
|
|
230
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
231
|
+
count_mode: str = "chars",
|
|
232
|
+
) -> None:
|
|
233
|
+
"""Initialize length filter.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
rule_id: Unique identifier
|
|
237
|
+
min_length: Minimum allowed length
|
|
238
|
+
max_length: Maximum allowed length
|
|
239
|
+
severity: Severity for violations
|
|
240
|
+
count_mode: How to count (chars, words, tokens_chars, tokens_words)
|
|
241
|
+
"""
|
|
242
|
+
super().__init__(rule_id, severity)
|
|
243
|
+
|
|
244
|
+
if min_length is not None and min_length < 0:
|
|
245
|
+
raise ValueError("min_length must be non-negative")
|
|
246
|
+
|
|
247
|
+
if max_length is not None and max_length < 0:
|
|
248
|
+
raise ValueError("max_length must be non-negative")
|
|
249
|
+
|
|
250
|
+
if (min_length is not None and max_length is not None) and min_length > max_length:
|
|
251
|
+
raise ValueError("min_length cannot be greater than max_length")
|
|
252
|
+
|
|
253
|
+
self._min_length = min_length
|
|
254
|
+
self._max_length = max_length
|
|
255
|
+
self._count_mode = count_mode
|
|
256
|
+
|
|
257
|
+
if count_mode == "chars":
|
|
258
|
+
self._counter = len
|
|
259
|
+
elif count_mode == "words":
|
|
260
|
+
self._counter = lambda x: len(x.split())
|
|
261
|
+
elif count_mode == "tokens_chars":
|
|
262
|
+
# Rough approximation of token count (4 chars per token)
|
|
263
|
+
self._counter = lambda x: len(x) // 4
|
|
264
|
+
elif count_mode == "tokens_words":
|
|
265
|
+
# Average 0.75 words per token
|
|
266
|
+
self._counter = lambda x: int(len(x.split()) * 0.75)
|
|
267
|
+
else:
|
|
268
|
+
raise ValueError(f"Invalid count_mode: {count_mode}")
|
|
269
|
+
|
|
270
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
271
|
+
"""Check content length."""
|
|
272
|
+
length = self._counter(content)
|
|
273
|
+
violations = []
|
|
274
|
+
|
|
275
|
+
if self._min_length is not None and length < self._min_length:
|
|
276
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
277
|
+
|
|
278
|
+
violations.append(
|
|
279
|
+
GuardrailViolation(
|
|
280
|
+
rule_id=self._rule_id,
|
|
281
|
+
message=f"Content too short: {length} {self._count_mode}, minimum {self._min_length}",
|
|
282
|
+
severity=self._severity,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if self._max_length is not None and length > self._max_length:
|
|
287
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
288
|
+
|
|
289
|
+
violations.append(
|
|
290
|
+
GuardrailViolation(
|
|
291
|
+
rule_id=self._rule_id,
|
|
292
|
+
message=f"Content too long: {length} {self._count_mode}, maximum {self._max_length}",
|
|
293
|
+
severity=self._severity,
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
if violations:
|
|
298
|
+
return GuardrailResult.violated(violations, content)
|
|
299
|
+
|
|
300
|
+
return GuardrailResult.safe(content=content)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class ProfanityFilter(Guardrail):
|
|
304
|
+
"""Filters profanity and inappropriate language.
|
|
305
|
+
|
|
306
|
+
Uses a built-in list of common profanity words.
|
|
307
|
+
|
|
308
|
+
Example:
|
|
309
|
+
>>> filter = ProfanityFilter(
|
|
310
|
+
... rule_id="no-profanity",
|
|
311
|
+
... severity=GuardrailSeverity.WARNING,
|
|
312
|
+
... replacement="[CENSORED]",
|
|
313
|
+
... )
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
_DEFAULT_PROFANITY_LIST = [
|
|
317
|
+
# Common profanity words (can be extended)
|
|
318
|
+
"damn", "hell", "crap",
|
|
319
|
+
# Additional words can be added as needed
|
|
320
|
+
]
|
|
321
|
+
|
|
322
|
+
def __init__(
|
|
323
|
+
self,
|
|
324
|
+
rule_id: str,
|
|
325
|
+
severity: GuardrailSeverity = GuardrailSeverity.WARNING,
|
|
326
|
+
profanity_list: list[str] | None = None,
|
|
327
|
+
case_sensitive: bool = False,
|
|
328
|
+
replacement: str | None = None,
|
|
329
|
+
) -> None:
|
|
330
|
+
"""Initialize profanity filter.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
rule_id: Unique identifier
|
|
334
|
+
severity: Severity for violations
|
|
335
|
+
profanity_list: Custom list of profanity words
|
|
336
|
+
case_sensitive: Whether matching is case sensitive
|
|
337
|
+
replacement: String to replace profanity with
|
|
338
|
+
"""
|
|
339
|
+
super().__init__(rule_id, severity)
|
|
340
|
+
self._keywords = profanity_list or self._DEFAULT_PROFANITY_LIST[:]
|
|
341
|
+
self._case_sensitive = case_sensitive
|
|
342
|
+
self._replacement = replacement or "***"
|
|
343
|
+
|
|
344
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
345
|
+
"""Check for profanity."""
|
|
346
|
+
violations = []
|
|
347
|
+
text_to_check = content if self._case_sensitive else content.lower()
|
|
348
|
+
|
|
349
|
+
for keyword in self._keywords:
|
|
350
|
+
kw = keyword if self._case_sensitive else keyword.lower()
|
|
351
|
+
if kw in text_to_check:
|
|
352
|
+
# Find the actual matched text (preserving case)
|
|
353
|
+
pattern = re.escape(keyword)
|
|
354
|
+
if not self._case_sensitive:
|
|
355
|
+
pattern = re.compile(pattern, re.IGNORECASE)
|
|
356
|
+
|
|
357
|
+
match = pattern.search(content)
|
|
358
|
+
if match:
|
|
359
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
360
|
+
|
|
361
|
+
violations.append(
|
|
362
|
+
GuardrailViolation(
|
|
363
|
+
rule_id=self._rule_id,
|
|
364
|
+
message=f"Profanity detected: {keyword}",
|
|
365
|
+
severity=self._severity,
|
|
366
|
+
matched_text=match.group(0),
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
if violations:
|
|
371
|
+
return GuardrailResult.violated(violations, content)
|
|
372
|
+
|
|
373
|
+
return GuardrailResult.safe(content=content)
|
|
374
|
+
|
|
375
|
+
def _filter_impl(self, content: str) -> str:
|
|
376
|
+
"""Replace profanity."""
|
|
377
|
+
filtered = content
|
|
378
|
+
|
|
379
|
+
for keyword in self._keywords:
|
|
380
|
+
pattern = re.escape(keyword)
|
|
381
|
+
flags = re.IGNORECASE if not self._case_sensitive else 0
|
|
382
|
+
filtered = re.sub(pattern, self._replacement, filtered, flags=flags)
|
|
383
|
+
|
|
384
|
+
return filtered
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
class UrlFilter(Guardrail):
|
|
388
|
+
"""Filters or extracts URLs from content.
|
|
389
|
+
|
|
390
|
+
Useful for preventing malicious links or extracting URLs.
|
|
391
|
+
|
|
392
|
+
Example:
|
|
393
|
+
>>> filter = UrlFilter(
|
|
394
|
+
... rule_id="block-urls",
|
|
395
|
+
... action="block",
|
|
396
|
+
... severity=GuardrailSeverity.INFO,
|
|
397
|
+
... )
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
_URL_PATTERN = re.compile(
|
|
401
|
+
r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[/\w .-]*(?:\?[^\s]*)?",
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
def __init__(
|
|
405
|
+
self,
|
|
406
|
+
rule_id: str,
|
|
407
|
+
action: str = "block",
|
|
408
|
+
severity: GuardrailSeverity = GuardrailSeverity.INFO,
|
|
409
|
+
allowed_domains: list[str] | None = None,
|
|
410
|
+
blocked_domains: list[str] | None = None,
|
|
411
|
+
) -> None:
|
|
412
|
+
"""Initialize URL filter.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
rule_id: Unique identifier
|
|
416
|
+
action: "block" (deny all), "allow" (allow only allowed_domains),
|
|
417
|
+
"deny" (deny blocked_domains)
|
|
418
|
+
severity: Severity for violations
|
|
419
|
+
allowed_domains: List of allowed domains (for "allow" action)
|
|
420
|
+
blocked_domains: List of blocked domains (for "deny" action)
|
|
421
|
+
"""
|
|
422
|
+
super().__init__(rule_id, severity)
|
|
423
|
+
|
|
424
|
+
if action not in ("block", "allow", "deny"):
|
|
425
|
+
raise ValueError(f"Invalid action: {action}")
|
|
426
|
+
|
|
427
|
+
self._action = action
|
|
428
|
+
self._allowed_domains = [d.lower() for d in (allowed_domains or [])]
|
|
429
|
+
self._blocked_domains = [d.lower() for d in (blocked_domains or [])]
|
|
430
|
+
|
|
431
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
432
|
+
"""Check content for URLs."""
|
|
433
|
+
matches = list(self._URL_PATTERN.finditer(content))
|
|
434
|
+
|
|
435
|
+
if not matches:
|
|
436
|
+
return GuardrailResult.safe(content=content)
|
|
437
|
+
|
|
438
|
+
violations = []
|
|
439
|
+
|
|
440
|
+
for match in matches:
|
|
441
|
+
url = match.group(0)
|
|
442
|
+
|
|
443
|
+
if self._action == "block":
|
|
444
|
+
violations.append(self._create_url_violation(f"URL detected: {url}", url))
|
|
445
|
+
elif self._action == "deny":
|
|
446
|
+
# Check if URL is in blocked domains
|
|
447
|
+
for domain in self._blocked_domains:
|
|
448
|
+
if domain.lower() in url.lower():
|
|
449
|
+
violations.append(
|
|
450
|
+
self._create_url_violation(f"Blocked domain URL: {url}", url)
|
|
451
|
+
)
|
|
452
|
+
break
|
|
453
|
+
elif self._action == "allow":
|
|
454
|
+
# Check if URL is in allowed domains
|
|
455
|
+
allowed = any(d.lower() in url.lower() for d in self._allowed_domains)
|
|
456
|
+
if not allowed:
|
|
457
|
+
violations.append(
|
|
458
|
+
self._create_url_violation(f"URL from non-allowed domain: {url}", url)
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
if violations:
|
|
462
|
+
return GuardrailResult.violated(violations, content)
|
|
463
|
+
|
|
464
|
+
return GuardrailResult.safe(content=content)
|
|
465
|
+
|
|
466
|
+
def _create_url_violation(self, message: str, url: str) -> "GuardrailResult":
|
|
467
|
+
"""Create a URL violation result."""
|
|
468
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
469
|
+
|
|
470
|
+
return GuardrailResult.violated(
|
|
471
|
+
[
|
|
472
|
+
GuardrailViolation(
|
|
473
|
+
rule_id=self._rule_id,
|
|
474
|
+
message=message,
|
|
475
|
+
severity=self._severity,
|
|
476
|
+
matched_text=url,
|
|
477
|
+
)
|
|
478
|
+
]
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class EmailFilter(Guardrail):
|
|
483
|
+
"""Filters or detects email addresses in content.
|
|
484
|
+
|
|
485
|
+
Useful for preventing PII leakage or logging email detection.
|
|
486
|
+
|
|
487
|
+
Example:
|
|
488
|
+
>>> filter = EmailFilter(
|
|
489
|
+
... rule_id="no-emails",
|
|
490
|
+
... action="block",
|
|
491
|
+
... severity=GuardrailSeverity.WARNING,
|
|
492
|
+
... )
|
|
493
|
+
"""
|
|
494
|
+
|
|
495
|
+
_EMAIL_PATTERN = re.compile(
|
|
496
|
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
def __init__(
|
|
500
|
+
self,
|
|
501
|
+
rule_id: str,
|
|
502
|
+
action: str = "block",
|
|
503
|
+
severity: GuardrailSeverity = GuardrailSeverity.INFO,
|
|
504
|
+
allowed_domains: list[str] | None = None,
|
|
505
|
+
blocked_domains: list[str] | None = None,
|
|
506
|
+
replacement: str | None = None,
|
|
507
|
+
) -> None:
|
|
508
|
+
"""Initialize email filter.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
rule_id: Unique identifier
|
|
512
|
+
action: "block" (deny all), "allow" (allow only allowed_domains),
|
|
513
|
+
"deny" (deny blocked_domains)
|
|
514
|
+
severity: Severity for violations
|
|
515
|
+
allowed_domains: List of allowed email domains
|
|
516
|
+
blocked_domains: List of blocked email domains
|
|
517
|
+
replacement: String to replace emails with
|
|
518
|
+
"""
|
|
519
|
+
super().__init__(rule_id, severity)
|
|
520
|
+
|
|
521
|
+
if action not in ("block", "allow", "deny"):
|
|
522
|
+
raise ValueError(f"Invalid action: {action}")
|
|
523
|
+
|
|
524
|
+
self._action = action
|
|
525
|
+
self._allowed_domains = [d.lower() for d in (allowed_domains or [])]
|
|
526
|
+
self._blocked_domains = [d.lower() for d in (blocked_domains or [])]
|
|
527
|
+
self._replacement = replacement or "[REDACTED]"
|
|
528
|
+
|
|
529
|
+
def _check_impl(self, content: str) -> GuardrailResult:
|
|
530
|
+
"""Check content for email addresses."""
|
|
531
|
+
matches = list(self._EMAIL_PATTERN.finditer(content))
|
|
532
|
+
|
|
533
|
+
if not matches:
|
|
534
|
+
return GuardrailResult.safe(content=content)
|
|
535
|
+
|
|
536
|
+
violations = []
|
|
537
|
+
|
|
538
|
+
for match in matches:
|
|
539
|
+
email = match.group(0)
|
|
540
|
+
|
|
541
|
+
if self._action == "block":
|
|
542
|
+
violations.append(self._create_email_violation(f"Email detected: {email}", email))
|
|
543
|
+
elif self._action == "deny":
|
|
544
|
+
# Check if email is in blocked domains
|
|
545
|
+
for domain in self._blocked_domains:
|
|
546
|
+
if email.lower().endswith(domain.lower()):
|
|
547
|
+
violations.append(
|
|
548
|
+
self._create_email_violation(f"Blocked domain email: {email}", email)
|
|
549
|
+
)
|
|
550
|
+
break
|
|
551
|
+
elif self._action == "allow":
|
|
552
|
+
# Check if email is in allowed domains
|
|
553
|
+
allowed = any(email.lower().endswith(d.lower()) for d in self._allowed_domains)
|
|
554
|
+
if not allowed:
|
|
555
|
+
violations.append(
|
|
556
|
+
self._create_email_violation(
|
|
557
|
+
f"Email from non-allowed domain: {email}", email
|
|
558
|
+
)
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
if violations:
|
|
562
|
+
return GuardrailResult.violated(violations, content)
|
|
563
|
+
|
|
564
|
+
return GuardrailResult.safe(content=content)
|
|
565
|
+
|
|
566
|
+
def _filter_impl(self, content: str) -> str:
|
|
567
|
+
"""Replace email addresses."""
|
|
568
|
+
return self._EMAIL_PATTERN.sub(self._replacement, content)
|
|
569
|
+
|
|
570
|
+
def _create_email_violation(self, message: str, email: str) -> "GuardrailResult":
|
|
571
|
+
"""Create an email violation result."""
|
|
572
|
+
from ai_lib_python.guardrails.base import GuardrailViolation
|
|
573
|
+
|
|
574
|
+
return GuardrailResult.violated(
|
|
575
|
+
[
|
|
576
|
+
GuardrailViolation(
|
|
577
|
+
rule_id=self._rule_id,
|
|
578
|
+
message=message,
|
|
579
|
+
severity=self._severity,
|
|
580
|
+
matched_text=email,
|
|
581
|
+
)
|
|
582
|
+
]
|
|
583
|
+
)
|