genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""PII Detection using Microsoft Presidio.
|
|
2
|
+
|
|
3
|
+
This module provides PII (Personally Identifiable Information) detection and redaction
|
|
4
|
+
capabilities using Microsoft Presidio library.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from .config import PIIConfig, PIIEntityType, PIIMode
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class PIIDetectionResult:
|
|
19
|
+
"""Result of PII detection.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
has_pii: Whether PII was detected
|
|
23
|
+
entities: List of detected PII entities
|
|
24
|
+
entity_counts: Count of entities by type
|
|
25
|
+
redacted_text: Text with PII redacted (if redaction enabled)
|
|
26
|
+
original_text: Original input text
|
|
27
|
+
score: Overall PII detection confidence score
|
|
28
|
+
blocked: Whether the text was blocked due to PII
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
has_pii: bool
|
|
32
|
+
entities: List[Dict[str, Any]] = field(default_factory=list)
|
|
33
|
+
entity_counts: Dict[str, int] = field(default_factory=dict)
|
|
34
|
+
redacted_text: Optional[str] = None
|
|
35
|
+
original_text: Optional[str] = None
|
|
36
|
+
score: float = 0.0
|
|
37
|
+
blocked: bool = False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class PIIDetector:
|
|
41
|
+
"""PII detector using Microsoft Presidio.
|
|
42
|
+
|
|
43
|
+
This detector uses Presidio's analyzer and anonymizer to detect and redact
|
|
44
|
+
personally identifiable information from text.
|
|
45
|
+
|
|
46
|
+
Requirements:
|
|
47
|
+
pip install presidio-analyzer presidio-anonymizer spacy
|
|
48
|
+
python -m spacy download en_core_web_lg
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, config: PIIConfig):
|
|
52
|
+
"""Initialize PII detector.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
config: PII detection configuration
|
|
56
|
+
"""
|
|
57
|
+
self.config = config
|
|
58
|
+
self._analyzer = None
|
|
59
|
+
self._anonymizer = None
|
|
60
|
+
self._presidio_available = False
|
|
61
|
+
self._check_availability()
|
|
62
|
+
|
|
63
|
+
def _check_availability(self):
|
|
64
|
+
"""Check if Presidio is available."""
|
|
65
|
+
try:
|
|
66
|
+
from presidio_analyzer import AnalyzerEngine
|
|
67
|
+
from presidio_anonymizer import AnonymizerEngine
|
|
68
|
+
|
|
69
|
+
self._analyzer = AnalyzerEngine()
|
|
70
|
+
self._anonymizer = AnonymizerEngine()
|
|
71
|
+
self._presidio_available = True
|
|
72
|
+
logger.info("Presidio PII detection initialized successfully")
|
|
73
|
+
except ImportError as e:
|
|
74
|
+
logger.warning("Presidio not available, PII detection will be limited: %s", e)
|
|
75
|
+
logger.info(
|
|
76
|
+
"Install with: pip install presidio-analyzer presidio-anonymizer spacy && python -m spacy download en_core_web_lg"
|
|
77
|
+
)
|
|
78
|
+
self._presidio_available = False
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error("Failed to initialize Presidio: %s", e)
|
|
81
|
+
self._presidio_available = False
|
|
82
|
+
|
|
83
|
+
def is_available(self) -> bool:
|
|
84
|
+
"""Check if PII detector is available.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
bool: True if Presidio is available
|
|
88
|
+
"""
|
|
89
|
+
return self._presidio_available
|
|
90
|
+
|
|
91
|
+
def detect(self, text: str, language: str = "en") -> PIIDetectionResult:
|
|
92
|
+
"""Detect PII in text.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
text: Text to analyze
|
|
96
|
+
language: Language code (default: "en")
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
PIIDetectionResult: Detection results
|
|
100
|
+
"""
|
|
101
|
+
if not self.config.enabled:
|
|
102
|
+
return PIIDetectionResult(has_pii=False, original_text=text)
|
|
103
|
+
|
|
104
|
+
if not self._presidio_available:
|
|
105
|
+
logger.warning("Presidio not available, using pattern-based detection")
|
|
106
|
+
return self._fallback_detection(text)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
# Convert entity types to Presidio format
|
|
110
|
+
entity_types = [entity.value for entity in self.config.entity_types]
|
|
111
|
+
|
|
112
|
+
# Analyze text
|
|
113
|
+
results = self._analyzer.analyze(
|
|
114
|
+
text=text,
|
|
115
|
+
language=language,
|
|
116
|
+
entities=entity_types,
|
|
117
|
+
score_threshold=self.config.threshold,
|
|
118
|
+
allow_list=self.config.allow_list,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Process results
|
|
122
|
+
entities = []
|
|
123
|
+
entity_counts: Dict[str, int] = {}
|
|
124
|
+
|
|
125
|
+
for result in results:
|
|
126
|
+
entity = {
|
|
127
|
+
"type": result.entity_type,
|
|
128
|
+
"start": result.start,
|
|
129
|
+
"end": result.end,
|
|
130
|
+
"score": result.score,
|
|
131
|
+
"text": text[result.start : result.end],
|
|
132
|
+
}
|
|
133
|
+
entities.append(entity)
|
|
134
|
+
|
|
135
|
+
# Count by type
|
|
136
|
+
entity_type = result.entity_type
|
|
137
|
+
entity_counts[entity_type] = entity_counts.get(entity_type, 0) + 1
|
|
138
|
+
|
|
139
|
+
has_pii = len(entities) > 0
|
|
140
|
+
|
|
141
|
+
# Calculate overall score (max confidence)
|
|
142
|
+
score = max([e["score"] for e in entities], default=0.0)
|
|
143
|
+
|
|
144
|
+
# Redact if mode is REDACT
|
|
145
|
+
redacted_text = None
|
|
146
|
+
if self.config.mode == PIIMode.REDACT and has_pii:
|
|
147
|
+
redacted_text = self._redact_pii(text, results)
|
|
148
|
+
|
|
149
|
+
# Block if mode is BLOCK
|
|
150
|
+
blocked = self.config.mode == PIIMode.BLOCK and has_pii
|
|
151
|
+
|
|
152
|
+
return PIIDetectionResult(
|
|
153
|
+
has_pii=has_pii,
|
|
154
|
+
entities=entities,
|
|
155
|
+
entity_counts=entity_counts,
|
|
156
|
+
redacted_text=redacted_text,
|
|
157
|
+
original_text=text,
|
|
158
|
+
score=score,
|
|
159
|
+
blocked=blocked,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.error("Error detecting PII: %s", e, exc_info=True)
|
|
164
|
+
return PIIDetectionResult(has_pii=False, original_text=text)
|
|
165
|
+
|
|
166
|
+
def _redact_pii(self, text: str, analyzer_results) -> str:
|
|
167
|
+
"""Redact PII from text.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
text: Original text
|
|
171
|
+
analyzer_results: Presidio analyzer results
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
str: Text with PII redacted
|
|
175
|
+
"""
|
|
176
|
+
try:
|
|
177
|
+
from presidio_anonymizer.entities import OperatorConfig
|
|
178
|
+
|
|
179
|
+
# Create anonymization config
|
|
180
|
+
operators = {
|
|
181
|
+
entity_type.value: OperatorConfig(
|
|
182
|
+
"replace", {"new_value": self.config.redaction_char * 8}
|
|
183
|
+
)
|
|
184
|
+
for entity_type in self.config.entity_types
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Anonymize
|
|
188
|
+
anonymized = self._anonymizer.anonymize(
|
|
189
|
+
text=text, analyzer_results=analyzer_results, operators=operators
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return anonymized.text
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error("Error redacting PII: %s", e)
|
|
196
|
+
return text
|
|
197
|
+
|
|
198
|
+
def _fallback_detection(self, text: str) -> PIIDetectionResult:
|
|
199
|
+
"""Fallback pattern-based PII detection.
|
|
200
|
+
|
|
201
|
+
This is used when Presidio is not available. It uses simple regex patterns
|
|
202
|
+
for common PII types.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
text: Text to analyze
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
PIIDetectionResult: Detection results
|
|
209
|
+
"""
|
|
210
|
+
entities = []
|
|
211
|
+
entity_counts: Dict[str, int] = {}
|
|
212
|
+
|
|
213
|
+
# Email pattern
|
|
214
|
+
if PIIEntityType.EMAIL_ADDRESS in self.config.entity_types:
|
|
215
|
+
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
|
|
216
|
+
for match in re.finditer(email_pattern, text):
|
|
217
|
+
entities.append(
|
|
218
|
+
{
|
|
219
|
+
"type": "EMAIL_ADDRESS",
|
|
220
|
+
"start": match.start(),
|
|
221
|
+
"end": match.end(),
|
|
222
|
+
"score": 0.9,
|
|
223
|
+
"text": match.group(),
|
|
224
|
+
}
|
|
225
|
+
)
|
|
226
|
+
entity_counts["EMAIL_ADDRESS"] = entity_counts.get("EMAIL_ADDRESS", 0) + 1
|
|
227
|
+
|
|
228
|
+
# Phone pattern (US)
|
|
229
|
+
if PIIEntityType.PHONE_NUMBER in self.config.entity_types:
|
|
230
|
+
phone_pattern = r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"
|
|
231
|
+
for match in re.finditer(phone_pattern, text):
|
|
232
|
+
entities.append(
|
|
233
|
+
{
|
|
234
|
+
"type": "PHONE_NUMBER",
|
|
235
|
+
"start": match.start(),
|
|
236
|
+
"end": match.end(),
|
|
237
|
+
"score": 0.8,
|
|
238
|
+
"text": match.group(),
|
|
239
|
+
}
|
|
240
|
+
)
|
|
241
|
+
entity_counts["PHONE_NUMBER"] = entity_counts.get("PHONE_NUMBER", 0) + 1
|
|
242
|
+
|
|
243
|
+
# Credit card pattern
|
|
244
|
+
if PIIEntityType.CREDIT_CARD in self.config.entity_types:
|
|
245
|
+
cc_pattern = r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b"
|
|
246
|
+
for match in re.finditer(cc_pattern, text):
|
|
247
|
+
entities.append(
|
|
248
|
+
{
|
|
249
|
+
"type": "CREDIT_CARD",
|
|
250
|
+
"start": match.start(),
|
|
251
|
+
"end": match.end(),
|
|
252
|
+
"score": 0.85,
|
|
253
|
+
"text": match.group(),
|
|
254
|
+
}
|
|
255
|
+
)
|
|
256
|
+
entity_counts["CREDIT_CARD"] = entity_counts.get("CREDIT_CARD", 0) + 1
|
|
257
|
+
|
|
258
|
+
# SSN pattern
|
|
259
|
+
if PIIEntityType.US_SSN in self.config.entity_types:
|
|
260
|
+
ssn_pattern = r"\b\d{3}-\d{2}-\d{4}\b"
|
|
261
|
+
for match in re.finditer(ssn_pattern, text):
|
|
262
|
+
entities.append(
|
|
263
|
+
{
|
|
264
|
+
"type": "US_SSN",
|
|
265
|
+
"start": match.start(),
|
|
266
|
+
"end": match.end(),
|
|
267
|
+
"score": 0.95,
|
|
268
|
+
"text": match.group(),
|
|
269
|
+
}
|
|
270
|
+
)
|
|
271
|
+
entity_counts["US_SSN"] = entity_counts.get("US_SSN", 0) + 1
|
|
272
|
+
|
|
273
|
+
# IP Address pattern
|
|
274
|
+
if PIIEntityType.IP_ADDRESS in self.config.entity_types:
|
|
275
|
+
ip_pattern = r"\b(?:\d{1,3}\.){3}\d{1,3}\b"
|
|
276
|
+
for match in re.finditer(ip_pattern, text):
|
|
277
|
+
entities.append(
|
|
278
|
+
{
|
|
279
|
+
"type": "IP_ADDRESS",
|
|
280
|
+
"start": match.start(),
|
|
281
|
+
"end": match.end(),
|
|
282
|
+
"score": 0.9,
|
|
283
|
+
"text": match.group(),
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
entity_counts["IP_ADDRESS"] = entity_counts.get("IP_ADDRESS", 0) + 1
|
|
287
|
+
|
|
288
|
+
has_pii = len(entities) > 0
|
|
289
|
+
score = max([e["score"] for e in entities], default=0.0)
|
|
290
|
+
|
|
291
|
+
# Simple redaction for fallback
|
|
292
|
+
redacted_text = None
|
|
293
|
+
if self.config.mode == PIIMode.REDACT and has_pii:
|
|
294
|
+
redacted_text = text
|
|
295
|
+
for entity in sorted(entities, key=lambda x: x["start"], reverse=True):
|
|
296
|
+
start, end = entity["start"], entity["end"]
|
|
297
|
+
replacement = self.config.redaction_char * (end - start)
|
|
298
|
+
redacted_text = redacted_text[:start] + replacement + redacted_text[end:]
|
|
299
|
+
|
|
300
|
+
blocked = self.config.mode == PIIMode.BLOCK and has_pii
|
|
301
|
+
|
|
302
|
+
return PIIDetectionResult(
|
|
303
|
+
has_pii=has_pii,
|
|
304
|
+
entities=entities,
|
|
305
|
+
entity_counts=entity_counts,
|
|
306
|
+
redacted_text=redacted_text,
|
|
307
|
+
original_text=text,
|
|
308
|
+
score=score,
|
|
309
|
+
blocked=blocked,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def analyze_batch(self, texts: List[str], language: str = "en") -> List[PIIDetectionResult]:
|
|
313
|
+
"""Analyze multiple texts for PII.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
texts: List of texts to analyze
|
|
317
|
+
language: Language code (default: "en")
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
List[PIIDetectionResult]: Detection results for each text
|
|
321
|
+
"""
|
|
322
|
+
results = []
|
|
323
|
+
for text in texts:
|
|
324
|
+
results.append(self.detect(text, language))
|
|
325
|
+
return results
|
|
326
|
+
|
|
327
|
+
def get_statistics(self, results: List[PIIDetectionResult]) -> Dict[str, Any]:
|
|
328
|
+
"""Get statistics from multiple detection results.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
results: List of detection results
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
Dict[str, Any]: Statistics including total PII count, entity type distribution
|
|
335
|
+
"""
|
|
336
|
+
total_detections = sum(1 for r in results if r.has_pii)
|
|
337
|
+
total_entities = sum(len(r.entities) for r in results)
|
|
338
|
+
|
|
339
|
+
# Aggregate entity counts
|
|
340
|
+
entity_type_counts: Dict[str, int] = {}
|
|
341
|
+
for result in results:
|
|
342
|
+
for entity_type, count in result.entity_counts.items():
|
|
343
|
+
entity_type_counts[entity_type] = entity_type_counts.get(entity_type, 0) + count
|
|
344
|
+
|
|
345
|
+
# Calculate average score
|
|
346
|
+
scores = [r.score for r in results if r.has_pii]
|
|
347
|
+
avg_score = sum(scores) / len(scores) if scores else 0.0
|
|
348
|
+
|
|
349
|
+
return {
|
|
350
|
+
"total_texts_analyzed": len(results),
|
|
351
|
+
"texts_with_pii": total_detections,
|
|
352
|
+
"total_entities_detected": total_entities,
|
|
353
|
+
"entity_type_distribution": entity_type_counts,
|
|
354
|
+
"average_confidence_score": avg_score,
|
|
355
|
+
"detection_rate": total_detections / len(results) if results else 0.0,
|
|
356
|
+
}
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Prompt Injection Detection for GenAI applications.
|
|
2
|
+
|
|
3
|
+
This module provides prompt injection detection capabilities using pattern-based
|
|
4
|
+
approaches to identify attempts to manipulate or bypass LLM instructions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from .config import PromptInjectionConfig
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class PromptInjectionResult:
|
|
19
|
+
"""Result of prompt injection detection.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
is_injection: Whether prompt injection was detected above threshold
|
|
23
|
+
injection_score: Overall injection risk score (0.0-1.0)
|
|
24
|
+
injection_types: List of injection types detected
|
|
25
|
+
patterns_matched: Specific patterns that triggered detection
|
|
26
|
+
original_text: Original input text
|
|
27
|
+
blocked: Whether the request was blocked
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
is_injection: bool
|
|
31
|
+
injection_score: float = 0.0
|
|
32
|
+
injection_types: List[str] = field(default_factory=list)
|
|
33
|
+
patterns_matched: Dict[str, List[str]] = field(default_factory=dict)
|
|
34
|
+
original_text: Optional[str] = None
|
|
35
|
+
blocked: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PromptInjectionDetector:
|
|
39
|
+
"""Prompt injection detector using pattern-based approaches.
|
|
40
|
+
|
|
41
|
+
This detector identifies various types of prompt injection attacks:
|
|
42
|
+
- Instruction override attempts
|
|
43
|
+
- Role-playing attacks
|
|
44
|
+
- Jailbreak attempts
|
|
45
|
+
- Context switching
|
|
46
|
+
- Encoding/obfuscation attacks
|
|
47
|
+
- System prompt extraction
|
|
48
|
+
|
|
49
|
+
All detection is pattern-based and does not require external dependencies.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Pattern definitions for different injection types
|
|
53
|
+
INJECTION_PATTERNS = {
|
|
54
|
+
"instruction_override": {
|
|
55
|
+
"patterns": [
|
|
56
|
+
r"ignore\s+(?:all\s+)?(?:(?:previous|above|the|your)\s+)*(?:instructions?|prompts?|directions?|rules?)",
|
|
57
|
+
r"disregard\s+(?:all\s+)?(?:(?:previous|above|the|your)\s+)*(?:instructions?|prompts?|directions?)",
|
|
58
|
+
r"forget\s+(?:all\s+)?(?:(?:previous|above|the)\s+)*(?:instructions?|prompts?|directions?)",
|
|
59
|
+
r"new\s+instructions?:\s*",
|
|
60
|
+
r"instead,?\s+(?:you\s+)?(?:will|must|should)\s+",
|
|
61
|
+
r"override\s+(?:your|the)\s+(?:instructions?|programming|rules)",
|
|
62
|
+
],
|
|
63
|
+
},
|
|
64
|
+
"role_playing": {
|
|
65
|
+
"patterns": [
|
|
66
|
+
r"(?:pretend|act|behave)\s+(?:like|as\s+if)\s+you\s+(?:are|were|have|can|could|must|should)",
|
|
67
|
+
r"(?:pretend|act|behave)\s+(?:you\s+)?(?:are|were)",
|
|
68
|
+
r"you\s+are\s+now\s+(?:a|an)\s+[\w\s]+",
|
|
69
|
+
r"from\s+now\s+on,?\s+you\s+(?:are|will\s+be)",
|
|
70
|
+
r"roleplay\s+as",
|
|
71
|
+
r"imagine\s+you\s+are\s+(?:a|an)\s+\w+",
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
"jailbreak": {
|
|
75
|
+
"patterns": [
|
|
76
|
+
r"DAN\s+mode",
|
|
77
|
+
r"developer\s+mode",
|
|
78
|
+
r"evil\s+mode",
|
|
79
|
+
r"opposite\s+mode",
|
|
80
|
+
r"jailbreak",
|
|
81
|
+
r"you\s+(?:now\s+)?have\s+no\s+(?:restrictions?|limitations?|ethics?|guidelines?)", # More specific: "you have no restrictions"
|
|
82
|
+
r"without\s+any\s+(?:restrictions?|limitations?|ethics?|guidelines?|filters?)",
|
|
83
|
+
r"bypass\s+(?:your|the)\s+(?:safety|ethical|content)\s+(?:filters?|guidelines?)",
|
|
84
|
+
],
|
|
85
|
+
},
|
|
86
|
+
"context_switching": {
|
|
87
|
+
"patterns": [
|
|
88
|
+
r"---+\s*(?:end|stop|ignore)",
|
|
89
|
+
r"```+\s*(?:end|stop)",
|
|
90
|
+
r"</(?:system|instruction|prompt)>",
|
|
91
|
+
r"\[(?:END|STOP|IGNORE)\s+(?:SYSTEM|INSTRUCTION|PROMPT)\]",
|
|
92
|
+
r"<\|(?:end|stop)(?:of)?(?:system|instruction|prompt)\|>",
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
"system_extraction": {
|
|
96
|
+
"patterns": [
|
|
97
|
+
r"(?:show|reveal|display|print|output)\s+(?:me\s+)?(?:your|the)\s+(?:system\s+)?(?:prompt|instructions?)",
|
|
98
|
+
r"what\s+(?:are|were)\s+your\s+(?:original\s+)?instructions?",
|
|
99
|
+
r"repeat\s+(?:your|the)\s+(?:system\s+)?(?:prompt|instructions?)",
|
|
100
|
+
r"tell\s+me\s+your\s+(?:system\s+)?(?:prompt|instructions?)",
|
|
101
|
+
],
|
|
102
|
+
},
|
|
103
|
+
"encoding_obfuscation": {
|
|
104
|
+
"patterns": [
|
|
105
|
+
r"(?:base64|rot13|hex|unicode)\s+(?:decode|encoded?|version)",
|
|
106
|
+
r"\\x[0-9a-fA-F]{2}", # Hex encoding
|
|
107
|
+
r"&#x?[0-9a-fA-F]+;", # HTML entities
|
|
108
|
+
r"\{[0-9]+\}", # Format string injection
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
def __init__(self, config: PromptInjectionConfig):
|
|
114
|
+
"""Initialize prompt injection detector.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
config: Prompt injection detection configuration
|
|
118
|
+
"""
|
|
119
|
+
self.config = config
|
|
120
|
+
|
|
121
|
+
def is_available(self) -> bool:
|
|
122
|
+
"""Check if prompt injection detector is available.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
bool: Always True (pattern-based detection always available)
|
|
126
|
+
"""
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
def detect(self, text: str) -> PromptInjectionResult:
|
|
130
|
+
"""Detect prompt injection attempts in text.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
text: Text to analyze (typically a user prompt)
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
PromptInjectionResult: Detection results
|
|
137
|
+
"""
|
|
138
|
+
if not self.config.enabled:
|
|
139
|
+
return PromptInjectionResult(is_injection=False, original_text=text)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
# Perform pattern-based detection
|
|
143
|
+
injection_scores = {}
|
|
144
|
+
patterns_matched = {}
|
|
145
|
+
|
|
146
|
+
for injection_type in self.INJECTION_PATTERNS:
|
|
147
|
+
score, matched = self._check_injection_type(text, injection_type)
|
|
148
|
+
injection_scores[injection_type] = score
|
|
149
|
+
if matched:
|
|
150
|
+
patterns_matched[injection_type] = matched
|
|
151
|
+
|
|
152
|
+
# Calculate overall injection score (max of all types)
|
|
153
|
+
injection_score = max(injection_scores.values(), default=0.0)
|
|
154
|
+
|
|
155
|
+
# Determine which injection types exceed threshold
|
|
156
|
+
injection_types = [
|
|
157
|
+
inj_type
|
|
158
|
+
for inj_type, score in injection_scores.items()
|
|
159
|
+
if score >= self.config.threshold
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
is_injection = len(injection_types) > 0
|
|
163
|
+
blocked = self.config.block_on_detection and is_injection
|
|
164
|
+
|
|
165
|
+
return PromptInjectionResult(
|
|
166
|
+
is_injection=is_injection,
|
|
167
|
+
injection_score=injection_score,
|
|
168
|
+
injection_types=injection_types,
|
|
169
|
+
patterns_matched=patterns_matched,
|
|
170
|
+
original_text=text,
|
|
171
|
+
blocked=blocked,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error("Error detecting prompt injection: %s", e, exc_info=True)
|
|
176
|
+
return PromptInjectionResult(is_injection=False, original_text=text)
|
|
177
|
+
|
|
178
|
+
def _check_injection_type(self, text: str, injection_type: str) -> tuple[float, List[str]]:
|
|
179
|
+
"""Check for a specific type of prompt injection.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
text: Text to analyze
|
|
183
|
+
injection_type: Type of injection to check
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
tuple: (score, matched_patterns)
|
|
187
|
+
"""
|
|
188
|
+
patterns_config = self.INJECTION_PATTERNS.get(injection_type, {})
|
|
189
|
+
patterns = patterns_config.get("patterns", [])
|
|
190
|
+
|
|
191
|
+
matched = []
|
|
192
|
+
text_lower = text.lower()
|
|
193
|
+
|
|
194
|
+
# Check regex patterns
|
|
195
|
+
for pattern in patterns:
|
|
196
|
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
|
197
|
+
for match in matches:
|
|
198
|
+
matched.append(match.group())
|
|
199
|
+
|
|
200
|
+
# Calculate score based on matches
|
|
201
|
+
if not matched:
|
|
202
|
+
return 0.0, []
|
|
203
|
+
|
|
204
|
+
# Score calculation:
|
|
205
|
+
# - Base score of 0.5 for any match (injection attempts are high risk)
|
|
206
|
+
# - Additional 0.1 per unique match, capped at 1.0
|
|
207
|
+
base_score = 0.5
|
|
208
|
+
match_score = min(len(set(matched)) * 0.1, 0.5)
|
|
209
|
+
total_score = min(base_score + match_score, 1.0)
|
|
210
|
+
|
|
211
|
+
return total_score, matched
|
|
212
|
+
|
|
213
|
+
def analyze_batch(self, texts: List[str]) -> List[PromptInjectionResult]:
|
|
214
|
+
"""Analyze multiple texts for prompt injection.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
texts: List of texts to analyze
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List[PromptInjectionResult]: Detection results for each text
|
|
221
|
+
"""
|
|
222
|
+
results = []
|
|
223
|
+
for text in texts:
|
|
224
|
+
results.append(self.detect(text))
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
def get_statistics(self, results: List[PromptInjectionResult]) -> Dict[str, Any]:
|
|
228
|
+
"""Get statistics from multiple detection results.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
results: List of detection results
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Dict[str, Any]: Statistics including injection rates, type distribution
|
|
235
|
+
"""
|
|
236
|
+
total_injections = sum(1 for r in results if r.is_injection)
|
|
237
|
+
|
|
238
|
+
# Aggregate injection type counts
|
|
239
|
+
injection_type_counts: Dict[str, int] = {}
|
|
240
|
+
for result in results:
|
|
241
|
+
for inj_type in result.injection_types:
|
|
242
|
+
injection_type_counts[inj_type] = injection_type_counts.get(inj_type, 0) + 1
|
|
243
|
+
|
|
244
|
+
# Calculate average scores
|
|
245
|
+
avg_score = sum(r.injection_score for r in results) / len(results) if results else 0.0
|
|
246
|
+
|
|
247
|
+
# Calculate max score
|
|
248
|
+
max_score = max((r.injection_score for r in results), default=0.0)
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
"total_prompts_analyzed": len(results),
|
|
252
|
+
"injection_attempts_count": total_injections,
|
|
253
|
+
"injection_rate": total_injections / len(results) if results else 0.0,
|
|
254
|
+
"injection_type_counts": injection_type_counts,
|
|
255
|
+
"average_score": avg_score,
|
|
256
|
+
"max_score": max_score,
|
|
257
|
+
"most_common_injection": (
|
|
258
|
+
max(injection_type_counts.items(), key=lambda x: x[1])[0]
|
|
259
|
+
if injection_type_counts
|
|
260
|
+
else None
|
|
261
|
+
),
|
|
262
|
+
}
|