genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
"""Restricted Topics Detection for GenAI applications.
|
|
2
|
+
|
|
3
|
+
This module provides topic classification and detection capabilities to identify
|
|
4
|
+
and optionally block sensitive or inappropriate topics in prompts and responses.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict, List, Optional, Set
|
|
11
|
+
|
|
12
|
+
from .config import RestrictedTopicsConfig
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class RestrictedTopicsResult:
|
|
19
|
+
"""Result of restricted topics detection.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
has_restricted_topic: Whether a restricted topic was detected
|
|
23
|
+
detected_topics: List of restricted topics found
|
|
24
|
+
topic_scores: Confidence scores by topic
|
|
25
|
+
max_score: Maximum confidence score across all topics
|
|
26
|
+
patterns_matched: Specific patterns that triggered detection
|
|
27
|
+
original_text: Original input text
|
|
28
|
+
blocked: Whether the request was blocked
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
has_restricted_topic: bool
|
|
32
|
+
detected_topics: List[str] = field(default_factory=list)
|
|
33
|
+
topic_scores: Dict[str, float] = field(default_factory=dict)
|
|
34
|
+
max_score: float = 0.0
|
|
35
|
+
patterns_matched: Dict[str, List[str]] = field(default_factory=dict)
|
|
36
|
+
original_text: Optional[str] = None
|
|
37
|
+
blocked: bool = False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RestrictedTopicsDetector:
|
|
41
|
+
"""Restricted topics detector using pattern-based classification.
|
|
42
|
+
|
|
43
|
+
This detector identifies sensitive or inappropriate topics that may need
|
|
44
|
+
to be monitored or blocked:
|
|
45
|
+
- Medical advice
|
|
46
|
+
- Legal advice
|
|
47
|
+
- Financial advice
|
|
48
|
+
- Violence and self-harm
|
|
49
|
+
- Illegal activities
|
|
50
|
+
- Adult content
|
|
51
|
+
- Personal information requests
|
|
52
|
+
- Political manipulation
|
|
53
|
+
|
|
54
|
+
All detection is pattern-based and does not require external dependencies.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
# Pattern definitions for different restricted topics
|
|
58
|
+
TOPIC_PATTERNS = {
|
|
59
|
+
"medical_advice": {
|
|
60
|
+
"patterns": [
|
|
61
|
+
r"should\s+I\s+(?:take|use|stop|avoid)\s+\w+", # "should I take aspirin"
|
|
62
|
+
r"(?:can|should)\s+I\s+(?:take|use)\s+(?:this|that|\w+)\s+for\s+(?:my|this|a)", # "can I take aspirin for my headache"
|
|
63
|
+
r"what\s+(?:drug|medicine|medication|treatment)\s+(?:should|can)\s+I\s+(?:take|use)",
|
|
64
|
+
r"is\s+(?:it|this|\w+)\s+(?:safe|dangerous|ok|okay)\s+to\s+(?:take|use|consume)",
|
|
65
|
+
r"(?:can|should)\s+you\s+(?:diagnose|recommend|prescribe)",
|
|
66
|
+
r"(?:diagnose|treat)\s+my\s+(?:symptoms?|illness|condition|disease)",
|
|
67
|
+
],
|
|
68
|
+
"keywords": [], # Remove overly broad keywords
|
|
69
|
+
},
|
|
70
|
+
"legal_advice": {
|
|
71
|
+
"patterns": [
|
|
72
|
+
r"should\s+I\s+(?:sue|file\s+a\s+lawsuit|take\s+legal\s+action|hire\s+(?:a|an)\s+(?:lawyer|attorney))",
|
|
73
|
+
r"(?:can|should)\s+I\s+(?:sue|take\s+legal\s+action|file\s+charges)",
|
|
74
|
+
r"is\s+(?:it|this)\s+(?:legal|illegal)\s+(?:to|for|if|\?|or)", # Added "or" and "?" for "legal or illegal?"
|
|
75
|
+
r"what\s+(?:are\s+my\s+)?legal\s+(?:rights|options)",
|
|
76
|
+
r"(?:can|should)\s+I\s+(?:file|take)\s+(?:a\s+)?(?:lawsuit|legal\s+action)",
|
|
77
|
+
r"(?:need|want|seeking|looking\s+for)\s+legal\s+advice", # "need legal advice"
|
|
78
|
+
r"legal\s+advice\s+(?:needed|wanted|required)", # "legal advice needed"
|
|
79
|
+
],
|
|
80
|
+
"keywords": [], # Removed broad keywords to avoid false positives
|
|
81
|
+
},
|
|
82
|
+
"financial_advice": {
|
|
83
|
+
"patterns": [
|
|
84
|
+
r"should\s+I\s+(?:invest|buy|sell|trade)\s+(?:in\s+)?(?:stocks?|crypto|bitcoin|forex)",
|
|
85
|
+
r"(?:you|we)\s+should\s+(?:definitely|absolutely)?\s*(?:buy|sell|invest\s+in|trade)\s+(?:these\s+)?(?:stocks?|crypto|bitcoin)", # "you should buy these stocks"
|
|
86
|
+
r"(?:best|good|top)\s+(?:stocks?|investments?|crypto)\s+to\s+(?:buy|invest|purchase)",
|
|
87
|
+
r"(?:best|good|top)\s+investment\s+strategies", # "best investment strategies"
|
|
88
|
+
r"(?:what|which)\s+(?:stocks?|crypto|investments?)\s+should\s+(?:I|we)\s+(?:buy|invest\s+in)",
|
|
89
|
+
r"(?:should|can)\s+(?:I|we)\s+(?:invest\s+in|buy|sell|trade)",
|
|
90
|
+
r"financial\s+advice\s+(?:on|for|about)",
|
|
91
|
+
],
|
|
92
|
+
"keywords": [], # Removed broad keywords to avoid false positives
|
|
93
|
+
},
|
|
94
|
+
"violence": {
|
|
95
|
+
"patterns": [
|
|
96
|
+
r"\b(?:kill|murder|assault|attack|weapon|bomb|explosive|terrorism)\b",
|
|
97
|
+
r"how\s+to\s+(?:kill|murder|harm|hurt|attack)",
|
|
98
|
+
r"(?:make|build|create)\s+(?:a\s+)?(?:bomb|weapon|explosive)",
|
|
99
|
+
r"ways\s+to\s+(?:kill|murder|harm|hurt)",
|
|
100
|
+
],
|
|
101
|
+
"keywords": ["violence", "harm", "weapon creation"],
|
|
102
|
+
},
|
|
103
|
+
"self_harm": {
|
|
104
|
+
"patterns": [
|
|
105
|
+
r"\b(?:suicide|self-harm|kill\s+myself|end\s+my\s+life)\b",
|
|
106
|
+
r"how\s+to\s+(?:commit\s+suicide|kill\s+myself|end\s+my\s+life)",
|
|
107
|
+
r"ways\s+to\s+(?:commit\s+suicide|kill\s+myself|die)",
|
|
108
|
+
r"I\s+want\s+to\s+(?:die|kill\s+myself|end\s+it\s+all)",
|
|
109
|
+
],
|
|
110
|
+
"keywords": ["suicide", "self-harm", "suicidal ideation"],
|
|
111
|
+
},
|
|
112
|
+
"illegal_activities": {
|
|
113
|
+
"patterns": [
|
|
114
|
+
r"how\s+to\s+(?:hack|crack|break\s+into|steal)",
|
|
115
|
+
r"(?:make|create|produce)\s+(?:illegal\s+)?(?:drugs|narcotics)",
|
|
116
|
+
r"how\s+to\s+(?:launder\s+money|evade\s+taxes|commit\s+fraud)",
|
|
117
|
+
r"ways\s+to\s+(?:steal|rob|break\s+the\s+law)",
|
|
118
|
+
],
|
|
119
|
+
"keywords": ["illegal activity", "crime", "hacking", "fraud"],
|
|
120
|
+
},
|
|
121
|
+
"adult_content": {
|
|
122
|
+
"patterns": [
|
|
123
|
+
r"\b(?:porn|pornography|xxx|nsfw|nude|naked)\b",
|
|
124
|
+
r"(?:explicit|sexual)\s+(?:content|material|images?)",
|
|
125
|
+
r"how\s+to\s+(?:find|access|watch)\s+(?:porn|adult\s+content)",
|
|
126
|
+
],
|
|
127
|
+
"keywords": ["adult content", "pornography", "explicit material"],
|
|
128
|
+
},
|
|
129
|
+
"personal_information": {
|
|
130
|
+
"patterns": [
|
|
131
|
+
r"(?:give|provide|tell)\s+me\s+(?:your|the)\s+(?:password|credit\s+card|ssn)",
|
|
132
|
+
r"what\s+is\s+(?:your|the)\s+(?:password|pin|code|key)",
|
|
133
|
+
r"share\s+(?:your|the)\s+(?:login|credentials|password)",
|
|
134
|
+
],
|
|
135
|
+
"keywords": ["password request", "credential theft", "phishing"],
|
|
136
|
+
},
|
|
137
|
+
"political_manipulation": {
|
|
138
|
+
"patterns": [
|
|
139
|
+
r"how\s+to\s+(?:manipulate|influence)\s+(?:voters|elections?|polls?)",
|
|
140
|
+
r"create\s+(?:fake|misleading)\s+(?:news|information|propaganda)",
|
|
141
|
+
r"spread\s+(?:misinformation|disinformation|propaganda)",
|
|
142
|
+
],
|
|
143
|
+
"keywords": ["election manipulation", "propaganda", "misinformation"],
|
|
144
|
+
},
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
def __init__(self, config: RestrictedTopicsConfig):
|
|
148
|
+
"""Initialize restricted topics detector.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
config: Restricted topics detection configuration
|
|
152
|
+
"""
|
|
153
|
+
self.config = config
|
|
154
|
+
|
|
155
|
+
def is_available(self) -> bool:
|
|
156
|
+
"""Check if restricted topics detector is available.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
bool: Always True (pattern-based detection always available)
|
|
160
|
+
"""
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
def detect(self, text: str) -> RestrictedTopicsResult:
|
|
164
|
+
"""Detect restricted topics in text.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
text: Text to analyze
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
RestrictedTopicsResult: Detection results
|
|
171
|
+
"""
|
|
172
|
+
if not self.config.enabled:
|
|
173
|
+
return RestrictedTopicsResult(has_restricted_topic=False, original_text=text)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
# Perform pattern-based detection
|
|
177
|
+
topic_scores = {}
|
|
178
|
+
patterns_matched = {}
|
|
179
|
+
|
|
180
|
+
# Get topics to check (either configured topics or all)
|
|
181
|
+
topics_to_check = (
|
|
182
|
+
self.config.restricted_topics
|
|
183
|
+
if self.config.restricted_topics
|
|
184
|
+
else list(self.TOPIC_PATTERNS.keys())
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
for topic in topics_to_check:
|
|
188
|
+
if topic not in self.TOPIC_PATTERNS:
|
|
189
|
+
logger.warning("Unknown topic: %s", topic)
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
score, matched = self._check_topic(text, topic)
|
|
193
|
+
topic_scores[topic] = score
|
|
194
|
+
if matched:
|
|
195
|
+
patterns_matched[topic] = matched
|
|
196
|
+
|
|
197
|
+
# Determine which topics exceed threshold
|
|
198
|
+
detected_topics = [
|
|
199
|
+
topic for topic, score in topic_scores.items() if score >= self.config.threshold
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
has_restricted_topic = len(detected_topics) > 0
|
|
203
|
+
max_score = max(topic_scores.values(), default=0.0)
|
|
204
|
+
blocked = self.config.block_on_detection and has_restricted_topic
|
|
205
|
+
|
|
206
|
+
return RestrictedTopicsResult(
|
|
207
|
+
has_restricted_topic=has_restricted_topic,
|
|
208
|
+
detected_topics=detected_topics,
|
|
209
|
+
topic_scores=topic_scores,
|
|
210
|
+
max_score=max_score,
|
|
211
|
+
patterns_matched=patterns_matched,
|
|
212
|
+
original_text=text,
|
|
213
|
+
blocked=blocked,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.error("Error detecting restricted topics: %s", e, exc_info=True)
|
|
218
|
+
return RestrictedTopicsResult(has_restricted_topic=False, original_text=text)
|
|
219
|
+
|
|
220
|
+
def _check_topic(self, text: str, topic: str) -> tuple[float, List[str]]:
|
|
221
|
+
"""Check for a specific restricted topic.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
text: Text to analyze
|
|
225
|
+
topic: Topic to check
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
tuple: (score, matched_patterns)
|
|
229
|
+
"""
|
|
230
|
+
patterns_config = self.TOPIC_PATTERNS.get(topic, {})
|
|
231
|
+
patterns = patterns_config.get("patterns", [])
|
|
232
|
+
keywords = patterns_config.get("keywords", [])
|
|
233
|
+
|
|
234
|
+
matched = []
|
|
235
|
+
text_lower = text.lower()
|
|
236
|
+
|
|
237
|
+
# Check regex patterns
|
|
238
|
+
for pattern in patterns:
|
|
239
|
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
|
240
|
+
for match in matches:
|
|
241
|
+
matched.append(match.group())
|
|
242
|
+
|
|
243
|
+
# Check keywords
|
|
244
|
+
for keyword in keywords:
|
|
245
|
+
if keyword in text_lower:
|
|
246
|
+
matched.append(keyword)
|
|
247
|
+
|
|
248
|
+
# Calculate score based on matches
|
|
249
|
+
if not matched:
|
|
250
|
+
return 0.0, []
|
|
251
|
+
|
|
252
|
+
# Score calculation:
|
|
253
|
+
# - Base score of 0.4 for any match
|
|
254
|
+
# - Additional 0.1 per unique match, capped at 1.0
|
|
255
|
+
base_score = 0.4
|
|
256
|
+
match_score = min(len(set(matched)) * 0.1, 0.6)
|
|
257
|
+
total_score = min(base_score + match_score, 1.0)
|
|
258
|
+
|
|
259
|
+
return total_score, matched
|
|
260
|
+
|
|
261
|
+
def analyze_batch(self, texts: List[str]) -> List[RestrictedTopicsResult]:
|
|
262
|
+
"""Analyze multiple texts for restricted topics.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
texts: List of texts to analyze
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
List[RestrictedTopicsResult]: Detection results for each text
|
|
269
|
+
"""
|
|
270
|
+
results = []
|
|
271
|
+
for text in texts:
|
|
272
|
+
results.append(self.detect(text))
|
|
273
|
+
return results
|
|
274
|
+
|
|
275
|
+
def get_statistics(self, results: List[RestrictedTopicsResult]) -> Dict[str, Any]:
|
|
276
|
+
"""Get statistics from multiple detection results.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
results: List of detection results
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Dict[str, Any]: Statistics including topic distribution
|
|
283
|
+
"""
|
|
284
|
+
total_restricted = sum(1 for r in results if r.has_restricted_topic)
|
|
285
|
+
|
|
286
|
+
# Aggregate topic counts
|
|
287
|
+
topic_counts: Dict[str, int] = {}
|
|
288
|
+
for result in results:
|
|
289
|
+
for topic in result.detected_topics:
|
|
290
|
+
topic_counts[topic] = topic_counts.get(topic, 0) + 1
|
|
291
|
+
|
|
292
|
+
# Calculate average scores
|
|
293
|
+
avg_score = sum(r.max_score for r in results) / len(results) if results else 0.0
|
|
294
|
+
|
|
295
|
+
# Calculate max score
|
|
296
|
+
max_score = max((r.max_score for r in results), default=0.0)
|
|
297
|
+
|
|
298
|
+
return {
|
|
299
|
+
"total_texts_analyzed": len(results),
|
|
300
|
+
"restricted_topics_count": total_restricted,
|
|
301
|
+
"restricted_rate": total_restricted / len(results) if results else 0.0,
|
|
302
|
+
"topic_counts": topic_counts,
|
|
303
|
+
"average_score": avg_score,
|
|
304
|
+
"max_score": max_score,
|
|
305
|
+
"most_common_topic": (
|
|
306
|
+
max(topic_counts.items(), key=lambda x: x[1])[0] if topic_counts else None
|
|
307
|
+
),
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
def get_available_topics(self) -> Set[str]:
|
|
311
|
+
"""Get list of available topic classifications.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Set[str]: Available topic classifications
|
|
315
|
+
"""
|
|
316
|
+
return set(self.TOPIC_PATTERNS.keys())
|