genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,316 @@
1
+ """Restricted Topics Detection for GenAI applications.
2
+
3
+ This module provides topic classification and detection capabilities to identify
4
+ and optionally block sensitive or inappropriate topics in prompts and responses.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict, List, Optional, Set
11
+
12
+ from .config import RestrictedTopicsConfig
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class RestrictedTopicsResult:
19
+ """Result of restricted topics detection.
20
+
21
+ Attributes:
22
+ has_restricted_topic: Whether a restricted topic was detected
23
+ detected_topics: List of restricted topics found
24
+ topic_scores: Confidence scores by topic
25
+ max_score: Maximum confidence score across all topics
26
+ patterns_matched: Specific patterns that triggered detection
27
+ original_text: Original input text
28
+ blocked: Whether the request was blocked
29
+ """
30
+
31
+ has_restricted_topic: bool
32
+ detected_topics: List[str] = field(default_factory=list)
33
+ topic_scores: Dict[str, float] = field(default_factory=dict)
34
+ max_score: float = 0.0
35
+ patterns_matched: Dict[str, List[str]] = field(default_factory=dict)
36
+ original_text: Optional[str] = None
37
+ blocked: bool = False
38
+
39
+
40
+ class RestrictedTopicsDetector:
41
+ """Restricted topics detector using pattern-based classification.
42
+
43
+ This detector identifies sensitive or inappropriate topics that may need
44
+ to be monitored or blocked:
45
+ - Medical advice
46
+ - Legal advice
47
+ - Financial advice
48
+ - Violence and self-harm
49
+ - Illegal activities
50
+ - Adult content
51
+ - Personal information requests
52
+ - Political manipulation
53
+
54
+ All detection is pattern-based and does not require external dependencies.
55
+ """
56
+
57
+ # Pattern definitions for different restricted topics
58
+ TOPIC_PATTERNS = {
59
+ "medical_advice": {
60
+ "patterns": [
61
+ r"should\s+I\s+(?:take|use|stop|avoid)\s+\w+", # "should I take aspirin"
62
+ r"(?:can|should)\s+I\s+(?:take|use)\s+(?:this|that|\w+)\s+for\s+(?:my|this|a)", # "can I take aspirin for my headache"
63
+ r"what\s+(?:drug|medicine|medication|treatment)\s+(?:should|can)\s+I\s+(?:take|use)",
64
+ r"is\s+(?:it|this|\w+)\s+(?:safe|dangerous|ok|okay)\s+to\s+(?:take|use|consume)",
65
+ r"(?:can|should)\s+you\s+(?:diagnose|recommend|prescribe)",
66
+ r"(?:diagnose|treat)\s+my\s+(?:symptoms?|illness|condition|disease)",
67
+ ],
68
+ "keywords": [], # Remove overly broad keywords
69
+ },
70
+ "legal_advice": {
71
+ "patterns": [
72
+ r"should\s+I\s+(?:sue|file\s+a\s+lawsuit|take\s+legal\s+action|hire\s+(?:a|an)\s+(?:lawyer|attorney))",
73
+ r"(?:can|should)\s+I\s+(?:sue|take\s+legal\s+action|file\s+charges)",
74
+ r"is\s+(?:it|this)\s+(?:legal|illegal)\s+(?:to|for|if|\?|or)", # Added "or" and "?" for "legal or illegal?"
75
+ r"what\s+(?:are\s+my\s+)?legal\s+(?:rights|options)",
76
+ r"(?:can|should)\s+I\s+(?:file|take)\s+(?:a\s+)?(?:lawsuit|legal\s+action)",
77
+ r"(?:need|want|seeking|looking\s+for)\s+legal\s+advice", # "need legal advice"
78
+ r"legal\s+advice\s+(?:needed|wanted|required)", # "legal advice needed"
79
+ ],
80
+ "keywords": [], # Removed broad keywords to avoid false positives
81
+ },
82
+ "financial_advice": {
83
+ "patterns": [
84
+ r"should\s+I\s+(?:invest|buy|sell|trade)\s+(?:in\s+)?(?:stocks?|crypto|bitcoin|forex)",
85
+ r"(?:you|we)\s+should\s+(?:definitely|absolutely)?\s*(?:buy|sell|invest\s+in|trade)\s+(?:these\s+)?(?:stocks?|crypto|bitcoin)", # "you should buy these stocks"
86
+ r"(?:best|good|top)\s+(?:stocks?|investments?|crypto)\s+to\s+(?:buy|invest|purchase)",
87
+ r"(?:best|good|top)\s+investment\s+strategies", # "best investment strategies"
88
+ r"(?:what|which)\s+(?:stocks?|crypto|investments?)\s+should\s+(?:I|we)\s+(?:buy|invest\s+in)",
89
+ r"(?:should|can)\s+(?:I|we)\s+(?:invest\s+in|buy|sell|trade)",
90
+ r"financial\s+advice\s+(?:on|for|about)",
91
+ ],
92
+ "keywords": [], # Removed broad keywords to avoid false positives
93
+ },
94
+ "violence": {
95
+ "patterns": [
96
+ r"\b(?:kill|murder|assault|attack|weapon|bomb|explosive|terrorism)\b",
97
+ r"how\s+to\s+(?:kill|murder|harm|hurt|attack)",
98
+ r"(?:make|build|create)\s+(?:a\s+)?(?:bomb|weapon|explosive)",
99
+ r"ways\s+to\s+(?:kill|murder|harm|hurt)",
100
+ ],
101
+ "keywords": ["violence", "harm", "weapon creation"],
102
+ },
103
+ "self_harm": {
104
+ "patterns": [
105
+ r"\b(?:suicide|self-harm|kill\s+myself|end\s+my\s+life)\b",
106
+ r"how\s+to\s+(?:commit\s+suicide|kill\s+myself|end\s+my\s+life)",
107
+ r"ways\s+to\s+(?:commit\s+suicide|kill\s+myself|die)",
108
+ r"I\s+want\s+to\s+(?:die|kill\s+myself|end\s+it\s+all)",
109
+ ],
110
+ "keywords": ["suicide", "self-harm", "suicidal ideation"],
111
+ },
112
+ "illegal_activities": {
113
+ "patterns": [
114
+ r"how\s+to\s+(?:hack|crack|break\s+into|steal)",
115
+ r"(?:make|create|produce)\s+(?:illegal\s+)?(?:drugs|narcotics)",
116
+ r"how\s+to\s+(?:launder\s+money|evade\s+taxes|commit\s+fraud)",
117
+ r"ways\s+to\s+(?:steal|rob|break\s+the\s+law)",
118
+ ],
119
+ "keywords": ["illegal activity", "crime", "hacking", "fraud"],
120
+ },
121
+ "adult_content": {
122
+ "patterns": [
123
+ r"\b(?:porn|pornography|xxx|nsfw|nude|naked)\b",
124
+ r"(?:explicit|sexual)\s+(?:content|material|images?)",
125
+ r"how\s+to\s+(?:find|access|watch)\s+(?:porn|adult\s+content)",
126
+ ],
127
+ "keywords": ["adult content", "pornography", "explicit material"],
128
+ },
129
+ "personal_information": {
130
+ "patterns": [
131
+ r"(?:give|provide|tell)\s+me\s+(?:your|the)\s+(?:password|credit\s+card|ssn)",
132
+ r"what\s+is\s+(?:your|the)\s+(?:password|pin|code|key)",
133
+ r"share\s+(?:your|the)\s+(?:login|credentials|password)",
134
+ ],
135
+ "keywords": ["password request", "credential theft", "phishing"],
136
+ },
137
+ "political_manipulation": {
138
+ "patterns": [
139
+ r"how\s+to\s+(?:manipulate|influence)\s+(?:voters|elections?|polls?)",
140
+ r"create\s+(?:fake|misleading)\s+(?:news|information|propaganda)",
141
+ r"spread\s+(?:misinformation|disinformation|propaganda)",
142
+ ],
143
+ "keywords": ["election manipulation", "propaganda", "misinformation"],
144
+ },
145
+ }
146
+
147
+ def __init__(self, config: RestrictedTopicsConfig):
148
+ """Initialize restricted topics detector.
149
+
150
+ Args:
151
+ config: Restricted topics detection configuration
152
+ """
153
+ self.config = config
154
+
155
+ def is_available(self) -> bool:
156
+ """Check if restricted topics detector is available.
157
+
158
+ Returns:
159
+ bool: Always True (pattern-based detection always available)
160
+ """
161
+ return True
162
+
163
+ def detect(self, text: str) -> RestrictedTopicsResult:
164
+ """Detect restricted topics in text.
165
+
166
+ Args:
167
+ text: Text to analyze
168
+
169
+ Returns:
170
+ RestrictedTopicsResult: Detection results
171
+ """
172
+ if not self.config.enabled:
173
+ return RestrictedTopicsResult(has_restricted_topic=False, original_text=text)
174
+
175
+ try:
176
+ # Perform pattern-based detection
177
+ topic_scores = {}
178
+ patterns_matched = {}
179
+
180
+ # Get topics to check (either configured topics or all)
181
+ topics_to_check = (
182
+ self.config.restricted_topics
183
+ if self.config.restricted_topics
184
+ else list(self.TOPIC_PATTERNS.keys())
185
+ )
186
+
187
+ for topic in topics_to_check:
188
+ if topic not in self.TOPIC_PATTERNS:
189
+ logger.warning("Unknown topic: %s", topic)
190
+ continue
191
+
192
+ score, matched = self._check_topic(text, topic)
193
+ topic_scores[topic] = score
194
+ if matched:
195
+ patterns_matched[topic] = matched
196
+
197
+ # Determine which topics exceed threshold
198
+ detected_topics = [
199
+ topic for topic, score in topic_scores.items() if score >= self.config.threshold
200
+ ]
201
+
202
+ has_restricted_topic = len(detected_topics) > 0
203
+ max_score = max(topic_scores.values(), default=0.0)
204
+ blocked = self.config.block_on_detection and has_restricted_topic
205
+
206
+ return RestrictedTopicsResult(
207
+ has_restricted_topic=has_restricted_topic,
208
+ detected_topics=detected_topics,
209
+ topic_scores=topic_scores,
210
+ max_score=max_score,
211
+ patterns_matched=patterns_matched,
212
+ original_text=text,
213
+ blocked=blocked,
214
+ )
215
+
216
+ except Exception as e:
217
+ logger.error("Error detecting restricted topics: %s", e, exc_info=True)
218
+ return RestrictedTopicsResult(has_restricted_topic=False, original_text=text)
219
+
220
+ def _check_topic(self, text: str, topic: str) -> tuple[float, List[str]]:
221
+ """Check for a specific restricted topic.
222
+
223
+ Args:
224
+ text: Text to analyze
225
+ topic: Topic to check
226
+
227
+ Returns:
228
+ tuple: (score, matched_patterns)
229
+ """
230
+ patterns_config = self.TOPIC_PATTERNS.get(topic, {})
231
+ patterns = patterns_config.get("patterns", [])
232
+ keywords = patterns_config.get("keywords", [])
233
+
234
+ matched = []
235
+ text_lower = text.lower()
236
+
237
+ # Check regex patterns
238
+ for pattern in patterns:
239
+ matches = re.finditer(pattern, text, re.IGNORECASE)
240
+ for match in matches:
241
+ matched.append(match.group())
242
+
243
+ # Check keywords
244
+ for keyword in keywords:
245
+ if keyword in text_lower:
246
+ matched.append(keyword)
247
+
248
+ # Calculate score based on matches
249
+ if not matched:
250
+ return 0.0, []
251
+
252
+ # Score calculation:
253
+ # - Base score of 0.4 for any match
254
+ # - Additional 0.1 per unique match, capped at 1.0
255
+ base_score = 0.4
256
+ match_score = min(len(set(matched)) * 0.1, 0.6)
257
+ total_score = min(base_score + match_score, 1.0)
258
+
259
+ return total_score, matched
260
+
261
+ def analyze_batch(self, texts: List[str]) -> List[RestrictedTopicsResult]:
262
+ """Analyze multiple texts for restricted topics.
263
+
264
+ Args:
265
+ texts: List of texts to analyze
266
+
267
+ Returns:
268
+ List[RestrictedTopicsResult]: Detection results for each text
269
+ """
270
+ results = []
271
+ for text in texts:
272
+ results.append(self.detect(text))
273
+ return results
274
+
275
+ def get_statistics(self, results: List[RestrictedTopicsResult]) -> Dict[str, Any]:
276
+ """Get statistics from multiple detection results.
277
+
278
+ Args:
279
+ results: List of detection results
280
+
281
+ Returns:
282
+ Dict[str, Any]: Statistics including topic distribution
283
+ """
284
+ total_restricted = sum(1 for r in results if r.has_restricted_topic)
285
+
286
+ # Aggregate topic counts
287
+ topic_counts: Dict[str, int] = {}
288
+ for result in results:
289
+ for topic in result.detected_topics:
290
+ topic_counts[topic] = topic_counts.get(topic, 0) + 1
291
+
292
+ # Calculate average scores
293
+ avg_score = sum(r.max_score for r in results) / len(results) if results else 0.0
294
+
295
+ # Calculate max score
296
+ max_score = max((r.max_score for r in results), default=0.0)
297
+
298
+ return {
299
+ "total_texts_analyzed": len(results),
300
+ "restricted_topics_count": total_restricted,
301
+ "restricted_rate": total_restricted / len(results) if results else 0.0,
302
+ "topic_counts": topic_counts,
303
+ "average_score": avg_score,
304
+ "max_score": max_score,
305
+ "most_common_topic": (
306
+ max(topic_counts.items(), key=lambda x: x[1])[0] if topic_counts else None
307
+ ),
308
+ }
309
+
310
+ def get_available_topics(self) -> Set[str]:
311
+ """Get list of available topic classifications.
312
+
313
+ Returns:
314
+ Set[str]: Available topic classifications
315
+ """
316
+ return set(self.TOPIC_PATTERNS.keys())