genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,406 @@
1
+ """Toxicity Detection using Google Perspective API and Detoxify.
2
+
3
+ This module provides toxicity detection capabilities using:
4
+ 1. Google Perspective API (optional, requires API key)
5
+ 2. Detoxify local model (fallback/alternative)
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from .config import ToxicityConfig
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Try to import optional dependencies at module level for testability
17
+ try:
18
+ from detoxify import Detoxify
19
+ except ImportError:
20
+ Detoxify = None
21
+
22
+ try:
23
+ from googleapiclient import discovery
24
+ except ImportError:
25
+ discovery = None
26
+
27
+
28
+ @dataclass
29
+ class ToxicityDetectionResult:
30
+ """Result of toxicity detection.
31
+
32
+ Attributes:
33
+ is_toxic: Whether toxicity was detected above threshold
34
+ scores: Toxicity scores by category
35
+ max_score: Maximum toxicity score across all categories
36
+ toxic_categories: List of categories that exceeded threshold
37
+ original_text: Original input text
38
+ blocked: Whether the text was blocked due to toxicity
39
+ """
40
+
41
+ is_toxic: bool
42
+ scores: Dict[str, float] = field(default_factory=dict)
43
+ max_score: float = 0.0
44
+ toxic_categories: List[str] = field(default_factory=list)
45
+ original_text: Optional[str] = None
46
+ blocked: bool = False
47
+
48
+
49
+ class ToxicityDetector:
50
+ """Toxicity detector using Perspective API and/or Detoxify.
51
+
52
+ This detector can use:
53
+ - Google Perspective API for production-grade detection
54
+ - Detoxify local model for offline/fallback detection
55
+
56
+ Requirements:
57
+ Perspective API: pip install google-api-python-client
58
+ Detoxify: pip install detoxify
59
+ """
60
+
61
+ def __init__(self, config: ToxicityConfig):
62
+ """Initialize toxicity detector.
63
+
64
+ Args:
65
+ config: Toxicity detection configuration
66
+ """
67
+ self.config = config
68
+ self._perspective_client = None
69
+ self._detoxify_model = None
70
+ self._perspective_available = False
71
+ self._detoxify_available = False
72
+ self._check_availability()
73
+
74
+ def _check_availability(self):
75
+ """Check which toxicity detection methods are available."""
76
+ # Check Perspective API
77
+ if self.config.use_perspective_api and self.config.perspective_api_key:
78
+ try:
79
+ if discovery is None:
80
+ raise ImportError("googleapiclient not installed")
81
+
82
+ self._perspective_client = discovery.build(
83
+ "commentanalyzer",
84
+ "v1alpha1",
85
+ developerKey=self.config.perspective_api_key,
86
+ discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
87
+ static_discovery=False,
88
+ )
89
+ self._perspective_available = True
90
+ logger.info("Google Perspective API initialized successfully")
91
+ except ImportError as e:
92
+ logger.warning(
93
+ "Perspective API not available: %s. Install with: pip install google-api-python-client",
94
+ e,
95
+ )
96
+ self._perspective_available = False
97
+ except Exception as e:
98
+ logger.error("Failed to initialize Perspective API: %s", e)
99
+ self._perspective_available = False
100
+
101
+ # Check Detoxify
102
+ if self.config.use_local_model:
103
+ try:
104
+ if Detoxify is None:
105
+ raise ImportError("detoxify not installed")
106
+
107
+ # Load the model (using "original" model by default)
108
+ self._detoxify_model = Detoxify("original")
109
+ self._detoxify_available = True
110
+ logger.info("Detoxify model loaded successfully")
111
+ except ImportError as e:
112
+ logger.warning(
113
+ "Detoxify not available: %s. Install with: pip install detoxify",
114
+ e,
115
+ )
116
+ self._detoxify_available = False
117
+ except Exception as e:
118
+ logger.error("Failed to load Detoxify model: %s", e)
119
+ self._detoxify_available = False
120
+
121
+ if not self._perspective_available and not self._detoxify_available:
122
+ logger.warning(
123
+ "No toxicity detection method available. Install either:\n"
124
+ " - Perspective API: pip install google-api-python-client\n"
125
+ " - Detoxify: pip install detoxify"
126
+ )
127
+
128
+ def is_available(self) -> bool:
129
+ """Check if toxicity detector is available.
130
+
131
+ Returns:
132
+ bool: True if at least one detection method is available
133
+ """
134
+ return self._perspective_available or self._detoxify_available
135
+
136
+ def detect(self, text: str) -> ToxicityDetectionResult:
137
+ """Detect toxicity in text.
138
+
139
+ Args:
140
+ text: Text to analyze
141
+
142
+ Returns:
143
+ ToxicityDetectionResult: Detection results
144
+ """
145
+ if not self.config.enabled:
146
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
147
+
148
+ if not self.is_available():
149
+ logger.warning("No toxicity detection method available")
150
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
151
+
152
+ try:
153
+ # Try Perspective API first if configured and available
154
+ if self.config.use_perspective_api and self._perspective_available:
155
+ return self._detect_with_perspective(text)
156
+ # Fall back to Detoxify
157
+ elif self._detoxify_available:
158
+ return self._detect_with_detoxify(text)
159
+ else:
160
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
161
+
162
+ except Exception as e:
163
+ logger.error("Error detecting toxicity: %s", e, exc_info=True)
164
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
165
+
166
+ def _detect_with_perspective(self, text: str) -> ToxicityDetectionResult:
167
+ """Detect toxicity using Google Perspective API.
168
+
169
+ Args:
170
+ text: Text to analyze
171
+
172
+ Returns:
173
+ ToxicityDetectionResult: Detection results
174
+ """
175
+ try:
176
+ # Build the request
177
+ analyze_request = {
178
+ "comment": {"text": text},
179
+ "requestedAttributes": {},
180
+ }
181
+
182
+ # Map our categories to Perspective API attributes
183
+ category_mapping = {
184
+ "toxicity": "TOXICITY",
185
+ "severe_toxicity": "SEVERE_TOXICITY",
186
+ "identity_attack": "IDENTITY_ATTACK",
187
+ "insult": "INSULT",
188
+ "profanity": "PROFANITY",
189
+ "threat": "THREAT",
190
+ }
191
+
192
+ # Add requested categories
193
+ for category in self.config.categories:
194
+ if category in category_mapping:
195
+ analyze_request["requestedAttributes"][category_mapping[category]] = {}
196
+
197
+ # Make the API call
198
+ response = self._perspective_client.comments().analyze(body=analyze_request).execute()
199
+
200
+ # Parse scores
201
+ scores = {}
202
+ for category in self.config.categories:
203
+ if category in category_mapping:
204
+ api_attr = category_mapping[category]
205
+ if api_attr in response.get("attributeScores", {}):
206
+ score_data = response["attributeScores"][api_attr]
207
+ scores[category] = score_data["summaryScore"]["value"]
208
+
209
+ # Determine if toxic
210
+ max_score = max(scores.values(), default=0.0)
211
+ toxic_categories = [
212
+ cat for cat, score in scores.items() if score >= self.config.threshold
213
+ ]
214
+ is_toxic = len(toxic_categories) > 0
215
+
216
+ # Check if should block
217
+ blocked = self.config.block_on_detection and is_toxic
218
+
219
+ return ToxicityDetectionResult(
220
+ is_toxic=is_toxic,
221
+ scores=scores,
222
+ max_score=max_score,
223
+ toxic_categories=toxic_categories,
224
+ original_text=text,
225
+ blocked=blocked,
226
+ )
227
+
228
+ except Exception as e:
229
+ logger.error("Perspective API error: %s", e, exc_info=True)
230
+ # Fall back to Detoxify if available
231
+ if self._detoxify_available:
232
+ logger.info("Falling back to Detoxify")
233
+ return self._detect_with_detoxify(text)
234
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
235
+
236
+ def _detect_with_detoxify(self, text: str) -> ToxicityDetectionResult:
237
+ """Detect toxicity using Detoxify local model.
238
+
239
+ Args:
240
+ text: Text to analyze
241
+
242
+ Returns:
243
+ ToxicityDetectionResult: Detection results
244
+ """
245
+ try:
246
+ # Get predictions
247
+ predictions = self._detoxify_model.predict(text)
248
+
249
+ # Map Detoxify outputs to our categories
250
+ # Detoxify outputs: toxicity, severe_toxicity, obscene, threat, insult, identity_attack
251
+ category_mapping = {
252
+ "toxicity": "toxicity",
253
+ "severe_toxicity": "severe_toxicity",
254
+ "identity_attack": "identity_attack",
255
+ "insult": "insult",
256
+ "profanity": "obscene", # Map obscene to profanity
257
+ "threat": "threat",
258
+ }
259
+
260
+ scores = {}
261
+ for our_cat, detoxify_cat in category_mapping.items():
262
+ if our_cat in self.config.categories and detoxify_cat in predictions:
263
+ scores[our_cat] = float(predictions[detoxify_cat])
264
+
265
+ # Determine if toxic
266
+ max_score = max(scores.values(), default=0.0)
267
+ toxic_categories = [
268
+ cat for cat, score in scores.items() if score >= self.config.threshold
269
+ ]
270
+ is_toxic = len(toxic_categories) > 0
271
+
272
+ # Check if should block
273
+ blocked = self.config.block_on_detection and is_toxic
274
+
275
+ return ToxicityDetectionResult(
276
+ is_toxic=is_toxic,
277
+ scores=scores,
278
+ max_score=max_score,
279
+ toxic_categories=toxic_categories,
280
+ original_text=text,
281
+ blocked=blocked,
282
+ )
283
+
284
+ except Exception as e:
285
+ logger.error("Detoxify error: %s", e, exc_info=True)
286
+ return ToxicityDetectionResult(is_toxic=False, original_text=text)
287
+
288
+ def analyze_batch(self, texts: List[str]) -> List[ToxicityDetectionResult]:
289
+ """Analyze multiple texts for toxicity.
290
+
291
+ Args:
292
+ texts: List of texts to analyze
293
+
294
+ Returns:
295
+ List[ToxicityDetectionResult]: Detection results for each text
296
+ """
297
+ # Detoxify supports batch processing
298
+ if self._detoxify_available and not self.config.use_perspective_api:
299
+ try:
300
+ return self._batch_detect_with_detoxify(texts)
301
+ except Exception as e:
302
+ logger.error("Batch detection error: %s", e)
303
+
304
+ # Fall back to sequential processing
305
+ results = []
306
+ for text in texts:
307
+ results.append(self.detect(text))
308
+ return results
309
+
310
+ def _batch_detect_with_detoxify(self, texts: List[str]) -> List[ToxicityDetectionResult]:
311
+ """Batch detect toxicity using Detoxify.
312
+
313
+ Args:
314
+ texts: List of texts to analyze
315
+
316
+ Returns:
317
+ List[ToxicityDetectionResult]: Detection results
318
+ """
319
+ try:
320
+ # Get batch predictions
321
+ predictions = self._detoxify_model.predict(texts)
322
+
323
+ results = []
324
+ for i, text in enumerate(texts):
325
+ # Extract scores for this text
326
+ category_mapping = {
327
+ "toxicity": "toxicity",
328
+ "severe_toxicity": "severe_toxicity",
329
+ "identity_attack": "identity_attack",
330
+ "insult": "insult",
331
+ "profanity": "obscene",
332
+ "threat": "threat",
333
+ }
334
+
335
+ scores = {}
336
+ for our_cat, detoxify_cat in category_mapping.items():
337
+ if our_cat in self.config.categories and detoxify_cat in predictions:
338
+ scores[our_cat] = float(predictions[detoxify_cat][i])
339
+
340
+ # Determine if toxic
341
+ max_score = max(scores.values(), default=0.0)
342
+ toxic_categories = [
343
+ cat for cat, score in scores.items() if score >= self.config.threshold
344
+ ]
345
+ is_toxic = len(toxic_categories) > 0
346
+ blocked = self.config.block_on_detection and is_toxic
347
+
348
+ results.append(
349
+ ToxicityDetectionResult(
350
+ is_toxic=is_toxic,
351
+ scores=scores,
352
+ max_score=max_score,
353
+ toxic_categories=toxic_categories,
354
+ original_text=text,
355
+ blocked=blocked,
356
+ )
357
+ )
358
+
359
+ return results
360
+
361
+ except Exception as e:
362
+ logger.error("Batch Detoxify error: %s", e, exc_info=True)
363
+ return [ToxicityDetectionResult(is_toxic=False, original_text=text) for text in texts]
364
+
365
+ def get_statistics(self, results: List[ToxicityDetectionResult]) -> Dict[str, Any]:
366
+ """Get statistics from multiple detection results.
367
+
368
+ Args:
369
+ results: List of detection results
370
+
371
+ Returns:
372
+ Dict[str, Any]: Statistics including toxicity rate, category distribution
373
+ """
374
+ total_toxic = sum(1 for r in results if r.is_toxic)
375
+
376
+ # Aggregate category counts
377
+ category_counts: Dict[str, int] = {}
378
+ for result in results:
379
+ for category in result.toxic_categories:
380
+ category_counts[category] = category_counts.get(category, 0) + 1
381
+
382
+ # Calculate average scores
383
+ avg_scores: Dict[str, float] = {}
384
+ for category in self.config.categories:
385
+ scores = [r.scores.get(category, 0.0) for r in results if r.scores.get(category)]
386
+ avg_scores[category] = sum(scores) / len(scores) if scores else 0.0
387
+
388
+ # Calculate max scores seen
389
+ max_scores: Dict[str, float] = {}
390
+ for category in self.config.categories:
391
+ scores = [r.scores.get(category, 0.0) for r in results if r.scores.get(category)]
392
+ max_scores[category] = max(scores, default=0.0)
393
+
394
+ return {
395
+ "total_texts_analyzed": len(results),
396
+ "toxic_texts_count": total_toxic,
397
+ "toxicity_rate": total_toxic / len(results) if results else 0.0,
398
+ "category_counts": category_counts,
399
+ "average_scores": avg_scores,
400
+ "max_scores": max_scores,
401
+ "detection_method": (
402
+ "perspective_api"
403
+ if self._perspective_available and self.config.use_perspective_api
404
+ else "detoxify"
405
+ ),
406
+ }
@@ -0,0 +1,17 @@
1
+ """Custom exceptions for better error handling"""
2
+
3
+
4
+ class InstrumentationError(Exception):
5
+ """Base exception for instrumentation errors"""
6
+
7
+
8
+ class ProviderInstrumentationError(InstrumentationError):
9
+ """Error instrumenting a specific provider"""
10
+
11
+
12
+ class TelemetryExportError(InstrumentationError):
13
+ """Error exporting telemetry data"""
14
+
15
+
16
+ class ConfigurationError(InstrumentationError):
17
+ """Error in configuration"""