genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
genai_otel/config.py ADDED
@@ -0,0 +1,333 @@
1
+ """Configuration management for the GenAI OpenTelemetry instrumentation library.
2
+
3
+ This module defines the `OTelConfig` dataclass, which encapsulates all configurable
4
+ parameters for the OpenTelemetry setup, including service name, exporter endpoint,
5
+ enablement flags for various features (GPU metrics, cost tracking, MCP instrumentation),
6
+ and error handling behavior. Configuration values are primarily loaded from
7
+ environment variables, with sensible defaults provided.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import sys
13
+ from dataclasses import dataclass, field
14
+ from typing import Any, Callable, Dict, List, Optional, Tuple
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # Default list of instrumentors to enable if not specified by the user.
20
+ # This maintains the "instrument everything available" behavior.
21
+ # Note: "mcp" is excluded by default because it requires the 'mcp' library (>= 1.6.0)
22
+ # which is a specialized dependency for Model Context Protocol servers/clients.
23
+ # Users can enable it by setting GENAI_ENABLED_INSTRUMENTORS="...,mcp" if needed.
24
+ #
25
+ # Note: "hyperbolic" is excluded by default because it wraps requests.post globally,
26
+ # which conflicts with OTLP HTTP exporters (same issue as RequestsInstrumentor).
27
+ # Users can enable it when using OTLP gRPC exporters by setting:
28
+ # OTEL_EXPORTER_OTLP_PROTOCOL=grpc and GENAI_ENABLED_INSTRUMENTORS="...,hyperbolic"
29
+ #
30
+ # Note: "smolagents" and "litellm" OpenInference instrumentors require Python >= 3.10
31
+ # They are only added to the default list if Python version is compatible.
32
+ DEFAULT_INSTRUMENTORS = [
33
+ "openai",
34
+ "anthropic",
35
+ "google.generativeai",
36
+ "boto3",
37
+ "azure.ai.openai",
38
+ "cohere",
39
+ "mistralai",
40
+ "together",
41
+ "groq",
42
+ "ollama",
43
+ "vertexai",
44
+ "replicate",
45
+ "anyscale",
46
+ "sambanova",
47
+ "langchain",
48
+ "llama_index",
49
+ "transformers",
50
+ ]
51
+
52
+ # Add OpenInference instrumentors only for Python >= 3.10
53
+ # IMPORTANT: Order matters! Load in this specific sequence:
54
+ # 1. smolagents - instruments the agent framework
55
+ # 2. litellm - instruments the LLM calls made by agents
56
+ if sys.version_info >= (3, 10):
57
+ DEFAULT_INSTRUMENTORS.extend(["smolagents", "litellm"])
58
+
59
+
60
+ def _get_enabled_instrumentors() -> List[str]:
61
+ """
62
+ Gets the list of enabled instrumentors from the environment variable.
63
+ Defaults to all supported instrumentors if the variable is not set.
64
+ """
65
+ enabled_str = os.getenv("GENAI_ENABLED_INSTRUMENTORS")
66
+ if enabled_str:
67
+ return [s.strip() for s in enabled_str.split(",")]
68
+ return DEFAULT_INSTRUMENTORS
69
+
70
+
71
+ def _get_exporter_timeout() -> int:
72
+ """
73
+ Gets the OTLP exporter timeout from environment variable.
74
+ Returns default of 60 seconds if not set or invalid.
75
+ """
76
+ timeout_str = os.getenv("OTEL_EXPORTER_OTLP_TIMEOUT", "60")
77
+ try:
78
+ return int(timeout_str)
79
+ except ValueError:
80
+ logger.warning(
81
+ f"Invalid timeout value '{timeout_str}' in OTEL_EXPORTER_OTLP_TIMEOUT. "
82
+ f"Using default of 60 seconds."
83
+ )
84
+ return 60
85
+
86
+
87
+ @dataclass
88
+ class OTelConfig:
89
+ """Configuration for OpenTelemetry instrumentation.
90
+
91
+ Loads settings from environment variables with sensible defaults.
92
+ """
93
+
94
+ service_name: str = field(default_factory=lambda: os.getenv("OTEL_SERVICE_NAME", "genai-app"))
95
+ endpoint: str = field(
96
+ default_factory=lambda: os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
97
+ )
98
+ enabled_instrumentors: List[str] = field(default_factory=_get_enabled_instrumentors)
99
+ enable_gpu_metrics: bool = field(
100
+ default_factory=lambda: os.getenv("GENAI_ENABLE_GPU_METRICS", "true").lower() == "true"
101
+ )
102
+ enable_cost_tracking: bool = field(
103
+ default_factory=lambda: os.getenv("GENAI_ENABLE_COST_TRACKING", "true").lower() == "true"
104
+ )
105
+ enable_mcp_instrumentation: bool = field(
106
+ default_factory=lambda: os.getenv("GENAI_ENABLE_MCP_INSTRUMENTATION", "true").lower()
107
+ == "true"
108
+ )
109
+ enable_http_instrumentation: bool = field(
110
+ default_factory=lambda: os.getenv("GENAI_ENABLE_HTTP_INSTRUMENTATION", "false").lower()
111
+ == "true"
112
+ )
113
+ # Add fail_on_error configuration
114
+ fail_on_error: bool = field(
115
+ default_factory=lambda: os.getenv("GENAI_FAIL_ON_ERROR", "false").lower() == "true"
116
+ )
117
+ headers: Optional[Dict[str, str]] = None
118
+
119
+ enable_co2_tracking: bool = field(
120
+ default_factory=lambda: os.getenv("GENAI_ENABLE_CO2_TRACKING", "false").lower() == "true"
121
+ )
122
+ exporter_timeout: int = field(default_factory=_get_exporter_timeout)
123
+ carbon_intensity: float = field(
124
+ default_factory=lambda: float(os.getenv("GENAI_CARBON_INTENSITY", "475.0"))
125
+ ) # gCO2e/kWh
126
+
127
+ power_cost_per_kwh: float = field(
128
+ default_factory=lambda: float(os.getenv("GENAI_POWER_COST_PER_KWH", "0.12"))
129
+ ) # USD per kWh - electricity cost for power consumption tracking
130
+
131
+ gpu_collection_interval: int = field(
132
+ default_factory=lambda: int(os.getenv("GENAI_GPU_COLLECTION_INTERVAL", "5"))
133
+ ) # seconds - how often to collect GPU metrics and CO2 emissions
134
+
135
+ # Codecarbon integration settings for more accurate CO2 tracking
136
+ # When enable_co2_tracking is True, codecarbon provides automatic region-based
137
+ # carbon intensity lookup instead of using the manual carbon_intensity value
138
+ co2_country_iso_code: Optional[str] = field(
139
+ default_factory=lambda: os.getenv("GENAI_CO2_COUNTRY_ISO_CODE")
140
+ ) # 3-letter ISO code (e.g., "USA", "GBR", "DEU", "FRA")
141
+
142
+ co2_region: Optional[str] = field(
143
+ default_factory=lambda: os.getenv("GENAI_CO2_REGION")
144
+ ) # Region/state within country (e.g., "california", "texas")
145
+
146
+ co2_cloud_provider: Optional[str] = field(
147
+ default_factory=lambda: os.getenv("GENAI_CO2_CLOUD_PROVIDER")
148
+ ) # Cloud provider name (e.g., "aws", "gcp", "azure")
149
+
150
+ co2_cloud_region: Optional[str] = field(
151
+ default_factory=lambda: os.getenv("GENAI_CO2_CLOUD_REGION")
152
+ ) # Cloud region (e.g., "us-east-1", "europe-west1")
153
+
154
+ co2_offline_mode: bool = field(
155
+ default_factory=lambda: os.getenv("GENAI_CO2_OFFLINE_MODE", "true").lower() == "true"
156
+ ) # Run codecarbon in offline mode (no API calls) - defaults to True for privacy
157
+
158
+ co2_tracking_mode: str = field(
159
+ default_factory=lambda: os.getenv("GENAI_CO2_TRACKING_MODE", "machine")
160
+ ) # "machine" (all processes) or "process" (current process only)
161
+
162
+ co2_use_manual: bool = field(
163
+ default_factory=lambda: os.getenv("GENAI_CO2_USE_MANUAL", "false").lower() == "true"
164
+ ) # Force manual CO2 calculation using carbon_intensity even when codecarbon is installed
165
+
166
+ # OpenTelemetry semantic convention stability opt-in
167
+ # Supports "gen_ai" for new conventions, "gen_ai/dup" for dual emission
168
+ semconv_stability_opt_in: str = field(
169
+ default_factory=lambda: os.getenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai")
170
+ )
171
+
172
+ # Enable content capture as span events
173
+ # WARNING: May capture sensitive data. Use with caution.
174
+ enable_content_capture: bool = field(
175
+ default_factory=lambda: os.getenv("GENAI_ENABLE_CONTENT_CAPTURE", "false").lower() == "true"
176
+ )
177
+
178
+ # Custom pricing configuration for models not in llm_pricing.json
179
+ # Format: JSON string with same structure as llm_pricing.json
180
+ # Example: {"chat": {"custom-model": {"promptPrice": 0.001, "completionPrice": 0.002}}}
181
+ custom_pricing_json: Optional[str] = field(
182
+ default_factory=lambda: os.getenv("GENAI_CUSTOM_PRICING_JSON")
183
+ )
184
+
185
+ # Evaluation and Safety Features (v0.2.0)
186
+ # PII Detection
187
+ enable_pii_detection: bool = field(
188
+ default_factory=lambda: os.getenv("GENAI_ENABLE_PII_DETECTION", "false").lower() == "true"
189
+ )
190
+ pii_mode: str = field(
191
+ default_factory=lambda: os.getenv("GENAI_PII_MODE", "detect")
192
+ ) # detect, redact, or block
193
+ pii_threshold: float = field(
194
+ default_factory=lambda: float(os.getenv("GENAI_PII_THRESHOLD", "0.7"))
195
+ )
196
+ pii_gdpr_mode: bool = field(
197
+ default_factory=lambda: os.getenv("GENAI_PII_GDPR_MODE", "false").lower() == "true"
198
+ )
199
+ pii_hipaa_mode: bool = field(
200
+ default_factory=lambda: os.getenv("GENAI_PII_HIPAA_MODE", "false").lower() == "true"
201
+ )
202
+ pii_pci_dss_mode: bool = field(
203
+ default_factory=lambda: os.getenv("GENAI_PII_PCI_DSS_MODE", "false").lower() == "true"
204
+ )
205
+
206
+ # Toxicity Detection
207
+ enable_toxicity_detection: bool = field(
208
+ default_factory=lambda: os.getenv("GENAI_ENABLE_TOXICITY_DETECTION", "false").lower()
209
+ == "true"
210
+ )
211
+ toxicity_threshold: float = field(
212
+ default_factory=lambda: float(os.getenv("GENAI_TOXICITY_THRESHOLD", "0.7"))
213
+ )
214
+ toxicity_use_perspective_api: bool = field(
215
+ default_factory=lambda: os.getenv("GENAI_TOXICITY_USE_PERSPECTIVE_API", "false").lower()
216
+ == "true"
217
+ )
218
+ toxicity_perspective_api_key: Optional[str] = field(
219
+ default_factory=lambda: os.getenv("GENAI_TOXICITY_PERSPECTIVE_API_KEY")
220
+ )
221
+
222
+ # Bias Detection
223
+ enable_bias_detection: bool = field(
224
+ default_factory=lambda: os.getenv("GENAI_ENABLE_BIAS_DETECTION", "false").lower() == "true"
225
+ )
226
+ bias_threshold: float = field(
227
+ default_factory=lambda: float(os.getenv("GENAI_BIAS_THRESHOLD", "0.6"))
228
+ )
229
+
230
+ # Prompt Injection Detection
231
+ enable_prompt_injection_detection: bool = field(
232
+ default_factory=lambda: os.getenv(
233
+ "GENAI_ENABLE_PROMPT_INJECTION_DETECTION", "false"
234
+ ).lower()
235
+ == "true"
236
+ )
237
+ prompt_injection_threshold: float = field(
238
+ default_factory=lambda: float(os.getenv("GENAI_PROMPT_INJECTION_THRESHOLD", "0.8"))
239
+ )
240
+
241
+ # Restricted Topics
242
+ enable_restricted_topics: bool = field(
243
+ default_factory=lambda: os.getenv("GENAI_ENABLE_RESTRICTED_TOPICS", "false").lower()
244
+ == "true"
245
+ )
246
+ restricted_topics_threshold: float = field(
247
+ default_factory=lambda: float(os.getenv("GENAI_RESTRICTED_TOPICS_THRESHOLD", "0.7"))
248
+ )
249
+
250
+ # Hallucination Detection
251
+ enable_hallucination_detection: bool = field(
252
+ default_factory=lambda: os.getenv("GENAI_ENABLE_HALLUCINATION_DETECTION", "false").lower()
253
+ == "true"
254
+ )
255
+ hallucination_threshold: float = field(
256
+ default_factory=lambda: float(os.getenv("GENAI_HALLUCINATION_THRESHOLD", "0.6"))
257
+ )
258
+
259
+ # Session and user tracking (Phase 4.1)
260
+ # Optional callable functions to extract session_id and user_id from requests
261
+ # Signature: (instance, args, kwargs) -> Optional[str]
262
+ # Example: lambda instance, args, kwargs: kwargs.get("metadata", {}).get("session_id")
263
+ session_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
264
+ user_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
265
+
266
+
267
+ import os
268
+
269
+ from opentelemetry import trace
270
+ from opentelemetry.sdk.resources import ( # noqa: F401
271
+ DEPLOYMENT_ENVIRONMENT,
272
+ SERVICE_NAME,
273
+ TELEMETRY_SDK_NAME,
274
+ Resource,
275
+ )
276
+ from opentelemetry.sdk.trace import TracerProvider
277
+ from opentelemetry.sdk.trace.export import (
278
+ BatchSpanProcessor,
279
+ ConsoleSpanExporter,
280
+ SimpleSpanProcessor,
281
+ )
282
+
283
+ if os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL") == "grpc":
284
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
285
+ else:
286
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
287
+
288
+
289
+ def setup_tracing(
290
+ config: "OTelConfig", # Use OTelConfig from this module
291
+ tracer_name: str,
292
+ disable_batch: bool = False,
293
+ ):
294
+ """
295
+ Sets up tracing with OpenTelemetry.
296
+ Initializes the tracer provider and configures the span processor and exporter.
297
+ """
298
+
299
+ try:
300
+ # Disable Haystack Auto Tracing
301
+ os.environ["HAYSTACK_AUTO_TRACE_ENABLED"] = "false"
302
+
303
+ # Create a resource with the service name attribute.
304
+ resource = Resource.create(
305
+ attributes={
306
+ SERVICE_NAME: config.service_name,
307
+ DEPLOYMENT_ENVIRONMENT: os.getenv("ENVIRONMENT", "dev"),
308
+ TELEMETRY_SDK_NAME: "genai_otel_instrument",
309
+ }
310
+ )
311
+
312
+ # Initialize the TracerProvider with the created resource.
313
+ trace.set_tracer_provider(TracerProvider(resource=resource))
314
+
315
+ # Configure the span exporter and processor based on whether the endpoint is effectively set.
316
+ if config.endpoint:
317
+ span_exporter = OTLPSpanExporter(headers=config.headers)
318
+ span_processor = (
319
+ BatchSpanProcessor(span_exporter)
320
+ if not disable_batch
321
+ else SimpleSpanProcessor(span_exporter)
322
+ )
323
+ else:
324
+ span_exporter = ConsoleSpanExporter()
325
+ span_processor = SimpleSpanProcessor(span_exporter)
326
+
327
+ trace.get_tracer_provider().add_span_processor(span_processor)
328
+
329
+ return trace.get_tracer(tracer_name)
330
+
331
+ except Exception as e:
332
+ logger.error("Failed to initialize OpenTelemetry: %s", e, exc_info=True)
333
+ return None