genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,503 @@
1
+ """OpenTelemetry instrumentor for Haystack NLP framework.
2
+
3
+ This instrumentor automatically traces pipeline execution, component operations,
4
+ and document processing using the Haystack framework.
5
+
6
+ Haystack is a modular NLP framework for building search and question-answering
7
+ systems with support for various LLMs, retrievers, and document stores.
8
+
9
+ Requirements:
10
+ pip install haystack-ai
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from typing import Any, Dict, Optional
16
+
17
+ from ..config import OTelConfig
18
+ from .base import BaseInstrumentor
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class HaystackInstrumentor(BaseInstrumentor):
24
+ """Instrumentor for Haystack NLP framework"""
25
+
26
+ def __init__(self):
27
+ """Initialize the instrumentor."""
28
+ super().__init__()
29
+ self._haystack_available = False
30
+ self._check_availability()
31
+
32
+ def _check_availability(self):
33
+ """Check if Haystack library is available."""
34
+ try:
35
+ import haystack
36
+
37
+ self._haystack_available = True
38
+ logger.debug("Haystack library detected and available for instrumentation")
39
+ except ImportError:
40
+ logger.debug("Haystack library not installed, instrumentation will be skipped")
41
+ self._haystack_available = False
42
+
43
+ def instrument(self, config: OTelConfig):
44
+ """Instrument Haystack framework if available.
45
+
46
+ Args:
47
+ config (OTelConfig): The OpenTelemetry configuration object.
48
+ """
49
+ if not self._haystack_available:
50
+ logger.debug("Skipping Haystack instrumentation - library not available")
51
+ return
52
+
53
+ self.config = config
54
+
55
+ try:
56
+ import wrapt
57
+
58
+ # Try to import Pipeline (v2.x)
59
+ try:
60
+ from haystack import Pipeline
61
+
62
+ # Instrument Pipeline.run (main execution method)
63
+ if hasattr(Pipeline, "run"):
64
+ original_run = Pipeline.run
65
+ Pipeline.run = wrapt.FunctionWrapper(original_run, self._wrap_pipeline_run)
66
+
67
+ # Instrument Pipeline.run_async (async execution)
68
+ if hasattr(Pipeline, "run_async"):
69
+ original_run_async = Pipeline.run_async
70
+ Pipeline.run_async = wrapt.FunctionWrapper(
71
+ original_run_async, self._wrap_pipeline_run_async
72
+ )
73
+
74
+ except ImportError:
75
+ logger.debug("Haystack Pipeline not available (v2.x API)")
76
+
77
+ # Try to instrument individual components
78
+ try:
79
+ # Instrument Generator components (LLM interaction)
80
+ from haystack.components.generators import OpenAIChatGenerator, OpenAIGenerator
81
+
82
+ if hasattr(OpenAIGenerator, "run"):
83
+ original_gen_run = OpenAIGenerator.run
84
+ OpenAIGenerator.run = wrapt.FunctionWrapper(
85
+ original_gen_run, self._wrap_generator_run
86
+ )
87
+
88
+ if hasattr(OpenAIChatGenerator, "run"):
89
+ original_chat_run = OpenAIChatGenerator.run
90
+ OpenAIChatGenerator.run = wrapt.FunctionWrapper(
91
+ original_chat_run, self._wrap_chat_generator_run
92
+ )
93
+
94
+ except ImportError:
95
+ logger.debug("Haystack generator components not available")
96
+
97
+ # Try to instrument Retriever components
98
+ try:
99
+ from haystack.components.retrievers import InMemoryBM25Retriever
100
+
101
+ if hasattr(InMemoryBM25Retriever, "run"):
102
+ original_retriever_run = InMemoryBM25Retriever.run
103
+ InMemoryBM25Retriever.run = wrapt.FunctionWrapper(
104
+ original_retriever_run, self._wrap_retriever_run
105
+ )
106
+
107
+ except ImportError:
108
+ logger.debug("Haystack retriever components not available")
109
+
110
+ self._instrumented = True
111
+ logger.info("Haystack instrumentation enabled")
112
+
113
+ except Exception as e:
114
+ logger.error("Failed to instrument Haystack: %s", e, exc_info=True)
115
+ if config.fail_on_error:
116
+ raise
117
+
118
+ def _wrap_pipeline_run(self, wrapped, instance, args, kwargs):
119
+ """Wrap Pipeline.run method with span.
120
+
121
+ Args:
122
+ wrapped: The original method.
123
+ instance: The Pipeline instance.
124
+ args: Positional arguments.
125
+ kwargs: Keyword arguments.
126
+ """
127
+ return self.create_span_wrapper(
128
+ span_name="haystack.pipeline.run",
129
+ extract_attributes=self._extract_pipeline_attributes,
130
+ )(wrapped)(instance, *args, **kwargs)
131
+
132
+ def _wrap_pipeline_run_async(self, wrapped, instance, args, kwargs):
133
+ """Wrap Pipeline.run_async method with span.
134
+
135
+ Args:
136
+ wrapped: The original method.
137
+ instance: The Pipeline instance.
138
+ args: Positional arguments.
139
+ kwargs: Keyword arguments.
140
+ """
141
+ return self.create_span_wrapper(
142
+ span_name="haystack.pipeline.run_async",
143
+ extract_attributes=self._extract_pipeline_attributes,
144
+ )(wrapped)(instance, *args, **kwargs)
145
+
146
+ def _wrap_generator_run(self, wrapped, instance, args, kwargs):
147
+ """Wrap Generator.run method with span.
148
+
149
+ Args:
150
+ wrapped: The original method.
151
+ instance: The Generator instance.
152
+ args: Positional arguments.
153
+ kwargs: Keyword arguments.
154
+ """
155
+ return self.create_span_wrapper(
156
+ span_name="haystack.generator.run",
157
+ extract_attributes=self._extract_generator_attributes,
158
+ )(wrapped)(instance, *args, **kwargs)
159
+
160
+ def _wrap_chat_generator_run(self, wrapped, instance, args, kwargs):
161
+ """Wrap ChatGenerator.run method with span.
162
+
163
+ Args:
164
+ wrapped: The original method.
165
+ instance: The ChatGenerator instance.
166
+ args: Positional arguments.
167
+ kwargs: Keyword arguments.
168
+ """
169
+ return self.create_span_wrapper(
170
+ span_name="haystack.chat_generator.run",
171
+ extract_attributes=self._extract_chat_generator_attributes,
172
+ )(wrapped)(instance, *args, **kwargs)
173
+
174
+ def _wrap_retriever_run(self, wrapped, instance, args, kwargs):
175
+ """Wrap Retriever.run method with span.
176
+
177
+ Args:
178
+ wrapped: The original method.
179
+ instance: The Retriever instance.
180
+ args: Positional arguments.
181
+ kwargs: Keyword arguments.
182
+ """
183
+ return self.create_span_wrapper(
184
+ span_name="haystack.retriever.run",
185
+ extract_attributes=self._extract_retriever_attributes,
186
+ )(wrapped)(instance, *args, **kwargs)
187
+
188
+ def _extract_pipeline_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
189
+ """Extract attributes from Pipeline.run call.
190
+
191
+ Args:
192
+ instance: The Pipeline instance.
193
+ args: Positional arguments.
194
+ kwargs: Keyword arguments.
195
+
196
+ Returns:
197
+ Dict[str, Any]: Dictionary of attributes to set on the span.
198
+ """
199
+ attrs = {}
200
+
201
+ # Core attributes
202
+ attrs["gen_ai.system"] = "haystack"
203
+ attrs["gen_ai.operation.name"] = "pipeline.run"
204
+
205
+ # Extract pipeline metadata
206
+ if hasattr(instance, "metadata") and instance.metadata:
207
+ try:
208
+ metadata = instance.metadata
209
+ if isinstance(metadata, dict):
210
+ for key, value in metadata.items():
211
+ attrs[f"haystack.pipeline.metadata.{key}"] = str(value)[:200]
212
+ except Exception as e:
213
+ logger.debug("Failed to extract pipeline metadata: %s", e)
214
+
215
+ # Extract graph information
216
+ if hasattr(instance, "graph"):
217
+ try:
218
+ graph = instance.graph
219
+ # Get nodes (components)
220
+ if hasattr(graph, "nodes"):
221
+ nodes = list(graph.nodes())
222
+ attrs["haystack.pipeline.components.count"] = len(nodes)
223
+ attrs["haystack.pipeline.components"] = [str(n) for n in nodes[:10]]
224
+
225
+ # Get edges (connections)
226
+ if hasattr(graph, "edges"):
227
+ edges = list(graph.edges())
228
+ attrs["haystack.pipeline.connections.count"] = len(edges)
229
+
230
+ except Exception as e:
231
+ logger.debug("Failed to extract pipeline graph: %s", e)
232
+
233
+ # Extract input data
234
+ if "data" in kwargs:
235
+ try:
236
+ data = kwargs["data"]
237
+ if isinstance(data, dict):
238
+ attrs["haystack.pipeline.input.keys"] = list(data.keys())[:10]
239
+ # Extract query if present
240
+ if "query" in data:
241
+ attrs["haystack.pipeline.input.query"] = str(data["query"])[:500]
242
+ except Exception as e:
243
+ logger.debug("Failed to extract pipeline input: %s", e)
244
+
245
+ # Extract include/exclude components
246
+ if "include_outputs_from" in kwargs:
247
+ try:
248
+ include = kwargs["include_outputs_from"]
249
+ if isinstance(include, (list, set)):
250
+ attrs["haystack.pipeline.include_outputs"] = list(include)[:10]
251
+ except Exception as e:
252
+ logger.debug("Failed to extract include_outputs: %s", e)
253
+
254
+ return attrs
255
+
256
+ def _extract_generator_attributes(
257
+ self, instance: Any, args: Any, kwargs: Any
258
+ ) -> Dict[str, Any]:
259
+ """Extract attributes from Generator.run call.
260
+
261
+ Args:
262
+ instance: The Generator instance.
263
+ args: Positional arguments.
264
+ kwargs: Keyword arguments.
265
+
266
+ Returns:
267
+ Dict[str, Any]: Dictionary of attributes to set on the span.
268
+ """
269
+ attrs = {}
270
+
271
+ # Core attributes
272
+ attrs["gen_ai.system"] = "haystack"
273
+ attrs["gen_ai.operation.name"] = "generator.run"
274
+ attrs["haystack.component.type"] = "generator"
275
+
276
+ # Extract model information
277
+ if hasattr(instance, "model") and instance.model:
278
+ attrs["gen_ai.request.model"] = instance.model
279
+ attrs["haystack.generator.model"] = instance.model
280
+
281
+ # Extract generation parameters
282
+ if hasattr(instance, "generation_kwargs"):
283
+ try:
284
+ gen_kwargs = instance.generation_kwargs
285
+ if isinstance(gen_kwargs, dict):
286
+ if "max_tokens" in gen_kwargs:
287
+ attrs["gen_ai.request.max_tokens"] = gen_kwargs["max_tokens"]
288
+ if "temperature" in gen_kwargs:
289
+ attrs["gen_ai.request.temperature"] = gen_kwargs["temperature"]
290
+ if "top_p" in gen_kwargs:
291
+ attrs["gen_ai.request.top_p"] = gen_kwargs["top_p"]
292
+ except Exception as e:
293
+ logger.debug("Failed to extract generation kwargs: %s", e)
294
+
295
+ # Extract prompt
296
+ if "prompt" in kwargs:
297
+ try:
298
+ prompt = kwargs["prompt"]
299
+ attrs["haystack.generator.prompt"] = str(prompt)[:500]
300
+ except Exception as e:
301
+ logger.debug("Failed to extract prompt: %s", e)
302
+
303
+ return attrs
304
+
305
+ def _extract_chat_generator_attributes(
306
+ self, instance: Any, args: Any, kwargs: Any
307
+ ) -> Dict[str, Any]:
308
+ """Extract attributes from ChatGenerator.run call.
309
+
310
+ Args:
311
+ instance: The ChatGenerator instance.
312
+ args: Positional arguments.
313
+ kwargs: Keyword arguments.
314
+
315
+ Returns:
316
+ Dict[str, Any]: Dictionary of attributes to set on the span.
317
+ """
318
+ attrs = {}
319
+
320
+ # Core attributes
321
+ attrs["gen_ai.system"] = "haystack"
322
+ attrs["gen_ai.operation.name"] = "chat_generator.run"
323
+ attrs["haystack.component.type"] = "chat_generator"
324
+
325
+ # Extract model information
326
+ if hasattr(instance, "model") and instance.model:
327
+ attrs["gen_ai.request.model"] = instance.model
328
+ attrs["haystack.chat_generator.model"] = instance.model
329
+
330
+ # Extract generation parameters
331
+ if hasattr(instance, "generation_kwargs"):
332
+ try:
333
+ gen_kwargs = instance.generation_kwargs
334
+ if isinstance(gen_kwargs, dict):
335
+ if "max_tokens" in gen_kwargs:
336
+ attrs["gen_ai.request.max_tokens"] = gen_kwargs["max_tokens"]
337
+ if "temperature" in gen_kwargs:
338
+ attrs["gen_ai.request.temperature"] = gen_kwargs["temperature"]
339
+ except Exception as e:
340
+ logger.debug("Failed to extract generation kwargs: %s", e)
341
+
342
+ # Extract messages
343
+ if "messages" in kwargs:
344
+ try:
345
+ messages = kwargs["messages"]
346
+ if isinstance(messages, list):
347
+ attrs["haystack.chat_generator.messages.count"] = len(messages)
348
+ # Extract last message
349
+ if messages:
350
+ last_msg = messages[-1]
351
+ if hasattr(last_msg, "content"):
352
+ attrs["haystack.chat_generator.last_message"] = str(last_msg.content)[
353
+ :500
354
+ ]
355
+ if hasattr(last_msg, "role"):
356
+ attrs["haystack.chat_generator.last_role"] = last_msg.role
357
+ except Exception as e:
358
+ logger.debug("Failed to extract messages: %s", e)
359
+
360
+ return attrs
361
+
362
+ def _extract_retriever_attributes(
363
+ self, instance: Any, args: Any, kwargs: Any
364
+ ) -> Dict[str, Any]:
365
+ """Extract attributes from Retriever.run call.
366
+
367
+ Args:
368
+ instance: The Retriever instance.
369
+ args: Positional arguments.
370
+ kwargs: Keyword arguments.
371
+
372
+ Returns:
373
+ Dict[str, Any]: Dictionary of attributes to set on the span.
374
+ """
375
+ attrs = {}
376
+
377
+ # Core attributes
378
+ attrs["gen_ai.system"] = "haystack"
379
+ attrs["gen_ai.operation.name"] = "retriever.run"
380
+ attrs["haystack.component.type"] = "retriever"
381
+
382
+ # Extract query
383
+ if "query" in kwargs:
384
+ try:
385
+ query = kwargs["query"]
386
+ attrs["haystack.retriever.query"] = str(query)[:500]
387
+ except Exception as e:
388
+ logger.debug("Failed to extract query: %s", e)
389
+
390
+ # Extract top_k
391
+ if "top_k" in kwargs:
392
+ try:
393
+ attrs["haystack.retriever.top_k"] = kwargs["top_k"]
394
+ except Exception as e:
395
+ logger.debug("Failed to extract top_k: %s", e)
396
+
397
+ # Extract filters if present
398
+ if "filters" in kwargs:
399
+ try:
400
+ filters = kwargs["filters"]
401
+ if filters:
402
+ attrs["haystack.retriever.filters"] = str(filters)[:200]
403
+ except Exception as e:
404
+ logger.debug("Failed to extract filters: %s", e)
405
+
406
+ return attrs
407
+
408
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
409
+ """Extract token usage from pipeline result.
410
+
411
+ Args:
412
+ result: The pipeline execution result.
413
+
414
+ Returns:
415
+ Optional[Dict[str, int]]: Dictionary with token counts or None.
416
+ """
417
+ # Haystack results are typically dicts with component outputs
418
+ # Token usage is captured by underlying LLM provider instrumentors
419
+ # Try to extract if available in result metadata
420
+ try:
421
+ if isinstance(result, dict):
422
+ # Check for generator outputs
423
+ for key, value in result.items():
424
+ if isinstance(value, dict) and "meta" in value:
425
+ meta = value["meta"]
426
+ if isinstance(meta, list) and meta:
427
+ usage_info = meta[0].get("usage", {})
428
+ if usage_info:
429
+ return {
430
+ "prompt_tokens": usage_info.get("prompt_tokens", 0),
431
+ "completion_tokens": usage_info.get("completion_tokens", 0),
432
+ "total_tokens": usage_info.get("total_tokens", 0),
433
+ }
434
+ except Exception as e:
435
+ logger.debug("Failed to extract usage: %s", e)
436
+
437
+ return None
438
+
439
+ def _extract_response_attributes(self, result) -> Dict[str, Any]:
440
+ """Extract response attributes from pipeline result.
441
+
442
+ Args:
443
+ result: The pipeline execution result.
444
+
445
+ Returns:
446
+ Dict[str, Any]: Dictionary of response attributes.
447
+ """
448
+ attrs = {}
449
+
450
+ try:
451
+ if isinstance(result, dict):
452
+ # Extract output keys
453
+ attrs["haystack.pipeline.output.keys"] = list(result.keys())[:10]
454
+
455
+ # Try to extract replies from generator outputs
456
+ for key, value in result.items():
457
+ if isinstance(value, dict):
458
+ # Check for replies (generator output)
459
+ if "replies" in value:
460
+ replies = value["replies"]
461
+ if isinstance(replies, list) and replies:
462
+ attrs[f"haystack.output.{key}.replies.count"] = len(replies)
463
+ attrs[f"haystack.output.{key}.first_reply"] = str(replies[0])[:500]
464
+
465
+ # Check for documents (retriever output)
466
+ if "documents" in value:
467
+ documents = value["documents"]
468
+ if isinstance(documents, list):
469
+ attrs[f"haystack.output.{key}.documents.count"] = len(documents)
470
+
471
+ except Exception as e:
472
+ logger.debug("Failed to extract response attributes: %s", e)
473
+
474
+ return attrs
475
+
476
+ def _extract_finish_reason(self, result) -> Optional[str]:
477
+ """Extract finish reason from pipeline result.
478
+
479
+ Args:
480
+ result: The pipeline execution result.
481
+
482
+ Returns:
483
+ Optional[str]: The finish reason string or None if not available.
484
+ """
485
+ try:
486
+ if isinstance(result, dict):
487
+ # Check generator outputs for finish reason
488
+ for key, value in result.items():
489
+ if isinstance(value, dict) and "meta" in value:
490
+ meta = value["meta"]
491
+ if isinstance(meta, list) and meta:
492
+ finish_reason = meta[0].get("finish_reason")
493
+ if finish_reason:
494
+ return str(finish_reason)
495
+
496
+ # If we have result, assume completion
497
+ if result:
498
+ return "completed"
499
+
500
+ except Exception as e:
501
+ logger.debug("Failed to extract finish reason: %s", e)
502
+
503
+ return None