openlit 1.33.16__tar.gz → 1.33.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {openlit-1.33.16 → openlit-1.33.18}/PKG-INFO +1 -1
  2. {openlit-1.33.16 → openlit-1.33.18}/pyproject.toml +1 -1
  3. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/azure_ai_inference/__init__.py +5 -22
  4. openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +144 -0
  5. openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +144 -0
  6. openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/utils.py +225 -0
  7. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/langchain/langchain.py +2 -28
  8. openlit-1.33.16/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -585
  9. openlit-1.33.16/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -585
  10. {openlit-1.33.16 → openlit-1.33.18}/LICENSE +0 -0
  11. {openlit-1.33.16 → openlit-1.33.18}/README.md +0 -0
  12. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/__helpers.py +0 -0
  13. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/__init__.py +0 -0
  14. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/__init__.py +0 -0
  15. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/all.py +0 -0
  16. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/bias_detection.py +0 -0
  17. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/hallucination.py +0 -0
  18. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/toxicity.py +0 -0
  19. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/evals/utils.py +0 -0
  20. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/__init__.py +0 -0
  21. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/all.py +0 -0
  22. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/prompt_injection.py +0 -0
  23. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/restrict_topic.py +0 -0
  24. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/sensitive_topic.py +0 -0
  25. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/guard/utils.py +0 -0
  26. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ag2/__init__.py +0 -0
  27. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ag2/ag2.py +0 -0
  28. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ai21/__init__.py +0 -0
  29. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ai21/ai21.py +0 -0
  30. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
  31. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ai21/utils.py +0 -0
  32. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
  33. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
  34. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
  35. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/utils.py +0 -0
  36. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
  37. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
  38. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/astra/__init__.py +0 -0
  39. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/astra/astra.py +0 -0
  40. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/astra/async_astra.py +0 -0
  41. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/astra/utils.py +0 -0
  42. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
  43. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
  44. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/chroma/__init__.py +0 -0
  45. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/chroma/chroma.py +0 -0
  46. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/cohere/__init__.py +0 -0
  47. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
  48. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/cohere/cohere.py +0 -0
  49. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
  50. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
  51. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
  52. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
  53. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
  54. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/crewai/__init__.py +0 -0
  55. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/crewai/crewai.py +0 -0
  56. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
  57. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
  58. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
  59. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
  60. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
  61. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
  62. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
  63. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
  64. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
  65. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
  66. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -0
  67. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -0
  68. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
  69. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
  70. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/gpu/__init__.py +0 -0
  71. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/groq/__init__.py +0 -0
  72. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/groq/async_groq.py +0 -0
  73. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/groq/groq.py +0 -0
  74. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/haystack/__init__.py +0 -0
  75. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/haystack/haystack.py +0 -0
  76. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/julep/__init__.py +0 -0
  77. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/julep/async_julep.py +0 -0
  78. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/julep/julep.py +0 -0
  79. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/langchain/__init__.py +0 -0
  80. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
  81. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/letta/__init__.py +0 -0
  82. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/letta/letta.py +0 -0
  83. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/litellm/__init__.py +0 -0
  84. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
  85. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/litellm/litellm.py +0 -0
  86. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
  87. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
  88. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/mem0/__init__.py +0 -0
  89. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/mem0/mem0.py +0 -0
  90. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/milvus/__init__.py +0 -0
  91. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/milvus/milvus.py +0 -0
  92. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/mistral/__init__.py +0 -0
  93. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
  94. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/mistral/mistral.py +0 -0
  95. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/multion/__init__.py +0 -0
  96. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/multion/async_multion.py +0 -0
  97. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/multion/multion.py +0 -0
  98. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ollama/__init__.py +0 -0
  99. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
  100. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ollama/ollama.py +0 -0
  101. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/ollama/utils.py +0 -0
  102. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/openai/__init__.py +0 -0
  103. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/openai/async_openai.py +0 -0
  104. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/openai/openai.py +0 -0
  105. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
  106. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
  107. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/phidata/__init__.py +0 -0
  108. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/phidata/phidata.py +0 -0
  109. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
  110. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
  111. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/premai/__init__.py +0 -0
  112. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/premai/premai.py +0 -0
  113. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
  114. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
  115. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
  116. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/reka/__init__.py +0 -0
  117. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/reka/async_reka.py +0 -0
  118. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/reka/reka.py +0 -0
  119. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/together/__init__.py +0 -0
  120. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/together/async_together.py +0 -0
  121. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/together/together.py +0 -0
  122. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/transformers/__init__.py +0 -0
  123. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/transformers/transformers.py +0 -0
  124. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
  125. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
  126. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
  127. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/vllm/__init__.py +0 -0
  128. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/instrumentation/vllm/vllm.py +0 -0
  129. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/otel/events.py +0 -0
  130. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/otel/metrics.py +0 -0
  131. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/otel/tracing.py +0 -0
  132. {openlit-1.33.16 → openlit-1.33.18}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.33.16
3
+ Version: 1.33.18
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "openlit"
3
- version = "1.33.16"
3
+ version = "1.33.18"
4
4
  description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
5
5
  authors = ["OpenLIT"]
6
6
  license = "Apache-2.0"
@@ -4,13 +4,11 @@ from typing import Collection
4
4
  import importlib.metadata
5
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
6
  from wrapt import wrap_function_wrapper
7
-
8
7
  from openlit.instrumentation.azure_ai_inference.azure_ai_inference import (
9
- complete, embedding
8
+ complete
10
9
  )
11
-
12
10
  from openlit.instrumentation.azure_ai_inference.async_azure_ai_inference import (
13
- async_complete, async_embedding
11
+ async_complete
14
12
  )
15
13
 
16
14
  _instruments = ('azure-ai-inference >= 1.0.0b4',)
@@ -27,6 +25,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
27
25
  application_name = kwargs.get('application_name', 'default')
28
26
  environment = kwargs.get('environment', 'default')
29
27
  tracer = kwargs.get('tracer')
28
+ event_provider = kwargs.get('event_provider')
30
29
  metrics = kwargs.get('metrics_dict')
31
30
  pricing_info = kwargs.get('pricing_info', {})
32
31
  capture_message_content = kwargs.get('capture_message_content', False)
@@ -38,15 +37,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
38
37
  'azure.ai.inference',
39
38
  'ChatCompletionsClient.complete',
40
39
  complete(version, environment, application_name,
41
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
- )
43
-
44
- # sync embedding
45
- wrap_function_wrapper(
46
- 'azure.ai.inference',
47
- 'EmbeddingsClient.embed',
48
- embedding(version, environment, application_name,
49
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
40
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
50
41
  )
51
42
 
52
43
  # async generate
@@ -54,15 +45,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
54
45
  'azure.ai.inference.aio',
55
46
  'ChatCompletionsClient.complete',
56
47
  async_complete(version, environment, application_name,
57
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
58
- )
59
-
60
- # async embedding
61
- wrap_function_wrapper(
62
- 'azure.ai.inference.aio',
63
- 'EmbeddingsClient.embed',
64
- async_embedding(version, environment, application_name,
65
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
48
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
66
49
  )
67
50
 
68
51
  def _uninstrument(self, **kwargs):
@@ -0,0 +1,144 @@
1
+ """
2
+ Module for monitoring Azure AI Inference API calls.
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ from opentelemetry.trace import SpanKind
8
+ from openlit.__helpers import (
9
+ handle_exception,
10
+ set_server_address_and_port,
11
+ )
12
+ from openlit.instrumentation.azure_ai_inference.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ )
17
+ from openlit.semcov import SemanticConvetion
18
+
19
+ # Initialize logger for logging potential issues and operations
20
+ logger = logging.getLogger(__name__)
21
+
22
+ def async_complete(version, environment, application_name,
23
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
24
+ """
25
+ Generates a telemetry wrapper for GenAI function call
26
+ """
27
+
28
+ class TracedAsyncStream:
29
+ """
30
+ Wrapper for streaming responses to collect telemetry.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ wrapped,
36
+ span,
37
+ span_name,
38
+ kwargs,
39
+ server_address,
40
+ server_port,
41
+ **args,
42
+ ):
43
+ self.__wrapped__ = wrapped
44
+ self._span = span
45
+ self._span_name = span_name
46
+ self._llmresponse = ""
47
+ self._response_id = ""
48
+ self._response_model = ""
49
+ self._finish_reason = ""
50
+ self._input_tokens = 0
51
+ self._output_tokens = 0
52
+
53
+ self._args = args
54
+ self._kwargs = kwargs
55
+ self._start_time = time.time()
56
+ self._end_time = None
57
+ self._timestamps = []
58
+ self._ttft = 0
59
+ self._tbt = 0
60
+ self._server_address = server_address
61
+ self._server_port = server_port
62
+
63
+ async def __aenter__(self):
64
+ await self.__wrapped__.__aenter__()
65
+ return self
66
+
67
+ async def __aexit__(self, exc_type, exc_value, traceback):
68
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
69
+
70
+ def __aiter__(self):
71
+ return self
72
+
73
+ async def __getattr__(self, name):
74
+ """Delegate attribute access to the wrapped object."""
75
+ return getattr(await self.__wrapped__, name)
76
+
77
+ async def __anext__(self):
78
+ try:
79
+ chunk = await self.__wrapped__.__anext__()
80
+ process_chunk(self, chunk)
81
+ return chunk
82
+ except StopAsyncIteration:
83
+ try:
84
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ event_provider=event_provider,
92
+ capture_message_content=capture_message_content,
93
+ disable_metrics=disable_metrics,
94
+ version=version
95
+ )
96
+
97
+ except Exception as e:
98
+ handle_exception(self._span, e)
99
+ logger.error("Error in trace creation: %s", e)
100
+ raise
101
+
102
+ async def wrapper(wrapped, instance, args, kwargs):
103
+ """
104
+ Wraps the GenAI function call.
105
+ """
106
+
107
+ streaming = kwargs.get("stream", False)
108
+ server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
109
+ request_model = kwargs.get("model", "gpt-4o")
110
+
111
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
+
113
+ # pylint: disable=no-else-return
114
+ if streaming:
115
+ awaited_wrapped = await wrapped(*args, **kwargs)
116
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
117
+
118
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
119
+
120
+ else:
121
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
122
+ start_time = time.time()
123
+ response = await wrapped(*args, **kwargs)
124
+ response = process_chat_response(
125
+ response=response,
126
+ request_model=request_model,
127
+ pricing_info=pricing_info,
128
+ server_port=server_port,
129
+ server_address=server_address,
130
+ environment=environment,
131
+ application_name=application_name,
132
+ metrics=metrics,
133
+ event_provider=event_provider,
134
+ start_time=start_time,
135
+ span=span,
136
+ capture_message_content=capture_message_content,
137
+ disable_metrics=disable_metrics,
138
+ version=version,
139
+ **kwargs
140
+ )
141
+
142
+ return response
143
+
144
+ return wrapper
@@ -0,0 +1,144 @@
1
+ """
2
+ Module for monitoring Azure AI Inference API calls.
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ from opentelemetry.trace import SpanKind
8
+ from openlit.__helpers import (
9
+ handle_exception,
10
+ set_server_address_and_port,
11
+ )
12
+ from openlit.instrumentation.azure_ai_inference.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ )
17
+ from openlit.semcov import SemanticConvetion
18
+
19
+ # Initialize logger for logging potential issues and operations
20
+ logger = logging.getLogger(__name__)
21
+
22
+ def complete(version, environment, application_name,
23
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
24
+ """
25
+ Generates a telemetry wrapper for GenAI function call
26
+ """
27
+
28
+ class TracedSyncStream:
29
+ """
30
+ Wrapper for streaming responses to collect telemetry.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ wrapped,
36
+ span,
37
+ span_name,
38
+ kwargs,
39
+ server_address,
40
+ server_port,
41
+ **args,
42
+ ):
43
+ self.__wrapped__ = wrapped
44
+ self._span = span
45
+ self._span_name = span_name
46
+ self._llmresponse = ""
47
+ self._response_id = ""
48
+ self._response_model = ""
49
+ self._finish_reason = ""
50
+ self._input_tokens = 0
51
+ self._output_tokens = 0
52
+
53
+ self._args = args
54
+ self._kwargs = kwargs
55
+ self._start_time = time.time()
56
+ self._end_time = None
57
+ self._timestamps = []
58
+ self._ttft = 0
59
+ self._tbt = 0
60
+ self._server_address = server_address
61
+ self._server_port = server_port
62
+
63
+ def __enter__(self):
64
+ self.__wrapped__.__enter__()
65
+ return self
66
+
67
+ def __exit__(self, exc_type, exc_value, traceback):
68
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
69
+
70
+ def __iter__(self):
71
+ return self
72
+
73
+ def __getattr__(self, name):
74
+ """Delegate attribute access to the wrapped object."""
75
+ return getattr(self.__wrapped__, name)
76
+
77
+ def __next__(self):
78
+ try:
79
+ chunk = self.__wrapped__.__next__()
80
+ process_chunk(self, chunk)
81
+ return chunk
82
+ except StopIteration:
83
+ try:
84
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ event_provider=event_provider,
92
+ capture_message_content=capture_message_content,
93
+ disable_metrics=disable_metrics,
94
+ version=version
95
+ )
96
+
97
+ except Exception as e:
98
+ handle_exception(self._span, e)
99
+ logger.error("Error in trace creation: %s", e)
100
+ raise
101
+
102
+ def wrapper(wrapped, instance, args, kwargs):
103
+ """
104
+ Wraps the GenAI function call.
105
+ """
106
+
107
+ streaming = kwargs.get("stream", False)
108
+ server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
109
+ request_model = kwargs.get("model", "gpt-4o")
110
+
111
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
+
113
+ # pylint: disable=no-else-return
114
+ if streaming:
115
+ awaited_wrapped = wrapped(*args, **kwargs)
116
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
117
+
118
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
119
+
120
+ else:
121
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
122
+ start_time = time.time()
123
+ response = wrapped(*args, **kwargs)
124
+ response = process_chat_response(
125
+ response=response,
126
+ request_model=request_model,
127
+ pricing_info=pricing_info,
128
+ server_port=server_port,
129
+ server_address=server_address,
130
+ environment=environment,
131
+ application_name=application_name,
132
+ metrics=metrics,
133
+ event_provider=event_provider,
134
+ start_time=start_time,
135
+ span=span,
136
+ capture_message_content=capture_message_content,
137
+ disable_metrics=disable_metrics,
138
+ version=version,
139
+ **kwargs
140
+ )
141
+
142
+ return response
143
+
144
+ return wrapper
@@ -0,0 +1,225 @@
1
+ """
2
+ Azure AI Inference OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ calculate_ttft,
11
+ response_as_dict,
12
+ calculate_tbt,
13
+ extract_and_format_input,
14
+ get_chat_model_cost,
15
+ create_metrics_attributes,
16
+ otel_event,
17
+ concatenate_all_contents
18
+ )
19
+ from openlit.semcov import SemanticConvetion
20
+
21
+ def process_chunk(self, chunk):
22
+ """
23
+ Process a chunk of response data and update state.
24
+ """
25
+
26
+ end_time = time.time()
27
+ # Record the timestamp for the current chunk
28
+ self._timestamps.append(end_time)
29
+
30
+ if len(self._timestamps) == 1:
31
+ # Calculate time to first chunk
32
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
33
+
34
+ chunked = response_as_dict(chunk)
35
+
36
+ # Collect message IDs and aggregated response from events
37
+ if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
38
+ 'content' in chunked.get('choices')[0].get('delta'))):
39
+
40
+ if content := chunked.get('choices')[0].get('delta').get('content'):
41
+ self._llmresponse += content
42
+
43
+ if chunked.get('choices')[0].get('finish_reason') is not None:
44
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
45
+
46
+ if chunked.get('usage') is not None:
47
+ self._input_tokens = chunked.get('usage').get('prompt_tokens')
48
+ self._response_id = chunked.get('id')
49
+ self._response_model = chunked.get('model')
50
+ self._output_tokens = chunked.get('usage').get('completion_tokens')
51
+
52
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
53
+ event_provider, capture_message_content, disable_metrics, version, is_stream):
54
+ """
55
+ Process chat request and generate Telemetry
56
+ """
57
+
58
+ scope._end_time = time.time()
59
+ if len(scope._timestamps) > 1:
60
+ scope._tbt = calculate_tbt(scope._timestamps)
61
+
62
+ formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
63
+ request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
64
+
65
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
66
+
67
+ # Set Span attributes (OTel Semconv)
68
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
69
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
70
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
71
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
72
+ scope._span.set_attribute(SemanticConvetion.SERVER_PORT, scope._server_port)
73
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
74
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
75
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
76
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
77
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
78
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
+ scope._kwargs.get("frequency_penalty", 0.0))
80
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
81
+ scope._kwargs.get("presence_penalty", 0.0))
82
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
83
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, scope._response_id)
84
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, scope._response_model)
85
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
86
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
87
+ scope._span.set_attribute(SemanticConvetion.SERVER_ADDRESS, scope._server_address)
88
+
89
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
90
+ 'text' if isinstance(scope._llmresponse, str) else 'json')
91
+
92
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
93
+ scope._span.set_attribute(SERVICE_NAME, application_name)
94
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM, is_stream)
95
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
96
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
97
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT, scope._tbt)
98
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT, scope._ttft)
99
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION, version)
100
+
101
+ # To be removed one the change to log events (from span events) is complete
102
+ prompt = concatenate_all_contents(formatted_messages)
103
+ if capture_message_content:
104
+ scope._span.add_event(
105
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
106
+ attributes={
107
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
108
+ },
109
+ )
110
+ scope._span.add_event(
111
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
112
+ attributes={
113
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
114
+ },
115
+ )
116
+
117
+ choice_event_body = {
118
+ 'finish_reason': scope._finish_reason,
119
+ 'index': 0,
120
+ 'message': {
121
+ **({'content': scope._llmresponse} if capture_message_content else {}),
122
+ 'role': 'assistant'
123
+ }
124
+ }
125
+
126
+ # Emit events
127
+ for role in ['user', 'system', 'assistant', 'tool']:
128
+ if formatted_messages.get(role, {}).get('content', ''):
129
+ event = otel_event(
130
+ name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
131
+ attributes={
132
+ SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
133
+ },
134
+ body = {
135
+ # pylint: disable=line-too-long
136
+ **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
137
+ 'role': formatted_messages.get(role, {}).get('role', []),
138
+ **({
139
+ 'tool_calls': {
140
+ 'function': {
141
+ # pylint: disable=line-too-long
142
+ 'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
143
+ 'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
144
+ },
145
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
146
+ 'type': 'function'
147
+ }
148
+ } if role == 'assistant' else {}),
149
+ **({
150
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
151
+ } if role == 'tool' else {})
152
+ }
153
+ )
154
+ event_provider.emit(event)
155
+
156
+ choice_event = otel_event(
157
+ name=SemanticConvetion.GEN_AI_CHOICE,
158
+ attributes={
159
+ SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
160
+ },
161
+ body=choice_event_body
162
+ )
163
+ event_provider.emit(choice_event)
164
+
165
+ scope._span.set_status(Status(StatusCode.OK))
166
+
167
+ if not disable_metrics:
168
+ metrics_attributes = create_metrics_attributes(
169
+ service_name=application_name,
170
+ deployment_environment=environment,
171
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
172
+ system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
173
+ request_model=request_model,
174
+ server_address=scope._server_address,
175
+ server_port=scope._server_port,
176
+ response_model=scope._response_model,
177
+ )
178
+
179
+ metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
180
+ metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
181
+ metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
182
+ metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
183
+ metrics['genai_requests'].add(1, metrics_attributes)
184
+ metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
185
+ metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
186
+ metrics['genai_cost'].record(cost, metrics_attributes)
187
+
188
+ def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
189
+ event_provider, capture_message_content=False, disable_metrics=False, version=''):
190
+ """
191
+ Process chat request and generate Telemetry
192
+ """
193
+
194
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
195
+ event_provider, capture_message_content, disable_metrics, version, is_stream=True)
196
+
197
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
198
+ environment, application_name, metrics, event_provider, start_time,
199
+ span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
200
+ """
201
+ Process chat request and generate Telemetry
202
+ """
203
+
204
+ self = type('GenericScope', (), {})()
205
+ response_dict = response_as_dict(response)
206
+
207
+ # pylint: disable = no-member
208
+ self._start_time = start_time
209
+ self._end_time = time.time()
210
+ self._span = span
211
+ self._llmresponse = response_dict.get('choices', {})[0].get('message', '').get('content', '')
212
+ self._input_tokens = response_dict.get('usage').get('prompt_tokens')
213
+ self._output_tokens = response_dict.get('usage').get('completion_tokens')
214
+ self._response_model = response_dict.get('model', '')
215
+ self._finish_reason = response_dict.get('choices', {})[0].get('finish_reason', '')
216
+ self._response_id = response_dict.get('id', '')
217
+ self._timestamps = []
218
+ self._ttft, self._tbt = self._end_time - self._start_time, 0
219
+ self._server_address, self._server_port = server_address, server_port
220
+ self._kwargs = kwargs
221
+
222
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
223
+ event_provider, capture_message_content, disable_metrics, version, is_stream=False)
224
+
225
+ return response
@@ -14,6 +14,7 @@ from openlit.__helpers import (
14
14
  calculate_ttft,
15
15
  calculate_tbt,
16
16
  create_metrics_attributes,
17
+ extract_and_format_input,
17
18
  )
18
19
  from openlit.semcov import SemanticConvetion
19
20
 
@@ -249,34 +250,7 @@ def chat(gen_ai_endpoint, version, environment, application_name,
249
250
  end_time = time.time()
250
251
 
251
252
  try:
252
- # Format 'messages' into a single string
253
- message_prompt = kwargs.get("messages", "") or args[0]
254
- formatted_messages = []
255
-
256
- for message in message_prompt:
257
- # Handle the case where message is a tuple
258
- if isinstance(message, tuple) and len(message) == 2:
259
- role, content = message
260
- # Handle the case where message is a dictionary
261
- elif isinstance(message, dict):
262
- role = message["role"]
263
- content = message["content"]
264
- else:
265
- continue
266
-
267
- # Check if the content is a list
268
- if isinstance(content, list):
269
- content_str = ", ".join(
270
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
271
- if "type" in item else f'text: {item["text"]}'
272
- for item in content
273
- )
274
- formatted_messages.append(f"{role}: {content_str}")
275
- else:
276
- formatted_messages.append(f"{role}: {content}")
277
-
278
- # Join all formatted messages with newline
279
- prompt = "\n".join(formatted_messages)
253
+ prompt = str(kwargs.get('messages', '') or args[0])
280
254
 
281
255
  input_tokens = general_tokens(str(prompt))
282
256
  output_tokens = general_tokens(str(response))