openlit 1.33.17__tar.gz → 1.33.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openlit-1.33.17 → openlit-1.33.18}/PKG-INFO +1 -1
- {openlit-1.33.17 → openlit-1.33.18}/pyproject.toml +1 -1
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/azure_ai_inference/__init__.py +5 -22
- openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +144 -0
- openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +144 -0
- openlit-1.33.18/src/openlit/instrumentation/azure_ai_inference/utils.py +225 -0
- openlit-1.33.17/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -585
- openlit-1.33.17/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -585
- {openlit-1.33.17 → openlit-1.33.18}/LICENSE +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/README.md +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/__helpers.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/all.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/bias_detection.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/hallucination.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/toxicity.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/evals/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/all.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/prompt_injection.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/restrict_topic.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/sensitive_topic.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/guard/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ag2/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ag2/ag2.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ai21/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ai21/ai21.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ai21/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/anthropic/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/astra/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/astra/astra.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/astra/async_astra.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/astra/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/chroma/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/chroma/chroma.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/cohere/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/cohere/cohere.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/crewai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/crewai/crewai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/gpu/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/groq/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/groq/async_groq.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/groq/groq.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/haystack/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/haystack/haystack.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/julep/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/julep/async_julep.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/julep/julep.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/langchain/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/langchain/langchain.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/letta/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/letta/letta.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/litellm/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/litellm/litellm.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/mem0/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/mem0/mem0.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/milvus/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/milvus/milvus.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/mistral/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/mistral/mistral.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/multion/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/multion/async_multion.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/multion/multion.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ollama/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ollama/ollama.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/ollama/utils.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/openai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/openai/async_openai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/openai/openai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/phidata/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/phidata/phidata.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/premai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/premai/premai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/reka/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/reka/async_reka.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/reka/reka.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/together/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/together/async_together.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/together/together.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/transformers/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/transformers/transformers.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/vllm/__init__.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/vllm/vllm.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/otel/events.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/otel/metrics.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/otel/tracing.py +0 -0
- {openlit-1.33.17 → openlit-1.33.18}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.33.
|
3
|
+
Version: 1.33.18
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "openlit"
|
3
|
-
version = "1.33.
|
3
|
+
version = "1.33.18"
|
4
4
|
description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
|
5
5
|
authors = ["OpenLIT"]
|
6
6
|
license = "Apache-2.0"
|
{openlit-1.33.17 → openlit-1.33.18}/src/openlit/instrumentation/azure_ai_inference/__init__.py
RENAMED
@@ -4,13 +4,11 @@ from typing import Collection
|
|
4
4
|
import importlib.metadata
|
5
5
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
6
|
from wrapt import wrap_function_wrapper
|
7
|
-
|
8
7
|
from openlit.instrumentation.azure_ai_inference.azure_ai_inference import (
|
9
|
-
complete
|
8
|
+
complete
|
10
9
|
)
|
11
|
-
|
12
10
|
from openlit.instrumentation.azure_ai_inference.async_azure_ai_inference import (
|
13
|
-
async_complete
|
11
|
+
async_complete
|
14
12
|
)
|
15
13
|
|
16
14
|
_instruments = ('azure-ai-inference >= 1.0.0b4',)
|
@@ -27,6 +25,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
|
|
27
25
|
application_name = kwargs.get('application_name', 'default')
|
28
26
|
environment = kwargs.get('environment', 'default')
|
29
27
|
tracer = kwargs.get('tracer')
|
28
|
+
event_provider = kwargs.get('event_provider')
|
30
29
|
metrics = kwargs.get('metrics_dict')
|
31
30
|
pricing_info = kwargs.get('pricing_info', {})
|
32
31
|
capture_message_content = kwargs.get('capture_message_content', False)
|
@@ -38,15 +37,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
|
|
38
37
|
'azure.ai.inference',
|
39
38
|
'ChatCompletionsClient.complete',
|
40
39
|
complete(version, environment, application_name,
|
41
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
42
|
-
)
|
43
|
-
|
44
|
-
# sync embedding
|
45
|
-
wrap_function_wrapper(
|
46
|
-
'azure.ai.inference',
|
47
|
-
'EmbeddingsClient.embed',
|
48
|
-
embedding(version, environment, application_name,
|
49
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
40
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
50
41
|
)
|
51
42
|
|
52
43
|
# async generate
|
@@ -54,15 +45,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
|
|
54
45
|
'azure.ai.inference.aio',
|
55
46
|
'ChatCompletionsClient.complete',
|
56
47
|
async_complete(version, environment, application_name,
|
57
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
58
|
-
)
|
59
|
-
|
60
|
-
# async embedding
|
61
|
-
wrap_function_wrapper(
|
62
|
-
'azure.ai.inference.aio',
|
63
|
-
'EmbeddingsClient.embed',
|
64
|
-
async_embedding(version, environment, application_name,
|
65
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
48
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
66
49
|
)
|
67
50
|
|
68
51
|
def _uninstrument(self, **kwargs):
|
@@ -0,0 +1,144 @@
|
|
1
|
+
"""
|
2
|
+
Module for monitoring Azure AI Inference API calls.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import time
|
7
|
+
from opentelemetry.trace import SpanKind
|
8
|
+
from openlit.__helpers import (
|
9
|
+
handle_exception,
|
10
|
+
set_server_address_and_port,
|
11
|
+
)
|
12
|
+
from openlit.instrumentation.azure_ai_inference.utils import (
|
13
|
+
process_chunk,
|
14
|
+
process_chat_response,
|
15
|
+
process_streaming_chat_response,
|
16
|
+
)
|
17
|
+
from openlit.semcov import SemanticConvetion
|
18
|
+
|
19
|
+
# Initialize logger for logging potential issues and operations
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
def async_complete(version, environment, application_name,
|
23
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
|
+
"""
|
25
|
+
Generates a telemetry wrapper for GenAI function call
|
26
|
+
"""
|
27
|
+
|
28
|
+
class TracedAsyncStream:
|
29
|
+
"""
|
30
|
+
Wrapper for streaming responses to collect telemetry.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
wrapped,
|
36
|
+
span,
|
37
|
+
span_name,
|
38
|
+
kwargs,
|
39
|
+
server_address,
|
40
|
+
server_port,
|
41
|
+
**args,
|
42
|
+
):
|
43
|
+
self.__wrapped__ = wrapped
|
44
|
+
self._span = span
|
45
|
+
self._span_name = span_name
|
46
|
+
self._llmresponse = ""
|
47
|
+
self._response_id = ""
|
48
|
+
self._response_model = ""
|
49
|
+
self._finish_reason = ""
|
50
|
+
self._input_tokens = 0
|
51
|
+
self._output_tokens = 0
|
52
|
+
|
53
|
+
self._args = args
|
54
|
+
self._kwargs = kwargs
|
55
|
+
self._start_time = time.time()
|
56
|
+
self._end_time = None
|
57
|
+
self._timestamps = []
|
58
|
+
self._ttft = 0
|
59
|
+
self._tbt = 0
|
60
|
+
self._server_address = server_address
|
61
|
+
self._server_port = server_port
|
62
|
+
|
63
|
+
async def __aenter__(self):
|
64
|
+
await self.__wrapped__.__aenter__()
|
65
|
+
return self
|
66
|
+
|
67
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
68
|
+
await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
|
69
|
+
|
70
|
+
def __aiter__(self):
|
71
|
+
return self
|
72
|
+
|
73
|
+
async def __getattr__(self, name):
|
74
|
+
"""Delegate attribute access to the wrapped object."""
|
75
|
+
return getattr(await self.__wrapped__, name)
|
76
|
+
|
77
|
+
async def __anext__(self):
|
78
|
+
try:
|
79
|
+
chunk = await self.__wrapped__.__anext__()
|
80
|
+
process_chunk(self, chunk)
|
81
|
+
return chunk
|
82
|
+
except StopAsyncIteration:
|
83
|
+
try:
|
84
|
+
with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
|
85
|
+
process_streaming_chat_response(
|
86
|
+
self,
|
87
|
+
pricing_info=pricing_info,
|
88
|
+
environment=environment,
|
89
|
+
application_name=application_name,
|
90
|
+
metrics=metrics,
|
91
|
+
event_provider=event_provider,
|
92
|
+
capture_message_content=capture_message_content,
|
93
|
+
disable_metrics=disable_metrics,
|
94
|
+
version=version
|
95
|
+
)
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
handle_exception(self._span, e)
|
99
|
+
logger.error("Error in trace creation: %s", e)
|
100
|
+
raise
|
101
|
+
|
102
|
+
async def wrapper(wrapped, instance, args, kwargs):
|
103
|
+
"""
|
104
|
+
Wraps the GenAI function call.
|
105
|
+
"""
|
106
|
+
|
107
|
+
streaming = kwargs.get("stream", False)
|
108
|
+
server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
|
109
|
+
request_model = kwargs.get("model", "gpt-4o")
|
110
|
+
|
111
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
112
|
+
|
113
|
+
# pylint: disable=no-else-return
|
114
|
+
if streaming:
|
115
|
+
awaited_wrapped = await wrapped(*args, **kwargs)
|
116
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
117
|
+
|
118
|
+
return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
119
|
+
|
120
|
+
else:
|
121
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
122
|
+
start_time = time.time()
|
123
|
+
response = await wrapped(*args, **kwargs)
|
124
|
+
response = process_chat_response(
|
125
|
+
response=response,
|
126
|
+
request_model=request_model,
|
127
|
+
pricing_info=pricing_info,
|
128
|
+
server_port=server_port,
|
129
|
+
server_address=server_address,
|
130
|
+
environment=environment,
|
131
|
+
application_name=application_name,
|
132
|
+
metrics=metrics,
|
133
|
+
event_provider=event_provider,
|
134
|
+
start_time=start_time,
|
135
|
+
span=span,
|
136
|
+
capture_message_content=capture_message_content,
|
137
|
+
disable_metrics=disable_metrics,
|
138
|
+
version=version,
|
139
|
+
**kwargs
|
140
|
+
)
|
141
|
+
|
142
|
+
return response
|
143
|
+
|
144
|
+
return wrapper
|
@@ -0,0 +1,144 @@
|
|
1
|
+
"""
|
2
|
+
Module for monitoring Azure AI Inference API calls.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import time
|
7
|
+
from opentelemetry.trace import SpanKind
|
8
|
+
from openlit.__helpers import (
|
9
|
+
handle_exception,
|
10
|
+
set_server_address_and_port,
|
11
|
+
)
|
12
|
+
from openlit.instrumentation.azure_ai_inference.utils import (
|
13
|
+
process_chunk,
|
14
|
+
process_chat_response,
|
15
|
+
process_streaming_chat_response,
|
16
|
+
)
|
17
|
+
from openlit.semcov import SemanticConvetion
|
18
|
+
|
19
|
+
# Initialize logger for logging potential issues and operations
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
def complete(version, environment, application_name,
|
23
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
|
+
"""
|
25
|
+
Generates a telemetry wrapper for GenAI function call
|
26
|
+
"""
|
27
|
+
|
28
|
+
class TracedSyncStream:
|
29
|
+
"""
|
30
|
+
Wrapper for streaming responses to collect telemetry.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
wrapped,
|
36
|
+
span,
|
37
|
+
span_name,
|
38
|
+
kwargs,
|
39
|
+
server_address,
|
40
|
+
server_port,
|
41
|
+
**args,
|
42
|
+
):
|
43
|
+
self.__wrapped__ = wrapped
|
44
|
+
self._span = span
|
45
|
+
self._span_name = span_name
|
46
|
+
self._llmresponse = ""
|
47
|
+
self._response_id = ""
|
48
|
+
self._response_model = ""
|
49
|
+
self._finish_reason = ""
|
50
|
+
self._input_tokens = 0
|
51
|
+
self._output_tokens = 0
|
52
|
+
|
53
|
+
self._args = args
|
54
|
+
self._kwargs = kwargs
|
55
|
+
self._start_time = time.time()
|
56
|
+
self._end_time = None
|
57
|
+
self._timestamps = []
|
58
|
+
self._ttft = 0
|
59
|
+
self._tbt = 0
|
60
|
+
self._server_address = server_address
|
61
|
+
self._server_port = server_port
|
62
|
+
|
63
|
+
def __enter__(self):
|
64
|
+
self.__wrapped__.__enter__()
|
65
|
+
return self
|
66
|
+
|
67
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
68
|
+
self.__wrapped__.__exit__(exc_type, exc_value, traceback)
|
69
|
+
|
70
|
+
def __iter__(self):
|
71
|
+
return self
|
72
|
+
|
73
|
+
def __getattr__(self, name):
|
74
|
+
"""Delegate attribute access to the wrapped object."""
|
75
|
+
return getattr(self.__wrapped__, name)
|
76
|
+
|
77
|
+
def __next__(self):
|
78
|
+
try:
|
79
|
+
chunk = self.__wrapped__.__next__()
|
80
|
+
process_chunk(self, chunk)
|
81
|
+
return chunk
|
82
|
+
except StopIteration:
|
83
|
+
try:
|
84
|
+
with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
|
85
|
+
process_streaming_chat_response(
|
86
|
+
self,
|
87
|
+
pricing_info=pricing_info,
|
88
|
+
environment=environment,
|
89
|
+
application_name=application_name,
|
90
|
+
metrics=metrics,
|
91
|
+
event_provider=event_provider,
|
92
|
+
capture_message_content=capture_message_content,
|
93
|
+
disable_metrics=disable_metrics,
|
94
|
+
version=version
|
95
|
+
)
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
handle_exception(self._span, e)
|
99
|
+
logger.error("Error in trace creation: %s", e)
|
100
|
+
raise
|
101
|
+
|
102
|
+
def wrapper(wrapped, instance, args, kwargs):
|
103
|
+
"""
|
104
|
+
Wraps the GenAI function call.
|
105
|
+
"""
|
106
|
+
|
107
|
+
streaming = kwargs.get("stream", False)
|
108
|
+
server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
|
109
|
+
request_model = kwargs.get("model", "gpt-4o")
|
110
|
+
|
111
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
112
|
+
|
113
|
+
# pylint: disable=no-else-return
|
114
|
+
if streaming:
|
115
|
+
awaited_wrapped = wrapped(*args, **kwargs)
|
116
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
117
|
+
|
118
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
119
|
+
|
120
|
+
else:
|
121
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
122
|
+
start_time = time.time()
|
123
|
+
response = wrapped(*args, **kwargs)
|
124
|
+
response = process_chat_response(
|
125
|
+
response=response,
|
126
|
+
request_model=request_model,
|
127
|
+
pricing_info=pricing_info,
|
128
|
+
server_port=server_port,
|
129
|
+
server_address=server_address,
|
130
|
+
environment=environment,
|
131
|
+
application_name=application_name,
|
132
|
+
metrics=metrics,
|
133
|
+
event_provider=event_provider,
|
134
|
+
start_time=start_time,
|
135
|
+
span=span,
|
136
|
+
capture_message_content=capture_message_content,
|
137
|
+
disable_metrics=disable_metrics,
|
138
|
+
version=version,
|
139
|
+
**kwargs
|
140
|
+
)
|
141
|
+
|
142
|
+
return response
|
143
|
+
|
144
|
+
return wrapper
|
@@ -0,0 +1,225 @@
|
|
1
|
+
"""
|
2
|
+
Azure AI Inference OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
|
9
|
+
from openlit.__helpers import (
|
10
|
+
calculate_ttft,
|
11
|
+
response_as_dict,
|
12
|
+
calculate_tbt,
|
13
|
+
extract_and_format_input,
|
14
|
+
get_chat_model_cost,
|
15
|
+
create_metrics_attributes,
|
16
|
+
otel_event,
|
17
|
+
concatenate_all_contents
|
18
|
+
)
|
19
|
+
from openlit.semcov import SemanticConvetion
|
20
|
+
|
21
|
+
def process_chunk(self, chunk):
|
22
|
+
"""
|
23
|
+
Process a chunk of response data and update state.
|
24
|
+
"""
|
25
|
+
|
26
|
+
end_time = time.time()
|
27
|
+
# Record the timestamp for the current chunk
|
28
|
+
self._timestamps.append(end_time)
|
29
|
+
|
30
|
+
if len(self._timestamps) == 1:
|
31
|
+
# Calculate time to first chunk
|
32
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
33
|
+
|
34
|
+
chunked = response_as_dict(chunk)
|
35
|
+
|
36
|
+
# Collect message IDs and aggregated response from events
|
37
|
+
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
38
|
+
'content' in chunked.get('choices')[0].get('delta'))):
|
39
|
+
|
40
|
+
if content := chunked.get('choices')[0].get('delta').get('content'):
|
41
|
+
self._llmresponse += content
|
42
|
+
|
43
|
+
if chunked.get('choices')[0].get('finish_reason') is not None:
|
44
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
45
|
+
|
46
|
+
if chunked.get('usage') is not None:
|
47
|
+
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
48
|
+
self._response_id = chunked.get('id')
|
49
|
+
self._response_model = chunked.get('model')
|
50
|
+
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
51
|
+
|
52
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
53
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream):
|
54
|
+
"""
|
55
|
+
Process chat request and generate Telemetry
|
56
|
+
"""
|
57
|
+
|
58
|
+
scope._end_time = time.time()
|
59
|
+
if len(scope._timestamps) > 1:
|
60
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
61
|
+
|
62
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
|
63
|
+
request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
|
64
|
+
|
65
|
+
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
66
|
+
|
67
|
+
# Set Span attributes (OTel Semconv)
|
68
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
69
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
70
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
|
71
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
|
72
|
+
scope._span.set_attribute(SemanticConvetion.SERVER_PORT, scope._server_port)
|
73
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
|
74
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
|
75
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
|
76
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
|
77
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
|
78
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
scope._kwargs.get("frequency_penalty", 0.0))
|
80
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
81
|
+
scope._kwargs.get("presence_penalty", 0.0))
|
82
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
|
83
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, scope._response_id)
|
84
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, scope._response_model)
|
85
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvetion.SERVER_ADDRESS, scope._server_address)
|
88
|
+
|
89
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
90
|
+
'text' if isinstance(scope._llmresponse, str) else 'json')
|
91
|
+
|
92
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
93
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
94
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
95
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
|
96
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
|
97
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT, scope._tbt)
|
98
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT, scope._ttft)
|
99
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION, version)
|
100
|
+
|
101
|
+
# To be removed one the change to log events (from span events) is complete
|
102
|
+
prompt = concatenate_all_contents(formatted_messages)
|
103
|
+
if capture_message_content:
|
104
|
+
scope._span.add_event(
|
105
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
106
|
+
attributes={
|
107
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
108
|
+
},
|
109
|
+
)
|
110
|
+
scope._span.add_event(
|
111
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
112
|
+
attributes={
|
113
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
114
|
+
},
|
115
|
+
)
|
116
|
+
|
117
|
+
choice_event_body = {
|
118
|
+
'finish_reason': scope._finish_reason,
|
119
|
+
'index': 0,
|
120
|
+
'message': {
|
121
|
+
**({'content': scope._llmresponse} if capture_message_content else {}),
|
122
|
+
'role': 'assistant'
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
# Emit events
|
127
|
+
for role in ['user', 'system', 'assistant', 'tool']:
|
128
|
+
if formatted_messages.get(role, {}).get('content', ''):
|
129
|
+
event = otel_event(
|
130
|
+
name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
|
131
|
+
attributes={
|
132
|
+
SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
|
133
|
+
},
|
134
|
+
body = {
|
135
|
+
# pylint: disable=line-too-long
|
136
|
+
**({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
|
137
|
+
'role': formatted_messages.get(role, {}).get('role', []),
|
138
|
+
**({
|
139
|
+
'tool_calls': {
|
140
|
+
'function': {
|
141
|
+
# pylint: disable=line-too-long
|
142
|
+
'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
|
143
|
+
'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
|
144
|
+
},
|
145
|
+
'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
|
146
|
+
'type': 'function'
|
147
|
+
}
|
148
|
+
} if role == 'assistant' else {}),
|
149
|
+
**({
|
150
|
+
'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
|
151
|
+
} if role == 'tool' else {})
|
152
|
+
}
|
153
|
+
)
|
154
|
+
event_provider.emit(event)
|
155
|
+
|
156
|
+
choice_event = otel_event(
|
157
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
158
|
+
attributes={
|
159
|
+
SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
|
160
|
+
},
|
161
|
+
body=choice_event_body
|
162
|
+
)
|
163
|
+
event_provider.emit(choice_event)
|
164
|
+
|
165
|
+
scope._span.set_status(Status(StatusCode.OK))
|
166
|
+
|
167
|
+
if not disable_metrics:
|
168
|
+
metrics_attributes = create_metrics_attributes(
|
169
|
+
service_name=application_name,
|
170
|
+
deployment_environment=environment,
|
171
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
172
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
|
173
|
+
request_model=request_model,
|
174
|
+
server_address=scope._server_address,
|
175
|
+
server_port=scope._server_port,
|
176
|
+
response_model=scope._response_model,
|
177
|
+
)
|
178
|
+
|
179
|
+
metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
|
180
|
+
metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
|
181
|
+
metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
|
182
|
+
metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
|
183
|
+
metrics['genai_requests'].add(1, metrics_attributes)
|
184
|
+
metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
|
185
|
+
metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
|
186
|
+
metrics['genai_cost'].record(cost, metrics_attributes)
|
187
|
+
|
188
|
+
def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
|
189
|
+
event_provider, capture_message_content=False, disable_metrics=False, version=''):
|
190
|
+
"""
|
191
|
+
Process chat request and generate Telemetry
|
192
|
+
"""
|
193
|
+
|
194
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
195
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=True)
|
196
|
+
|
197
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
198
|
+
environment, application_name, metrics, event_provider, start_time,
|
199
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
200
|
+
"""
|
201
|
+
Process chat request and generate Telemetry
|
202
|
+
"""
|
203
|
+
|
204
|
+
self = type('GenericScope', (), {})()
|
205
|
+
response_dict = response_as_dict(response)
|
206
|
+
|
207
|
+
# pylint: disable = no-member
|
208
|
+
self._start_time = start_time
|
209
|
+
self._end_time = time.time()
|
210
|
+
self._span = span
|
211
|
+
self._llmresponse = response_dict.get('choices', {})[0].get('message', '').get('content', '')
|
212
|
+
self._input_tokens = response_dict.get('usage').get('prompt_tokens')
|
213
|
+
self._output_tokens = response_dict.get('usage').get('completion_tokens')
|
214
|
+
self._response_model = response_dict.get('model', '')
|
215
|
+
self._finish_reason = response_dict.get('choices', {})[0].get('finish_reason', '')
|
216
|
+
self._response_id = response_dict.get('id', '')
|
217
|
+
self._timestamps = []
|
218
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
219
|
+
self._server_address, self._server_port = server_address, server_port
|
220
|
+
self._kwargs = kwargs
|
221
|
+
|
222
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
223
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=False)
|
224
|
+
|
225
|
+
return response
|