openlit 1.34.12__tar.gz → 1.34.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openlit-1.34.12 → openlit-1.34.14}/PKG-INFO +1 -1
- {openlit-1.34.12 → openlit-1.34.14}/pyproject.toml +1 -1
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/transformers/__init__.py +12 -5
- openlit-1.34.14/src/openlit/instrumentation/transformers/transformers.py +53 -0
- openlit-1.34.14/src/openlit/instrumentation/transformers/utils.py +199 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/vllm/__init__.py +5 -7
- openlit-1.34.14/src/openlit/instrumentation/vllm/utils.py +143 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/vllm/vllm.py +3 -8
- openlit-1.34.12/src/openlit/instrumentation/transformers/transformers.py +0 -60
- openlit-1.34.12/src/openlit/instrumentation/transformers/utils.py +0 -183
- openlit-1.34.12/src/openlit/instrumentation/vllm/utils.py +0 -161
- {openlit-1.34.12 → openlit-1.34.14}/LICENSE +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/README.md +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/__helpers.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/bias_detection.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/hallucination.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/toxicity.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/evals/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/prompt_injection.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/restrict_topic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/sensitive_topic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/guard/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ag2/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ag2/ag2.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ai21/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ai21/ai21.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ai21/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/anthropic/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/assemblyai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/astra/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/astra/astra.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/astra/async_astra.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/astra/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/azure_ai_inference/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/azure_ai_inference/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/bedrock/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/chroma/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/chroma/chroma.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/cohere/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/cohere/cohere.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/crewai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/crewai/crewai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/elevenlabs/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/google_ai_studio/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/gpt4all/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/gpu/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/groq/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/groq/async_groq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/groq/groq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/groq/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/haystack/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/haystack/haystack.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/julep/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/julep/async_julep.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/julep/julep.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/langchain/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/langchain/langchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/letta/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/letta/letta.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/litellm/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/litellm/litellm.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/mem0/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/mem0/mem0.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/milvus/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/milvus/milvus.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/mistral/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/mistral/mistral.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/multion/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/multion/async_multion.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/multion/multion.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ollama/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ollama/ollama.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/ollama/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/openai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/openai/async_openai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/openai/openai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/phidata/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/phidata/phidata.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/premai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/premai/premai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/premai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/pydantic_ai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/pydantic_ai/pydantic_ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/pydantic_ai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/reka/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/reka/async_reka.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/reka/reka.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/reka/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/together/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/together/async_together.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/together/together.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/together/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/otel/events.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/otel/metrics.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/otel/tracing.py +0 -0
- {openlit-1.34.12 → openlit-1.34.14}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.34.
|
3
|
+
Version: 1.34.14
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "openlit"
|
3
|
-
version = "1.34.
|
3
|
+
version = "1.34.14"
|
4
4
|
description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
|
5
5
|
authors = ["OpenLIT"]
|
6
6
|
license = "Apache-2.0"
|
@@ -30,12 +30,19 @@ class TransformersInstrumentor(BaseInstrumentor):
|
|
30
30
|
version = importlib.metadata.version("transformers")
|
31
31
|
|
32
32
|
wrap_function_wrapper(
|
33
|
-
"transformers",
|
34
|
-
"TextGenerationPipeline.__call__",
|
35
|
-
pipeline_wrapper(
|
36
|
-
|
33
|
+
"transformers",
|
34
|
+
"TextGenerationPipeline.__call__",
|
35
|
+
pipeline_wrapper(
|
36
|
+
version,
|
37
|
+
environment,
|
38
|
+
application_name,
|
39
|
+
tracer,
|
40
|
+
pricing_info,
|
41
|
+
capture_message_content,
|
42
|
+
metrics,
|
43
|
+
disable_metrics
|
44
|
+
),
|
37
45
|
)
|
38
46
|
|
39
47
|
def _uninstrument(self, **kwargs):
|
40
|
-
# Proper uninstrumentation logic to revert patched methods
|
41
48
|
pass
|
@@ -0,0 +1,53 @@
|
|
1
|
+
"""
|
2
|
+
Module for monitoring HF Transformers API calls.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import time
|
6
|
+
from opentelemetry.trace import SpanKind
|
7
|
+
from openlit.__helpers import set_server_address_and_port
|
8
|
+
from openlit.instrumentation.transformers.utils import process_chat_response
|
9
|
+
from openlit.semcov import SemanticConvention
|
10
|
+
|
11
|
+
|
12
|
+
def pipeline_wrapper(version, environment, application_name, tracer, pricing_info,
|
13
|
+
capture_message_content, metrics, disable_metrics):
|
14
|
+
"""
|
15
|
+
Generates a telemetry wrapper for GenAI function call
|
16
|
+
"""
|
17
|
+
|
18
|
+
def wrapper(wrapped, instance, args, kwargs):
|
19
|
+
"""
|
20
|
+
Wraps the GenAI function call.
|
21
|
+
"""
|
22
|
+
|
23
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
24
|
+
request_model = instance.model.config.name_or_path
|
25
|
+
|
26
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
27
|
+
|
28
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
29
|
+
start_time = time.time()
|
30
|
+
response = wrapped(*args, **kwargs)
|
31
|
+
|
32
|
+
response = process_chat_response(
|
33
|
+
instance=instance,
|
34
|
+
response=response,
|
35
|
+
request_model=request_model,
|
36
|
+
pricing_info=pricing_info,
|
37
|
+
server_port=server_port,
|
38
|
+
server_address=server_address,
|
39
|
+
environment=environment,
|
40
|
+
application_name=application_name,
|
41
|
+
metrics=metrics,
|
42
|
+
start_time=start_time,
|
43
|
+
span=span,
|
44
|
+
args=args,
|
45
|
+
kwargs=kwargs,
|
46
|
+
capture_message_content=capture_message_content,
|
47
|
+
disable_metrics=disable_metrics,
|
48
|
+
version=version,
|
49
|
+
)
|
50
|
+
|
51
|
+
return response
|
52
|
+
|
53
|
+
return wrapper
|
@@ -0,0 +1,199 @@
|
|
1
|
+
"""
|
2
|
+
HF Transformers OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
general_tokens,
|
10
|
+
get_chat_model_cost,
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
13
|
+
)
|
14
|
+
from openlit.semcov import SemanticConvention
|
15
|
+
|
16
|
+
def format_content(content):
|
17
|
+
"""
|
18
|
+
Format content to a consistent structure.
|
19
|
+
"""
|
20
|
+
if isinstance(content, str):
|
21
|
+
return content
|
22
|
+
elif isinstance(content, list):
|
23
|
+
# Check if its a list of chat messages (like in the test case)
|
24
|
+
if (len(content) > 0 and isinstance(content[0], dict) and
|
25
|
+
"role" in content[0] and "content" in content[0]):
|
26
|
+
# Handle chat message format like Groq
|
27
|
+
formatted_messages = []
|
28
|
+
for message in content:
|
29
|
+
role = message["role"]
|
30
|
+
msg_content = message["content"]
|
31
|
+
|
32
|
+
if isinstance(msg_content, list):
|
33
|
+
content_str = ", ".join(
|
34
|
+
f'{item["type"]}: {item["text"] if "text" in item else item.get("image_url", str(item))}'
|
35
|
+
if isinstance(item, dict) and "type" in item
|
36
|
+
else str(item)
|
37
|
+
for item in msg_content
|
38
|
+
)
|
39
|
+
formatted_messages.append(f"{role}: {content_str}")
|
40
|
+
else:
|
41
|
+
formatted_messages.append(f"{role}: {msg_content}")
|
42
|
+
return "\n".join(formatted_messages)
|
43
|
+
else:
|
44
|
+
# Handle other list formats (transformers responses)
|
45
|
+
formatted_content = []
|
46
|
+
for item in content:
|
47
|
+
if isinstance(item, str):
|
48
|
+
formatted_content.append(item)
|
49
|
+
elif isinstance(item, dict):
|
50
|
+
# Handle dict format for transformers
|
51
|
+
if "generated_text" in item:
|
52
|
+
formatted_content.append(str(item["generated_text"]))
|
53
|
+
else:
|
54
|
+
formatted_content.append(str(item))
|
55
|
+
else:
|
56
|
+
formatted_content.append(str(item))
|
57
|
+
return " ".join(formatted_content)
|
58
|
+
else:
|
59
|
+
return str(content)
|
60
|
+
|
61
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
62
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream):
|
63
|
+
|
64
|
+
"""
|
65
|
+
Process chat request and generate Telemetry
|
66
|
+
"""
|
67
|
+
|
68
|
+
scope._end_time = time.time()
|
69
|
+
forward_params = scope._instance._forward_params
|
70
|
+
request_model = scope._instance.model.config.name_or_path
|
71
|
+
|
72
|
+
input_tokens = general_tokens(scope._prompt)
|
73
|
+
output_tokens = general_tokens(scope._completion)
|
74
|
+
|
75
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
76
|
+
|
77
|
+
# Common Span Attributes
|
78
|
+
common_span_attributes(scope,
|
79
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
80
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
81
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
82
|
+
|
83
|
+
# Set request parameters from forward_params
|
84
|
+
if forward_params.get("temperature") is not None:
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, forward_params["temperature"])
|
86
|
+
if forward_params.get("top_k") is not None:
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, forward_params["top_k"])
|
88
|
+
if forward_params.get("top_p") is not None:
|
89
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, forward_params["top_p"])
|
90
|
+
if forward_params.get("max_length") is not None:
|
91
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, forward_params["max_length"])
|
92
|
+
|
93
|
+
# Set token usage and cost attributes
|
94
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
95
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
96
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
97
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
98
|
+
|
99
|
+
# Span Attributes for Content
|
100
|
+
if capture_message_content:
|
101
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
|
102
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._completion)
|
103
|
+
|
104
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
105
|
+
scope._span.add_event(
|
106
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
107
|
+
attributes={
|
108
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: scope._prompt,
|
109
|
+
},
|
110
|
+
)
|
111
|
+
scope._span.add_event(
|
112
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
113
|
+
attributes={
|
114
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._completion,
|
115
|
+
},
|
116
|
+
)
|
117
|
+
|
118
|
+
scope._span.set_status(Status(StatusCode.OK))
|
119
|
+
|
120
|
+
# Record metrics using the standardized helper function
|
121
|
+
if not disable_metrics:
|
122
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
123
|
+
SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE, scope._server_address, scope._server_port,
|
124
|
+
request_model, request_model, environment, application_name, scope._start_time, scope._end_time,
|
125
|
+
cost, input_tokens, output_tokens, scope._tbt, scope._ttft)
|
126
|
+
|
127
|
+
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
128
|
+
environment, application_name, metrics, start_time,
|
129
|
+
span, args, kwargs, capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
130
|
+
"""
|
131
|
+
Process chat request and generate Telemetry
|
132
|
+
"""
|
133
|
+
|
134
|
+
scope = type("GenericScope", (), {})()
|
135
|
+
scope._instance = instance
|
136
|
+
scope._start_time = start_time
|
137
|
+
scope._end_time = time.time()
|
138
|
+
scope._span = span
|
139
|
+
scope._server_address = server_address
|
140
|
+
scope._server_port = server_port
|
141
|
+
scope._kwargs = kwargs
|
142
|
+
scope._args = args
|
143
|
+
|
144
|
+
# Extract prompt from args or kwargs
|
145
|
+
if args and len(args) > 0:
|
146
|
+
scope._prompt = args[0]
|
147
|
+
else:
|
148
|
+
scope._prompt = (
|
149
|
+
kwargs.get("text_inputs") or
|
150
|
+
(kwargs.get("image") and kwargs.get("question") and
|
151
|
+
("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
|
152
|
+
kwargs.get("fallback") or
|
153
|
+
""
|
154
|
+
)
|
155
|
+
scope._prompt = format_content(scope._prompt)
|
156
|
+
|
157
|
+
# Process response based on task type
|
158
|
+
task = kwargs.get("task", "text-generation")
|
159
|
+
|
160
|
+
if task == "text-generation":
|
161
|
+
# Handle text generation responses
|
162
|
+
if isinstance(response, list) and len(response) > 0:
|
163
|
+
first_entry = response[0]
|
164
|
+
if isinstance(first_entry, dict):
|
165
|
+
if isinstance(first_entry.get("generated_text"), list):
|
166
|
+
# Handle nested list format
|
167
|
+
last_element = first_entry.get("generated_text")[-1]
|
168
|
+
scope._completion = last_element.get("content", str(last_element))
|
169
|
+
else:
|
170
|
+
# Handle standard format
|
171
|
+
scope._completion = first_entry.get("generated_text", "")
|
172
|
+
else:
|
173
|
+
scope._completion = str(first_entry)
|
174
|
+
else:
|
175
|
+
scope._completion = ""
|
176
|
+
|
177
|
+
elif task == "automatic-speech-recognition":
|
178
|
+
scope._completion = response.get("text", "") if isinstance(response, dict) else ""
|
179
|
+
|
180
|
+
elif task == "image-classification":
|
181
|
+
scope._completion = str(response[0]) if isinstance(response, list) and len(response) > 0 else ""
|
182
|
+
|
183
|
+
elif task == "visual-question-answering":
|
184
|
+
if isinstance(response, list) and len(response) > 0 and isinstance(response[0], dict):
|
185
|
+
scope._completion = response[0].get("answer", "")
|
186
|
+
else:
|
187
|
+
scope._completion = ""
|
188
|
+
else:
|
189
|
+
# Default handling for other tasks
|
190
|
+
scope._completion = format_content(response)
|
191
|
+
|
192
|
+
# Initialize timing attributes
|
193
|
+
scope._tbt = 0
|
194
|
+
scope._ttft = scope._end_time - scope._start_time
|
195
|
+
|
196
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
197
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
|
198
|
+
|
199
|
+
return response
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of vLLM Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
|
|
14
13
|
|
15
14
|
class VLLMInstrumentor(BaseInstrumentor):
|
16
15
|
"""
|
17
|
-
An instrumentor for vLLM
|
16
|
+
An instrumentor for vLLM client library.
|
18
17
|
"""
|
19
18
|
|
20
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
21
20
|
return _instruments
|
22
21
|
|
23
22
|
def _instrument(self, **kwargs):
|
24
|
-
application_name = kwargs.get("application_name", "
|
25
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
26
25
|
tracer = kwargs.get("tracer")
|
27
26
|
metrics = kwargs.get("metrics_dict")
|
28
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
|
|
30
29
|
disable_metrics = kwargs.get("disable_metrics")
|
31
30
|
version = importlib.metadata.version("vllm")
|
32
31
|
|
33
|
-
#
|
32
|
+
# Chat completions
|
34
33
|
wrap_function_wrapper(
|
35
34
|
"vllm.entrypoints.llm",
|
36
35
|
"LLM.generate",
|
37
36
|
generate(version, environment, application_name,
|
38
|
-
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
39
38
|
)
|
40
39
|
|
41
40
|
def _uninstrument(self, **kwargs):
|
42
|
-
# Proper uninstrumentation logic to revert patched methods
|
43
41
|
pass
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""
|
2
|
+
vLLM OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
general_tokens,
|
10
|
+
get_chat_model_cost,
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
13
|
+
)
|
14
|
+
from openlit.semcov import SemanticConvention
|
15
|
+
|
16
|
+
def get_inference_config(args, kwargs):
|
17
|
+
"""
|
18
|
+
Safely extract inference configuration from args or kwargs.
|
19
|
+
"""
|
20
|
+
|
21
|
+
if 'sampling_params' in kwargs:
|
22
|
+
return kwargs['sampling_params']
|
23
|
+
if len(args) > 1:
|
24
|
+
return args[1]
|
25
|
+
return None
|
26
|
+
|
27
|
+
def format_content(prompts):
|
28
|
+
"""
|
29
|
+
Process a list of prompts to extract content.
|
30
|
+
"""
|
31
|
+
|
32
|
+
if isinstance(prompts, str):
|
33
|
+
return prompts
|
34
|
+
elif isinstance(prompts, list):
|
35
|
+
return "\n".join(str(prompt) for prompt in prompts)
|
36
|
+
else:
|
37
|
+
return str(prompts)
|
38
|
+
|
39
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
40
|
+
capture_message_content, disable_metrics, version, is_stream):
|
41
|
+
"""
|
42
|
+
Process chat request and generate Telemetry
|
43
|
+
"""
|
44
|
+
|
45
|
+
request_model = scope._request_model
|
46
|
+
|
47
|
+
# Extract prompts and completions from vLLM response
|
48
|
+
input_tokens = 0
|
49
|
+
output_tokens = 0
|
50
|
+
prompt = ""
|
51
|
+
completion = ""
|
52
|
+
|
53
|
+
for output in scope._response:
|
54
|
+
prompt += output.prompt + "\n"
|
55
|
+
if output.outputs and len(output.outputs) > 0:
|
56
|
+
completion += output.outputs[0].text + "\n"
|
57
|
+
input_tokens += general_tokens(output.prompt)
|
58
|
+
output_tokens += general_tokens(output.outputs[0].text)
|
59
|
+
|
60
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
61
|
+
|
62
|
+
# Common Span Attributes
|
63
|
+
common_span_attributes(scope,
|
64
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
65
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
66
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
67
|
+
|
68
|
+
# Span Attributes for Request parameters
|
69
|
+
inference_config = get_inference_config(scope._args, scope._kwargs)
|
70
|
+
if inference_config:
|
71
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
|
72
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
|
73
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
|
76
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
77
|
+
getattr(inference_config, 'presence_penalty', 0.0))
|
78
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
getattr(inference_config, 'frequency_penalty', 0.0))
|
80
|
+
|
81
|
+
# Span Attributes for Response parameters
|
82
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
83
|
+
|
84
|
+
# Span Attributes for Cost and Tokens
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
88
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
89
|
+
|
90
|
+
# Span Attributes for Content
|
91
|
+
if capture_message_content:
|
92
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
93
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
|
94
|
+
|
95
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
96
|
+
scope._span.add_event(
|
97
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
98
|
+
attributes={
|
99
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
100
|
+
},
|
101
|
+
)
|
102
|
+
scope._span.add_event(
|
103
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
104
|
+
attributes={
|
105
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
|
106
|
+
},
|
107
|
+
)
|
108
|
+
|
109
|
+
scope._span.set_status(Status(StatusCode.OK))
|
110
|
+
|
111
|
+
# Metrics
|
112
|
+
if not disable_metrics:
|
113
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
114
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
115
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
116
|
+
cost, scope._tbt, scope._ttft)
|
117
|
+
|
118
|
+
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
119
|
+
environment, application_name, metrics, start_time, span, args, kwargs,
|
120
|
+
capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
121
|
+
"""
|
122
|
+
Process chat request and generate Telemetry
|
123
|
+
"""
|
124
|
+
|
125
|
+
# Create scope object
|
126
|
+
scope = type("GenericScope", (), {})()
|
127
|
+
|
128
|
+
scope._response = response
|
129
|
+
scope._start_time = start_time
|
130
|
+
scope._end_time = time.time()
|
131
|
+
scope._span = span
|
132
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
133
|
+
scope._server_address = server_address
|
134
|
+
scope._server_port = server_port
|
135
|
+
scope._request_model = request_model
|
136
|
+
scope._timestamps = []
|
137
|
+
scope._args = args
|
138
|
+
scope._kwargs = kwargs
|
139
|
+
|
140
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
141
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
142
|
+
|
143
|
+
return response
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring vLLM API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
|
|
14
13
|
)
|
15
14
|
from openlit.semcov import SemanticConvention
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def generate(version, environment, application_name,
|
21
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
16
|
+
def generate(version, environment, application_name, tracer, pricing_info,
|
17
|
+
capture_message_content, metrics, disable_metrics):
|
22
18
|
"""
|
23
19
|
Generates a telemetry wrapper for GenAI function call
|
24
20
|
"""
|
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
|
|
27
23
|
"""
|
28
24
|
Wraps the GenAI function call.
|
29
25
|
"""
|
30
|
-
|
31
26
|
server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
|
32
27
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
33
28
|
|
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
|
|
56
51
|
disable_metrics=disable_metrics,
|
57
52
|
version=version,
|
58
53
|
)
|
54
|
+
|
59
55
|
except Exception as e:
|
60
56
|
handle_exception(span, e)
|
61
|
-
logger.error("Error in trace creation: %s", e)
|
62
57
|
|
63
58
|
return response
|
64
59
|
|
@@ -1,60 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for monitoring HF Transformers API calls.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import logging
|
6
|
-
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from openlit.__helpers import (
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
11
|
-
from openlit.instrumentation.transformers.utils import (
|
12
|
-
process_chat_response,
|
13
|
-
)
|
14
|
-
from openlit.semcov import SemanticConvention
|
15
|
-
|
16
|
-
# Initialize logger for logging potential issues and operations
|
17
|
-
logger = logging.getLogger(__name__)
|
18
|
-
|
19
|
-
def pipeline_wrapper(version, environment, application_name,
|
20
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
21
|
-
"""
|
22
|
-
Generates a telemetry wrapper for GenAI function call
|
23
|
-
"""
|
24
|
-
|
25
|
-
def wrapper(wrapped, instance, args, kwargs):
|
26
|
-
"""
|
27
|
-
Wraps the GenAI function call.
|
28
|
-
"""
|
29
|
-
|
30
|
-
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
31
|
-
request_model = instance.model.config.name_or_path
|
32
|
-
|
33
|
-
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
34
|
-
|
35
|
-
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
36
|
-
start_time = time.time()
|
37
|
-
response = wrapped(*args, **kwargs)
|
38
|
-
|
39
|
-
response = process_chat_response(
|
40
|
-
instance = instance,
|
41
|
-
response=response,
|
42
|
-
request_model=request_model,
|
43
|
-
pricing_info=pricing_info,
|
44
|
-
server_port=server_port,
|
45
|
-
server_address=server_address,
|
46
|
-
environment=environment,
|
47
|
-
application_name=application_name,
|
48
|
-
metrics=metrics,
|
49
|
-
start_time=start_time,
|
50
|
-
span=span,
|
51
|
-
args=args,
|
52
|
-
kwargs=kwargs,
|
53
|
-
capture_message_content=capture_message_content,
|
54
|
-
disable_metrics=disable_metrics,
|
55
|
-
version=version,
|
56
|
-
)
|
57
|
-
|
58
|
-
return response
|
59
|
-
|
60
|
-
return wrapper
|