openlit 1.34.12__tar.gz → 1.34.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openlit-1.34.12 → openlit-1.34.13}/PKG-INFO +1 -1
- {openlit-1.34.12 → openlit-1.34.13}/pyproject.toml +1 -1
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vllm/__init__.py +5 -7
- openlit-1.34.13/src/openlit/instrumentation/vllm/utils.py +143 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vllm/vllm.py +3 -8
- openlit-1.34.12/src/openlit/instrumentation/vllm/utils.py +0 -161
- {openlit-1.34.12 → openlit-1.34.13}/LICENSE +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/README.md +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/__helpers.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/bias_detection.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/hallucination.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/toxicity.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/prompt_injection.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/restrict_topic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/sensitive_topic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ag2/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ag2/ag2.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/ai21.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/astra.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/async_astra.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/chroma/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/chroma/chroma.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/cohere.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crewai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crewai/crewai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpu/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/async_groq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/groq.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/haystack/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/haystack/haystack.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/async_julep.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/julep.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/langchain.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/letta/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/letta/letta.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/litellm.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mem0/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mem0/mem0.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/milvus/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/milvus/milvus.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/mistral.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/async_multion.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/multion.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/ollama.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/async_openai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/openai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/phidata/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/phidata/phidata.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/premai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/pydantic_ai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/async_reka.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/reka.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/async_together.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/together.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/transformers.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/utils.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/events.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/metrics.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/tracing.py +0 -0
- {openlit-1.34.12 → openlit-1.34.13}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.34.
|
3
|
+
Version: 1.34.13
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "openlit"
|
3
|
-
version = "1.34.
|
3
|
+
version = "1.34.13"
|
4
4
|
description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
|
5
5
|
authors = ["OpenLIT"]
|
6
6
|
license = "Apache-2.0"
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of vLLM Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
|
|
14
13
|
|
15
14
|
class VLLMInstrumentor(BaseInstrumentor):
|
16
15
|
"""
|
17
|
-
An instrumentor for vLLM
|
16
|
+
An instrumentor for vLLM client library.
|
18
17
|
"""
|
19
18
|
|
20
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
21
20
|
return _instruments
|
22
21
|
|
23
22
|
def _instrument(self, **kwargs):
|
24
|
-
application_name = kwargs.get("application_name", "
|
25
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
26
25
|
tracer = kwargs.get("tracer")
|
27
26
|
metrics = kwargs.get("metrics_dict")
|
28
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
|
|
30
29
|
disable_metrics = kwargs.get("disable_metrics")
|
31
30
|
version = importlib.metadata.version("vllm")
|
32
31
|
|
33
|
-
#
|
32
|
+
# Chat completions
|
34
33
|
wrap_function_wrapper(
|
35
34
|
"vllm.entrypoints.llm",
|
36
35
|
"LLM.generate",
|
37
36
|
generate(version, environment, application_name,
|
38
|
-
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
39
38
|
)
|
40
39
|
|
41
40
|
def _uninstrument(self, **kwargs):
|
42
|
-
# Proper uninstrumentation logic to revert patched methods
|
43
41
|
pass
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""
|
2
|
+
vLLM OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
general_tokens,
|
10
|
+
get_chat_model_cost,
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
13
|
+
)
|
14
|
+
from openlit.semcov import SemanticConvention
|
15
|
+
|
16
|
+
def get_inference_config(args, kwargs):
|
17
|
+
"""
|
18
|
+
Safely extract inference configuration from args or kwargs.
|
19
|
+
"""
|
20
|
+
|
21
|
+
if 'sampling_params' in kwargs:
|
22
|
+
return kwargs['sampling_params']
|
23
|
+
if len(args) > 1:
|
24
|
+
return args[1]
|
25
|
+
return None
|
26
|
+
|
27
|
+
def format_content(prompts):
|
28
|
+
"""
|
29
|
+
Process a list of prompts to extract content.
|
30
|
+
"""
|
31
|
+
|
32
|
+
if isinstance(prompts, str):
|
33
|
+
return prompts
|
34
|
+
elif isinstance(prompts, list):
|
35
|
+
return "\n".join(str(prompt) for prompt in prompts)
|
36
|
+
else:
|
37
|
+
return str(prompts)
|
38
|
+
|
39
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
40
|
+
capture_message_content, disable_metrics, version, is_stream):
|
41
|
+
"""
|
42
|
+
Process chat request and generate Telemetry
|
43
|
+
"""
|
44
|
+
|
45
|
+
request_model = scope._request_model
|
46
|
+
|
47
|
+
# Extract prompts and completions from vLLM response
|
48
|
+
input_tokens = 0
|
49
|
+
output_tokens = 0
|
50
|
+
prompt = ""
|
51
|
+
completion = ""
|
52
|
+
|
53
|
+
for output in scope._response:
|
54
|
+
prompt += output.prompt + "\n"
|
55
|
+
if output.outputs and len(output.outputs) > 0:
|
56
|
+
completion += output.outputs[0].text + "\n"
|
57
|
+
input_tokens += general_tokens(output.prompt)
|
58
|
+
output_tokens += general_tokens(output.outputs[0].text)
|
59
|
+
|
60
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
61
|
+
|
62
|
+
# Common Span Attributes
|
63
|
+
common_span_attributes(scope,
|
64
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
65
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
66
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
67
|
+
|
68
|
+
# Span Attributes for Request parameters
|
69
|
+
inference_config = get_inference_config(scope._args, scope._kwargs)
|
70
|
+
if inference_config:
|
71
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
|
72
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
|
73
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
|
76
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
77
|
+
getattr(inference_config, 'presence_penalty', 0.0))
|
78
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
getattr(inference_config, 'frequency_penalty', 0.0))
|
80
|
+
|
81
|
+
# Span Attributes for Response parameters
|
82
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
83
|
+
|
84
|
+
# Span Attributes for Cost and Tokens
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
88
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
89
|
+
|
90
|
+
# Span Attributes for Content
|
91
|
+
if capture_message_content:
|
92
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
93
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
|
94
|
+
|
95
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
96
|
+
scope._span.add_event(
|
97
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
98
|
+
attributes={
|
99
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
100
|
+
},
|
101
|
+
)
|
102
|
+
scope._span.add_event(
|
103
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
104
|
+
attributes={
|
105
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
|
106
|
+
},
|
107
|
+
)
|
108
|
+
|
109
|
+
scope._span.set_status(Status(StatusCode.OK))
|
110
|
+
|
111
|
+
# Metrics
|
112
|
+
if not disable_metrics:
|
113
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
114
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
115
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
116
|
+
cost, scope._tbt, scope._ttft)
|
117
|
+
|
118
|
+
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
119
|
+
environment, application_name, metrics, start_time, span, args, kwargs,
|
120
|
+
capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
121
|
+
"""
|
122
|
+
Process chat request and generate Telemetry
|
123
|
+
"""
|
124
|
+
|
125
|
+
# Create scope object
|
126
|
+
scope = type("GenericScope", (), {})()
|
127
|
+
|
128
|
+
scope._response = response
|
129
|
+
scope._start_time = start_time
|
130
|
+
scope._end_time = time.time()
|
131
|
+
scope._span = span
|
132
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
133
|
+
scope._server_address = server_address
|
134
|
+
scope._server_port = server_port
|
135
|
+
scope._request_model = request_model
|
136
|
+
scope._timestamps = []
|
137
|
+
scope._args = args
|
138
|
+
scope._kwargs = kwargs
|
139
|
+
|
140
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
141
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
142
|
+
|
143
|
+
return response
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring vLLM API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
|
|
14
13
|
)
|
15
14
|
from openlit.semcov import SemanticConvention
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def generate(version, environment, application_name,
|
21
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
16
|
+
def generate(version, environment, application_name, tracer, pricing_info,
|
17
|
+
capture_message_content, metrics, disable_metrics):
|
22
18
|
"""
|
23
19
|
Generates a telemetry wrapper for GenAI function call
|
24
20
|
"""
|
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
|
|
27
23
|
"""
|
28
24
|
Wraps the GenAI function call.
|
29
25
|
"""
|
30
|
-
|
31
26
|
server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
|
32
27
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
33
28
|
|
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
|
|
56
51
|
disable_metrics=disable_metrics,
|
57
52
|
version=version,
|
58
53
|
)
|
54
|
+
|
59
55
|
except Exception as e:
|
60
56
|
handle_exception(span, e)
|
61
|
-
logger.error("Error in trace creation: %s", e)
|
62
57
|
|
63
58
|
return response
|
64
59
|
|
@@ -1,161 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Utility functions for vLLM instrumentation.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import time
|
6
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
-
from opentelemetry.trace import Status, StatusCode
|
8
|
-
from openlit.__helpers import (
|
9
|
-
calculate_tbt,
|
10
|
-
get_chat_model_cost,
|
11
|
-
general_tokens,
|
12
|
-
create_metrics_attributes,
|
13
|
-
)
|
14
|
-
from openlit.semcov import SemanticConvention
|
15
|
-
|
16
|
-
def get_inference_config(args, kwargs):
|
17
|
-
"""
|
18
|
-
Safely extract inference configuration from args or kwargs.
|
19
|
-
"""
|
20
|
-
|
21
|
-
if 'sampling_params' in kwargs:
|
22
|
-
return kwargs['sampling_params']
|
23
|
-
if len(args) > 1:
|
24
|
-
return args[1]
|
25
|
-
return None
|
26
|
-
|
27
|
-
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
28
|
-
capture_message_content, disable_metrics, version, is_stream):
|
29
|
-
"""
|
30
|
-
Process chat request and generate Telemetry
|
31
|
-
"""
|
32
|
-
|
33
|
-
scope._end_time = time.time()
|
34
|
-
if len(scope._timestamps) > 1:
|
35
|
-
scope._tbt = calculate_tbt(scope._timestamps)
|
36
|
-
|
37
|
-
# Set base span attributes
|
38
|
-
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
39
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
40
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
|
41
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
|
42
|
-
scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
|
43
|
-
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
44
|
-
|
45
|
-
# Handle inference configuration
|
46
|
-
inference_config = get_inference_config(scope._args, scope._kwargs)
|
47
|
-
if inference_config:
|
48
|
-
attributes = [
|
49
|
-
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
50
|
-
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
|
51
|
-
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
52
|
-
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
53
|
-
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
54
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
55
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
56
|
-
]
|
57
|
-
|
58
|
-
for attribute, key in attributes:
|
59
|
-
value = getattr(inference_config, key, None)
|
60
|
-
if value is not None:
|
61
|
-
scope._span.set_attribute(attribute, value)
|
62
|
-
|
63
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
|
64
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
65
|
-
|
66
|
-
# Set base span attributes (Extras)
|
67
|
-
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
68
|
-
scope._span.set_attribute(SERVICE_NAME, application_name)
|
69
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
70
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
71
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
72
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
73
|
-
|
74
|
-
input_tokens = 0
|
75
|
-
output_tokens = 0
|
76
|
-
cost = 0
|
77
|
-
|
78
|
-
if capture_message_content:
|
79
|
-
prompt = ""
|
80
|
-
completion = ""
|
81
|
-
|
82
|
-
for output in scope._response:
|
83
|
-
prompt += output.prompt + "\n"
|
84
|
-
if output.outputs and len(output.outputs) > 0:
|
85
|
-
completion += output.outputs[0].text + "\n"
|
86
|
-
input_tokens += general_tokens(output.prompt)
|
87
|
-
output_tokens += general_tokens(output.outputs[0].text)
|
88
|
-
|
89
|
-
# Add a single event for prompt
|
90
|
-
scope._span.add_event(
|
91
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
92
|
-
attributes={
|
93
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
94
|
-
},
|
95
|
-
)
|
96
|
-
|
97
|
-
# Add a single event for completion
|
98
|
-
scope._span.add_event(
|
99
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
100
|
-
attributes={
|
101
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
|
102
|
-
},
|
103
|
-
)
|
104
|
-
|
105
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
106
|
-
input_tokens)
|
107
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
108
|
-
output_tokens)
|
109
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
110
|
-
input_tokens + output_tokens)
|
111
|
-
|
112
|
-
# Calculate cost of the operation
|
113
|
-
cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
|
114
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
115
|
-
|
116
|
-
scope._span.set_status(Status(StatusCode.OK))
|
117
|
-
|
118
|
-
if disable_metrics is False:
|
119
|
-
metrics_attributes = create_metrics_attributes(
|
120
|
-
service_name=application_name,
|
121
|
-
deployment_environment=environment,
|
122
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
123
|
-
system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
124
|
-
request_model=scope._request_model,
|
125
|
-
server_address=scope._server_address,
|
126
|
-
server_port=scope._server_port,
|
127
|
-
response_model=scope._request_model,
|
128
|
-
)
|
129
|
-
metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
|
130
|
-
metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
|
131
|
-
metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
|
132
|
-
metrics['genai_requests'].add(1, metrics_attributes)
|
133
|
-
metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
|
134
|
-
metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
|
135
|
-
metrics['genai_cost'].record(cost, metrics_attributes)
|
136
|
-
metrics['genai_client_usage_tokens'].record(
|
137
|
-
input_tokens + output_tokens, metrics_attributes)
|
138
|
-
|
139
|
-
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
140
|
-
environment, application_name, metrics, start_time, span, args, kwargs,
|
141
|
-
capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
142
|
-
"""
|
143
|
-
Process chat request and generate Telemetry
|
144
|
-
"""
|
145
|
-
self = type('GenericScope', (), {})()
|
146
|
-
self._response = response
|
147
|
-
self._start_time = start_time
|
148
|
-
self._end_time = time.time()
|
149
|
-
self._span = span
|
150
|
-
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
151
|
-
self._server_address = server_address
|
152
|
-
self._server_port = server_port
|
153
|
-
self._request_model = request_model
|
154
|
-
self._timestamps = []
|
155
|
-
self._args = args
|
156
|
-
self._kwargs = kwargs
|
157
|
-
|
158
|
-
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
159
|
-
capture_message_content, disable_metrics, version, is_stream=False)
|
160
|
-
|
161
|
-
return response
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/async_anthropic.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/__init__.py
RENAMED
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/async_langchain.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai_agents/openai_agents.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/transformers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|