openlit 1.34.12__tar.gz → 1.34.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {openlit-1.34.12 → openlit-1.34.13}/PKG-INFO +1 -1
  2. {openlit-1.34.12 → openlit-1.34.13}/pyproject.toml +1 -1
  3. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vllm/__init__.py +5 -7
  4. openlit-1.34.13/src/openlit/instrumentation/vllm/utils.py +143 -0
  5. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vllm/vllm.py +3 -8
  6. openlit-1.34.12/src/openlit/instrumentation/vllm/utils.py +0 -161
  7. {openlit-1.34.12 → openlit-1.34.13}/LICENSE +0 -0
  8. {openlit-1.34.12 → openlit-1.34.13}/README.md +0 -0
  9. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/__helpers.py +0 -0
  10. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/__init__.py +0 -0
  11. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/__init__.py +0 -0
  12. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/all.py +0 -0
  13. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/bias_detection.py +0 -0
  14. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/hallucination.py +0 -0
  15. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/toxicity.py +0 -0
  16. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/evals/utils.py +0 -0
  17. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/__init__.py +0 -0
  18. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/all.py +0 -0
  19. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/prompt_injection.py +0 -0
  20. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/restrict_topic.py +0 -0
  21. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/sensitive_topic.py +0 -0
  22. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/guard/utils.py +0 -0
  23. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ag2/__init__.py +0 -0
  24. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ag2/ag2.py +0 -0
  25. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/__init__.py +0 -0
  26. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/ai21.py +0 -0
  27. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
  28. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ai21/utils.py +0 -0
  29. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
  30. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
  31. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
  32. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/anthropic/utils.py +0 -0
  33. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
  34. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
  35. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/assemblyai/utils.py +0 -0
  36. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/__init__.py +0 -0
  37. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/astra.py +0 -0
  38. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/async_astra.py +0 -0
  39. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/astra/utils.py +0 -0
  40. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/__init__.py +0 -0
  41. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -0
  42. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -0
  43. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/azure_ai_inference/utils.py +0 -0
  44. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
  45. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
  46. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/bedrock/utils.py +0 -0
  47. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/chroma/__init__.py +0 -0
  48. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/chroma/chroma.py +0 -0
  49. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/__init__.py +0 -0
  50. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
  51. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/cohere/cohere.py +0 -0
  52. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
  53. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
  54. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
  55. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
  56. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
  57. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crewai/__init__.py +0 -0
  58. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/crewai/crewai.py +0 -0
  59. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
  60. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
  61. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
  62. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
  63. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
  64. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/elevenlabs/utils.py +0 -0
  65. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
  66. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
  67. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
  68. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
  69. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
  70. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -0
  71. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -0
  72. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/google_ai_studio/utils.py +0 -0
  73. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
  74. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
  75. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpt4all/utils.py +0 -0
  76. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/gpu/__init__.py +0 -0
  77. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/__init__.py +0 -0
  78. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/async_groq.py +0 -0
  79. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/groq.py +0 -0
  80. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/groq/utils.py +0 -0
  81. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/haystack/__init__.py +0 -0
  82. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/haystack/haystack.py +0 -0
  83. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/__init__.py +0 -0
  84. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/async_julep.py +0 -0
  85. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/julep/julep.py +0 -0
  86. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/__init__.py +0 -0
  87. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
  88. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/langchain/langchain.py +0 -0
  89. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/letta/__init__.py +0 -0
  90. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/letta/letta.py +0 -0
  91. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/__init__.py +0 -0
  92. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
  93. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/litellm/litellm.py +0 -0
  94. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
  95. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
  96. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mem0/__init__.py +0 -0
  97. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mem0/mem0.py +0 -0
  98. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/milvus/__init__.py +0 -0
  99. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/milvus/milvus.py +0 -0
  100. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/__init__.py +0 -0
  101. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
  102. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/mistral/mistral.py +0 -0
  103. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/__init__.py +0 -0
  104. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/async_multion.py +0 -0
  105. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/multion/multion.py +0 -0
  106. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/__init__.py +0 -0
  107. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
  108. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/ollama.py +0 -0
  109. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/ollama/utils.py +0 -0
  110. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/__init__.py +0 -0
  111. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/async_openai.py +0 -0
  112. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai/openai.py +0 -0
  113. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
  114. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
  115. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/phidata/__init__.py +0 -0
  116. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/phidata/phidata.py +0 -0
  117. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
  118. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
  119. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/__init__.py +0 -0
  120. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/premai.py +0 -0
  121. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/premai/utils.py +0 -0
  122. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/__init__.py +0 -0
  123. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/pydantic_ai.py +0 -0
  124. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/pydantic_ai/utils.py +0 -0
  125. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
  126. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
  127. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
  128. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/__init__.py +0 -0
  129. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/async_reka.py +0 -0
  130. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/reka.py +0 -0
  131. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/reka/utils.py +0 -0
  132. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/__init__.py +0 -0
  133. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/async_together.py +0 -0
  134. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/together.py +0 -0
  135. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/together/utils.py +0 -0
  136. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/__init__.py +0 -0
  137. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/transformers.py +0 -0
  138. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/transformers/utils.py +0 -0
  139. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
  140. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
  141. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
  142. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/events.py +0 -0
  143. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/metrics.py +0 -0
  144. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/otel/tracing.py +0 -0
  145. {openlit-1.34.12 → openlit-1.34.13}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.12
3
+ Version: 1.34.13
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "openlit"
3
- version = "1.34.12"
3
+ version = "1.34.13"
4
4
  description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
5
5
  authors = ["OpenLIT"]
6
6
  license = "Apache-2.0"
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of vLLM Functions"""
3
2
 
4
3
  from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
14
13
 
15
14
  class VLLMInstrumentor(BaseInstrumentor):
16
15
  """
17
- An instrumentor for vLLM's client library.
16
+ An instrumentor for vLLM client library.
18
17
  """
19
18
 
20
19
  def instrumentation_dependencies(self) -> Collection[str]:
21
20
  return _instruments
22
21
 
23
22
  def _instrument(self, **kwargs):
24
- application_name = kwargs.get("application_name", "default_application")
25
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
26
25
  tracer = kwargs.get("tracer")
27
26
  metrics = kwargs.get("metrics_dict")
28
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
30
29
  disable_metrics = kwargs.get("disable_metrics")
31
30
  version = importlib.metadata.version("vllm")
32
31
 
33
- # sync chat
32
+ # Chat completions
34
33
  wrap_function_wrapper(
35
34
  "vllm.entrypoints.llm",
36
35
  "LLM.generate",
37
36
  generate(version, environment, application_name,
38
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
37
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
39
38
  )
40
39
 
41
40
  def _uninstrument(self, **kwargs):
42
- # Proper uninstrumentation logic to revert patched methods
43
41
  pass
@@ -0,0 +1,143 @@
1
+ """
2
+ vLLM OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ general_tokens,
10
+ get_chat_model_cost,
11
+ common_span_attributes,
12
+ record_completion_metrics,
13
+ )
14
+ from openlit.semcov import SemanticConvention
15
+
16
+ def get_inference_config(args, kwargs):
17
+ """
18
+ Safely extract inference configuration from args or kwargs.
19
+ """
20
+
21
+ if 'sampling_params' in kwargs:
22
+ return kwargs['sampling_params']
23
+ if len(args) > 1:
24
+ return args[1]
25
+ return None
26
+
27
+ def format_content(prompts):
28
+ """
29
+ Process a list of prompts to extract content.
30
+ """
31
+
32
+ if isinstance(prompts, str):
33
+ return prompts
34
+ elif isinstance(prompts, list):
35
+ return "\n".join(str(prompt) for prompt in prompts)
36
+ else:
37
+ return str(prompts)
38
+
39
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
40
+ capture_message_content, disable_metrics, version, is_stream):
41
+ """
42
+ Process chat request and generate Telemetry
43
+ """
44
+
45
+ request_model = scope._request_model
46
+
47
+ # Extract prompts and completions from vLLM response
48
+ input_tokens = 0
49
+ output_tokens = 0
50
+ prompt = ""
51
+ completion = ""
52
+
53
+ for output in scope._response:
54
+ prompt += output.prompt + "\n"
55
+ if output.outputs and len(output.outputs) > 0:
56
+ completion += output.outputs[0].text + "\n"
57
+ input_tokens += general_tokens(output.prompt)
58
+ output_tokens += general_tokens(output.outputs[0].text)
59
+
60
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
61
+
62
+ # Common Span Attributes
63
+ common_span_attributes(scope,
64
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
65
+ scope._server_address, scope._server_port, request_model, request_model,
66
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
67
+
68
+ # Span Attributes for Request parameters
69
+ inference_config = get_inference_config(scope._args, scope._kwargs)
70
+ if inference_config:
71
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
72
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
73
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
74
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
75
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
76
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
77
+ getattr(inference_config, 'presence_penalty', 0.0))
78
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
+ getattr(inference_config, 'frequency_penalty', 0.0))
80
+
81
+ # Span Attributes for Response parameters
82
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
83
+
84
+ # Span Attributes for Cost and Tokens
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
86
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
87
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
88
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
89
+
90
+ # Span Attributes for Content
91
+ if capture_message_content:
92
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
93
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
94
+
95
+ # To be removed once the change to span_attributes (from span events) is complete
96
+ scope._span.add_event(
97
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
98
+ attributes={
99
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
100
+ },
101
+ )
102
+ scope._span.add_event(
103
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
104
+ attributes={
105
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
106
+ },
107
+ )
108
+
109
+ scope._span.set_status(Status(StatusCode.OK))
110
+
111
+ # Metrics
112
+ if not disable_metrics:
113
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
114
+ scope._server_address, scope._server_port, request_model, request_model, environment,
115
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
116
+ cost, scope._tbt, scope._ttft)
117
+
118
+ def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
119
+ environment, application_name, metrics, start_time, span, args, kwargs,
120
+ capture_message_content=False, disable_metrics=False, version="1.0.0"):
121
+ """
122
+ Process chat request and generate Telemetry
123
+ """
124
+
125
+ # Create scope object
126
+ scope = type("GenericScope", (), {})()
127
+
128
+ scope._response = response
129
+ scope._start_time = start_time
130
+ scope._end_time = time.time()
131
+ scope._span = span
132
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
133
+ scope._server_address = server_address
134
+ scope._server_port = server_port
135
+ scope._request_model = request_model
136
+ scope._timestamps = []
137
+ scope._args = args
138
+ scope._kwargs = kwargs
139
+
140
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
141
+ capture_message_content, disable_metrics, version, is_stream=False)
142
+
143
+ return response
@@ -2,7 +2,6 @@
2
2
  Module for monitoring vLLM API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
14
13
  )
15
14
  from openlit.semcov import SemanticConvention
16
15
 
17
- # Initialize logger for logging potential issues and operations
18
- logger = logging.getLogger(__name__)
19
-
20
- def generate(version, environment, application_name,
21
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
16
+ def generate(version, environment, application_name, tracer, pricing_info,
17
+ capture_message_content, metrics, disable_metrics):
22
18
  """
23
19
  Generates a telemetry wrapper for GenAI function call
24
20
  """
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
27
23
  """
28
24
  Wraps the GenAI function call.
29
25
  """
30
-
31
26
  server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
32
27
  request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
33
28
 
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
56
51
  disable_metrics=disable_metrics,
57
52
  version=version,
58
53
  )
54
+
59
55
  except Exception as e:
60
56
  handle_exception(span, e)
61
- logger.error("Error in trace creation: %s", e)
62
57
 
63
58
  return response
64
59
 
@@ -1,161 +0,0 @@
1
- """
2
- Utility functions for vLLM instrumentation.
3
- """
4
-
5
- import time
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
- from opentelemetry.trace import Status, StatusCode
8
- from openlit.__helpers import (
9
- calculate_tbt,
10
- get_chat_model_cost,
11
- general_tokens,
12
- create_metrics_attributes,
13
- )
14
- from openlit.semcov import SemanticConvention
15
-
16
- def get_inference_config(args, kwargs):
17
- """
18
- Safely extract inference configuration from args or kwargs.
19
- """
20
-
21
- if 'sampling_params' in kwargs:
22
- return kwargs['sampling_params']
23
- if len(args) > 1:
24
- return args[1]
25
- return None
26
-
27
- def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
28
- capture_message_content, disable_metrics, version, is_stream):
29
- """
30
- Process chat request and generate Telemetry
31
- """
32
-
33
- scope._end_time = time.time()
34
- if len(scope._timestamps) > 1:
35
- scope._tbt = calculate_tbt(scope._timestamps)
36
-
37
- # Set base span attributes
38
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
39
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
40
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
41
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
42
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
43
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
44
-
45
- # Handle inference configuration
46
- inference_config = get_inference_config(scope._args, scope._kwargs)
47
- if inference_config:
48
- attributes = [
49
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
50
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
51
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
52
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
53
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
54
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
55
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
56
- ]
57
-
58
- for attribute, key in attributes:
59
- value = getattr(inference_config, key, None)
60
- if value is not None:
61
- scope._span.set_attribute(attribute, value)
62
-
63
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
64
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
65
-
66
- # Set base span attributes (Extras)
67
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
68
- scope._span.set_attribute(SERVICE_NAME, application_name)
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
72
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
73
-
74
- input_tokens = 0
75
- output_tokens = 0
76
- cost = 0
77
-
78
- if capture_message_content:
79
- prompt = ""
80
- completion = ""
81
-
82
- for output in scope._response:
83
- prompt += output.prompt + "\n"
84
- if output.outputs and len(output.outputs) > 0:
85
- completion += output.outputs[0].text + "\n"
86
- input_tokens += general_tokens(output.prompt)
87
- output_tokens += general_tokens(output.outputs[0].text)
88
-
89
- # Add a single event for prompt
90
- scope._span.add_event(
91
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
92
- attributes={
93
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
94
- },
95
- )
96
-
97
- # Add a single event for completion
98
- scope._span.add_event(
99
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
100
- attributes={
101
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
102
- },
103
- )
104
-
105
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
106
- input_tokens)
107
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
108
- output_tokens)
109
- scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
110
- input_tokens + output_tokens)
111
-
112
- # Calculate cost of the operation
113
- cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
114
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
115
-
116
- scope._span.set_status(Status(StatusCode.OK))
117
-
118
- if disable_metrics is False:
119
- metrics_attributes = create_metrics_attributes(
120
- service_name=application_name,
121
- deployment_environment=environment,
122
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
123
- system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
124
- request_model=scope._request_model,
125
- server_address=scope._server_address,
126
- server_port=scope._server_port,
127
- response_model=scope._request_model,
128
- )
129
- metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
130
- metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
131
- metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
132
- metrics['genai_requests'].add(1, metrics_attributes)
133
- metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
134
- metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
135
- metrics['genai_cost'].record(cost, metrics_attributes)
136
- metrics['genai_client_usage_tokens'].record(
137
- input_tokens + output_tokens, metrics_attributes)
138
-
139
- def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
140
- environment, application_name, metrics, start_time, span, args, kwargs,
141
- capture_message_content=False, disable_metrics=False, version="1.0.0"):
142
- """
143
- Process chat request and generate Telemetry
144
- """
145
- self = type('GenericScope', (), {})()
146
- self._response = response
147
- self._start_time = start_time
148
- self._end_time = time.time()
149
- self._span = span
150
- self._ttft, self._tbt = self._end_time - self._start_time, 0
151
- self._server_address = server_address
152
- self._server_port = server_port
153
- self._request_model = request_model
154
- self._timestamps = []
155
- self._args = args
156
- self._kwargs = kwargs
157
-
158
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
159
- capture_message_content, disable_metrics, version, is_stream=False)
160
-
161
- return response
File without changes
File without changes