openlit 1.34.0__tar.gz → 1.34.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {openlit-1.34.0 → openlit-1.34.2}/PKG-INFO +1 -1
  2. {openlit-1.34.0 → openlit-1.34.2}/pyproject.toml +1 -1
  3. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +26 -20
  4. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/google_ai_studio/google_ai_studio.py +26 -20
  5. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/google_ai_studio/utils.py +1 -3
  6. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/vllm/__init__.py +1 -1
  7. openlit-1.34.2/src/openlit/instrumentation/vllm/utils.py +161 -0
  8. openlit-1.34.2/src/openlit/instrumentation/vllm/vllm.py +65 -0
  9. openlit-1.34.0/src/openlit/instrumentation/vllm/vllm.py +0 -173
  10. {openlit-1.34.0 → openlit-1.34.2}/LICENSE +0 -0
  11. {openlit-1.34.0 → openlit-1.34.2}/README.md +0 -0
  12. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/__helpers.py +0 -0
  13. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/__init__.py +0 -0
  14. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/__init__.py +0 -0
  15. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/all.py +0 -0
  16. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/bias_detection.py +0 -0
  17. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/hallucination.py +0 -0
  18. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/toxicity.py +0 -0
  19. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/evals/utils.py +0 -0
  20. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/__init__.py +0 -0
  21. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/all.py +0 -0
  22. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/prompt_injection.py +0 -0
  23. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/restrict_topic.py +0 -0
  24. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/sensitive_topic.py +0 -0
  25. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/guard/utils.py +0 -0
  26. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ag2/__init__.py +0 -0
  27. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ag2/ag2.py +0 -0
  28. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ai21/__init__.py +0 -0
  29. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ai21/ai21.py +0 -0
  30. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ai21/async_ai21.py +0 -0
  31. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ai21/utils.py +0 -0
  32. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/anthropic/__init__.py +0 -0
  33. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/anthropic/anthropic.py +0 -0
  34. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/anthropic/async_anthropic.py +0 -0
  35. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/anthropic/utils.py +0 -0
  36. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/assemblyai/__init__.py +0 -0
  37. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/assemblyai/assemblyai.py +0 -0
  38. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/astra/__init__.py +0 -0
  39. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/astra/astra.py +0 -0
  40. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/astra/async_astra.py +0 -0
  41. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/astra/utils.py +0 -0
  42. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/azure_ai_inference/__init__.py +0 -0
  43. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -0
  44. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -0
  45. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/azure_ai_inference/utils.py +0 -0
  46. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/bedrock/__init__.py +0 -0
  47. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/bedrock/bedrock.py +0 -0
  48. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/bedrock/utils.py +0 -0
  49. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/chroma/__init__.py +0 -0
  50. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/chroma/chroma.py +0 -0
  51. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/cohere/__init__.py +0 -0
  52. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/cohere/async_cohere.py +0 -0
  53. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/cohere/cohere.py +0 -0
  54. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/controlflow/__init__.py +0 -0
  55. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/controlflow/controlflow.py +0 -0
  56. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/crawl4ai/__init__.py +0 -0
  57. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/crawl4ai/async_crawl4ai.py +0 -0
  58. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/crawl4ai/crawl4ai.py +0 -0
  59. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/crewai/__init__.py +0 -0
  60. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/crewai/crewai.py +0 -0
  61. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/dynamiq/__init__.py +0 -0
  62. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/dynamiq/dynamiq.py +0 -0
  63. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/elevenlabs/__init__.py +0 -0
  64. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/elevenlabs/async_elevenlabs.py +0 -0
  65. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/elevenlabs/elevenlabs.py +0 -0
  66. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/embedchain/__init__.py +0 -0
  67. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/embedchain/embedchain.py +0 -0
  68. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/firecrawl/__init__.py +0 -0
  69. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/firecrawl/firecrawl.py +0 -0
  70. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/google_ai_studio/__init__.py +0 -0
  71. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/gpt4all/__init__.py +0 -0
  72. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/gpt4all/gpt4all.py +0 -0
  73. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/gpu/__init__.py +0 -0
  74. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/groq/__init__.py +0 -0
  75. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/groq/async_groq.py +0 -0
  76. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/groq/groq.py +0 -0
  77. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/haystack/__init__.py +0 -0
  78. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/haystack/haystack.py +0 -0
  79. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/julep/__init__.py +0 -0
  80. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/julep/async_julep.py +0 -0
  81. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/julep/julep.py +0 -0
  82. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/langchain/__init__.py +0 -0
  83. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/langchain/async_langchain.py +0 -0
  84. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/langchain/langchain.py +0 -0
  85. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/letta/__init__.py +0 -0
  86. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/letta/letta.py +0 -0
  87. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/litellm/__init__.py +0 -0
  88. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/litellm/async_litellm.py +0 -0
  89. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/litellm/litellm.py +0 -0
  90. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/llamaindex/__init__.py +0 -0
  91. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/llamaindex/llamaindex.py +0 -0
  92. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/mem0/__init__.py +0 -0
  93. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/mem0/mem0.py +0 -0
  94. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/milvus/__init__.py +0 -0
  95. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/milvus/milvus.py +0 -0
  96. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/mistral/__init__.py +0 -0
  97. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/mistral/async_mistral.py +0 -0
  98. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/mistral/mistral.py +0 -0
  99. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/multion/__init__.py +0 -0
  100. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/multion/async_multion.py +0 -0
  101. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/multion/multion.py +0 -0
  102. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ollama/__init__.py +0 -0
  103. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ollama/async_ollama.py +0 -0
  104. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ollama/ollama.py +0 -0
  105. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/ollama/utils.py +0 -0
  106. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/openai/__init__.py +0 -0
  107. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/openai/async_openai.py +0 -0
  108. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/openai/openai.py +0 -0
  109. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/openai_agents/__init__.py +0 -0
  110. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/openai_agents/openai_agents.py +0 -0
  111. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/phidata/__init__.py +0 -0
  112. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/phidata/phidata.py +0 -0
  113. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/pinecone/__init__.py +0 -0
  114. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/pinecone/pinecone.py +0 -0
  115. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/premai/__init__.py +0 -0
  116. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/premai/premai.py +0 -0
  117. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/pydantic_ai/__init__.py +0 -0
  118. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/pydantic_ai/pydantic_ai.py +0 -0
  119. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/pydantic_ai/utils.py +0 -0
  120. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/qdrant/__init__.py +0 -0
  121. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/qdrant/async_qdrant.py +0 -0
  122. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/qdrant/qdrant.py +0 -0
  123. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/reka/__init__.py +0 -0
  124. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/reka/async_reka.py +0 -0
  125. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/reka/reka.py +0 -0
  126. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/together/__init__.py +0 -0
  127. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/together/async_together.py +0 -0
  128. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/together/together.py +0 -0
  129. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/transformers/__init__.py +0 -0
  130. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/transformers/transformers.py +0 -0
  131. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/transformers/utils.py +0 -0
  132. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/vertexai/__init__.py +0 -0
  133. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/vertexai/async_vertexai.py +0 -0
  134. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/instrumentation/vertexai/vertexai.py +0 -0
  135. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/otel/events.py +0 -0
  136. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/otel/metrics.py +0 -0
  137. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/otel/tracing.py +0 -0
  138. {openlit-1.34.0 → openlit-1.34.2}/src/openlit/semcov/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.0
3
+ Version: 1.34.2
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "openlit"
3
- version = "1.34.0"
3
+ version = "1.34.2"
4
4
  description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects"
5
5
  authors = ["OpenLIT"]
6
6
  license = "Apache-2.0"
@@ -39,26 +39,32 @@ def async_generate(version, environment, application_name,
39
39
  start_time = time.time()
40
40
  response = await wrapped(*args, **kwargs)
41
41
 
42
- response = process_chat_response(
43
- instance = instance,
44
- response=response,
45
- request_model=request_model,
46
- pricing_info=pricing_info,
47
- server_port=server_port,
48
- server_address=server_address,
49
- environment=environment,
50
- application_name=application_name,
51
- metrics=metrics,
52
- start_time=start_time,
53
- span=span,
54
- args=args,
55
- kwargs=kwargs,
56
- capture_message_content=capture_message_content,
57
- disable_metrics=disable_metrics,
58
- version=version,
59
- )
60
-
61
- return response
42
+ try:
43
+ response = process_chat_response(
44
+ instance = instance,
45
+ response=response,
46
+ request_model=request_model,
47
+ pricing_info=pricing_info,
48
+ server_port=server_port,
49
+ server_address=server_address,
50
+ environment=environment,
51
+ application_name=application_name,
52
+ metrics=metrics,
53
+ start_time=start_time,
54
+ span=span,
55
+ args=args,
56
+ kwargs=kwargs,
57
+ capture_message_content=capture_message_content,
58
+ disable_metrics=disable_metrics,
59
+ version=version,
60
+ )
61
+
62
+ except Exception as e:
63
+ handle_exception(span, e)
64
+ logger.error("Error in trace creation: %s", e)
65
+
66
+ # Return original response
67
+ return response
62
68
 
63
69
  return wrapper
64
70
 
@@ -39,26 +39,32 @@ def generate(version, environment, application_name,
39
39
  start_time = time.time()
40
40
  response = wrapped(*args, **kwargs)
41
41
 
42
- response = process_chat_response(
43
- instance = instance,
44
- response=response,
45
- request_model=request_model,
46
- pricing_info=pricing_info,
47
- server_port=server_port,
48
- server_address=server_address,
49
- environment=environment,
50
- application_name=application_name,
51
- metrics=metrics,
52
- start_time=start_time,
53
- span=span,
54
- args=args,
55
- kwargs=kwargs,
56
- capture_message_content=capture_message_content,
57
- disable_metrics=disable_metrics,
58
- version=version,
59
- )
60
-
61
- return response
42
+ try:
43
+ response = process_chat_response(
44
+ instance = instance,
45
+ response=response,
46
+ request_model=request_model,
47
+ pricing_info=pricing_info,
48
+ server_port=server_port,
49
+ server_address=server_address,
50
+ environment=environment,
51
+ application_name=application_name,
52
+ metrics=metrics,
53
+ start_time=start_time,
54
+ span=span,
55
+ args=args,
56
+ kwargs=kwargs,
57
+ capture_message_content=capture_message_content,
58
+ disable_metrics=disable_metrics,
59
+ version=version,
60
+ )
61
+
62
+ except Exception as e:
63
+ handle_exception(span, e)
64
+ logger.error("Error in trace creation: %s", e)
65
+
66
+ # Return original response
67
+ return response
62
68
 
63
69
  return wrapper
64
70
 
@@ -2,10 +2,8 @@
2
2
  Google AI Studio OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
-
6
5
  from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
-
9
7
  from openlit.__helpers import (
10
8
  calculate_ttft,
11
9
  response_as_dict,
@@ -117,6 +115,7 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
117
115
  scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_GEMINI)
118
116
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
119
117
  scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
118
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
120
119
 
121
120
  inference_config = scope._kwargs.get('config', {})
122
121
 
@@ -142,7 +141,6 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
142
141
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
143
142
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
144
143
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_REASONING_TOKENS, scope._reasoning_tokens)
145
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
146
144
 
147
145
  scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
148
146
  'text' if isinstance(scope._llmresponse, str) else 'json')
@@ -32,7 +32,7 @@ class VLLMInstrumentor(BaseInstrumentor):
32
32
 
33
33
  # sync chat
34
34
  wrap_function_wrapper(
35
- "vllm",
35
+ "vllm.entrypoints.llm",
36
36
  "LLM.generate",
37
37
  generate(version, environment, application_name,
38
38
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
@@ -0,0 +1,161 @@
1
+ """
2
+ Utility functions for vLLM instrumentation.
3
+ """
4
+
5
+ import time
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+ from openlit.__helpers import (
9
+ calculate_tbt,
10
+ get_chat_model_cost,
11
+ general_tokens,
12
+ create_metrics_attributes,
13
+ )
14
+ from openlit.semcov import SemanticConvention
15
+
16
+ def get_inference_config(args, kwargs):
17
+ """
18
+ Safely extract inference configuration from args or kwargs.
19
+ """
20
+
21
+ if 'sampling_params' in kwargs:
22
+ return kwargs['sampling_params']
23
+ if len(args) > 1:
24
+ return args[1]
25
+ return None
26
+
27
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
28
+ capture_message_content, disable_metrics, version, is_stream):
29
+ """
30
+ Process chat request and generate Telemetry
31
+ """
32
+
33
+ scope._end_time = time.time()
34
+ if len(scope._timestamps) > 1:
35
+ scope._tbt = calculate_tbt(scope._timestamps)
36
+
37
+ # Set base span attributes
38
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
39
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
40
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
41
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
42
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
43
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
44
+
45
+ # Handle inference configuration
46
+ inference_config = get_inference_config(scope._args, scope._kwargs)
47
+ if inference_config:
48
+ attributes = [
49
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
50
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
51
+ (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
52
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
53
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
54
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
55
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
56
+ ]
57
+
58
+ for attribute, key in attributes:
59
+ value = getattr(inference_config, key, None)
60
+ if value is not None:
61
+ scope._span.set_attribute(attribute, value)
62
+
63
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
64
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
65
+
66
+ # Set base span attributes (Extras)
67
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
68
+ scope._span.set_attribute(SERVICE_NAME, application_name)
69
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
70
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
71
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
72
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
73
+
74
+ input_tokens = 0
75
+ output_tokens = 0
76
+ cost = 0
77
+
78
+ if capture_message_content:
79
+ prompt = ""
80
+ completion = ""
81
+
82
+ for output in scope._response:
83
+ prompt += output.prompt + "\n"
84
+ if output.outputs and len(output.outputs) > 0:
85
+ completion += output.outputs[0].text + "\n"
86
+ input_tokens += general_tokens(output.prompt)
87
+ output_tokens += general_tokens(output.outputs[0].text)
88
+
89
+ # Add a single event for prompt
90
+ scope._span.add_event(
91
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
92
+ attributes={
93
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
94
+ },
95
+ )
96
+
97
+ # Add a single event for completion
98
+ scope._span.add_event(
99
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
100
+ attributes={
101
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
102
+ },
103
+ )
104
+
105
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
106
+ input_tokens)
107
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
108
+ output_tokens)
109
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
110
+ input_tokens + output_tokens)
111
+
112
+ # Calculate cost of the operation
113
+ cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
114
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
115
+
116
+ scope._span.set_status(Status(StatusCode.OK))
117
+
118
+ if disable_metrics is False:
119
+ metrics_attributes = create_metrics_attributes(
120
+ service_name=application_name,
121
+ deployment_environment=environment,
122
+ operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
123
+ system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
124
+ request_model=scope._request_model,
125
+ server_address=scope._server_address,
126
+ server_port=scope._server_port,
127
+ response_model=scope._request_model,
128
+ )
129
+ metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
130
+ metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
131
+ metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
132
+ metrics['genai_requests'].add(1, metrics_attributes)
133
+ metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
134
+ metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
135
+ metrics['genai_cost'].record(cost, metrics_attributes)
136
+ metrics['genai_client_usage_tokens'].record(
137
+ input_tokens + output_tokens, metrics_attributes)
138
+
139
+ def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
140
+ environment, application_name, metrics, start_time, span, args, kwargs,
141
+ capture_message_content=False, disable_metrics=False, version="1.0.0"):
142
+ """
143
+ Process chat request and generate Telemetry
144
+ """
145
+ self = type('GenericScope', (), {})()
146
+ self._response = response
147
+ self._start_time = start_time
148
+ self._end_time = time.time()
149
+ self._span = span
150
+ self._ttft, self._tbt = self._end_time - self._start_time, 0
151
+ self._server_address = server_address
152
+ self._server_port = server_port
153
+ self._request_model = request_model
154
+ self._timestamps = []
155
+ self._args = args
156
+ self._kwargs = kwargs
157
+
158
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
159
+ capture_message_content, disable_metrics, version, is_stream=False)
160
+
161
+ return response
@@ -0,0 +1,65 @@
1
+ """
2
+ Module for monitoring vLLM API calls.
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ from opentelemetry.trace import SpanKind
8
+ from openlit.__helpers import (
9
+ handle_exception,
10
+ set_server_address_and_port
11
+ )
12
+ from openlit.instrumentation.vllm.utils import (
13
+ process_chat_response
14
+ )
15
+ from openlit.semcov import SemanticConvention
16
+
17
+ # Initialize logger for logging potential issues and operations
18
+ logger = logging.getLogger(__name__)
19
+
20
+ def generate(version, environment, application_name,
21
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
22
+ """
23
+ Generates a telemetry wrapper for GenAI function call
24
+ """
25
+
26
+ def wrapper(wrapped, instance, args, kwargs):
27
+ """
28
+ Wraps the GenAI function call.
29
+ """
30
+
31
+ server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
32
+ request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
33
+
34
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
35
+
36
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
37
+ start_time = time.time()
38
+ response = wrapped(*args, **kwargs)
39
+
40
+ try:
41
+ response = process_chat_response(
42
+ instance=instance,
43
+ response=response,
44
+ request_model=request_model,
45
+ pricing_info=pricing_info,
46
+ server_port=server_port,
47
+ server_address=server_address,
48
+ environment=environment,
49
+ application_name=application_name,
50
+ metrics=metrics,
51
+ start_time=start_time,
52
+ span=span,
53
+ args=args,
54
+ kwargs=kwargs,
55
+ capture_message_content=capture_message_content,
56
+ disable_metrics=disable_metrics,
57
+ version=version,
58
+ )
59
+ except Exception as e:
60
+ handle_exception(span, e)
61
+ logger.error("Error in trace creation: %s", e)
62
+
63
+ return response
64
+
65
+ return wrapper
@@ -1,173 +0,0 @@
1
- """
2
- Module for monitoring vLLM API calls.
3
- """
4
-
5
- import logging
6
- import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
- from openlit.__helpers import (
10
- get_chat_model_cost,
11
- handle_exception,
12
- general_tokens,
13
- create_metrics_attributes,
14
- set_server_address_and_port
15
- )
16
- from openlit.semcov import SemanticConvention
17
-
18
- # Initialize logger for logging potential issues and operations
19
- logger = logging.getLogger(__name__)
20
-
21
- def generate(version, environment, application_name,
22
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
23
- """
24
- Generates a telemetry wrapper for generate to collect metrics.
25
-
26
- Args:
27
- version: Version of the monitoring package.
28
- environment: Deployment environment (e.g., production, staging).
29
- application_name: Name of the application using the vLLM API.
30
- tracer: OpenTelemetry tracer for creating spans.
31
- pricing_info: Information used for calculating the cost of vLLM usage.
32
- capture_message_content: Flag indicating whether to trace the actual content.
33
-
34
- Returns:
35
- A function that wraps the generate method to add telemetry.
36
- """
37
-
38
- def wrapper(wrapped, instance, args, kwargs):
39
- """
40
- Wraps the 'generate' API call to add telemetry.
41
-
42
- This collects metrics such as execution time, cost, and token usage, and handles errors
43
- gracefully, adding details to the trace for observability.
44
-
45
- Args:
46
- wrapped: The original 'generate' method to be wrapped.
47
- instance: The instance of the class where the original method is defined.
48
- args: Positional arguments for the 'generate' method.
49
- kwargs: Keyword arguments for the 'generate' method.
50
-
51
- Returns:
52
- The response from the original 'generate' method.
53
- """
54
-
55
- server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
56
- request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
57
-
58
- span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
59
-
60
- # pylint: disable=line-too-long
61
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
62
- start_time = time.time()
63
- response = wrapped(*args, **kwargs)
64
- end_time = time.time()
65
-
66
- try:
67
- # Set base span attribues
68
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
69
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
70
- SemanticConvention.GEN_AI_SYSTEM_VLLM)
71
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
72
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
73
- span.set_attribute(SemanticConvention.SERVER_PORT,
74
- server_port)
75
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
76
- request_model)
77
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
78
- request_model)
79
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
80
- server_address)
81
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
82
- "text")
83
-
84
- # Set base span attribues (Extras)
85
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
86
- environment)
87
- span.set_attribute(SERVICE_NAME,
88
- application_name)
89
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
90
- False)
91
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
92
- end_time - start_time)
93
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
94
- version)
95
-
96
- input_tokens = 0
97
- output_tokens = 0
98
- cost = 0
99
-
100
- if capture_message_content:
101
- prompt_attributes = {}
102
- completion_attributes = {}
103
-
104
- for i, output in enumerate(response):
105
- prompt_attributes[f"{SemanticConvention.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
106
- completion_attributes[f"{SemanticConvention.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
107
- input_tokens += general_tokens(output.prompt)
108
- output_tokens += general_tokens(output.outputs[0].text)
109
-
110
- # Add a single event for all prompts
111
- span.add_event(
112
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
113
- attributes=prompt_attributes,
114
- )
115
-
116
- # Add a single event for all completions
117
- span.add_event(
118
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
119
- attributes=completion_attributes,
120
- )
121
-
122
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
123
- input_tokens)
124
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
125
- output_tokens)
126
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
127
- input_tokens + output_tokens)
128
-
129
- # Calculate cost of the operation
130
- cost = get_chat_model_cost(request_model, pricing_info,
131
- input_tokens, output_tokens)
132
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
133
- cost)
134
-
135
- span.set_status(Status(StatusCode.OK))
136
-
137
- if disable_metrics is False:
138
- attributes = create_metrics_attributes(
139
- service_name=application_name,
140
- deployment_environment=environment,
141
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
142
- system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
143
- request_model=request_model,
144
- server_address=server_address,
145
- server_port=server_port,
146
- response_model=request_model,
147
- )
148
-
149
- metrics["genai_client_usage_tokens"].record(
150
- input_tokens + output_tokens, attributes
151
- )
152
- metrics["genai_client_operation_duration"].record(
153
- end_time - start_time, attributes
154
- )
155
- metrics["genai_server_ttft"].record(
156
- end_time - start_time, attributes
157
- )
158
- metrics["genai_requests"].add(1, attributes)
159
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
160
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
161
- metrics["genai_cost"].record(cost, attributes)
162
-
163
- # Return original response
164
- return response
165
-
166
- except Exception as e:
167
- handle_exception(span, e)
168
- logger.error("Error in trace creation: %s", e)
169
-
170
- # Return original response
171
- return response
172
-
173
- return wrapper
File without changes
File without changes