openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -73,12 +73,12 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
81
- SemanticConvetion.GEN_AI_TYPE_VECTORDB)
80
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
82
82
  span.set_attribute(SemanticConvetion.DB_SYSTEM,
83
83
  SemanticConvetion.DB_SYSTEM_QDRANT)
84
84
 
@@ -241,14 +241,14 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
241
241
  attributes = {
242
242
  TELEMETRY_SDK_NAME:
243
243
  "openlit",
244
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
244
+ SERVICE_NAME:
245
245
  application_name,
246
246
  SemanticConvetion.DB_SYSTEM:
247
247
  SemanticConvetion.DB_SYSTEM_QDRANT,
248
- SemanticConvetion.GEN_AI_ENVIRONMENT:
248
+ DEPLOYMENT_ENVIRONMENT:
249
249
  environment,
250
- SemanticConvetion.GEN_AI_TYPE:
251
- SemanticConvetion.GEN_AI_TYPE_VECTORDB,
250
+ SemanticConvetion.GEN_AI_OPERATION:
251
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
252
252
  SemanticConvetion.DB_OPERATION:
253
253
  db_operation
254
254
  }
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -73,12 +73,12 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
81
- SemanticConvetion.GEN_AI_TYPE_VECTORDB)
80
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
82
82
  span.set_attribute(SemanticConvetion.DB_SYSTEM,
83
83
  SemanticConvetion.DB_SYSTEM_QDRANT)
84
84
 
@@ -248,14 +248,14 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
248
248
  attributes = {
249
249
  TELEMETRY_SDK_NAME:
250
250
  "openlit",
251
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
251
+ SERVICE_NAME:
252
252
  application_name,
253
253
  SemanticConvetion.DB_SYSTEM:
254
254
  SemanticConvetion.DB_SYSTEM_QDRANT,
255
- SemanticConvetion.GEN_AI_ENVIRONMENT:
255
+ DEPLOYMENT_ENVIRONMENT:
256
256
  environment,
257
- SemanticConvetion.GEN_AI_TYPE:
258
- SemanticConvetion.GEN_AI_TYPE_VECTORDB,
257
+ SemanticConvetion.GEN_AI_OPERATION:
258
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
259
259
  SemanticConvetion.DB_OPERATION:
260
260
  db_operation
261
261
  }
@@ -37,7 +37,7 @@ class RekaInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "reka.chat.client",
39
39
  "ChatClient.create",
40
- chat("reka.chat", version, environment, application_name,
40
+ chat(version, environment, application_name,
41
41
  tracer, pricing_info, trace_content, metrics, disable_metrics),
42
42
  )
43
43
 
@@ -45,7 +45,7 @@ class RekaInstrumentor(BaseInstrumentor):
45
45
  wrap_function_wrapper(
46
46
  "reka.chat.client",
47
47
  "AsyncChatClient.create",
48
- async_chat("reka.chat", version, environment, application_name,
48
+ async_chat(version, environment, application_name,
49
49
  tracer, pricing_info, trace_content, metrics, disable_metrics),
50
50
  )
51
51
 
@@ -1,27 +1,28 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def async_chat(gen_ai_endpoint, version, environment, application_name,
20
+ def async_chat(version, environment, application_name,
19
21
  tracer, pricing_info, trace_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
@@ -50,8 +51,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = await wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,22 +80,68 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
92
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
93
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
78
94
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
95
  SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
81
- SemanticConvetion.GEN_AI_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
92
145
  if trace_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -99,51 +152,36 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_TYPE:
138
- SemanticConvetion.GEN_AI_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -1,27 +1,28 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def chat(gen_ai_endpoint, version, environment, application_name,
20
+ def chat(version, environment, application_name,
19
21
  tracer, pricing_info, trace_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
@@ -50,8 +51,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,22 +80,68 @@ def chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
92
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
93
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
78
94
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
95
  SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
81
- SemanticConvetion.GEN_AI_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
92
145
  if trace_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -99,51 +152,36 @@ def chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_TYPE:
138
- SemanticConvetion.GEN_AI_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -37,7 +37,7 @@ class TogetherInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "together.resources.chat.completions",
39
39
  "ChatCompletions.create",
40
- completion("together.chat.completions", version, environment, application_name,
40
+ completion(version, environment, application_name,
41
41
  tracer, pricing_info, trace_content, metrics, disable_metrics),
42
42
  )
43
43
 
@@ -45,7 +45,7 @@ class TogetherInstrumentor(BaseInstrumentor):
45
45
  wrap_function_wrapper(
46
46
  "together.resources.images",
47
47
  "Images.generate",
48
- image_generate("together.image.generate", version, environment, application_name,
48
+ image_generate(version, environment, application_name,
49
49
  tracer, pricing_info, trace_content, metrics, disable_metrics),
50
50
  )
51
51
 
@@ -53,7 +53,7 @@ class TogetherInstrumentor(BaseInstrumentor):
53
53
  wrap_function_wrapper(
54
54
  "together.resources.chat.completions",
55
55
  "AsyncChatCompletions.create",
56
- async_completion("together.chat.completions", version, environment, application_name,
56
+ async_completion(version, environment, application_name,
57
57
  tracer, pricing_info, trace_content, metrics, disable_metrics),
58
58
  )
59
59
 
@@ -61,7 +61,7 @@ class TogetherInstrumentor(BaseInstrumentor):
61
61
  wrap_function_wrapper(
62
62
  "together.resources.images",
63
63
  "AsyncImages.generate",
64
- async_image_generate("together.image.generate", version, environment, application_name,
64
+ async_image_generate(version, environment, application_name,
65
65
  tracer, pricing_info, trace_content, metrics, disable_metrics),
66
66
  )
67
67