openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. openlit/__helpers.py +5 -0
  2. openlit/__init__.py +3 -2
  3. openlit/instrumentation/ag2/ag2.py +3 -3
  4. openlit/instrumentation/ai21/ai21.py +1 -1
  5. openlit/instrumentation/ai21/async_ai21.py +1 -1
  6. openlit/instrumentation/anthropic/anthropic.py +1 -1
  7. openlit/instrumentation/anthropic/async_anthropic.py +1 -1
  8. openlit/instrumentation/astra/astra.py +5 -5
  9. openlit/instrumentation/astra/async_astra.py +5 -5
  10. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
  11. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
  12. openlit/instrumentation/chroma/chroma.py +5 -5
  13. openlit/instrumentation/cohere/async_cohere.py +1 -1
  14. openlit/instrumentation/cohere/cohere.py +2 -2
  15. openlit/instrumentation/controlflow/controlflow.py +3 -3
  16. openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
  17. openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
  18. openlit/instrumentation/crewai/crewai.py +4 -2
  19. openlit/instrumentation/dynamiq/dynamiq.py +3 -3
  20. openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
  21. openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
  22. openlit/instrumentation/embedchain/embedchain.py +5 -5
  23. openlit/instrumentation/firecrawl/firecrawl.py +3 -3
  24. openlit/instrumentation/gpt4all/__init__.py +2 -2
  25. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  26. openlit/instrumentation/gpu/__init__.py +5 -5
  27. openlit/instrumentation/groq/__init__.py +2 -2
  28. openlit/instrumentation/groq/async_groq.py +356 -240
  29. openlit/instrumentation/groq/groq.py +356 -240
  30. openlit/instrumentation/haystack/haystack.py +3 -3
  31. openlit/instrumentation/julep/async_julep.py +3 -3
  32. openlit/instrumentation/julep/julep.py +3 -3
  33. openlit/instrumentation/langchain/__init__.py +13 -7
  34. openlit/instrumentation/langchain/async_langchain.py +384 -0
  35. openlit/instrumentation/langchain/langchain.py +98 -490
  36. openlit/instrumentation/letta/letta.py +5 -3
  37. openlit/instrumentation/litellm/__init__.py +4 -5
  38. openlit/instrumentation/litellm/async_litellm.py +316 -245
  39. openlit/instrumentation/litellm/litellm.py +312 -241
  40. openlit/instrumentation/llamaindex/llamaindex.py +3 -3
  41. openlit/instrumentation/mem0/mem0.py +3 -3
  42. openlit/instrumentation/milvus/milvus.py +5 -5
  43. openlit/instrumentation/mistral/__init__.py +6 -6
  44. openlit/instrumentation/mistral/async_mistral.py +421 -248
  45. openlit/instrumentation/mistral/mistral.py +418 -244
  46. openlit/instrumentation/multion/async_multion.py +4 -2
  47. openlit/instrumentation/multion/multion.py +4 -2
  48. openlit/instrumentation/ollama/__init__.py +8 -30
  49. openlit/instrumentation/ollama/async_ollama.py +385 -417
  50. openlit/instrumentation/ollama/ollama.py +384 -417
  51. openlit/instrumentation/openai/async_openai.py +7 -9
  52. openlit/instrumentation/openai/openai.py +7 -9
  53. openlit/instrumentation/phidata/phidata.py +4 -2
  54. openlit/instrumentation/pinecone/pinecone.py +5 -5
  55. openlit/instrumentation/premai/__init__.py +2 -2
  56. openlit/instrumentation/premai/premai.py +262 -213
  57. openlit/instrumentation/qdrant/async_qdrant.py +5 -5
  58. openlit/instrumentation/qdrant/qdrant.py +5 -5
  59. openlit/instrumentation/reka/__init__.py +2 -2
  60. openlit/instrumentation/reka/async_reka.py +90 -52
  61. openlit/instrumentation/reka/reka.py +90 -52
  62. openlit/instrumentation/together/__init__.py +4 -4
  63. openlit/instrumentation/together/async_together.py +278 -236
  64. openlit/instrumentation/together/together.py +278 -236
  65. openlit/instrumentation/transformers/__init__.py +1 -1
  66. openlit/instrumentation/transformers/transformers.py +75 -44
  67. openlit/instrumentation/vertexai/__init__.py +14 -64
  68. openlit/instrumentation/vertexai/async_vertexai.py +329 -986
  69. openlit/instrumentation/vertexai/vertexai.py +329 -986
  70. openlit/instrumentation/vllm/__init__.py +1 -1
  71. openlit/instrumentation/vllm/vllm.py +62 -32
  72. openlit/semcov/__init__.py +3 -3
  73. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  74. openlit-1.33.10.dist-info/RECORD +122 -0
  75. openlit-1.33.9.dist-info/RECORD +0 -121
  76. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
  77. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -73,9 +73,9 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
80
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
81
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
@@ -241,11 +241,11 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
241
241
  attributes = {
242
242
  TELEMETRY_SDK_NAME:
243
243
  "openlit",
244
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
244
+ SERVICE_NAME:
245
245
  application_name,
246
246
  SemanticConvetion.DB_SYSTEM:
247
247
  SemanticConvetion.DB_SYSTEM_QDRANT,
248
- SemanticConvetion.GEN_AI_ENVIRONMENT:
248
+ DEPLOYMENT_ENVIRONMENT:
249
249
  environment,
250
250
  SemanticConvetion.GEN_AI_OPERATION:
251
251
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -73,9 +73,9 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
80
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
81
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
@@ -248,11 +248,11 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
248
248
  attributes = {
249
249
  TELEMETRY_SDK_NAME:
250
250
  "openlit",
251
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
251
+ SERVICE_NAME:
252
252
  application_name,
253
253
  SemanticConvetion.DB_SYSTEM:
254
254
  SemanticConvetion.DB_SYSTEM_QDRANT,
255
- SemanticConvetion.GEN_AI_ENVIRONMENT:
255
+ DEPLOYMENT_ENVIRONMENT:
256
256
  environment,
257
257
  SemanticConvetion.GEN_AI_OPERATION:
258
258
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
@@ -37,7 +37,7 @@ class RekaInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "reka.chat.client",
39
39
  "ChatClient.create",
40
- chat("reka.chat", version, environment, application_name,
40
+ chat(version, environment, application_name,
41
41
  tracer, pricing_info, trace_content, metrics, disable_metrics),
42
42
  )
43
43
 
@@ -45,7 +45,7 @@ class RekaInstrumentor(BaseInstrumentor):
45
45
  wrap_function_wrapper(
46
46
  "reka.chat.client",
47
47
  "AsyncChatClient.create",
48
- async_chat("reka.chat", version, environment, application_name,
48
+ async_chat(version, environment, application_name,
49
49
  tracer, pricing_info, trace_content, metrics, disable_metrics),
50
50
  )
51
51
 
@@ -1,27 +1,28 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def async_chat(gen_ai_endpoint, version, environment, application_name,
20
+ def async_chat(version, environment, application_name,
19
21
  tracer, pricing_info, trace_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
@@ -50,8 +51,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = await wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,22 +80,68 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
78
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
92
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
93
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
94
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
95
+ SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
92
145
  if trace_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -99,51 +152,36 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_OPERATION:
138
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -1,27 +1,28 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def chat(gen_ai_endpoint, version, environment, application_name,
20
+ def chat(version, environment, application_name,
19
21
  tracer, pricing_info, trace_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
@@ -50,8 +51,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,22 +80,68 @@ def chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
78
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
92
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
93
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
94
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
95
+ SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
92
145
  if trace_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -99,51 +152,36 @@ def chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_OPERATION:
138
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -37,7 +37,7 @@ class TogetherInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "together.resources.chat.completions",
39
39
  "ChatCompletions.create",
40
- completion("together.chat.completions", version, environment, application_name,
40
+ completion(version, environment, application_name,
41
41
  tracer, pricing_info, trace_content, metrics, disable_metrics),
42
42
  )
43
43
 
@@ -45,7 +45,7 @@ class TogetherInstrumentor(BaseInstrumentor):
45
45
  wrap_function_wrapper(
46
46
  "together.resources.images",
47
47
  "Images.generate",
48
- image_generate("together.image.generate", version, environment, application_name,
48
+ image_generate(version, environment, application_name,
49
49
  tracer, pricing_info, trace_content, metrics, disable_metrics),
50
50
  )
51
51
 
@@ -53,7 +53,7 @@ class TogetherInstrumentor(BaseInstrumentor):
53
53
  wrap_function_wrapper(
54
54
  "together.resources.chat.completions",
55
55
  "AsyncChatCompletions.create",
56
- async_completion("together.chat.completions", version, environment, application_name,
56
+ async_completion(version, environment, application_name,
57
57
  tracer, pricing_info, trace_content, metrics, disable_metrics),
58
58
  )
59
59
 
@@ -61,7 +61,7 @@ class TogetherInstrumentor(BaseInstrumentor):
61
61
  wrap_function_wrapper(
62
62
  "together.resources.images",
63
63
  "AsyncImages.generate",
64
- async_image_generate("together.image.generate", version, environment, application_name,
64
+ async_image_generate(version, environment, application_name,
65
65
  tracer, pricing_info, trace_content, metrics, disable_metrics),
66
66
  )
67
67