openlit 1.34.4__py3-none-any.whl → 1.34.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,53 +2,26 @@
2
2
  Module for monitoring Reka API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
8
  handle_exception,
12
- create_metrics_attributes,
13
9
  set_server_address_and_port
14
10
  )
11
+ from openlit.instrumentation.reka.utils import (
12
+ process_chat_response
13
+ )
15
14
  from openlit.semcov import SemanticConvention
16
15
 
17
- # Initialize logger for logging potential issues and operations
18
- logger = logging.getLogger(__name__)
19
-
20
16
  def chat(version, environment, application_name,
21
17
  tracer, pricing_info, capture_message_content, metrics, disable_metrics):
22
18
  """
23
- Generates a telemetry wrapper for chat to collect metrics.
24
-
25
- Args:
26
- version: Version of the monitoring package.
27
- environment: Deployment environment (e.g., production, staging).
28
- application_name: Name of the application using the Reka API.
29
- tracer: OpenTelemetry tracer for creating spans.
30
- pricing_info: Information used for calculating the cost of Reka usage.
31
- capture_message_content: Flag indicating whether to trace the actual content.
32
-
33
- Returns:
34
- A function that wraps the chat method to add telemetry.
19
+ Generates a telemetry wrapper for GenAI function call
35
20
  """
36
21
 
37
22
  def wrapper(wrapped, instance, args, kwargs):
38
23
  """
39
- Wraps the 'chat' API call to add telemetry.
40
-
41
- This collects metrics such as execution time, cost, and token usage, and handles errors
42
- gracefully, adding details to the trace for observability.
43
-
44
- Args:
45
- wrapped: The original 'chat' method to be wrapped.
46
- instance: The instance of the class where the original method is defined.
47
- args: Positional arguments for the 'chat' method.
48
- kwargs: Keyword arguments for the 'chat' method.
49
-
50
- Returns:
51
- The response from the original 'chat' method.
24
+ Wraps the GenAI function call.
52
25
  """
53
26
 
54
27
  server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
@@ -56,142 +29,31 @@ def chat(version, environment, application_name,
56
29
 
57
30
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
31
 
59
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
32
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
60
33
  start_time = time.time()
61
34
  response = wrapped(*args, **kwargs)
62
- end_time = time.time()
63
35
 
64
36
  try:
65
- # Format 'messages' into a single string
66
- message_prompt = kwargs.get("messages", "")
67
- formatted_messages = []
68
- for message in message_prompt:
69
- role = message["role"]
70
- content = message["content"]
71
-
72
- if isinstance(content, list):
73
- content_str = ", ".join(
74
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
75
- if "type" in item else f'text: {item["text"]}'
76
- for item in content
77
- )
78
- formatted_messages.append(f"{role}: {content_str}")
79
- else:
80
- formatted_messages.append(f"{role}: {content}")
81
- prompt = "\n".join(formatted_messages)
82
-
83
- input_tokens = response.usage.input_tokens
84
- output_tokens = response.usage.output_tokens
85
-
86
- # Calculate cost of the operation
87
- cost = get_chat_model_cost(request_model,
88
- pricing_info, input_tokens, output_tokens)
89
-
90
- # Set Span attributes (OTel Semconv)
91
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
92
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
93
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
94
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
95
- SemanticConvention.GEN_AI_SYSTEM_REKAAI)
96
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
97
- request_model)
98
- span.set_attribute(SemanticConvention.SERVER_PORT,
99
- server_port)
100
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
101
- kwargs.get("seed", ""))
102
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
103
- kwargs.get("max_tokens", -1))
104
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
105
- kwargs.get("stop", []))
106
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
- kwargs.get("presence_penalty", 0.0))
108
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
109
- kwargs.get("temperature", 0.4))
110
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
111
- kwargs.get("top_k", 1.0))
112
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
113
- kwargs.get("top_p", 1.0))
114
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
115
- [response.responses[0].finish_reason])
116
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
117
- response.id)
118
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
119
- response.model)
120
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
121
- input_tokens)
122
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
123
- output_tokens)
124
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
125
- server_address)
126
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
127
- 'text')
128
-
129
- # Set Span attributes (Extra)
130
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
131
- environment)
132
- span.set_attribute(SERVICE_NAME,
133
- application_name)
134
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
135
- False)
136
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
137
- input_tokens + output_tokens)
138
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
139
- cost)
140
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
141
- end_time - start_time)
142
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
143
- version)
144
-
145
- if capture_message_content:
146
- span.add_event(
147
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
148
- attributes={
149
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
150
- },
151
- )
152
- span.add_event(
153
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
154
- attributes={
155
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
156
- },
157
- )
158
-
159
- span.set_status(Status(StatusCode.OK))
160
-
161
- if disable_metrics is False:
162
- attributes = create_metrics_attributes(
163
- service_name=application_name,
164
- deployment_environment=environment,
165
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
166
- system=SemanticConvention.GEN_AI_SYSTEM_REKAAI,
167
- request_model=request_model,
168
- server_address=server_address,
169
- server_port=server_port,
170
- response_model=response.model,
171
- )
172
-
173
- metrics["genai_client_usage_tokens"].record(
174
- input_tokens + output_tokens, attributes
175
- )
176
- metrics["genai_client_operation_duration"].record(
177
- end_time - start_time, attributes
178
- )
179
- metrics["genai_server_ttft"].record(
180
- end_time - start_time, attributes
181
- )
182
- metrics["genai_requests"].add(1, attributes)
183
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
185
- metrics["genai_cost"].record(cost, attributes)
186
-
187
- # Return original response
188
- return response
37
+ response = process_chat_response(
38
+ response=response,
39
+ request_model=request_model,
40
+ pricing_info=pricing_info,
41
+ server_port=server_port,
42
+ server_address=server_address,
43
+ environment=environment,
44
+ application_name=application_name,
45
+ metrics=metrics,
46
+ start_time=start_time,
47
+ span=span,
48
+ capture_message_content=capture_message_content,
49
+ disable_metrics=disable_metrics,
50
+ version=version,
51
+ **kwargs
52
+ )
189
53
 
190
54
  except Exception as e:
191
55
  handle_exception(span, e)
192
- logger.error("Error in trace creation: %s", e)
193
56
 
194
- # Return original response
195
- return response
57
+ return response
196
58
 
197
59
  return wrapper
@@ -0,0 +1,193 @@
1
+ """
2
+ Reka OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ get_chat_model_cost,
13
+ create_metrics_attributes,
14
+ )
15
+ from openlit.semcov import SemanticConvention
16
+
17
+ def format_content(messages):
18
+ """
19
+ Process a list of messages to extract content.
20
+ """
21
+
22
+ formatted_messages = []
23
+ for message in messages:
24
+ role = message["role"]
25
+ content = message["content"]
26
+
27
+ if isinstance(content, list):
28
+ content_str = ", ".join(
29
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
30
+ if "type" in item else f'text: {item["text"]}'
31
+ for item in content
32
+ )
33
+ formatted_messages.append(f"{role}: {content_str}")
34
+ else:
35
+ formatted_messages.append(f"{role}: {content}")
36
+
37
+ return "\n".join(formatted_messages)
38
+
39
+ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
40
+ request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
41
+ """
42
+ Set common span attributes for both chat and RAG operations.
43
+ """
44
+
45
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
46
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, gen_ai_operation)
47
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, gen_ai_system)
48
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
49
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
50
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
51
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
52
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
53
+ scope._span.set_attribute(SERVICE_NAME, application_name)
54
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
55
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
56
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
57
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
58
+
59
+ def record_common_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
60
+ request_model, response_model, environment, application_name, start_time, end_time,
61
+ input_tokens, output_tokens, cost, tbt=None, ttft=None):
62
+ """
63
+ Record common metrics for the operation.
64
+ """
65
+
66
+ attributes = create_metrics_attributes(
67
+ operation=gen_ai_operation,
68
+ system=gen_ai_system,
69
+ server_address=server_address,
70
+ server_port=server_port,
71
+ request_model=request_model,
72
+ response_model=response_model,
73
+ service_name=application_name,
74
+ deployment_environment=environment,
75
+ )
76
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
77
+ metrics["genai_requests"].add(1, attributes)
78
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
79
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
80
+ metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
81
+ metrics["genai_cost"].record(cost, attributes)
82
+ if tbt is not None:
83
+ metrics["genai_server_tbt"].record(tbt, attributes)
84
+ if ttft is not None:
85
+ metrics["genai_server_ttft"].record(ttft, attributes)
86
+
87
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
88
+ capture_message_content, disable_metrics, version, is_stream):
89
+ """
90
+ Process chat request and generate Telemetry
91
+ """
92
+
93
+ if len(scope._timestamps) > 1:
94
+ scope._tbt = calculate_tbt(scope._timestamps)
95
+
96
+ prompt = format_content(scope._kwargs.get("messages", ""))
97
+ request_model = scope._kwargs.get("model", "reka-core-20240501")
98
+
99
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
100
+
101
+ # Common Span Attributes
102
+ common_span_attributes(scope,
103
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,SemanticConvention.GEN_AI_SYSTEM_REKAAI,
104
+ scope._server_address, scope._server_port, request_model, scope._response_model,
105
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
106
+
107
+ # Span Attributes for Response parameters
108
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
109
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
110
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
111
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
112
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
113
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 0.4))
114
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("top_k", 1.0))
115
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
116
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
117
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
118
+
119
+ # Span Attributes for Cost and Tokens
120
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
121
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
122
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
123
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
124
+
125
+ # Span Attributes for Tools
126
+ if scope._tools:
127
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("name",""))
128
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id","")))
129
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("parameters","")))
130
+
131
+ # Span Attributes for Content
132
+ if capture_message_content:
133
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
134
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
135
+
136
+ # To be removed one the change to span_attributes (from span events) is complete
137
+ scope._span.add_event(
138
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
139
+ attributes={
140
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
141
+ },
142
+ )
143
+ scope._span.add_event(
144
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
145
+ attributes={
146
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
147
+ },
148
+ )
149
+
150
+ scope._span.set_status(Status(StatusCode.OK))
151
+
152
+ # Metrics
153
+ if not disable_metrics:
154
+ record_common_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_REKAAI,
155
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
156
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
157
+ cost, scope._tbt, scope._ttft)
158
+
159
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
160
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
161
+ disable_metrics=False, version="1.0.0", **kwargs):
162
+ """
163
+ Process chat request and generate Telemetry
164
+ """
165
+
166
+ scope = type("GenericScope", (), {})()
167
+ response_dict = response_as_dict(response)
168
+
169
+ scope._start_time = start_time
170
+ scope._end_time = time.time()
171
+ scope._span = span
172
+ scope._llmresponse = " ".join(
173
+ (choice.get("message", {}).get("content") or "")
174
+ for choice in response_dict.get("responses", [])
175
+ )
176
+ scope._response_id = response_dict.get("id")
177
+ scope._response_model = response_dict.get("model")
178
+ scope._input_tokens = response_dict.get("usage").get("input_tokens")
179
+ scope._output_tokens = response_dict.get("usage").get("output_tokens")
180
+ scope._timestamps = []
181
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
182
+ scope._server_address, scope._server_port = server_address, server_port
183
+ scope._kwargs = kwargs
184
+ scope._finish_reason = str(response_dict.get("responses")[0].get("finish_reason"))
185
+ if scope._kwargs.get("tools"):
186
+ scope._tools = response_dict.get("responses")[0].get("message").get("tool_calls")
187
+ else:
188
+ scope._tools = None
189
+
190
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
191
+ capture_message_content, disable_metrics, version, is_stream=False)
192
+
193
+ return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Together AI Functions"""
3
2
 
4
3
  from typing import Collection
@@ -17,15 +16,15 @@ _instruments = ("together >= 1.3.5",)
17
16
 
18
17
  class TogetherInstrumentor(BaseInstrumentor):
19
18
  """
20
- An instrumentor for Together's client library.
19
+ An instrumentor for Together client library.
21
20
  """
22
21
 
23
22
  def instrumentation_dependencies(self) -> Collection[str]:
24
23
  return _instruments
25
24
 
26
25
  def _instrument(self, **kwargs):
27
- application_name = kwargs.get("application_name", "default_application")
28
- environment = kwargs.get("environment", "default_environment")
26
+ application_name = kwargs.get("application_name", "default")
27
+ environment = kwargs.get("environment", "default")
29
28
  tracer = kwargs.get("tracer")
30
29
  metrics = kwargs.get("metrics_dict")
31
30
  pricing_info = kwargs.get("pricing_info", {})
@@ -66,5 +65,4 @@ class TogetherInstrumentor(BaseInstrumentor):
66
65
  )
67
66
 
68
67
  def _uninstrument(self, **kwargs):
69
- # Proper uninstrumentation logic to revert patched methods
70
68
  pass