openlit 1.33.11__py3-none-any.whl → 1.33.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +73 -109
- openlit/instrumentation/ag2/__init__.py +14 -14
- openlit/instrumentation/ag2/ag2.py +11 -11
- openlit/instrumentation/ai21/__init__.py +18 -18
- openlit/instrumentation/ai21/ai21.py +13 -14
- openlit/instrumentation/ai21/async_ai21.py +13 -14
- openlit/instrumentation/ai21/utils.py +86 -84
- openlit/instrumentation/anthropic/__init__.py +16 -16
- openlit/instrumentation/anthropic/anthropic.py +60 -352
- openlit/instrumentation/anthropic/async_anthropic.py +61 -353
- openlit/instrumentation/anthropic/utils.py +251 -0
- openlit/instrumentation/ollama/utils.py +0 -1
- {openlit-1.33.11.dist-info → openlit-1.33.12.dist-info}/METADATA +1 -1
- {openlit-1.33.11.dist-info → openlit-1.33.12.dist-info}/RECORD +16 -15
- {openlit-1.33.11.dist-info → openlit-1.33.12.dist-info}/LICENSE +0 -0
- {openlit-1.33.11.dist-info → openlit-1.33.12.dist-info}/WHEEL +0 -0
@@ -7,7 +7,6 @@ import time
|
|
7
7
|
from opentelemetry.trace import SpanKind
|
8
8
|
from openlit.__helpers import (
|
9
9
|
handle_exception,
|
10
|
-
response_as_dict,
|
11
10
|
set_server_address_and_port,
|
12
11
|
)
|
13
12
|
from openlit.instrumentation.ai21.utils import (
|
@@ -47,9 +46,9 @@ def async_chat(version, environment, application_name,
|
|
47
46
|
self._span = span
|
48
47
|
self._span_name = span_name
|
49
48
|
# Placeholder for aggregating streaming response
|
50
|
-
self._llmresponse =
|
51
|
-
self._response_id =
|
52
|
-
self._finish_reason =
|
49
|
+
self._llmresponse = ''
|
50
|
+
self._response_id = ''
|
51
|
+
self._finish_reason = ''
|
53
52
|
self._input_tokens = 0
|
54
53
|
self._output_tokens = 0
|
55
54
|
self._choices = []
|
@@ -100,7 +99,7 @@ def async_chat(version, environment, application_name,
|
|
100
99
|
)
|
101
100
|
except Exception as e:
|
102
101
|
handle_exception(self._span, e)
|
103
|
-
logger.error(
|
102
|
+
logger.error('Error in trace creation: %s', e)
|
104
103
|
raise
|
105
104
|
|
106
105
|
async def wrapper(wrapped, instance, args, kwargs):
|
@@ -109,12 +108,12 @@ def async_chat(version, environment, application_name,
|
|
109
108
|
"""
|
110
109
|
|
111
110
|
# Check if streaming is enabled for the API call
|
112
|
-
streaming = kwargs.get(
|
111
|
+
streaming = kwargs.get('stream', False)
|
113
112
|
|
114
|
-
server_address, server_port = set_server_address_and_port(instance,
|
115
|
-
request_model = kwargs.get(
|
113
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.ai21.com', 443)
|
114
|
+
request_model = kwargs.get('model', 'jamba-1.5-mini')
|
116
115
|
|
117
|
-
span_name = f
|
116
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
118
117
|
|
119
118
|
# pylint: disable=no-else-return
|
120
119
|
if streaming:
|
@@ -129,7 +128,7 @@ def async_chat(version, environment, application_name,
|
|
129
128
|
start_time = time.time()
|
130
129
|
response = await wrapped(*args, **kwargs)
|
131
130
|
response = process_chat_response(
|
132
|
-
response=
|
131
|
+
response=response,
|
133
132
|
request_model=request_model,
|
134
133
|
pricing_info=pricing_info,
|
135
134
|
server_port=server_port,
|
@@ -161,16 +160,16 @@ def async_chat_rag(version, environment, application_name,
|
|
161
160
|
Wraps the GenAI function call.
|
162
161
|
"""
|
163
162
|
|
164
|
-
server_address, server_port = set_server_address_and_port(instance,
|
165
|
-
request_model = kwargs.get(
|
163
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.ai21.com', 443)
|
164
|
+
request_model = kwargs.get('model', 'jamba-1.5-mini')
|
166
165
|
|
167
|
-
span_name = f
|
166
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
168
167
|
|
169
168
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
170
169
|
start_time = time.time()
|
171
170
|
response = await wrapped(*args, **kwargs)
|
172
171
|
response = process_chat_rag_response(
|
173
|
-
response=
|
172
|
+
response=response,
|
174
173
|
request_model=request_model,
|
175
174
|
pricing_info=pricing_info,
|
176
175
|
server_port=server_port,
|
@@ -29,30 +29,30 @@ def setup_common_span_attributes(span, request_model, kwargs, tokens,
|
|
29
29
|
"""
|
30
30
|
|
31
31
|
# Base attributes from SDK and operation settings.
|
32
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
32
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
33
33
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
34
34
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
35
35
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
|
36
36
|
span.set_attribute(SemanticConvetion.SERVER_PORT, server_port)
|
37
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED, kwargs.get(
|
38
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get(
|
39
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get(
|
40
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get(
|
41
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get(
|
42
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, kwargs.get(
|
43
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, kwargs.get(
|
37
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED, kwargs.get('seed', ''))
|
38
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get('frequency_penalty', 0.0))
|
39
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get('max_tokens', -1))
|
40
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get('presence_penalty', 0.0))
|
41
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get('stop', []))
|
42
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, kwargs.get('temperature', 0.4))
|
43
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, kwargs.get('top_p', 1.0))
|
44
44
|
|
45
45
|
# Add token-related attributes if available.
|
46
|
-
if
|
47
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [tokens[
|
48
|
-
if
|
49
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, tokens[
|
50
|
-
if
|
51
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, tokens[
|
52
|
-
if
|
53
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, tokens[
|
54
|
-
if
|
55
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, tokens[
|
46
|
+
if 'finish_reason' in tokens:
|
47
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [tokens['finish_reason']])
|
48
|
+
if 'response_id' in tokens:
|
49
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, tokens['response_id'])
|
50
|
+
if 'input_tokens' in tokens:
|
51
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, tokens['input_tokens'])
|
52
|
+
if 'output_tokens' in tokens:
|
53
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, tokens['output_tokens'])
|
54
|
+
if 'total_tokens' in tokens:
|
55
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, tokens['total_tokens'])
|
56
56
|
|
57
57
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, request_model)
|
58
58
|
span.set_attribute(SemanticConvetion.SERVER_ADDRESS, server_address)
|
@@ -80,15 +80,15 @@ def record_common_metrics(metrics, application_name, environment, request_model,
|
|
80
80
|
server_port=server_port,
|
81
81
|
response_model=request_model,
|
82
82
|
)
|
83
|
-
metrics[
|
84
|
-
metrics[
|
83
|
+
metrics['genai_client_usage_tokens'].record(input_tokens + output_tokens, attributes)
|
84
|
+
metrics['genai_client_operation_duration'].record(end_time - start_time, attributes)
|
85
85
|
if include_tbt and tbt_value is not None:
|
86
|
-
metrics[
|
87
|
-
metrics[
|
88
|
-
metrics[
|
89
|
-
metrics[
|
90
|
-
metrics[
|
91
|
-
metrics[
|
86
|
+
metrics['genai_server_tbt'].record(tbt_value, attributes)
|
87
|
+
metrics['genai_server_ttft'].record(end_time - start_time, attributes)
|
88
|
+
metrics['genai_requests'].add(1, attributes)
|
89
|
+
metrics['genai_completion_tokens'].add(output_tokens, attributes)
|
90
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
91
|
+
metrics['genai_cost'].record(cost, attributes)
|
92
92
|
|
93
93
|
def emit_common_events(event_provider, choices, finish_reason, llmresponse, formatted_messages,
|
94
94
|
capture_message_content, n):
|
@@ -99,25 +99,25 @@ def emit_common_events(event_provider, choices, finish_reason, llmresponse, form
|
|
99
99
|
if n > 1:
|
100
100
|
for choice in choices:
|
101
101
|
choice_event_body = {
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
**({
|
106
|
-
|
102
|
+
'finish_reason': finish_reason,
|
103
|
+
'index': choice.get('index', 0),
|
104
|
+
'message': {
|
105
|
+
**({'content': choice.get('message', {}).get('content', '')} if capture_message_content else {}),
|
106
|
+
'role': choice.get('message', {}).get('role', 'assistant')
|
107
107
|
}
|
108
108
|
}
|
109
109
|
# If tool calls exist, emit an event for each tool call.
|
110
110
|
tool_calls = choice.get('message', {}).get('tool_calls')
|
111
111
|
if tool_calls:
|
112
112
|
for tool_call in tool_calls:
|
113
|
-
choice_event_body[
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
113
|
+
choice_event_body['message'].update({
|
114
|
+
'tool_calls': {
|
115
|
+
'function': {
|
116
|
+
'name': tool_call.get('function', {}).get('name', ''),
|
117
|
+
'arguments': tool_call.get('function', {}).get('arguments', '')
|
118
118
|
},
|
119
|
-
|
120
|
-
|
119
|
+
'id': tool_call.get('id', ''),
|
120
|
+
'type': tool_call.get('type', 'function')
|
121
121
|
}
|
122
122
|
})
|
123
123
|
event = otel_event(
|
@@ -136,11 +136,11 @@ def emit_common_events(event_provider, choices, finish_reason, llmresponse, form
|
|
136
136
|
else:
|
137
137
|
# Single choice case.
|
138
138
|
choice_event_body = {
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
**({
|
143
|
-
|
139
|
+
'finish_reason': finish_reason,
|
140
|
+
'index': 0,
|
141
|
+
'message': {
|
142
|
+
**({'content': llmresponse} if capture_message_content else {}),
|
143
|
+
'role': 'assistant'
|
144
144
|
}
|
145
145
|
}
|
146
146
|
event = otel_event(
|
@@ -155,25 +155,25 @@ def emit_common_events(event_provider, choices, finish_reason, llmresponse, form
|
|
155
155
|
msg = formatted_messages.get(role, {})
|
156
156
|
if msg.get('content', ''):
|
157
157
|
event_body = {
|
158
|
-
**({
|
159
|
-
|
158
|
+
**({'content': msg.get('content', '')} if capture_message_content else {}),
|
159
|
+
'role': msg.get('role', [])
|
160
160
|
}
|
161
161
|
# For assistant messages, attach tool call details if they exist.
|
162
162
|
if role == 'assistant' and choices:
|
163
163
|
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
164
164
|
if tool_calls:
|
165
|
-
event_body[
|
166
|
-
|
167
|
-
|
168
|
-
|
165
|
+
event_body['tool_calls'] = {
|
166
|
+
'function': {
|
167
|
+
'name': tool_calls[0].get('function', {}).get('name', ''),
|
168
|
+
'arguments': tool_calls[0].get('function', {}).get('arguments', '')
|
169
169
|
},
|
170
|
-
|
171
|
-
|
170
|
+
'id': tool_calls[0].get('id', ''),
|
171
|
+
'type': 'function'
|
172
172
|
}
|
173
173
|
if role == 'tool' and choices:
|
174
174
|
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
175
175
|
if tool_calls:
|
176
|
-
event_body[
|
176
|
+
event_body['id'] = tool_calls[0].get('id', '')
|
177
177
|
event = otel_event(
|
178
178
|
name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
|
179
179
|
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
@@ -201,8 +201,8 @@ def process_chunk(self, chunk):
|
|
201
201
|
if content:
|
202
202
|
self._llmresponse += content
|
203
203
|
if chunked.get('usage'):
|
204
|
-
self._input_tokens = chunked.get('usage').get(
|
205
|
-
self._output_tokens = chunked.get('usage').get(
|
204
|
+
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
205
|
+
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
206
206
|
self._response_id = chunked.get('id')
|
207
207
|
self._choices += chunked.get('choices')
|
208
208
|
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
@@ -218,19 +218,19 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
218
218
|
scope._tbt = calculate_tbt(scope._timestamps)
|
219
219
|
|
220
220
|
# Extract and format input messages.
|
221
|
-
formatted_messages = extract_and_format_input(scope._kwargs.get(
|
221
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
|
222
222
|
prompt = concatenate_all_contents(formatted_messages)
|
223
|
-
request_model = scope._kwargs.get(
|
223
|
+
request_model = scope._kwargs.get('model', 'jamba-1.5-mini')
|
224
224
|
|
225
225
|
# Calculate cost based on token usage.
|
226
226
|
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
227
227
|
# Prepare tokens dictionary.
|
228
228
|
tokens = {
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
229
|
+
'finish_reason': scope._finish_reason,
|
230
|
+
'response_id': scope._response_id,
|
231
|
+
'input_tokens': scope._input_tokens,
|
232
|
+
'output_tokens': scope._output_tokens,
|
233
|
+
'total_tokens': scope._input_tokens + scope._output_tokens,
|
234
234
|
}
|
235
235
|
extra_attrs = {
|
236
236
|
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: is_stream,
|
@@ -239,7 +239,7 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
239
239
|
SemanticConvetion.GEN_AI_SERVER_TBT: scope._tbt,
|
240
240
|
SemanticConvetion.GEN_AI_SERVER_TTFT: scope._ttft,
|
241
241
|
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
242
|
-
SemanticConvetion.GEN_AI_OUTPUT_TYPE:
|
242
|
+
SemanticConvetion.GEN_AI_OUTPUT_TYPE: 'text' if isinstance(scope._llmresponse, str) else 'json'
|
243
243
|
}
|
244
244
|
# Set span attributes.
|
245
245
|
setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
|
@@ -282,13 +282,14 @@ def process_streaming_chat_response(self, pricing_info, environment, application
|
|
282
282
|
|
283
283
|
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
284
284
|
environment, application_name, metrics, event_provider, start_time,
|
285
|
-
span, capture_message_content=False, disable_metrics=False, version=
|
285
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
286
286
|
"""
|
287
287
|
Process a synchronous chat response and generate Telemetry.
|
288
288
|
"""
|
289
289
|
|
290
290
|
# Create a generic scope object to hold telemetry data.
|
291
291
|
self = type('GenericScope', (), {})()
|
292
|
+
response_dict = response_as_dict(response)
|
292
293
|
|
293
294
|
# pylint: disable = no-member
|
294
295
|
self._start_time = start_time
|
@@ -298,19 +299,19 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
298
299
|
# Concatenate content from all choices.
|
299
300
|
self._llmresponse = ''.join(
|
300
301
|
(choice.get('message', {}).get('content') or '')
|
301
|
-
for choice in
|
302
|
+
for choice in response_dict.get('choices', [])
|
302
303
|
)
|
303
|
-
self._response_role =
|
304
|
-
self._input_tokens =
|
305
|
-
self._output_tokens =
|
306
|
-
self._response_id =
|
304
|
+
self._response_role = response_dict.get('message', {}).get('role', 'assistant')
|
305
|
+
self._input_tokens = response_dict.get('usage', {}).get('prompt_tokens', 0)
|
306
|
+
self._output_tokens = response_dict.get('usage', {}).get('completion_tokens', 0)
|
307
|
+
self._response_id = response_dict.get('id', '')
|
307
308
|
self._response_model = request_model
|
308
|
-
self._finish_reason =
|
309
|
+
self._finish_reason = response_dict.get('choices', [{}])[0].get('finish_reason')
|
309
310
|
self._timestamps = []
|
310
311
|
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
311
312
|
self._server_address, self._server_port = server_address, server_port
|
312
313
|
self._kwargs = kwargs
|
313
|
-
self._choices =
|
314
|
+
self._choices = response_dict.get('choices')
|
314
315
|
|
315
316
|
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
316
317
|
event_provider, capture_message_content, disable_metrics, version, is_stream=False)
|
@@ -319,30 +320,31 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
319
320
|
|
320
321
|
def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
|
321
322
|
environment, application_name, metrics, event_provider, start_time,
|
322
|
-
span, capture_message_content=False, disable_metrics=False, version=
|
323
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
323
324
|
"""
|
324
325
|
Process a chat response and generate Telemetry.
|
325
326
|
"""
|
326
327
|
end_time = time.time()
|
328
|
+
response_dict = response_as_dict(response)
|
327
329
|
try:
|
328
330
|
# Format input messages into a single prompt string.
|
329
|
-
messages_input = kwargs.get(
|
331
|
+
messages_input = kwargs.get('messages', '')
|
330
332
|
formatted_messages = extract_and_format_input(messages_input)
|
331
333
|
prompt = concatenate_all_contents(formatted_messages)
|
332
334
|
input_tokens = general_tokens(prompt)
|
333
335
|
|
334
336
|
# Create tokens dict and RAG-specific extra attributes.
|
335
|
-
tokens = {
|
337
|
+
tokens = {'response_id': response_dict.get('id'), 'input_tokens': input_tokens}
|
336
338
|
extra_attrs = {
|
337
339
|
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: False,
|
338
340
|
SemanticConvetion.GEN_AI_SERVER_TTFT: end_time - start_time,
|
339
341
|
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
340
|
-
SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get(
|
341
|
-
SemanticConvetion.GEN_AI_RAG_STRATEGY: kwargs.get(
|
342
|
-
SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get(
|
343
|
-
SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get(
|
344
|
-
SemanticConvetion.GEN_AI_RAG_FILE_IDS: str(kwargs.get(
|
345
|
-
SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get(
|
342
|
+
SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get('max_segments', -1),
|
343
|
+
SemanticConvetion.GEN_AI_RAG_STRATEGY: kwargs.get('retrieval_strategy', 'segments'),
|
344
|
+
SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get('retrieval_similarity_threshold', -1),
|
345
|
+
SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get('max_neighbors', -1),
|
346
|
+
SemanticConvetion.GEN_AI_RAG_FILE_IDS: str(kwargs.get('file_ids', '')),
|
347
|
+
SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get('path', '')
|
346
348
|
}
|
347
349
|
# Set common span attributes.
|
348
350
|
setup_common_span_attributes(span, request_model, kwargs, tokens,
|
@@ -357,7 +359,7 @@ def process_chat_rag_response(response, request_model, pricing_info, server_port
|
|
357
359
|
)
|
358
360
|
|
359
361
|
output_tokens = 0
|
360
|
-
choices =
|
362
|
+
choices = response_dict.get('choices', [])
|
361
363
|
# Instead of adding a separate event per choice, we aggregate all completion content.
|
362
364
|
aggregated_completion = []
|
363
365
|
for i in range(kwargs.get('n', 1)):
|
@@ -370,14 +372,14 @@ def process_chat_rag_response(response, request_model, pricing_info, server_port
|
|
370
372
|
str(choices[i].get('message', {}).get('tool_calls')))
|
371
373
|
# Set output type based on actual content type.
|
372
374
|
if isinstance(content, str):
|
373
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
375
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, 'text')
|
374
376
|
elif content is not None:
|
375
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, 'json')
|
376
378
|
|
377
379
|
# Concatenate completion responses.
|
378
380
|
llmresponse = ''.join(aggregated_completion)
|
379
|
-
tokens[
|
380
|
-
tokens[
|
381
|
+
tokens['output_tokens'] = output_tokens
|
382
|
+
tokens['total_tokens'] = input_tokens + output_tokens
|
381
383
|
|
382
384
|
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
383
385
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of Anthropic Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -9,7 +8,7 @@ from wrapt import wrap_function_wrapper
|
|
9
8
|
from openlit.instrumentation.anthropic.anthropic import messages
|
10
9
|
from openlit.instrumentation.anthropic.async_anthropic import async_messages
|
11
10
|
|
12
|
-
_instruments = (
|
11
|
+
_instruments = ('anthropic >= 0.21.0',)
|
13
12
|
|
14
13
|
class AnthropicInstrumentor(BaseInstrumentor):
|
15
14
|
"""
|
@@ -20,29 +19,30 @@ class AnthropicInstrumentor(BaseInstrumentor):
|
|
20
19
|
return _instruments
|
21
20
|
|
22
21
|
def _instrument(self, **kwargs):
|
23
|
-
application_name = kwargs.get(
|
24
|
-
environment = kwargs.get(
|
25
|
-
tracer = kwargs.get(
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
22
|
+
application_name = kwargs.get('application_name', 'default')
|
23
|
+
environment = kwargs.get('environment', 'default')
|
24
|
+
tracer = kwargs.get('tracer')
|
25
|
+
event_provider = kwargs.get('event_provider')
|
26
|
+
metrics = kwargs.get('metrics_dict')
|
27
|
+
pricing_info = kwargs.get('pricing_info', {})
|
28
|
+
capture_message_content = kwargs.get('capture_message_content', False)
|
29
|
+
disable_metrics = kwargs.get('disable_metrics')
|
30
|
+
version = importlib.metadata.version('anthropic')
|
31
31
|
|
32
32
|
#sync
|
33
33
|
wrap_function_wrapper(
|
34
|
-
|
35
|
-
|
34
|
+
'anthropic.resources.messages',
|
35
|
+
'Messages.create',
|
36
36
|
messages(version, environment, application_name,
|
37
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
37
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
38
38
|
)
|
39
39
|
|
40
40
|
#async
|
41
41
|
wrap_function_wrapper(
|
42
|
-
|
43
|
-
|
42
|
+
'anthropic.resources.messages',
|
43
|
+
'AsyncMessages.create',
|
44
44
|
async_messages(version, environment, application_name,
|
45
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
45
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
46
46
|
)
|
47
47
|
|
48
48
|
def _uninstrument(self, **kwargs):
|