openlit 1.33.20__py3-none-any.whl → 1.33.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +57 -0
- openlit/instrumentation/ollama/__init__.py +47 -34
- openlit/instrumentation/ollama/async_ollama.py +4 -2
- openlit/instrumentation/ollama/ollama.py +4 -2
- openlit/instrumentation/ollama/utils.py +8 -4
- openlit/instrumentation/transformers/__init__.py +11 -7
- openlit/instrumentation/transformers/transformers.py +30 -166
- openlit/instrumentation/transformers/utils.py +183 -0
- {openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/METADATA +1 -1
- {openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/RECORD +12 -11
- {openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
- {openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0
openlit/__helpers.py
CHANGED
@@ -240,6 +240,11 @@ def extract_and_format_input(messages):
|
|
240
240
|
fixed_roles = ['user', 'assistant', 'system', 'tool', 'developer']
|
241
241
|
formatted_messages = {role_key: {'role': '', 'content': ''} for role_key in fixed_roles}
|
242
242
|
|
243
|
+
# Check if input is a simple string
|
244
|
+
if isinstance(messages, str):
|
245
|
+
formatted_messages['user'] = {'role': 'user', 'content': messages}
|
246
|
+
return formatted_messages
|
247
|
+
|
243
248
|
for message in messages:
|
244
249
|
message = response_as_dict(message)
|
245
250
|
|
@@ -276,3 +281,55 @@ def concatenate_all_contents(formatted_messages):
|
|
276
281
|
for message_data in formatted_messages.values()
|
277
282
|
if message_data['content']
|
278
283
|
)
|
284
|
+
|
285
|
+
def format_and_concatenate(messages):
|
286
|
+
"""
|
287
|
+
Process a list of messages to extract content, categorize them by role,
|
288
|
+
and concatenate all 'content' fields into a single string with role: content format.
|
289
|
+
"""
|
290
|
+
|
291
|
+
formatted_messages = {}
|
292
|
+
|
293
|
+
# Check if input is a simple string
|
294
|
+
if isinstance(messages, str):
|
295
|
+
formatted_messages['user'] = {'role': 'user', 'content': messages}
|
296
|
+
elif isinstance(messages, list) and all(isinstance(m, str) for m in messages):
|
297
|
+
# If it's a list of strings, each string is 'user' input
|
298
|
+
user_content = ' '.join(messages)
|
299
|
+
formatted_messages['user'] = {'role': 'user', 'content': user_content}
|
300
|
+
else:
|
301
|
+
for message in messages:
|
302
|
+
message = response_as_dict(message)
|
303
|
+
role = message.get('role', 'unknown') # Default to 'unknown' if no role is specified
|
304
|
+
content = message.get('content', '')
|
305
|
+
|
306
|
+
# Initialize role in formatted messages if not present
|
307
|
+
if role not in formatted_messages:
|
308
|
+
formatted_messages[role] = {'role': role, 'content': ''}
|
309
|
+
|
310
|
+
# Handle list of dictionaries in content
|
311
|
+
if isinstance(content, list):
|
312
|
+
content_str = []
|
313
|
+
for item in content:
|
314
|
+
if isinstance(item, dict):
|
315
|
+
# Collect text or other attributes as needed
|
316
|
+
text = item.get('text', '')
|
317
|
+
image_url = item.get('image_url', '')
|
318
|
+
content_str.append(text)
|
319
|
+
content_str.append(image_url)
|
320
|
+
content_str = ", ".join(filter(None, content_str))
|
321
|
+
else:
|
322
|
+
content_str = content
|
323
|
+
|
324
|
+
# Concatenate content
|
325
|
+
if formatted_messages[role]['content']:
|
326
|
+
formatted_messages[role]['content'] += ' ' + content_str
|
327
|
+
else:
|
328
|
+
formatted_messages[role]['content'] = content_str
|
329
|
+
|
330
|
+
# Concatenate role and content for all messages
|
331
|
+
return ' '.join(
|
332
|
+
f"{message_data['role']}: {message_data['content']}"
|
333
|
+
for message_data in formatted_messages.values()
|
334
|
+
if message_data['content']
|
335
|
+
)
|
@@ -16,6 +16,29 @@ from openlit.instrumentation.ollama.async_ollama import (
|
|
16
16
|
|
17
17
|
_instruments = ("ollama >= 0.2.0",)
|
18
18
|
|
19
|
+
# Dispatch wrapper to route instrumentation to chat or embeddings based on path
|
20
|
+
def _dispatch(sync_chat_wrap, sync_emb_wrap):
|
21
|
+
def wrapper(wrapped, instance, args, kwargs):
|
22
|
+
if len(args) > 2 and isinstance(args[2], str):
|
23
|
+
op = args[2].rstrip("/").split("/")[-1]
|
24
|
+
if op == "chat":
|
25
|
+
return sync_chat_wrap(wrapped, instance, args, kwargs)
|
26
|
+
if op == "embeddings":
|
27
|
+
return sync_emb_wrap(wrapped, instance, args, kwargs)
|
28
|
+
return wrapped(*args, **kwargs)
|
29
|
+
return wrapper
|
30
|
+
|
31
|
+
def _dispatch_async(async_chat_wrap, async_emb_wrap):
|
32
|
+
async def wrapper(wrapped, instance, args, kwargs):
|
33
|
+
if len(args) > 2 and isinstance(args[2], str):
|
34
|
+
op = args[2].rstrip("/").split("/")[-1]
|
35
|
+
if op == "chat":
|
36
|
+
return await async_chat_wrap(wrapped, instance, args, kwargs)
|
37
|
+
if op == "embeddings":
|
38
|
+
return await async_emb_wrap(wrapped, instance, args, kwargs)
|
39
|
+
return await wrapped(*args, **kwargs)
|
40
|
+
return wrapper
|
41
|
+
|
19
42
|
class OllamaInstrumentor(BaseInstrumentor):
|
20
43
|
"""
|
21
44
|
An instrumentor for Ollama's client library.
|
@@ -35,48 +58,38 @@ class OllamaInstrumentor(BaseInstrumentor):
|
|
35
58
|
disable_metrics = kwargs.get("disable_metrics")
|
36
59
|
version = importlib.metadata.version("ollama")
|
37
60
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
61
|
+
# Build wrapper factories for chat and embeddings
|
62
|
+
sync_chat_wrap = chat(
|
63
|
+
version, environment, application_name,
|
64
|
+
tracer, event_provider, pricing_info,
|
65
|
+
capture_message_content, metrics, disable_metrics
|
44
66
|
)
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
67
|
+
sync_emb_wrap = embeddings(
|
68
|
+
version, environment, application_name,
|
69
|
+
tracer, event_provider, pricing_info,
|
70
|
+
capture_message_content, metrics, disable_metrics
|
50
71
|
)
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
"embeddings",
|
56
|
-
embeddings(version, environment, application_name,
|
57
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
72
|
+
async_chat_wrap = async_chat(
|
73
|
+
version, environment, application_name,
|
74
|
+
tracer, event_provider, pricing_info,
|
75
|
+
capture_message_content, metrics, disable_metrics
|
58
76
|
)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
77
|
+
async_emb_wrap = async_embeddings(
|
78
|
+
version, environment, application_name,
|
79
|
+
tracer, event_provider, pricing_info,
|
80
|
+
capture_message_content, metrics, disable_metrics
|
64
81
|
)
|
65
82
|
|
66
|
-
#
|
83
|
+
# Patch underlying request methods to ensure instrumentation regardless of import order
|
67
84
|
wrap_function_wrapper(
|
68
|
-
"ollama",
|
69
|
-
"
|
70
|
-
|
71
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
85
|
+
"ollama._client",
|
86
|
+
"Client._request",
|
87
|
+
_dispatch(sync_chat_wrap, sync_emb_wrap),
|
72
88
|
)
|
73
|
-
|
74
|
-
# async embeddings
|
75
89
|
wrap_function_wrapper(
|
76
|
-
"ollama",
|
77
|
-
"AsyncClient.
|
78
|
-
|
79
|
-
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
90
|
+
"ollama._client",
|
91
|
+
"AsyncClient._request",
|
92
|
+
_dispatch_async(async_chat_wrap, async_emb_wrap),
|
80
93
|
)
|
81
94
|
|
82
95
|
def _uninstrument(self, **kwargs):
|
@@ -106,7 +106,8 @@ def async_chat(version, environment, application_name,
|
|
106
106
|
streaming = kwargs.get("stream", False)
|
107
107
|
|
108
108
|
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
|
109
|
-
|
109
|
+
json_body = kwargs.get("json", {}) or {}
|
110
|
+
request_model = json_body.get("model") or kwargs.get("model")
|
110
111
|
|
111
112
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
112
113
|
|
@@ -154,7 +155,8 @@ def async_embeddings(version, environment, application_name,
|
|
154
155
|
"""
|
155
156
|
|
156
157
|
server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
|
157
|
-
|
158
|
+
json_body = kwargs.get('json', {}) or {}
|
159
|
+
request_model = json_body.get('model') or kwargs.get('model')
|
158
160
|
|
159
161
|
span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
160
162
|
|
@@ -106,7 +106,8 @@ def chat(version, environment, application_name,
|
|
106
106
|
streaming = kwargs.get("stream", False)
|
107
107
|
|
108
108
|
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
|
109
|
-
|
109
|
+
json_body = kwargs.get("json", {}) or {}
|
110
|
+
request_model = json_body.get("model") or kwargs.get("model")
|
110
111
|
|
111
112
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
112
113
|
|
@@ -154,7 +155,8 @@ def embeddings(version, environment, application_name,
|
|
154
155
|
"""
|
155
156
|
|
156
157
|
server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
|
157
|
-
|
158
|
+
json_body = kwargs.get('json', {}) or {}
|
159
|
+
request_model = json_body.get('model') or kwargs.get('model')
|
158
160
|
|
159
161
|
span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
160
162
|
|
@@ -57,8 +57,10 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
57
57
|
if len(scope._timestamps) > 1:
|
58
58
|
scope._tbt = calculate_tbt(scope._timestamps)
|
59
59
|
|
60
|
-
|
61
|
-
request_model = scope._kwargs.get("model"
|
60
|
+
json_body = scope._kwargs.get("json", {}) or {}
|
61
|
+
request_model = json_body.get("model") or scope._kwargs.get("model")
|
62
|
+
messages = json_body.get("messages", scope._kwargs.get("messages", ""))
|
63
|
+
formatted_messages = extract_and_format_input(messages)
|
62
64
|
|
63
65
|
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
64
66
|
|
@@ -252,7 +254,9 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
252
254
|
end_time = time.time()
|
253
255
|
|
254
256
|
try:
|
255
|
-
|
257
|
+
json_body = kwargs.get("json", {}) or {}
|
258
|
+
prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
|
259
|
+
input_tokens = general_tokens(str(prompt_val))
|
256
260
|
|
257
261
|
# Calculate cost of the operation
|
258
262
|
cost = get_embed_model_cost(request_model,
|
@@ -293,7 +297,7 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
293
297
|
SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
|
294
298
|
},
|
295
299
|
body={
|
296
|
-
**({"content":
|
300
|
+
**({"content": prompt_val} if capture_message_content else {}),
|
297
301
|
"role": 'user'
|
298
302
|
}
|
299
303
|
)
|
@@ -1,16 +1,20 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Initializer of Auto Instrumentation of HuggingFace Transformer Functions
|
3
|
+
"""
|
4
|
+
|
3
5
|
from typing import Collection
|
4
6
|
import importlib.metadata
|
5
7
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
8
|
from wrapt import wrap_function_wrapper
|
7
9
|
|
8
|
-
from openlit.instrumentation.transformers.transformers import
|
10
|
+
from openlit.instrumentation.transformers.transformers import pipeline_wrapper
|
9
11
|
|
10
|
-
_instruments = ("transformers >= 4.
|
12
|
+
_instruments = ("transformers >= 4.48.0",)
|
11
13
|
|
12
14
|
class TransformersInstrumentor(BaseInstrumentor):
|
13
|
-
"""
|
15
|
+
"""
|
16
|
+
An instrumentor for HuggingFace Transformer library.
|
17
|
+
"""
|
14
18
|
|
15
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
16
20
|
return _instruments
|
@@ -28,10 +32,10 @@ class TransformersInstrumentor(BaseInstrumentor):
|
|
28
32
|
wrap_function_wrapper(
|
29
33
|
"transformers",
|
30
34
|
"TextGenerationPipeline.__call__",
|
31
|
-
|
35
|
+
pipeline_wrapper(version, environment, application_name,
|
32
36
|
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
33
37
|
)
|
34
38
|
|
35
|
-
@staticmethod
|
36
39
|
def _uninstrument(self, **kwargs):
|
40
|
+
# Proper uninstrumentation logic to revert patched methods
|
37
41
|
pass
|
@@ -1,63 +1,31 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring
|
2
|
+
Module for monitoring HF Transformers API calls.
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
6
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import SpanKind
|
9
8
|
from openlit.__helpers import (
|
10
|
-
get_chat_model_cost,
|
11
|
-
handle_exception,
|
12
|
-
general_tokens,
|
13
|
-
create_metrics_attributes,
|
14
9
|
set_server_address_and_port
|
15
10
|
)
|
11
|
+
|
12
|
+
from openlit.instrumentation.transformers.utils import (
|
13
|
+
process_chat_response,
|
14
|
+
)
|
16
15
|
from openlit.semcov import SemanticConvention
|
17
16
|
|
18
17
|
# Initialize logger for logging potential issues and operations
|
19
18
|
logger = logging.getLogger(__name__)
|
20
19
|
|
21
|
-
def
|
22
|
-
|
20
|
+
def pipeline_wrapper(version, environment, application_name,
|
21
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
23
22
|
"""
|
24
|
-
|
25
|
-
|
26
|
-
This function wraps any given function to measure its execution time,
|
27
|
-
log its operation, and trace its execution using OpenTelemetry.
|
28
|
-
|
29
|
-
Parameters:
|
30
|
-
- version (str): The version of the Langchain application.
|
31
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
32
|
-
- application_name (str): Name of the Langchain application.
|
33
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
34
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
35
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
- function: A higher-order function that takes a function 'wrapped' and returns
|
39
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
23
|
+
Generates a telemetry wrapper for GenAI function call
|
40
24
|
"""
|
41
25
|
|
42
26
|
def wrapper(wrapped, instance, args, kwargs):
|
43
27
|
"""
|
44
|
-
|
45
|
-
time, and records trace data using OpenTelemetry.
|
46
|
-
|
47
|
-
Parameters:
|
48
|
-
- wrapped (Callable): The original function that this wrapper will execute.
|
49
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
50
|
-
is used for instance methods. For static and classmethods,
|
51
|
-
this may be None.
|
52
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
53
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
54
|
-
|
55
|
-
Returns:
|
56
|
-
- The result of the wrapped function call.
|
57
|
-
|
58
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
59
|
-
on the span based on the function's execution and response, and ensures
|
60
|
-
errors are handled and logged appropriately.
|
28
|
+
Wraps the GenAI function call.
|
61
29
|
"""
|
62
30
|
|
63
31
|
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
@@ -68,130 +36,26 @@ def text_wrap(version, environment, application_name,
|
|
68
36
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
69
37
|
start_time = time.time()
|
70
38
|
response = wrapped(*args, **kwargs)
|
71
|
-
end_time = time.time()
|
72
|
-
|
73
|
-
# pylint: disable=protected-access
|
74
|
-
forward_params = instance._forward_params
|
75
|
-
|
76
|
-
try:
|
77
|
-
if args and len(args) > 0:
|
78
|
-
prompt = args[0]
|
79
|
-
else:
|
80
|
-
prompt = kwargs.get("args", "")
|
81
|
-
|
82
|
-
input_tokens = general_tokens(prompt[0])
|
83
|
-
|
84
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
85
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
86
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
87
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
88
|
-
SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
|
89
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
90
|
-
request_model)
|
91
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
92
|
-
forward_params.get("temperature", "null"))
|
93
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
94
|
-
forward_params.get("top_p", "null"))
|
95
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
96
|
-
forward_params.get("max_length", -1))
|
97
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
98
|
-
input_tokens)
|
99
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
100
|
-
server_address)
|
101
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
102
|
-
server_port)
|
103
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
104
|
-
request_model)
|
105
|
-
|
106
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
107
|
-
environment)
|
108
|
-
span.set_attribute(SERVICE_NAME,
|
109
|
-
application_name)
|
110
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
111
|
-
False)
|
112
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
113
|
-
end_time - start_time)
|
114
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
115
|
-
version)
|
116
|
-
if capture_message_content:
|
117
|
-
span.add_event(
|
118
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
119
|
-
attributes={
|
120
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
121
|
-
},
|
122
|
-
)
|
123
|
-
|
124
|
-
i = 0
|
125
|
-
output_tokens = 0
|
126
|
-
for completion in response:
|
127
|
-
if len(response) > 1:
|
128
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
129
|
-
else:
|
130
|
-
attribute_name = SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT
|
131
|
-
if capture_message_content:
|
132
|
-
# pylint: disable=bare-except
|
133
|
-
try:
|
134
|
-
llm_response = completion.get('generated_text', '')
|
135
|
-
except:
|
136
|
-
llm_response = completion[i].get('generated_text', '')
|
137
|
-
|
138
|
-
span.add_event(
|
139
|
-
name=attribute_name,
|
140
|
-
attributes={
|
141
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
|
142
|
-
},
|
143
|
-
)
|
144
|
-
output_tokens += general_tokens(llm_response)
|
145
|
-
|
146
|
-
i=i+1
|
147
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
148
|
-
output_tokens)
|
149
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
150
|
-
input_tokens + output_tokens)
|
151
|
-
|
152
|
-
# Calculate cost of the operation
|
153
|
-
cost = get_chat_model_cost(request_model,
|
154
|
-
pricing_info, input_tokens,
|
155
|
-
output_tokens)
|
156
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
157
|
-
cost)
|
158
|
-
|
159
|
-
span.set_status(Status(StatusCode.OK))
|
160
|
-
|
161
|
-
if disable_metrics is False:
|
162
|
-
attributes = create_metrics_attributes(
|
163
|
-
service_name=application_name,
|
164
|
-
deployment_environment=environment,
|
165
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
-
system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
167
|
-
request_model=request_model,
|
168
|
-
server_address=server_address,
|
169
|
-
server_port=server_port,
|
170
|
-
response_model=request_model,
|
171
|
-
)
|
172
|
-
|
173
|
-
metrics["genai_client_usage_tokens"].record(
|
174
|
-
input_tokens + output_tokens, attributes
|
175
|
-
)
|
176
|
-
metrics["genai_client_operation_duration"].record(
|
177
|
-
end_time - start_time, attributes
|
178
|
-
)
|
179
|
-
metrics["genai_server_ttft"].record(
|
180
|
-
end_time - start_time, attributes
|
181
|
-
)
|
182
|
-
metrics["genai_requests"].add(1, attributes)
|
183
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
185
|
-
metrics["genai_cost"].record(cost, attributes)
|
186
|
-
|
187
|
-
# Return original response
|
188
|
-
return response
|
189
|
-
|
190
|
-
except Exception as e:
|
191
|
-
handle_exception(span, e)
|
192
|
-
logger.error("Error in trace creation: %s", e)
|
193
39
|
|
194
|
-
|
195
|
-
|
40
|
+
response = process_chat_response(
|
41
|
+
instance = instance,
|
42
|
+
response=response,
|
43
|
+
request_model=request_model,
|
44
|
+
pricing_info=pricing_info,
|
45
|
+
server_port=server_port,
|
46
|
+
server_address=server_address,
|
47
|
+
environment=environment,
|
48
|
+
application_name=application_name,
|
49
|
+
metrics=metrics,
|
50
|
+
start_time=start_time,
|
51
|
+
span=span,
|
52
|
+
args=args,
|
53
|
+
kwargs=kwargs,
|
54
|
+
capture_message_content=capture_message_content,
|
55
|
+
disable_metrics=disable_metrics,
|
56
|
+
version=version,
|
57
|
+
)
|
58
|
+
|
59
|
+
return response
|
196
60
|
|
197
61
|
return wrapper
|
@@ -0,0 +1,183 @@
|
|
1
|
+
"""
|
2
|
+
HF Transformers OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
|
9
|
+
from openlit.__helpers import (
|
10
|
+
response_as_dict,
|
11
|
+
calculate_tbt,
|
12
|
+
general_tokens,
|
13
|
+
get_chat_model_cost,
|
14
|
+
create_metrics_attributes,
|
15
|
+
format_and_concatenate
|
16
|
+
)
|
17
|
+
from openlit.semcov import SemanticConvention
|
18
|
+
|
19
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
20
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream):
|
21
|
+
|
22
|
+
"""
|
23
|
+
Process chat request and generate Telemetry
|
24
|
+
"""
|
25
|
+
|
26
|
+
scope._end_time = time.time()
|
27
|
+
if len(scope._timestamps) > 1:
|
28
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
29
|
+
|
30
|
+
forward_params = scope._instance._forward_params
|
31
|
+
request_model = scope._instance.model.config.name_or_path
|
32
|
+
|
33
|
+
input_tokens = general_tokens(scope._prompt)
|
34
|
+
output_tokens = general_tokens(scope._llmresponse)
|
35
|
+
|
36
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
37
|
+
|
38
|
+
# Set Span attributes (OTel Semconv)
|
39
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
40
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
41
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
|
42
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
43
|
+
scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
|
44
|
+
|
45
|
+
# List of attributes and their config keys
|
46
|
+
attributes = [
|
47
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
|
48
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
|
49
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
|
50
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_length"),
|
51
|
+
]
|
52
|
+
|
53
|
+
# Set each attribute if the corresponding value exists and is not None
|
54
|
+
for attribute, key in attributes:
|
55
|
+
value = forward_params.get(key)
|
56
|
+
if value is not None:
|
57
|
+
scope._span.set_attribute(attribute, value)
|
58
|
+
|
59
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
|
60
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
61
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
62
|
+
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
63
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
64
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
65
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
66
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
67
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
68
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
69
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
70
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
71
|
+
|
72
|
+
# To be removed one the change to span_attributes (from span events) is complete
|
73
|
+
if capture_message_content:
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse,)
|
76
|
+
|
77
|
+
scope._span.add_event(
|
78
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
79
|
+
attributes={
|
80
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: scope._prompt,
|
81
|
+
},
|
82
|
+
)
|
83
|
+
scope._span.add_event(
|
84
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
85
|
+
attributes={
|
86
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
87
|
+
},
|
88
|
+
)
|
89
|
+
|
90
|
+
scope._span.set_status(Status(StatusCode.OK))
|
91
|
+
|
92
|
+
if not disable_metrics:
|
93
|
+
metrics_attributes = create_metrics_attributes(
|
94
|
+
service_name=application_name,
|
95
|
+
deployment_environment=environment,
|
96
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
97
|
+
system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
98
|
+
request_model=request_model,
|
99
|
+
server_address=scope._server_address,
|
100
|
+
server_port=scope._server_port,
|
101
|
+
response_model=request_model,
|
102
|
+
)
|
103
|
+
|
104
|
+
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
|
105
|
+
metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
|
106
|
+
metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
|
107
|
+
metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
|
108
|
+
metrics["genai_requests"].add(1, metrics_attributes)
|
109
|
+
metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
|
110
|
+
metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
|
111
|
+
metrics["genai_cost"].record(cost, metrics_attributes)
|
112
|
+
|
113
|
+
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
114
|
+
environment, application_name, metrics, start_time,
|
115
|
+
span, args, kwargs, capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
116
|
+
"""
|
117
|
+
Process chat request and generate Telemetry
|
118
|
+
"""
|
119
|
+
|
120
|
+
self = type("GenericScope", (), {})()
|
121
|
+
response_dict = response_as_dict(response)
|
122
|
+
|
123
|
+
# pylint: disable = no-member
|
124
|
+
self._instance = instance
|
125
|
+
self._start_time = start_time
|
126
|
+
self._end_time = time.time()
|
127
|
+
self._span = span
|
128
|
+
self._timestamps = []
|
129
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
130
|
+
self._server_address, self._server_port = server_address, server_port
|
131
|
+
self._kwargs = kwargs
|
132
|
+
self._args = args
|
133
|
+
|
134
|
+
if self._args and len(self._args) > 0:
|
135
|
+
self._prompt = args[0]
|
136
|
+
else:
|
137
|
+
self._prompt = (
|
138
|
+
kwargs.get("text_inputs") or
|
139
|
+
(kwargs.get("image") and kwargs.get("question") and
|
140
|
+
("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
|
141
|
+
kwargs.get("fallback") or
|
142
|
+
""
|
143
|
+
)
|
144
|
+
self._prompt = format_and_concatenate(self._prompt)
|
145
|
+
|
146
|
+
self._llmresponse = []
|
147
|
+
if self._kwargs.get("task", "text-generation") == "text-generation":
|
148
|
+
first_entry = response_dict[0]
|
149
|
+
|
150
|
+
if isinstance(first_entry, dict) and isinstance(first_entry.get("generated_text"), list):
|
151
|
+
last_element = first_entry.get("generated_text")[-1]
|
152
|
+
self._llmresponse = last_element.get("content", last_element)
|
153
|
+
else:
|
154
|
+
def extract_text(entry):
|
155
|
+
if isinstance(entry, dict):
|
156
|
+
return entry.get("generated_text")
|
157
|
+
if isinstance(entry, list):
|
158
|
+
return " ".join(
|
159
|
+
extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
|
160
|
+
)
|
161
|
+
return ""
|
162
|
+
|
163
|
+
# Process and collect all generated texts
|
164
|
+
self._llmresponse = [
|
165
|
+
extract_text(entry) for entry in response_dict
|
166
|
+
]
|
167
|
+
|
168
|
+
# Join all non-empty responses into a single string
|
169
|
+
self._llmresponse = " ".join(filter(None, self._llmresponse))
|
170
|
+
|
171
|
+
elif self._kwargs.get("task", "text-generation") == "automatic-speech-recognition":
|
172
|
+
self._llmresponse = response_dict.get("text", "")
|
173
|
+
|
174
|
+
elif self._kwargs.get("task", "text-generation") == "image-classification":
|
175
|
+
self._llmresponse = str(response_dict[0])
|
176
|
+
|
177
|
+
elif self._kwargs.get("task", "text-generation") == "visual-question-answering":
|
178
|
+
self._llmresponse = str(response_dict[0]).get("answer")
|
179
|
+
|
180
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
181
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
|
182
|
+
|
183
|
+
return response
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.33.
|
3
|
+
Version: 1.33.21
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -1,4 +1,4 @@
|
|
1
|
-
openlit/__helpers.py,sha256=
|
1
|
+
openlit/__helpers.py,sha256=sg0EGJGC_OlZePR84cLK77l_lZRBPJwdjWjq_RuaYS0,11444
|
2
2
|
openlit/__init__.py,sha256=1OzJQmiZrTlT3Aze_l8GOf1GXH7dAHztJn0Uzd1LAPc,23924
|
3
3
|
openlit/evals/__init__.py,sha256=nJe99nuLo1b5rf7pt9U9BCdSDedzbVi2Fj96cgl7msM,380
|
4
4
|
openlit/evals/all.py,sha256=oWrue3PotE-rB5WePG3MRYSA-ro6WivkclSHjYlAqGs,7154
|
@@ -90,10 +90,10 @@ openlit/instrumentation/mistral/mistral.py,sha256=_2qM8v4RCL-S0Mm1vbW77m5vUm8aPD
|
|
90
90
|
openlit/instrumentation/multion/__init__.py,sha256=Wr3lcDyG_YbOLkCUzBFhraAedF6E113tce8eSWlcz10,3149
|
91
91
|
openlit/instrumentation/multion/async_multion.py,sha256=XutZnayCJOZ_NA9bvE1NUoej41KOGR7FRn2tpoGKMEU,6092
|
92
92
|
openlit/instrumentation/multion/multion.py,sha256=-WqRAcu5qiEMY9XDmlJTQHuQiWfdwms9JDn127QCNb8,6074
|
93
|
-
openlit/instrumentation/ollama/__init__.py,sha256=
|
94
|
-
openlit/instrumentation/ollama/async_ollama.py,sha256=
|
95
|
-
openlit/instrumentation/ollama/ollama.py,sha256=
|
96
|
-
openlit/instrumentation/ollama/utils.py,sha256=
|
93
|
+
openlit/instrumentation/ollama/__init__.py,sha256=v7VhVxHw_c6QtMznxe6a7z6QrYHZsH_NSXfiXao83Ns,3707
|
94
|
+
openlit/instrumentation/ollama/async_ollama.py,sha256=zJPDr2ROh1nvFGoxgdTbe04Zr1KhmgJUYFPeuRLQGLk,6667
|
95
|
+
openlit/instrumentation/ollama/ollama.py,sha256=MNUltiP9XVT4azmO_-E2vjhFaoHQyJ0Z6c-HnB0_jCE,6563
|
96
|
+
openlit/instrumentation/ollama/utils.py,sha256=41uvYaYkGwWfRyHYqhOOwrFy6cMzBlG1urJYUat9Q24,14819
|
97
97
|
openlit/instrumentation/openai/__init__.py,sha256=FiL4OHDhs957spa3k9sNC_VLt0-txtwbnujQwnevQ5I,5564
|
98
98
|
openlit/instrumentation/openai/async_openai.py,sha256=CiyBpn8Evnd_gh3Cm1WbfkN7eUpDmFh4KMvxka-B4og,71764
|
99
99
|
openlit/instrumentation/openai/openai.py,sha256=r8ZNVoAFTPuCUf18a5v1Lp48LXwCeT9paEB-3USSiiU,71507
|
@@ -114,8 +114,9 @@ openlit/instrumentation/reka/reka.py,sha256=L6gH7j94tcYlc_FCkQP6SrxH7yBr4uSgtN8B
|
|
114
114
|
openlit/instrumentation/together/__init__.py,sha256=MLLL2t8FyrytpfMueqcwekiqTKn-JN40HBD_LbZS_jQ,2661
|
115
115
|
openlit/instrumentation/together/async_together.py,sha256=ToSeYqE0mCgSsCNSO0pqoyS7WU6YarHxa3I7ZrzH-d8,30634
|
116
116
|
openlit/instrumentation/together/together.py,sha256=7Da9fjHaZk_ObXMnSZA79-RktgwHRVYevsZAA-OpcXY,30530
|
117
|
-
openlit/instrumentation/transformers/__init__.py,sha256=
|
118
|
-
openlit/instrumentation/transformers/transformers.py,sha256=
|
117
|
+
openlit/instrumentation/transformers/__init__.py,sha256=9Ubss5nlumcypxprxff8Fv3sst7II27SsvCzqkBX9Kg,1457
|
118
|
+
openlit/instrumentation/transformers/transformers.py,sha256=zCAwfXu77HPlhy7vuU-nvNvsmmU4fs4aaFMCBG5AOLA,1993
|
119
|
+
openlit/instrumentation/transformers/utils.py,sha256=UP-aB_hP4SVLQ1A0I-PrNXC3mPJkVZnS7UOkQGe6OXc,8087
|
119
120
|
openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
|
120
121
|
openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
|
121
122
|
openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
|
@@ -125,7 +126,7 @@ openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
|
|
125
126
|
openlit/otel/metrics.py,sha256=urpadRfC_BjLCPxWgk5J6NGStECeJA55LFkyTD43Jd4,6837
|
126
127
|
openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
|
127
128
|
openlit/semcov/__init__.py,sha256=JF9MwflazC8jHOiQdPYshfv1q5Z9bhB4OGa0N_fr9d4,13305
|
128
|
-
openlit-1.33.
|
129
|
-
openlit-1.33.
|
130
|
-
openlit-1.33.
|
131
|
-
openlit-1.33.
|
129
|
+
openlit-1.33.21.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
130
|
+
openlit-1.33.21.dist-info/METADATA,sha256=RP9M2CRa9gXnTVN6I_YjyjoGS8C3zUI04w4lbI6q3yE,23470
|
131
|
+
openlit-1.33.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
132
|
+
openlit-1.33.21.dist-info/RECORD,,
|
File without changes
|