openlit 1.34.10__py3-none-any.whl → 1.34.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +26 -3
- openlit/instrumentation/ai21/__init__.py +10 -8
- openlit/instrumentation/ai21/ai21.py +15 -27
- openlit/instrumentation/ai21/async_ai21.py +15 -27
- openlit/instrumentation/ai21/utils.py +229 -212
- openlit/instrumentation/groq/__init__.py +7 -9
- openlit/instrumentation/groq/async_groq.py +50 -374
- openlit/instrumentation/groq/groq.py +49 -373
- openlit/instrumentation/groq/utils.py +199 -0
- openlit/instrumentation/ollama/async_ollama.py +3 -2
- openlit/instrumentation/ollama/ollama.py +3 -2
- openlit/instrumentation/ollama/utils.py +10 -6
- openlit/instrumentation/openai/__init__.py +3 -3
- openlit/instrumentation/premai/utils.py +3 -73
- openlit/instrumentation/reka/utils.py +3 -51
- {openlit-1.34.10.dist-info → openlit-1.34.12.dist-info}/METADATA +1 -1
- {openlit-1.34.10.dist-info → openlit-1.34.12.dist-info}/RECORD +19 -18
- {openlit-1.34.10.dist-info → openlit-1.34.12.dist-info}/LICENSE +0 -0
- {openlit-1.34.10.dist-info → openlit-1.34.12.dist-info}/WHEEL +0 -0
openlit/__helpers.py
CHANGED
@@ -346,12 +346,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
|
|
346
346
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
347
347
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
348
348
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
349
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
349
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
|
350
350
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
351
351
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
352
352
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
353
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
354
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
353
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
|
354
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
355
355
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
356
356
|
|
357
357
|
def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
@@ -379,3 +379,26 @@ def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_a
|
|
379
379
|
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
380
380
|
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
381
381
|
metrics["genai_cost"].record(cost, attributes)
|
382
|
+
|
383
|
+
def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
384
|
+
request_model, response_model, environment, application_name, start_time, end_time,
|
385
|
+
input_tokens, cost):
|
386
|
+
"""
|
387
|
+
Record embedding-specific metrics for the operation.
|
388
|
+
"""
|
389
|
+
|
390
|
+
attributes = create_metrics_attributes(
|
391
|
+
operation=gen_ai_operation,
|
392
|
+
system=gen_ai_system,
|
393
|
+
server_address=server_address,
|
394
|
+
server_port=server_port,
|
395
|
+
request_model=request_model,
|
396
|
+
response_model=response_model,
|
397
|
+
service_name=application_name,
|
398
|
+
deployment_environment=environment,
|
399
|
+
)
|
400
|
+
metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
|
401
|
+
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
402
|
+
metrics["genai_requests"].add(1, attributes)
|
403
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
404
|
+
metrics["genai_cost"].record(cost, attributes)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of AI21 Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -33,34 +32,37 @@ class AI21Instrumentor(BaseInstrumentor):
|
|
33
32
|
disable_metrics = kwargs.get("disable_metrics")
|
34
33
|
version = importlib.metadata.version("ai21")
|
35
34
|
|
36
|
-
#
|
35
|
+
# Chat completions
|
37
36
|
wrap_function_wrapper(
|
38
37
|
"ai21.clients.studio.resources.chat.chat_completions",
|
39
38
|
"ChatCompletions.create",
|
40
39
|
chat(version, environment, application_name,
|
41
|
-
|
40
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
42
41
|
)
|
42
|
+
|
43
|
+
# RAG completions
|
43
44
|
wrap_function_wrapper(
|
44
45
|
"ai21.clients.studio.resources.studio_conversational_rag",
|
45
46
|
"StudioConversationalRag.create",
|
46
47
|
chat_rag(version, environment, application_name,
|
47
|
-
|
48
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
48
49
|
)
|
49
50
|
|
50
|
-
#Async
|
51
|
+
# Async chat completions
|
51
52
|
wrap_function_wrapper(
|
52
53
|
"ai21.clients.studio.resources.chat.async_chat_completions",
|
53
54
|
"AsyncChatCompletions.create",
|
54
55
|
async_chat(version, environment, application_name,
|
55
|
-
|
56
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
56
57
|
)
|
58
|
+
|
59
|
+
# Async RAG completions
|
57
60
|
wrap_function_wrapper(
|
58
61
|
"ai21.clients.studio.resources.studio_conversational_rag",
|
59
62
|
"AsyncStudioConversationalRag.create",
|
60
63
|
async_chat_rag(version, environment, application_name,
|
61
|
-
|
64
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
62
65
|
)
|
63
66
|
|
64
67
|
def _uninstrument(self, **kwargs):
|
65
|
-
# Proper uninstrumentation logic to revert patched methods
|
66
68
|
pass
|
@@ -1,8 +1,7 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring AI21 calls.
|
2
|
+
Module for monitoring AI21 API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -15,14 +14,10 @@ from openlit.instrumentation.ai21.utils import (
|
|
15
14
|
process_streaming_chat_response,
|
16
15
|
process_chat_rag_response
|
17
16
|
)
|
18
|
-
|
19
17
|
from openlit.semcov import SemanticConvention
|
20
18
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
def chat(version, environment, application_name,
|
25
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
19
|
+
def chat(version, environment, application_name, tracer, pricing_info,
|
20
|
+
capture_message_content, metrics, disable_metrics):
|
26
21
|
"""
|
27
22
|
Generates a telemetry wrapper for GenAI function call
|
28
23
|
"""
|
@@ -45,14 +40,12 @@ def chat(version, environment, application_name,
|
|
45
40
|
self.__wrapped__ = wrapped
|
46
41
|
self._span = span
|
47
42
|
self._span_name = span_name
|
48
|
-
# Placeholder for aggregating streaming response
|
49
43
|
self._llmresponse = ""
|
50
44
|
self._response_id = ""
|
51
45
|
self._finish_reason = ""
|
46
|
+
self._tools = None
|
52
47
|
self._input_tokens = 0
|
53
48
|
self._output_tokens = 0
|
54
|
-
self._choices = []
|
55
|
-
|
56
49
|
self._args = args
|
57
50
|
self._kwargs = kwargs
|
58
51
|
self._start_time = time.time()
|
@@ -83,9 +76,8 @@ def chat(version, environment, application_name,
|
|
83
76
|
process_chunk(self, chunk)
|
84
77
|
return chunk
|
85
78
|
except StopIteration:
|
86
|
-
# Handling exception ensure observability without disrupting operation
|
87
79
|
try:
|
88
|
-
with tracer.start_as_current_span(self._span_name, kind=
|
80
|
+
with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
|
89
81
|
process_streaming_chat_response(
|
90
82
|
self,
|
91
83
|
pricing_info=pricing_info,
|
@@ -96,34 +88,31 @@ def chat(version, environment, application_name,
|
|
96
88
|
disable_metrics=disable_metrics,
|
97
89
|
version=version
|
98
90
|
)
|
91
|
+
|
99
92
|
except Exception as e:
|
100
93
|
handle_exception(self._span, e)
|
101
|
-
|
94
|
+
|
102
95
|
raise
|
103
96
|
|
104
97
|
def wrapper(wrapped, instance, args, kwargs):
|
105
98
|
"""
|
106
99
|
Wraps the GenAI function call.
|
107
100
|
"""
|
108
|
-
|
109
101
|
# Check if streaming is enabled for the API call
|
110
102
|
streaming = kwargs.get("stream", False)
|
111
|
-
|
112
103
|
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
113
104
|
request_model = kwargs.get("model", "jamba-1.5-mini")
|
114
105
|
|
115
106
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
116
107
|
|
117
|
-
# pylint: disable=no-else-return
|
118
108
|
if streaming:
|
119
|
-
# Special handling for streaming response
|
109
|
+
# Special handling for streaming response
|
120
110
|
awaited_wrapped = wrapped(*args, **kwargs)
|
121
111
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
122
112
|
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
123
|
-
|
124
|
-
# Handling for non-streaming responses
|
125
113
|
else:
|
126
|
-
|
114
|
+
# Handling for non-streaming responses
|
115
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
127
116
|
start_time = time.time()
|
128
117
|
response = wrapped(*args, **kwargs)
|
129
118
|
|
@@ -152,23 +141,22 @@ def chat(version, environment, application_name,
|
|
152
141
|
|
153
142
|
return wrapper
|
154
143
|
|
155
|
-
def chat_rag(version, environment, application_name,
|
156
|
-
|
144
|
+
def chat_rag(version, environment, application_name, tracer, pricing_info,
|
145
|
+
capture_message_content, metrics, disable_metrics):
|
157
146
|
"""
|
158
|
-
Generates a telemetry wrapper for GenAI function call
|
147
|
+
Generates a telemetry wrapper for GenAI RAG function call
|
159
148
|
"""
|
160
149
|
|
161
150
|
def wrapper(wrapped, instance, args, kwargs):
|
162
151
|
"""
|
163
|
-
Wraps the GenAI function call.
|
152
|
+
Wraps the GenAI RAG function call.
|
164
153
|
"""
|
165
|
-
|
166
154
|
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
167
155
|
request_model = kwargs.get("model", "jamba-1.5-mini")
|
168
156
|
|
169
157
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
170
158
|
|
171
|
-
with tracer.start_as_current_span(span_name, kind=
|
159
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
172
160
|
start_time = time.time()
|
173
161
|
response = wrapped(*args, **kwargs)
|
174
162
|
|
@@ -1,8 +1,7 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring AI21 calls.
|
2
|
+
Module for monitoring AI21 API calls (async version).
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -15,21 +14,17 @@ from openlit.instrumentation.ai21.utils import (
|
|
15
14
|
process_streaming_chat_response,
|
16
15
|
process_chat_rag_response
|
17
16
|
)
|
18
|
-
|
19
17
|
from openlit.semcov import SemanticConvention
|
20
18
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
def async_chat(version, environment, application_name,
|
25
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
19
|
+
def async_chat(version, environment, application_name, tracer, pricing_info,
|
20
|
+
capture_message_content, metrics, disable_metrics):
|
26
21
|
"""
|
27
22
|
Generates a telemetry wrapper for GenAI function call
|
28
23
|
"""
|
29
24
|
|
30
25
|
class TracedAsyncStream:
|
31
26
|
"""
|
32
|
-
Wrapper for streaming responses to collect telemetry.
|
27
|
+
Wrapper for async streaming responses to collect telemetry.
|
33
28
|
"""
|
34
29
|
|
35
30
|
def __init__(
|
@@ -45,14 +40,12 @@ def async_chat(version, environment, application_name,
|
|
45
40
|
self.__wrapped__ = wrapped
|
46
41
|
self._span = span
|
47
42
|
self._span_name = span_name
|
48
|
-
# Placeholder for aggregating streaming response
|
49
43
|
self._llmresponse = ""
|
50
44
|
self._response_id = ""
|
51
45
|
self._finish_reason = ""
|
46
|
+
self._tools = None
|
52
47
|
self._input_tokens = 0
|
53
48
|
self._output_tokens = 0
|
54
|
-
self._choices = []
|
55
|
-
|
56
49
|
self._args = args
|
57
50
|
self._kwargs = kwargs
|
58
51
|
self._start_time = time.time()
|
@@ -83,9 +76,8 @@ def async_chat(version, environment, application_name,
|
|
83
76
|
process_chunk(self, chunk)
|
84
77
|
return chunk
|
85
78
|
except StopAsyncIteration:
|
86
|
-
# Handling exception ensure observability without disrupting operation
|
87
79
|
try:
|
88
|
-
with tracer.start_as_current_span(self._span_name, kind=
|
80
|
+
with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
|
89
81
|
process_streaming_chat_response(
|
90
82
|
self,
|
91
83
|
pricing_info=pricing_info,
|
@@ -96,6 +88,7 @@ def async_chat(version, environment, application_name,
|
|
96
88
|
disable_metrics=disable_metrics,
|
97
89
|
version=version
|
98
90
|
)
|
91
|
+
|
99
92
|
except Exception as e:
|
100
93
|
handle_exception(self._span, e)
|
101
94
|
|
@@ -105,25 +98,21 @@ def async_chat(version, environment, application_name,
|
|
105
98
|
"""
|
106
99
|
Wraps the GenAI function call.
|
107
100
|
"""
|
108
|
-
|
109
101
|
# Check if streaming is enabled for the API call
|
110
102
|
streaming = kwargs.get("stream", False)
|
111
|
-
|
112
103
|
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
113
104
|
request_model = kwargs.get("model", "jamba-1.5-mini")
|
114
105
|
|
115
106
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
116
107
|
|
117
|
-
# pylint: disable=no-else-return
|
118
108
|
if streaming:
|
119
|
-
# Special handling for streaming response
|
109
|
+
# Special handling for streaming response
|
120
110
|
awaited_wrapped = await wrapped(*args, **kwargs)
|
121
111
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
122
112
|
return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
123
|
-
|
124
|
-
# Handling for non-streaming responses
|
125
113
|
else:
|
126
|
-
|
114
|
+
# Handling for non-streaming responses
|
115
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
127
116
|
start_time = time.time()
|
128
117
|
response = await wrapped(*args, **kwargs)
|
129
118
|
|
@@ -152,23 +141,22 @@ def async_chat(version, environment, application_name,
|
|
152
141
|
|
153
142
|
return wrapper
|
154
143
|
|
155
|
-
def async_chat_rag(version, environment, application_name,
|
156
|
-
|
144
|
+
def async_chat_rag(version, environment, application_name, tracer, pricing_info,
|
145
|
+
capture_message_content, metrics, disable_metrics):
|
157
146
|
"""
|
158
|
-
Generates a telemetry wrapper for GenAI function call
|
147
|
+
Generates a telemetry wrapper for GenAI RAG function call
|
159
148
|
"""
|
160
149
|
|
161
150
|
async def wrapper(wrapped, instance, args, kwargs):
|
162
151
|
"""
|
163
|
-
Wraps the GenAI function call.
|
152
|
+
Wraps the GenAI RAG function call.
|
164
153
|
"""
|
165
|
-
|
166
154
|
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
167
155
|
request_model = kwargs.get("model", "jamba-1.5-mini")
|
168
156
|
|
169
157
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
170
158
|
|
171
|
-
with tracer.start_as_current_span(span_name, kind=
|
159
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
172
160
|
start_time = time.time()
|
173
161
|
response = await wrapped(*args, **kwargs)
|
174
162
|
|