openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +88 -0
- openlit/__init__.py +4 -3
- openlit/instrumentation/ag2/ag2.py +5 -5
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +9 -9
- openlit/instrumentation/astra/async_astra.py +9 -9
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +9 -9
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
- openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/async_julep.py +5 -5
- openlit/instrumentation/julep/julep.py +5 -5
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +105 -492
- openlit/instrumentation/letta/letta.py +11 -9
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +318 -247
- openlit/instrumentation/litellm/litellm.py +314 -243
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/milvus.py +9 -9
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +423 -250
- openlit/instrumentation/mistral/mistral.py +420 -246
- openlit/instrumentation/multion/async_multion.py +6 -4
- openlit/instrumentation/multion/multion.py +6 -4
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +433 -410
- openlit/instrumentation/openai/openai.py +414 -394
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/pinecone.py +9 -9
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +9 -9
- openlit/instrumentation/qdrant/qdrant.py +9 -9
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +76 -45
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +330 -987
- openlit/instrumentation/vertexai/vertexai.py +330 -987
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +66 -36
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import handle_exception
|
10
10
|
from openlit.semcov import SemanticConvetion
|
11
11
|
|
@@ -73,12 +73,12 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
73
73
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
74
74
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
75
75
|
gen_ai_endpoint)
|
76
|
-
span.set_attribute(
|
76
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
77
77
|
environment)
|
78
|
-
span.set_attribute(
|
78
|
+
span.set_attribute(SERVICE_NAME,
|
79
79
|
application_name)
|
80
|
-
span.set_attribute(SemanticConvetion.
|
81
|
-
SemanticConvetion.
|
80
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
|
82
82
|
span.set_attribute(SemanticConvetion.DB_SYSTEM,
|
83
83
|
SemanticConvetion.DB_SYSTEM_QDRANT)
|
84
84
|
|
@@ -241,14 +241,14 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
241
241
|
attributes = {
|
242
242
|
TELEMETRY_SDK_NAME:
|
243
243
|
"openlit",
|
244
|
-
|
244
|
+
SERVICE_NAME:
|
245
245
|
application_name,
|
246
246
|
SemanticConvetion.DB_SYSTEM:
|
247
247
|
SemanticConvetion.DB_SYSTEM_QDRANT,
|
248
|
-
|
248
|
+
DEPLOYMENT_ENVIRONMENT:
|
249
249
|
environment,
|
250
|
-
SemanticConvetion.
|
251
|
-
SemanticConvetion.
|
250
|
+
SemanticConvetion.GEN_AI_OPERATION:
|
251
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
|
252
252
|
SemanticConvetion.DB_OPERATION:
|
253
253
|
db_operation
|
254
254
|
}
|
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import handle_exception
|
10
10
|
from openlit.semcov import SemanticConvetion
|
11
11
|
|
@@ -73,12 +73,12 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
73
73
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
74
74
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
75
75
|
gen_ai_endpoint)
|
76
|
-
span.set_attribute(
|
76
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
77
77
|
environment)
|
78
|
-
span.set_attribute(
|
78
|
+
span.set_attribute(SERVICE_NAME,
|
79
79
|
application_name)
|
80
|
-
span.set_attribute(SemanticConvetion.
|
81
|
-
SemanticConvetion.
|
80
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
|
82
82
|
span.set_attribute(SemanticConvetion.DB_SYSTEM,
|
83
83
|
SemanticConvetion.DB_SYSTEM_QDRANT)
|
84
84
|
|
@@ -248,14 +248,14 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
248
248
|
attributes = {
|
249
249
|
TELEMETRY_SDK_NAME:
|
250
250
|
"openlit",
|
251
|
-
|
251
|
+
SERVICE_NAME:
|
252
252
|
application_name,
|
253
253
|
SemanticConvetion.DB_SYSTEM:
|
254
254
|
SemanticConvetion.DB_SYSTEM_QDRANT,
|
255
|
-
|
255
|
+
DEPLOYMENT_ENVIRONMENT:
|
256
256
|
environment,
|
257
|
-
SemanticConvetion.
|
258
|
-
SemanticConvetion.
|
257
|
+
SemanticConvetion.GEN_AI_OPERATION:
|
258
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
|
259
259
|
SemanticConvetion.DB_OPERATION:
|
260
260
|
db_operation
|
261
261
|
}
|
@@ -37,7 +37,7 @@ class RekaInstrumentor(BaseInstrumentor):
|
|
37
37
|
wrap_function_wrapper(
|
38
38
|
"reka.chat.client",
|
39
39
|
"ChatClient.create",
|
40
|
-
chat(
|
40
|
+
chat(version, environment, application_name,
|
41
41
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
42
42
|
)
|
43
43
|
|
@@ -45,7 +45,7 @@ class RekaInstrumentor(BaseInstrumentor):
|
|
45
45
|
wrap_function_wrapper(
|
46
46
|
"reka.chat.client",
|
47
47
|
"AsyncChatClient.create",
|
48
|
-
async_chat(
|
48
|
+
async_chat(version, environment, application_name,
|
49
49
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
50
50
|
)
|
51
51
|
|
@@ -1,27 +1,28 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Reka API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
-
handle_exception,
|
11
10
|
get_chat_model_cost,
|
11
|
+
handle_exception,
|
12
|
+
create_metrics_attributes,
|
13
|
+
set_server_address_and_port
|
12
14
|
)
|
13
15
|
from openlit.semcov import SemanticConvetion
|
14
16
|
|
15
17
|
# Initialize logger for logging potential issues and operations
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
18
|
-
def async_chat(
|
20
|
+
def async_chat(version, environment, application_name,
|
19
21
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
20
22
|
"""
|
21
23
|
Generates a telemetry wrapper for chat to collect metrics.
|
22
24
|
|
23
25
|
Args:
|
24
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
25
26
|
version: Version of the monitoring package.
|
26
27
|
environment: Deployment environment (e.g., production, staging).
|
27
28
|
application_name: Name of the application using the Reka API.
|
@@ -50,8 +51,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
51
|
The response from the original 'chat' method.
|
51
52
|
"""
|
52
53
|
|
53
|
-
|
54
|
+
server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
|
55
|
+
request_model = kwargs.get("model", "reka-core-20240501")
|
56
|
+
|
57
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
58
|
+
|
59
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
60
|
+
start_time = time.time()
|
54
61
|
response = await wrapped(*args, **kwargs)
|
62
|
+
end_time = time.time()
|
55
63
|
|
56
64
|
try:
|
57
65
|
# Format 'messages' into a single string
|
@@ -63,7 +71,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
63
71
|
|
64
72
|
if isinstance(content, list):
|
65
73
|
content_str = ", ".join(
|
66
|
-
# pylint: disable=line-too-long
|
67
74
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
68
75
|
if "type" in item else f'text: {item["text"]}'
|
69
76
|
for item in content
|
@@ -73,22 +80,68 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
73
80
|
formatted_messages.append(f"{role}: {content}")
|
74
81
|
prompt = "\n".join(formatted_messages)
|
75
82
|
|
76
|
-
|
83
|
+
input_tokens = response.usage.input_tokens
|
84
|
+
output_tokens = response.usage.output_tokens
|
85
|
+
|
86
|
+
# Calculate cost of the operation
|
87
|
+
cost = get_chat_model_cost(request_model,
|
88
|
+
pricing_info, input_tokens, output_tokens)
|
89
|
+
|
90
|
+
# Set Span attributes (OTel Semconv)
|
77
91
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
92
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
93
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
78
94
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
79
95
|
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
80
|
-
span.set_attribute(SemanticConvetion.
|
81
|
-
|
82
|
-
span.set_attribute(SemanticConvetion.
|
83
|
-
|
84
|
-
span.set_attribute(SemanticConvetion.
|
96
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
97
|
+
request_model)
|
98
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
99
|
+
server_port)
|
100
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
101
|
+
kwargs.get("seed", ""))
|
102
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
103
|
+
kwargs.get("max_tokens", -1))
|
104
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
105
|
+
kwargs.get("stop", []))
|
106
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
107
|
+
kwargs.get("presence_penalty", 0.0))
|
108
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
109
|
+
kwargs.get("temperature", 0.4))
|
110
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
111
|
+
kwargs.get("top_k", 1.0))
|
112
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
113
|
+
kwargs.get("top_p", 1.0))
|
114
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
115
|
+
[response.responses[0].finish_reason])
|
116
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
117
|
+
response.id)
|
118
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
119
|
+
response.model)
|
120
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
121
|
+
input_tokens)
|
122
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
123
|
+
output_tokens)
|
124
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
125
|
+
server_address)
|
126
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
127
|
+
'text')
|
128
|
+
|
129
|
+
# Set Span attributes (Extra)
|
130
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
85
131
|
environment)
|
86
|
-
span.set_attribute(
|
132
|
+
span.set_attribute(SERVICE_NAME,
|
87
133
|
application_name)
|
88
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
89
|
-
kwargs.get("model", "reka-core"))
|
90
134
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
91
135
|
False)
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
137
|
+
input_tokens + output_tokens)
|
138
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
139
|
+
cost)
|
140
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
141
|
+
end_time - start_time)
|
142
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
143
|
+
version)
|
144
|
+
|
92
145
|
if trace_content:
|
93
146
|
span.add_event(
|
94
147
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -99,51 +152,36 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
99
152
|
span.add_event(
|
100
153
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
101
154
|
attributes={
|
102
|
-
# pylint: disable=line-too-long
|
103
155
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
|
104
156
|
},
|
105
157
|
)
|
106
158
|
|
107
|
-
prompt_tokens = response.usage.input_tokens
|
108
|
-
completion_tokens = response.usage.output_tokens
|
109
|
-
total_tokens = prompt_tokens + completion_tokens
|
110
|
-
# Calculate cost of the operation
|
111
|
-
cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
|
112
|
-
pricing_info, prompt_tokens, completion_tokens)
|
113
|
-
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
115
|
-
prompt_tokens)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
117
|
-
completion_tokens)
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
119
|
-
total_tokens)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
121
|
-
[response.responses[0].finish_reason])
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
123
|
-
cost)
|
124
|
-
|
125
159
|
span.set_status(Status(StatusCode.OK))
|
126
160
|
|
127
161
|
if disable_metrics is False:
|
128
|
-
attributes =
|
129
|
-
|
130
|
-
|
131
|
-
SemanticConvetion.
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
139
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
140
|
-
kwargs.get("model", "reka-core")
|
141
|
-
}
|
162
|
+
attributes = create_metrics_attributes(
|
163
|
+
service_name=application_name,
|
164
|
+
deployment_environment=environment,
|
165
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
|
167
|
+
request_model=request_model,
|
168
|
+
server_address=server_address,
|
169
|
+
server_port=server_port,
|
170
|
+
response_model=response.model,
|
171
|
+
)
|
142
172
|
|
173
|
+
metrics["genai_client_usage_tokens"].record(
|
174
|
+
input_tokens + output_tokens, attributes
|
175
|
+
)
|
176
|
+
metrics["genai_client_operation_duration"].record(
|
177
|
+
end_time - start_time, attributes
|
178
|
+
)
|
179
|
+
metrics["genai_server_ttft"].record(
|
180
|
+
end_time - start_time, attributes
|
181
|
+
)
|
143
182
|
metrics["genai_requests"].add(1, attributes)
|
144
|
-
metrics["
|
145
|
-
metrics["
|
146
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
183
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
147
185
|
metrics["genai_cost"].record(cost, attributes)
|
148
186
|
|
149
187
|
# Return original response
|
@@ -1,27 +1,28 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Reka API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
-
handle_exception,
|
11
10
|
get_chat_model_cost,
|
11
|
+
handle_exception,
|
12
|
+
create_metrics_attributes,
|
13
|
+
set_server_address_and_port
|
12
14
|
)
|
13
15
|
from openlit.semcov import SemanticConvetion
|
14
16
|
|
15
17
|
# Initialize logger for logging potential issues and operations
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
18
|
-
def chat(
|
20
|
+
def chat(version, environment, application_name,
|
19
21
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
20
22
|
"""
|
21
23
|
Generates a telemetry wrapper for chat to collect metrics.
|
22
24
|
|
23
25
|
Args:
|
24
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
25
26
|
version: Version of the monitoring package.
|
26
27
|
environment: Deployment environment (e.g., production, staging).
|
27
28
|
application_name: Name of the application using the Reka API.
|
@@ -50,8 +51,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
51
|
The response from the original 'chat' method.
|
51
52
|
"""
|
52
53
|
|
53
|
-
|
54
|
+
server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
|
55
|
+
request_model = kwargs.get("model", "reka-core-20240501")
|
56
|
+
|
57
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
58
|
+
|
59
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
60
|
+
start_time = time.time()
|
54
61
|
response = wrapped(*args, **kwargs)
|
62
|
+
end_time = time.time()
|
55
63
|
|
56
64
|
try:
|
57
65
|
# Format 'messages' into a single string
|
@@ -63,7 +71,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
63
71
|
|
64
72
|
if isinstance(content, list):
|
65
73
|
content_str = ", ".join(
|
66
|
-
# pylint: disable=line-too-long
|
67
74
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
68
75
|
if "type" in item else f'text: {item["text"]}'
|
69
76
|
for item in content
|
@@ -73,22 +80,68 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
73
80
|
formatted_messages.append(f"{role}: {content}")
|
74
81
|
prompt = "\n".join(formatted_messages)
|
75
82
|
|
76
|
-
|
83
|
+
input_tokens = response.usage.input_tokens
|
84
|
+
output_tokens = response.usage.output_tokens
|
85
|
+
|
86
|
+
# Calculate cost of the operation
|
87
|
+
cost = get_chat_model_cost(request_model,
|
88
|
+
pricing_info, input_tokens, output_tokens)
|
89
|
+
|
90
|
+
# Set Span attributes (OTel Semconv)
|
77
91
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
92
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
93
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
78
94
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
79
95
|
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
80
|
-
span.set_attribute(SemanticConvetion.
|
81
|
-
|
82
|
-
span.set_attribute(SemanticConvetion.
|
83
|
-
|
84
|
-
span.set_attribute(SemanticConvetion.
|
96
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
97
|
+
request_model)
|
98
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
99
|
+
server_port)
|
100
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
101
|
+
kwargs.get("seed", ""))
|
102
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
103
|
+
kwargs.get("max_tokens", -1))
|
104
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
105
|
+
kwargs.get("stop", []))
|
106
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
107
|
+
kwargs.get("presence_penalty", 0.0))
|
108
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
109
|
+
kwargs.get("temperature", 0.4))
|
110
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
111
|
+
kwargs.get("top_k", 1.0))
|
112
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
113
|
+
kwargs.get("top_p", 1.0))
|
114
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
115
|
+
[response.responses[0].finish_reason])
|
116
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
117
|
+
response.id)
|
118
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
119
|
+
response.model)
|
120
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
121
|
+
input_tokens)
|
122
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
123
|
+
output_tokens)
|
124
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
125
|
+
server_address)
|
126
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
127
|
+
'text')
|
128
|
+
|
129
|
+
# Set Span attributes (Extra)
|
130
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
85
131
|
environment)
|
86
|
-
span.set_attribute(
|
132
|
+
span.set_attribute(SERVICE_NAME,
|
87
133
|
application_name)
|
88
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
89
|
-
kwargs.get("model", "reka-core"))
|
90
134
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
91
135
|
False)
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
137
|
+
input_tokens + output_tokens)
|
138
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
139
|
+
cost)
|
140
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
141
|
+
end_time - start_time)
|
142
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
143
|
+
version)
|
144
|
+
|
92
145
|
if trace_content:
|
93
146
|
span.add_event(
|
94
147
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -99,51 +152,36 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
99
152
|
span.add_event(
|
100
153
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
101
154
|
attributes={
|
102
|
-
# pylint: disable=line-too-long
|
103
155
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
|
104
156
|
},
|
105
157
|
)
|
106
158
|
|
107
|
-
prompt_tokens = response.usage.input_tokens
|
108
|
-
completion_tokens = response.usage.output_tokens
|
109
|
-
total_tokens = prompt_tokens + completion_tokens
|
110
|
-
# Calculate cost of the operation
|
111
|
-
cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
|
112
|
-
pricing_info, prompt_tokens, completion_tokens)
|
113
|
-
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
115
|
-
prompt_tokens)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
117
|
-
completion_tokens)
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
119
|
-
total_tokens)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
121
|
-
[response.responses[0].finish_reason])
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
123
|
-
cost)
|
124
|
-
|
125
159
|
span.set_status(Status(StatusCode.OK))
|
126
160
|
|
127
161
|
if disable_metrics is False:
|
128
|
-
attributes =
|
129
|
-
|
130
|
-
|
131
|
-
SemanticConvetion.
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
139
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
140
|
-
kwargs.get("model", "reka-core")
|
141
|
-
}
|
162
|
+
attributes = create_metrics_attributes(
|
163
|
+
service_name=application_name,
|
164
|
+
deployment_environment=environment,
|
165
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
|
167
|
+
request_model=request_model,
|
168
|
+
server_address=server_address,
|
169
|
+
server_port=server_port,
|
170
|
+
response_model=response.model,
|
171
|
+
)
|
142
172
|
|
173
|
+
metrics["genai_client_usage_tokens"].record(
|
174
|
+
input_tokens + output_tokens, attributes
|
175
|
+
)
|
176
|
+
metrics["genai_client_operation_duration"].record(
|
177
|
+
end_time - start_time, attributes
|
178
|
+
)
|
179
|
+
metrics["genai_server_ttft"].record(
|
180
|
+
end_time - start_time, attributes
|
181
|
+
)
|
143
182
|
metrics["genai_requests"].add(1, attributes)
|
144
|
-
metrics["
|
145
|
-
metrics["
|
146
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
183
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
147
185
|
metrics["genai_cost"].record(cost, attributes)
|
148
186
|
|
149
187
|
# Return original response
|
@@ -37,7 +37,7 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
37
37
|
wrap_function_wrapper(
|
38
38
|
"together.resources.chat.completions",
|
39
39
|
"ChatCompletions.create",
|
40
|
-
completion(
|
40
|
+
completion(version, environment, application_name,
|
41
41
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
42
42
|
)
|
43
43
|
|
@@ -45,7 +45,7 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
45
45
|
wrap_function_wrapper(
|
46
46
|
"together.resources.images",
|
47
47
|
"Images.generate",
|
48
|
-
image_generate(
|
48
|
+
image_generate(version, environment, application_name,
|
49
49
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
50
50
|
)
|
51
51
|
|
@@ -53,7 +53,7 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
53
53
|
wrap_function_wrapper(
|
54
54
|
"together.resources.chat.completions",
|
55
55
|
"AsyncChatCompletions.create",
|
56
|
-
async_completion(
|
56
|
+
async_completion(version, environment, application_name,
|
57
57
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
58
58
|
)
|
59
59
|
|
@@ -61,7 +61,7 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
61
61
|
wrap_function_wrapper(
|
62
62
|
"together.resources.images",
|
63
63
|
"AsyncImages.generate",
|
64
|
-
async_image_generate(
|
64
|
+
async_image_generate(version, environment, application_name,
|
65
65
|
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
66
66
|
)
|
67
67
|
|