openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +78 -0
- openlit/__init__.py +41 -13
- openlit/instrumentation/ag2/__init__.py +9 -10
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +6 -5
- openlit/instrumentation/ai21/ai21.py +71 -534
- openlit/instrumentation/ai21/async_ai21.py +71 -534
- openlit/instrumentation/ai21/utils.py +407 -0
- openlit/instrumentation/anthropic/__init__.py +3 -3
- openlit/instrumentation/anthropic/anthropic.py +5 -5
- openlit/instrumentation/anthropic/async_anthropic.py +5 -5
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +7 -7
- openlit/instrumentation/astra/async_astra.py +7 -7
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +7 -7
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +10 -10
- openlit/instrumentation/cohere/cohere.py +11 -11
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
- openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +5 -5
- openlit/instrumentation/gpt4all/gpt4all.py +350 -225
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +5 -5
- openlit/instrumentation/groq/async_groq.py +359 -243
- openlit/instrumentation/groq/groq.py +359 -243
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +6 -6
- openlit/instrumentation/julep/julep.py +6 -6
- openlit/instrumentation/langchain/__init__.py +15 -9
- openlit/instrumentation/langchain/async_langchain.py +388 -0
- openlit/instrumentation/langchain/langchain.py +110 -497
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +10 -8
- openlit/instrumentation/litellm/__init__.py +9 -10
- openlit/instrumentation/litellm/async_litellm.py +321 -250
- openlit/instrumentation/litellm/litellm.py +319 -248
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +7 -7
- openlit/instrumentation/mistral/__init__.py +13 -13
- openlit/instrumentation/mistral/async_mistral.py +426 -253
- openlit/instrumentation/mistral/mistral.py +424 -250
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +9 -7
- openlit/instrumentation/multion/multion.py +9 -7
- openlit/instrumentation/ollama/__init__.py +19 -39
- openlit/instrumentation/ollama/async_ollama.py +137 -563
- openlit/instrumentation/ollama/ollama.py +136 -563
- openlit/instrumentation/ollama/utils.py +333 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +25 -27
- openlit/instrumentation/openai/openai.py +25 -27
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +7 -7
- openlit/instrumentation/premai/__init__.py +5 -5
- openlit/instrumentation/premai/premai.py +268 -219
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +7 -7
- openlit/instrumentation/qdrant/qdrant.py +7 -7
- openlit/instrumentation/reka/__init__.py +5 -5
- openlit/instrumentation/reka/async_reka.py +93 -55
- openlit/instrumentation/reka/reka.py +93 -55
- openlit/instrumentation/together/__init__.py +9 -9
- openlit/instrumentation/together/async_together.py +284 -242
- openlit/instrumentation/together/together.py +284 -242
- openlit/instrumentation/transformers/__init__.py +3 -3
- openlit/instrumentation/transformers/transformers.py +79 -48
- openlit/instrumentation/vertexai/__init__.py +19 -69
- openlit/instrumentation/vertexai/async_vertexai.py +333 -990
- openlit/instrumentation/vertexai/vertexai.py +333 -990
- openlit/instrumentation/vllm/__init__.py +3 -3
- openlit/instrumentation/vllm/vllm.py +65 -35
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +16 -4
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
- openlit-1.33.11.dist-info/RECORD +125 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -273,7 +273,7 @@ class QdrantInstrumentor(BaseInstrumentor):
|
|
273
273
|
tracer = kwargs.get("tracer")
|
274
274
|
metrics = kwargs.get("metrics_dict")
|
275
275
|
pricing_info = kwargs.get("pricing_info")
|
276
|
-
|
276
|
+
capture_message_content = kwargs.get("capture_message_content")
|
277
277
|
disable_metrics = kwargs.get("disable_metrics")
|
278
278
|
version = importlib.metadata.version("qdrant-client")
|
279
279
|
|
@@ -286,7 +286,7 @@ class QdrantInstrumentor(BaseInstrumentor):
|
|
286
286
|
wrap_package,
|
287
287
|
wrap_object,
|
288
288
|
wrapper(gen_ai_endpoint, version, environment, application_name,
|
289
|
-
tracer, pricing_info,
|
289
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
290
290
|
)
|
291
291
|
|
292
292
|
|
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import handle_exception
|
10
10
|
from openlit.semcov import SemanticConvetion
|
11
11
|
|
@@ -25,7 +25,7 @@ def object_count(obj):
|
|
25
25
|
return cnt
|
26
26
|
|
27
27
|
def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
28
|
-
tracer, pricing_info,
|
28
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
29
29
|
"""
|
30
30
|
Creates a wrapper around a function call to trace and log its execution metrics.
|
31
31
|
|
@@ -39,7 +39,7 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
39
39
|
- application_name (str): Name of the Langchain application.
|
40
40
|
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
41
41
|
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
42
|
-
-
|
42
|
+
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
43
43
|
|
44
44
|
Returns:
|
45
45
|
- function: A higher-order function that takes a function 'wrapped' and returns
|
@@ -73,9 +73,9 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
73
73
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
74
74
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
75
75
|
gen_ai_endpoint)
|
76
|
-
span.set_attribute(
|
76
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
77
77
|
environment)
|
78
|
-
span.set_attribute(
|
78
|
+
span.set_attribute(SERVICE_NAME,
|
79
79
|
application_name)
|
80
80
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
81
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
|
@@ -241,11 +241,11 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
241
241
|
attributes = {
|
242
242
|
TELEMETRY_SDK_NAME:
|
243
243
|
"openlit",
|
244
|
-
|
244
|
+
SERVICE_NAME:
|
245
245
|
application_name,
|
246
246
|
SemanticConvetion.DB_SYSTEM:
|
247
247
|
SemanticConvetion.DB_SYSTEM_QDRANT,
|
248
|
-
|
248
|
+
DEPLOYMENT_ENVIRONMENT:
|
249
249
|
environment,
|
250
250
|
SemanticConvetion.GEN_AI_OPERATION:
|
251
251
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
|
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import handle_exception
|
10
10
|
from openlit.semcov import SemanticConvetion
|
11
11
|
|
@@ -25,7 +25,7 @@ def object_count(obj):
|
|
25
25
|
return cnt
|
26
26
|
|
27
27
|
def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
28
|
-
tracer, pricing_info,
|
28
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
29
29
|
"""
|
30
30
|
Creates a wrapper around a function call to trace and log its execution metrics.
|
31
31
|
|
@@ -39,7 +39,7 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
39
39
|
- application_name (str): Name of the Langchain application.
|
40
40
|
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
41
41
|
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
42
|
-
-
|
42
|
+
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
43
43
|
|
44
44
|
Returns:
|
45
45
|
- function: A higher-order function that takes a function 'wrapped' and returns
|
@@ -73,9 +73,9 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
73
73
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
74
74
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
75
75
|
gen_ai_endpoint)
|
76
|
-
span.set_attribute(
|
76
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
77
77
|
environment)
|
78
|
-
span.set_attribute(
|
78
|
+
span.set_attribute(SERVICE_NAME,
|
79
79
|
application_name)
|
80
80
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
81
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
|
@@ -248,11 +248,11 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
|
248
248
|
attributes = {
|
249
249
|
TELEMETRY_SDK_NAME:
|
250
250
|
"openlit",
|
251
|
-
|
251
|
+
SERVICE_NAME:
|
252
252
|
application_name,
|
253
253
|
SemanticConvetion.DB_SYSTEM:
|
254
254
|
SemanticConvetion.DB_SYSTEM_QDRANT,
|
255
|
-
|
255
|
+
DEPLOYMENT_ENVIRONMENT:
|
256
256
|
environment,
|
257
257
|
SemanticConvetion.GEN_AI_OPERATION:
|
258
258
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
|
@@ -29,7 +29,7 @@ class RekaInstrumentor(BaseInstrumentor):
|
|
29
29
|
tracer = kwargs.get("tracer")
|
30
30
|
metrics = kwargs.get("metrics_dict")
|
31
31
|
pricing_info = kwargs.get("pricing_info", {})
|
32
|
-
|
32
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
33
33
|
disable_metrics = kwargs.get("disable_metrics")
|
34
34
|
version = importlib.metadata.version("reka-api")
|
35
35
|
|
@@ -37,16 +37,16 @@ class RekaInstrumentor(BaseInstrumentor):
|
|
37
37
|
wrap_function_wrapper(
|
38
38
|
"reka.chat.client",
|
39
39
|
"ChatClient.create",
|
40
|
-
chat(
|
41
|
-
tracer, pricing_info,
|
40
|
+
chat(version, environment, application_name,
|
41
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
42
42
|
)
|
43
43
|
|
44
44
|
# async chat
|
45
45
|
wrap_function_wrapper(
|
46
46
|
"reka.chat.client",
|
47
47
|
"AsyncChatClient.create",
|
48
|
-
async_chat(
|
49
|
-
tracer, pricing_info,
|
48
|
+
async_chat(version, environment, application_name,
|
49
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
50
50
|
)
|
51
51
|
|
52
52
|
def _uninstrument(self, **kwargs):
|
@@ -1,33 +1,34 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Reka API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
-
handle_exception,
|
11
10
|
get_chat_model_cost,
|
11
|
+
handle_exception,
|
12
|
+
create_metrics_attributes,
|
13
|
+
set_server_address_and_port
|
12
14
|
)
|
13
15
|
from openlit.semcov import SemanticConvetion
|
14
16
|
|
15
17
|
# Initialize logger for logging potential issues and operations
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
18
|
-
def async_chat(
|
19
|
-
tracer, pricing_info,
|
20
|
+
def async_chat(version, environment, application_name,
|
21
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
20
22
|
"""
|
21
23
|
Generates a telemetry wrapper for chat to collect metrics.
|
22
24
|
|
23
25
|
Args:
|
24
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
25
26
|
version: Version of the monitoring package.
|
26
27
|
environment: Deployment environment (e.g., production, staging).
|
27
28
|
application_name: Name of the application using the Reka API.
|
28
29
|
tracer: OpenTelemetry tracer for creating spans.
|
29
30
|
pricing_info: Information used for calculating the cost of Reka usage.
|
30
|
-
|
31
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
31
32
|
|
32
33
|
Returns:
|
33
34
|
A function that wraps the chat method to add telemetry.
|
@@ -50,8 +51,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
51
|
The response from the original 'chat' method.
|
51
52
|
"""
|
52
53
|
|
53
|
-
|
54
|
+
server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
|
55
|
+
request_model = kwargs.get("model", "reka-core-20240501")
|
56
|
+
|
57
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
58
|
+
|
59
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
60
|
+
start_time = time.time()
|
54
61
|
response = await wrapped(*args, **kwargs)
|
62
|
+
end_time = time.time()
|
55
63
|
|
56
64
|
try:
|
57
65
|
# Format 'messages' into a single string
|
@@ -63,7 +71,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
63
71
|
|
64
72
|
if isinstance(content, list):
|
65
73
|
content_str = ", ".join(
|
66
|
-
# pylint: disable=line-too-long
|
67
74
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
68
75
|
if "type" in item else f'text: {item["text"]}'
|
69
76
|
for item in content
|
@@ -73,23 +80,69 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
73
80
|
formatted_messages.append(f"{role}: {content}")
|
74
81
|
prompt = "\n".join(formatted_messages)
|
75
82
|
|
76
|
-
|
83
|
+
input_tokens = response.usage.input_tokens
|
84
|
+
output_tokens = response.usage.output_tokens
|
85
|
+
|
86
|
+
# Calculate cost of the operation
|
87
|
+
cost = get_chat_model_cost(request_model,
|
88
|
+
pricing_info, input_tokens, output_tokens)
|
89
|
+
|
90
|
+
# Set Span attributes (OTel Semconv)
|
77
91
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
78
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
79
|
-
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
80
92
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
93
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
82
|
-
span.set_attribute(SemanticConvetion.
|
83
|
-
|
84
|
-
span.set_attribute(SemanticConvetion.
|
94
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
95
|
+
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
96
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
97
|
+
request_model)
|
98
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
99
|
+
server_port)
|
100
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
101
|
+
kwargs.get("seed", ""))
|
102
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
103
|
+
kwargs.get("max_tokens", -1))
|
104
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
105
|
+
kwargs.get("stop", []))
|
106
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
107
|
+
kwargs.get("presence_penalty", 0.0))
|
108
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
109
|
+
kwargs.get("temperature", 0.4))
|
110
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
111
|
+
kwargs.get("top_k", 1.0))
|
112
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
113
|
+
kwargs.get("top_p", 1.0))
|
114
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
115
|
+
[response.responses[0].finish_reason])
|
116
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
117
|
+
response.id)
|
118
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
119
|
+
response.model)
|
120
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
121
|
+
input_tokens)
|
122
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
123
|
+
output_tokens)
|
124
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
125
|
+
server_address)
|
126
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
127
|
+
'text')
|
128
|
+
|
129
|
+
# Set Span attributes (Extra)
|
130
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
85
131
|
environment)
|
86
|
-
span.set_attribute(
|
132
|
+
span.set_attribute(SERVICE_NAME,
|
87
133
|
application_name)
|
88
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
89
|
-
kwargs.get("model", "reka-core"))
|
90
134
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
91
135
|
False)
|
92
|
-
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
137
|
+
input_tokens + output_tokens)
|
138
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
139
|
+
cost)
|
140
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
141
|
+
end_time - start_time)
|
142
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
143
|
+
version)
|
144
|
+
|
145
|
+
if capture_message_content:
|
93
146
|
span.add_event(
|
94
147
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
95
148
|
attributes={
|
@@ -99,51 +152,36 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
99
152
|
span.add_event(
|
100
153
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
101
154
|
attributes={
|
102
|
-
# pylint: disable=line-too-long
|
103
155
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
|
104
156
|
},
|
105
157
|
)
|
106
158
|
|
107
|
-
prompt_tokens = response.usage.input_tokens
|
108
|
-
completion_tokens = response.usage.output_tokens
|
109
|
-
total_tokens = prompt_tokens + completion_tokens
|
110
|
-
# Calculate cost of the operation
|
111
|
-
cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
|
112
|
-
pricing_info, prompt_tokens, completion_tokens)
|
113
|
-
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
115
|
-
prompt_tokens)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
117
|
-
completion_tokens)
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
119
|
-
total_tokens)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
121
|
-
[response.responses[0].finish_reason])
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
123
|
-
cost)
|
124
|
-
|
125
159
|
span.set_status(Status(StatusCode.OK))
|
126
160
|
|
127
161
|
if disable_metrics is False:
|
128
|
-
attributes =
|
129
|
-
|
130
|
-
|
131
|
-
SemanticConvetion.
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
139
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
140
|
-
kwargs.get("model", "reka-core")
|
141
|
-
}
|
162
|
+
attributes = create_metrics_attributes(
|
163
|
+
service_name=application_name,
|
164
|
+
deployment_environment=environment,
|
165
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
|
167
|
+
request_model=request_model,
|
168
|
+
server_address=server_address,
|
169
|
+
server_port=server_port,
|
170
|
+
response_model=response.model,
|
171
|
+
)
|
142
172
|
|
173
|
+
metrics["genai_client_usage_tokens"].record(
|
174
|
+
input_tokens + output_tokens, attributes
|
175
|
+
)
|
176
|
+
metrics["genai_client_operation_duration"].record(
|
177
|
+
end_time - start_time, attributes
|
178
|
+
)
|
179
|
+
metrics["genai_server_ttft"].record(
|
180
|
+
end_time - start_time, attributes
|
181
|
+
)
|
143
182
|
metrics["genai_requests"].add(1, attributes)
|
144
|
-
metrics["
|
145
|
-
metrics["
|
146
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
183
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
147
185
|
metrics["genai_cost"].record(cost, attributes)
|
148
186
|
|
149
187
|
# Return original response
|
@@ -1,33 +1,34 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Reka API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
-
handle_exception,
|
11
10
|
get_chat_model_cost,
|
11
|
+
handle_exception,
|
12
|
+
create_metrics_attributes,
|
13
|
+
set_server_address_and_port
|
12
14
|
)
|
13
15
|
from openlit.semcov import SemanticConvetion
|
14
16
|
|
15
17
|
# Initialize logger for logging potential issues and operations
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
18
|
-
def chat(
|
19
|
-
tracer, pricing_info,
|
20
|
+
def chat(version, environment, application_name,
|
21
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
20
22
|
"""
|
21
23
|
Generates a telemetry wrapper for chat to collect metrics.
|
22
24
|
|
23
25
|
Args:
|
24
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
25
26
|
version: Version of the monitoring package.
|
26
27
|
environment: Deployment environment (e.g., production, staging).
|
27
28
|
application_name: Name of the application using the Reka API.
|
28
29
|
tracer: OpenTelemetry tracer for creating spans.
|
29
30
|
pricing_info: Information used for calculating the cost of Reka usage.
|
30
|
-
|
31
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
31
32
|
|
32
33
|
Returns:
|
33
34
|
A function that wraps the chat method to add telemetry.
|
@@ -50,8 +51,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
51
|
The response from the original 'chat' method.
|
51
52
|
"""
|
52
53
|
|
53
|
-
|
54
|
+
server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
|
55
|
+
request_model = kwargs.get("model", "reka-core-20240501")
|
56
|
+
|
57
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
58
|
+
|
59
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
60
|
+
start_time = time.time()
|
54
61
|
response = wrapped(*args, **kwargs)
|
62
|
+
end_time = time.time()
|
55
63
|
|
56
64
|
try:
|
57
65
|
# Format 'messages' into a single string
|
@@ -63,7 +71,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
63
71
|
|
64
72
|
if isinstance(content, list):
|
65
73
|
content_str = ", ".join(
|
66
|
-
# pylint: disable=line-too-long
|
67
74
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
68
75
|
if "type" in item else f'text: {item["text"]}'
|
69
76
|
for item in content
|
@@ -73,23 +80,69 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
73
80
|
formatted_messages.append(f"{role}: {content}")
|
74
81
|
prompt = "\n".join(formatted_messages)
|
75
82
|
|
76
|
-
|
83
|
+
input_tokens = response.usage.input_tokens
|
84
|
+
output_tokens = response.usage.output_tokens
|
85
|
+
|
86
|
+
# Calculate cost of the operation
|
87
|
+
cost = get_chat_model_cost(request_model,
|
88
|
+
pricing_info, input_tokens, output_tokens)
|
89
|
+
|
90
|
+
# Set Span attributes (OTel Semconv)
|
77
91
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
78
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
79
|
-
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
80
92
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
81
93
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
82
|
-
span.set_attribute(SemanticConvetion.
|
83
|
-
|
84
|
-
span.set_attribute(SemanticConvetion.
|
94
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
95
|
+
SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
|
96
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
97
|
+
request_model)
|
98
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
99
|
+
server_port)
|
100
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
101
|
+
kwargs.get("seed", ""))
|
102
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
103
|
+
kwargs.get("max_tokens", -1))
|
104
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
105
|
+
kwargs.get("stop", []))
|
106
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
107
|
+
kwargs.get("presence_penalty", 0.0))
|
108
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
109
|
+
kwargs.get("temperature", 0.4))
|
110
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
111
|
+
kwargs.get("top_k", 1.0))
|
112
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
113
|
+
kwargs.get("top_p", 1.0))
|
114
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
115
|
+
[response.responses[0].finish_reason])
|
116
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
117
|
+
response.id)
|
118
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
119
|
+
response.model)
|
120
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
121
|
+
input_tokens)
|
122
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
123
|
+
output_tokens)
|
124
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
125
|
+
server_address)
|
126
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
127
|
+
'text')
|
128
|
+
|
129
|
+
# Set Span attributes (Extra)
|
130
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
85
131
|
environment)
|
86
|
-
span.set_attribute(
|
132
|
+
span.set_attribute(SERVICE_NAME,
|
87
133
|
application_name)
|
88
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
89
|
-
kwargs.get("model", "reka-core"))
|
90
134
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
91
135
|
False)
|
92
|
-
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
137
|
+
input_tokens + output_tokens)
|
138
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
139
|
+
cost)
|
140
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
141
|
+
end_time - start_time)
|
142
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
143
|
+
version)
|
144
|
+
|
145
|
+
if capture_message_content:
|
93
146
|
span.add_event(
|
94
147
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
95
148
|
attributes={
|
@@ -99,51 +152,36 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
99
152
|
span.add_event(
|
100
153
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
101
154
|
attributes={
|
102
|
-
# pylint: disable=line-too-long
|
103
155
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
|
104
156
|
},
|
105
157
|
)
|
106
158
|
|
107
|
-
prompt_tokens = response.usage.input_tokens
|
108
|
-
completion_tokens = response.usage.output_tokens
|
109
|
-
total_tokens = prompt_tokens + completion_tokens
|
110
|
-
# Calculate cost of the operation
|
111
|
-
cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
|
112
|
-
pricing_info, prompt_tokens, completion_tokens)
|
113
|
-
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
115
|
-
prompt_tokens)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
117
|
-
completion_tokens)
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
119
|
-
total_tokens)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
121
|
-
[response.responses[0].finish_reason])
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
123
|
-
cost)
|
124
|
-
|
125
159
|
span.set_status(Status(StatusCode.OK))
|
126
160
|
|
127
161
|
if disable_metrics is False:
|
128
|
-
attributes =
|
129
|
-
|
130
|
-
|
131
|
-
SemanticConvetion.
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
139
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
140
|
-
kwargs.get("model", "reka-core")
|
141
|
-
}
|
162
|
+
attributes = create_metrics_attributes(
|
163
|
+
service_name=application_name,
|
164
|
+
deployment_environment=environment,
|
165
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
|
167
|
+
request_model=request_model,
|
168
|
+
server_address=server_address,
|
169
|
+
server_port=server_port,
|
170
|
+
response_model=response.model,
|
171
|
+
)
|
142
172
|
|
173
|
+
metrics["genai_client_usage_tokens"].record(
|
174
|
+
input_tokens + output_tokens, attributes
|
175
|
+
)
|
176
|
+
metrics["genai_client_operation_duration"].record(
|
177
|
+
end_time - start_time, attributes
|
178
|
+
)
|
179
|
+
metrics["genai_server_ttft"].record(
|
180
|
+
end_time - start_time, attributes
|
181
|
+
)
|
143
182
|
metrics["genai_requests"].add(1, attributes)
|
144
|
-
metrics["
|
145
|
-
metrics["
|
146
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
183
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
147
185
|
metrics["genai_cost"].record(cost, attributes)
|
148
186
|
|
149
187
|
# Return original response
|
@@ -29,7 +29,7 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
29
29
|
tracer = kwargs.get("tracer")
|
30
30
|
metrics = kwargs.get("metrics_dict")
|
31
31
|
pricing_info = kwargs.get("pricing_info", {})
|
32
|
-
|
32
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
33
33
|
disable_metrics = kwargs.get("disable_metrics")
|
34
34
|
version = importlib.metadata.version("together")
|
35
35
|
|
@@ -37,32 +37,32 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
37
37
|
wrap_function_wrapper(
|
38
38
|
"together.resources.chat.completions",
|
39
39
|
"ChatCompletions.create",
|
40
|
-
completion(
|
41
|
-
tracer, pricing_info,
|
40
|
+
completion(version, environment, application_name,
|
41
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
42
42
|
)
|
43
43
|
|
44
44
|
# Image generate
|
45
45
|
wrap_function_wrapper(
|
46
46
|
"together.resources.images",
|
47
47
|
"Images.generate",
|
48
|
-
image_generate(
|
49
|
-
tracer, pricing_info,
|
48
|
+
image_generate(version, environment, application_name,
|
49
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
50
50
|
)
|
51
51
|
|
52
52
|
# Chat completions
|
53
53
|
wrap_function_wrapper(
|
54
54
|
"together.resources.chat.completions",
|
55
55
|
"AsyncChatCompletions.create",
|
56
|
-
async_completion(
|
57
|
-
tracer, pricing_info,
|
56
|
+
async_completion(version, environment, application_name,
|
57
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
58
58
|
)
|
59
59
|
|
60
60
|
# Image generate
|
61
61
|
wrap_function_wrapper(
|
62
62
|
"together.resources.images",
|
63
63
|
"AsyncImages.generate",
|
64
|
-
async_image_generate(
|
65
|
-
tracer, pricing_info,
|
64
|
+
async_image_generate(version, environment, application_name,
|
65
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
66
66
|
)
|
67
67
|
|
68
68
|
def _uninstrument(self, **kwargs):
|