openlit 0.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +1 -1
- openlit/__init__.py +4 -1
- openlit/instrumentation/bedrock/__init__.py +41 -0
- openlit/instrumentation/bedrock/bedrock.py +436 -0
- openlit/otel/metrics.py +2 -2
- openlit/semcov/__init__.py +1 -0
- {openlit-0.0.2.dist-info → openlit-1.1.0.dist-info}/METADATA +71 -7
- {openlit-0.0.2.dist-info → openlit-1.1.0.dist-info}/RECORD +10 -8
- {openlit-0.0.2.dist-info → openlit-1.1.0.dist-info}/LICENSE +0 -0
- {openlit-0.0.2.dist-info → openlit-1.1.0.dist-info}/WHEEL +0 -0
openlit/__helpers.py
CHANGED
@@ -124,7 +124,7 @@ def get_audio_model_cost(model, pricing_info, prompt):
|
|
124
124
|
|
125
125
|
def fetch_pricing_info():
|
126
126
|
"""Fetches pricing information from a specified URL."""
|
127
|
-
pricing_url = "https://raw.githubusercontent.com/
|
127
|
+
pricing_url = "https://raw.githubusercontent.com/openlit/openlit/main/assets/pricing.json"
|
128
128
|
try:
|
129
129
|
# Set a timeout of 10 seconds for both the connection and the read
|
130
130
|
response = requests.get(pricing_url, timeout=20)
|
openlit/__init__.py
CHANGED
@@ -17,6 +17,7 @@ from openlit.instrumentation.openai import OpenAIInstrumentor
|
|
17
17
|
from openlit.instrumentation.anthropic import AnthropicInstrumentor
|
18
18
|
from openlit.instrumentation.cohere import CohereInstrumentor
|
19
19
|
from openlit.instrumentation.mistral import MistralInstrumentor
|
20
|
+
from openlit.instrumentation.bedrock import BedrockInstrumentor
|
20
21
|
from openlit.instrumentation.langchain import LangChainInstrumentor
|
21
22
|
from openlit.instrumentation.chroma import ChromaInstrumentor
|
22
23
|
from openlit.instrumentation.pinecone import PineconeInstrumentor
|
@@ -145,7 +146,8 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
|
|
145
146
|
"openai": "openai",
|
146
147
|
"anthropic": "anthropic",
|
147
148
|
"cohere": "cohere",
|
148
|
-
"mistral": "mistralai",
|
149
|
+
"mistral": "mistralai",
|
150
|
+
"bedrock": "boto3",
|
149
151
|
"langchain": "langchain",
|
150
152
|
"chroma": "chromadb",
|
151
153
|
"pinecone": "pincone",
|
@@ -192,6 +194,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
|
|
192
194
|
"anthropic": AnthropicInstrumentor(),
|
193
195
|
"cohere": CohereInstrumentor(),
|
194
196
|
"mistral": MistralInstrumentor(),
|
197
|
+
"bedrock": BedrockInstrumentor(),
|
195
198
|
"langchain": LangChainInstrumentor(),
|
196
199
|
"chroma": ChromaInstrumentor(),
|
197
200
|
"pinecone": PineconeInstrumentor(),
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
|
+
"""Initializer of Auto Instrumentation of AWS Bedrock Functions"""
|
3
|
+
|
4
|
+
from typing import Collection
|
5
|
+
import importlib.metadata
|
6
|
+
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
7
|
+
from wrapt import wrap_function_wrapper
|
8
|
+
|
9
|
+
from openlit.instrumentation.bedrock.bedrock import chat
|
10
|
+
|
11
|
+
_instruments = ("boto3 >= 1.34.93",)
|
12
|
+
|
13
|
+
class BedrockInstrumentor(BaseInstrumentor):
|
14
|
+
"""
|
15
|
+
An instrumentor for AWS Bedrock's client library.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def instrumentation_dependencies(self) -> Collection[str]:
|
19
|
+
return _instruments
|
20
|
+
|
21
|
+
def _instrument(self, **kwargs):
|
22
|
+
application_name = kwargs.get("application_name", "default_application")
|
23
|
+
environment = kwargs.get("environment", "default_environment")
|
24
|
+
tracer = kwargs.get("tracer")
|
25
|
+
metrics = kwargs.get("metrics_dict")
|
26
|
+
pricing_info = kwargs.get("pricing_info", {})
|
27
|
+
trace_content = kwargs.get("trace_content", False)
|
28
|
+
disable_metrics = kwargs.get("disable_metrics")
|
29
|
+
version = importlib.metadata.version("boto3")
|
30
|
+
|
31
|
+
#sync
|
32
|
+
wrap_function_wrapper(
|
33
|
+
"botocore.client",
|
34
|
+
"ClientCreator.create_client",
|
35
|
+
chat("bedrock.invoke_model", version, environment, application_name,
|
36
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
37
|
+
)
|
38
|
+
|
39
|
+
def _uninstrument(self, **kwargs):
|
40
|
+
# Proper uninstrumentation logic to revert patched methods
|
41
|
+
pass
|
@@ -0,0 +1,436 @@
|
|
1
|
+
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, protected-access, too-many-branches
|
2
|
+
"""
|
3
|
+
Module for monitoring Amazon Bedrock API calls.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
import json
|
8
|
+
from botocore.response import StreamingBody
|
9
|
+
from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
|
10
|
+
from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
|
11
|
+
from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
|
12
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
13
|
+
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
14
|
+
from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_image_model_cost
|
15
|
+
from openlit.__helpers import handle_exception, general_tokens
|
16
|
+
from openlit.semcov import SemanticConvetion
|
17
|
+
|
18
|
+
# Initialize logger for logging potential issues and operations
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
class CustomStreamWrapper(StreamingBody):
|
22
|
+
"""Handle streaming responses with the ability to read multiple times."""
|
23
|
+
|
24
|
+
def __init__(self, stream_source, length):
|
25
|
+
super().__init__(stream_source, length)
|
26
|
+
self._stream_data = None
|
27
|
+
self._read_position = 0
|
28
|
+
|
29
|
+
def read(self, amt=None):
|
30
|
+
if self._stream_data is None:
|
31
|
+
try:
|
32
|
+
self._stream_data = self._raw_stream.read()
|
33
|
+
except URLLib3ReadTimeoutError as error:
|
34
|
+
raise ReadTimeoutError(endpoint_url=error.url, error=error) from error
|
35
|
+
except URLLib3ProtocolError as error:
|
36
|
+
raise ResponseStreamingError(error=error) from error
|
37
|
+
|
38
|
+
self._amount_read += len(self._stream_data)
|
39
|
+
if amt is None or (not self._stream_data and amt > 0):
|
40
|
+
self._verify_content_length()
|
41
|
+
|
42
|
+
if amt is None:
|
43
|
+
data_chunk = self._stream_data[self._read_position:]
|
44
|
+
else:
|
45
|
+
data_start = self._read_position
|
46
|
+
self._read_position += amt
|
47
|
+
data_chunk = self._stream_data[data_start:self._read_position]
|
48
|
+
|
49
|
+
return data_chunk
|
50
|
+
|
51
|
+
|
52
|
+
def chat(gen_ai_endpoint, version, environment, application_name, tracer,
|
53
|
+
pricing_info, trace_content, metrics, disable_metrics):
|
54
|
+
"""
|
55
|
+
Generates a telemetry wrapper for messages to collect metrics.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
59
|
+
version: The monitoring package version.
|
60
|
+
environment: Deployment environment (e.g. production, staging).
|
61
|
+
application_name: Name of the application using the Bedrock API.
|
62
|
+
tracer: OpenTelemetry tracer for creating spans.
|
63
|
+
pricing_info: Information for calculating Bedrock usage cost.
|
64
|
+
trace_content: Whether to trace the actual content.
|
65
|
+
metrics: Metrics collector.
|
66
|
+
disable_metrics: Flag to toggle metrics collection.
|
67
|
+
Returns:
|
68
|
+
A function that wraps the chat method to add telemetry.
|
69
|
+
"""
|
70
|
+
|
71
|
+
def wrapper(wrapped, instance, args, kwargs):
|
72
|
+
"""
|
73
|
+
Wraps an API call to add telemetry.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
wrapped: Original method.
|
77
|
+
instance: Instance of the class.
|
78
|
+
args: Positional arguments of the 'messages' method.
|
79
|
+
kwargs: Keyword arguments of the 'messages' method.
|
80
|
+
Returns:
|
81
|
+
Response from the original method.
|
82
|
+
"""
|
83
|
+
def handle_image(span, model, request_body, response_body):
|
84
|
+
cost = 0
|
85
|
+
if "amazon" in model:
|
86
|
+
# pylint: disable=line-too-long
|
87
|
+
size = str(request_body.get("imageGenerationConfig", {}).get("width", 1024)) + "x" + str(request_body.get("imageGenerationConfig", {}).get("height", 1024))
|
88
|
+
quality = request_body.get("imageGenerationConfig", {}).get("quality", "standard")
|
89
|
+
n = request_body.get("imageGenerationConfig", {}).get("numberOfImages", 1)
|
90
|
+
|
91
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_SIZE,
|
92
|
+
size)
|
93
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_QUALITY,
|
94
|
+
quality)
|
95
|
+
# Calculate cost of the operation
|
96
|
+
cost = n * get_image_model_cost(model,
|
97
|
+
pricing_info, size, quality)
|
98
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
99
|
+
cost)
|
100
|
+
if trace_content:
|
101
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
102
|
+
request_body.get("textToImageParams")["text"])
|
103
|
+
|
104
|
+
span.set_status(Status(StatusCode.OK))
|
105
|
+
|
106
|
+
if disable_metrics is False:
|
107
|
+
attributes = {
|
108
|
+
TELEMETRY_SDK_NAME:
|
109
|
+
"openlit",
|
110
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
111
|
+
application_name,
|
112
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
113
|
+
SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
|
114
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
115
|
+
environment,
|
116
|
+
SemanticConvetion.GEN_AI_TYPE:
|
117
|
+
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
118
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
119
|
+
model
|
120
|
+
}
|
121
|
+
|
122
|
+
metrics["genai_requests"].add(1, attributes)
|
123
|
+
metrics["genai_cost"].record(cost, attributes)
|
124
|
+
|
125
|
+
def handle_embed(span, model, request_body, response_body):
|
126
|
+
prompt_tokens, cost = 0, 0
|
127
|
+
if "amazon" in model:
|
128
|
+
prompt_tokens = response_body["inputTextTokenCount"]
|
129
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
130
|
+
prompt_tokens)
|
131
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
132
|
+
prompt_tokens)
|
133
|
+
# Calculate cost of the operation
|
134
|
+
cost = get_embed_model_cost(model,
|
135
|
+
pricing_info, prompt_tokens)
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
137
|
+
cost)
|
138
|
+
if trace_content:
|
139
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
140
|
+
request_body["inputText"])
|
141
|
+
|
142
|
+
span.set_status(Status(StatusCode.OK))
|
143
|
+
|
144
|
+
if disable_metrics is False:
|
145
|
+
attributes = {
|
146
|
+
TELEMETRY_SDK_NAME:
|
147
|
+
"openlit",
|
148
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
149
|
+
application_name,
|
150
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
151
|
+
SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
|
152
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
153
|
+
environment,
|
154
|
+
SemanticConvetion.GEN_AI_TYPE:
|
155
|
+
SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
|
156
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
157
|
+
model
|
158
|
+
}
|
159
|
+
|
160
|
+
metrics["genai_requests"].add(1, attributes)
|
161
|
+
metrics["genai_total_tokens"].add(
|
162
|
+
prompt_tokens, attributes
|
163
|
+
)
|
164
|
+
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
165
|
+
metrics["genai_cost"].record(cost, attributes)
|
166
|
+
|
167
|
+
def handle_chat(span, model, request_body, response_body):
|
168
|
+
prompt_tokens, completion_tokens, cost = 0, 0, 0
|
169
|
+
|
170
|
+
if "amazon" in model:
|
171
|
+
prompt_tokens = response_body["inputTextTokenCount"]
|
172
|
+
completion_tokens = response_body["results"][0]["tokenCount"]
|
173
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
174
|
+
prompt_tokens)
|
175
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
176
|
+
completion_tokens)
|
177
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
178
|
+
completion_tokens +
|
179
|
+
prompt_tokens)
|
180
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
181
|
+
response_body["results"][0]["completionReason"])
|
182
|
+
|
183
|
+
# Calculate cost of the operation
|
184
|
+
cost = get_chat_model_cost(model,
|
185
|
+
pricing_info, prompt_tokens,
|
186
|
+
completion_tokens)
|
187
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
188
|
+
cost)
|
189
|
+
|
190
|
+
if trace_content:
|
191
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
192
|
+
request_body["inputText"])
|
193
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
194
|
+
response_body["results"][0]["outputText"])
|
195
|
+
|
196
|
+
elif "mistral" in model:
|
197
|
+
prompt_tokens = general_tokens(request_body["prompt"])
|
198
|
+
completion_tokens = general_tokens(response_body["outputs"][0]["text"])
|
199
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
200
|
+
prompt_tokens)
|
201
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
202
|
+
completion_tokens)
|
203
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
204
|
+
prompt_tokens + completion_tokens)
|
205
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
206
|
+
response_body["outputs"][0]["stop_reason"])
|
207
|
+
# Calculate cost of the operation
|
208
|
+
cost = get_chat_model_cost(model,
|
209
|
+
pricing_info, prompt_tokens,
|
210
|
+
completion_tokens)
|
211
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
212
|
+
cost)
|
213
|
+
|
214
|
+
if trace_content:
|
215
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
216
|
+
request_body["prompt"])
|
217
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
218
|
+
response_body["outputs"][0]["text"])
|
219
|
+
|
220
|
+
elif "anthropic" in model:
|
221
|
+
prompt_tokens = response_body["usage"]["input_tokens"]
|
222
|
+
completion_tokens = response_body["usage"]["output_tokens"]
|
223
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
224
|
+
prompt_tokens)
|
225
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
226
|
+
completion_tokens)
|
227
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
228
|
+
completion_tokens +
|
229
|
+
prompt_tokens)
|
230
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
231
|
+
response_body["stop_reason"])
|
232
|
+
|
233
|
+
# Calculate cost of the operation
|
234
|
+
cost = get_chat_model_cost(model,
|
235
|
+
pricing_info, prompt_tokens,
|
236
|
+
completion_tokens)
|
237
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
238
|
+
cost)
|
239
|
+
|
240
|
+
if trace_content:
|
241
|
+
# Format 'messages' into a single string
|
242
|
+
message_prompt = request_body["messages"]
|
243
|
+
formatted_messages = []
|
244
|
+
for message in message_prompt:
|
245
|
+
role = message["role"]
|
246
|
+
content = message["content"]
|
247
|
+
|
248
|
+
if isinstance(content, list):
|
249
|
+
content_str = ", ".join(
|
250
|
+
# pylint: disable=line-too-long
|
251
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
252
|
+
if "type" in item else f'text: {item["text"]}'
|
253
|
+
for item in content
|
254
|
+
)
|
255
|
+
formatted_messages.append(f"{role}: {content_str}")
|
256
|
+
else:
|
257
|
+
formatted_messages.append(f"{role}: {content}")
|
258
|
+
prompt = "\n".join(formatted_messages)
|
259
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
260
|
+
prompt)
|
261
|
+
|
262
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
263
|
+
response_body["content"][0]["text"])
|
264
|
+
elif "meta" in model:
|
265
|
+
prompt_tokens = response_body["prompt_token_count"]
|
266
|
+
completion_tokens = response_body["generation_token_count"]
|
267
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
268
|
+
prompt_tokens)
|
269
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
270
|
+
completion_tokens)
|
271
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
272
|
+
completion_tokens +
|
273
|
+
prompt_tokens)
|
274
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
275
|
+
response_body["stop_reason"])
|
276
|
+
|
277
|
+
# Calculate cost of the operation
|
278
|
+
cost = get_chat_model_cost(model,
|
279
|
+
pricing_info, prompt_tokens,
|
280
|
+
completion_tokens)
|
281
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
282
|
+
cost)
|
283
|
+
|
284
|
+
if trace_content:
|
285
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
286
|
+
request_body["prompt"])
|
287
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
288
|
+
response_body["generation"])
|
289
|
+
|
290
|
+
elif "cohere" in model and "command-r" not in model:
|
291
|
+
prompt_tokens = general_tokens(request_body["prompt"])
|
292
|
+
completion_tokens = general_tokens(response_body["generations"][0]["text"])
|
293
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
294
|
+
prompt_tokens)
|
295
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
296
|
+
completion_tokens)
|
297
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
298
|
+
prompt_tokens + completion_tokens)
|
299
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
300
|
+
response_body["generations"][0]["finish_reason"])
|
301
|
+
# Calculate cost of the operation
|
302
|
+
cost = get_chat_model_cost(model,
|
303
|
+
pricing_info, prompt_tokens,
|
304
|
+
completion_tokens)
|
305
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
306
|
+
cost)
|
307
|
+
|
308
|
+
if trace_content:
|
309
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
310
|
+
request_body["prompt"])
|
311
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
312
|
+
response_body["generations"][0]["text"])
|
313
|
+
elif "ai21" in model:
|
314
|
+
prompt_tokens = general_tokens(request_body["prompt"])
|
315
|
+
completion_tokens = general_tokens(response_body["completions"][0]["data"]["text"])
|
316
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
317
|
+
prompt_tokens)
|
318
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
319
|
+
completion_tokens)
|
320
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
321
|
+
prompt_tokens + completion_tokens)
|
322
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
323
|
+
response_body["completions"][0]["finishReason"]["reason"])
|
324
|
+
# Calculate cost of the operation
|
325
|
+
cost = get_chat_model_cost(model,
|
326
|
+
pricing_info, prompt_tokens,
|
327
|
+
completion_tokens)
|
328
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
329
|
+
cost)
|
330
|
+
|
331
|
+
if trace_content:
|
332
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
333
|
+
request_body["prompt"])
|
334
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
335
|
+
response_body["completions"][0]["data"]["text"])
|
336
|
+
|
337
|
+
span.set_status(Status(StatusCode.OK))
|
338
|
+
|
339
|
+
if disable_metrics is False:
|
340
|
+
attributes = {
|
341
|
+
TELEMETRY_SDK_NAME:
|
342
|
+
"openlit",
|
343
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
344
|
+
application_name,
|
345
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
346
|
+
SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
|
347
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
348
|
+
environment,
|
349
|
+
SemanticConvetion.GEN_AI_TYPE:
|
350
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
351
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
352
|
+
model
|
353
|
+
}
|
354
|
+
|
355
|
+
metrics["genai_requests"].add(1, attributes)
|
356
|
+
metrics["genai_total_tokens"].add(
|
357
|
+
prompt_tokens + completion_tokens, attributes
|
358
|
+
)
|
359
|
+
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
360
|
+
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
361
|
+
metrics["genai_cost"].record(cost, attributes)
|
362
|
+
|
363
|
+
def add_instrumentation(original_method, *method_args, **method_kwargs):
|
364
|
+
"""
|
365
|
+
Adds instrumentation to the invoke model call.
|
366
|
+
|
367
|
+
Args:
|
368
|
+
original_method: The original invoke model method.
|
369
|
+
*method_args: Positional arguments for the method.
|
370
|
+
**method_kwargs: Keyword arguments for the method.
|
371
|
+
Returns:
|
372
|
+
The modified response with telemetry.
|
373
|
+
"""
|
374
|
+
with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
|
375
|
+
response = original_method(*method_args, **method_kwargs)
|
376
|
+
|
377
|
+
try:
|
378
|
+
# Modify the response body to be reusable
|
379
|
+
response["body"] = CustomStreamWrapper(
|
380
|
+
response["body"]._raw_stream, response["body"]._content_length
|
381
|
+
)
|
382
|
+
request_body = json.loads(method_kwargs.get("body"))
|
383
|
+
response_body = json.loads(response.get("body").read())
|
384
|
+
|
385
|
+
model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
|
386
|
+
if ("stability" in model or "image" in model) and "embed-image" not in model:
|
387
|
+
generation = "image"
|
388
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
389
|
+
SemanticConvetion.GEN_AI_TYPE_IMAGE)
|
390
|
+
elif "embed" in model and "embed-image" not in model:
|
391
|
+
generation = "embeddings"
|
392
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
393
|
+
SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
|
394
|
+
else:
|
395
|
+
generation = "chat"
|
396
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
397
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
398
|
+
|
399
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
400
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
401
|
+
SemanticConvetion.GEN_AI_SYSTEM_BEDROCK)
|
402
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
403
|
+
gen_ai_endpoint)
|
404
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
405
|
+
environment)
|
406
|
+
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
407
|
+
application_name)
|
408
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
409
|
+
model)
|
410
|
+
if generation == "chat":
|
411
|
+
handle_chat(span, model, request_body, response_body)
|
412
|
+
elif generation == "embeddings":
|
413
|
+
handle_embed(span, model, request_body, response_body)
|
414
|
+
elif generation == "image":
|
415
|
+
handle_image(span, model, request_body, response_body)
|
416
|
+
|
417
|
+
return response
|
418
|
+
|
419
|
+
except Exception as e:
|
420
|
+
handle_exception(span, e)
|
421
|
+
logger.error("Error in trace creation: %s", e)
|
422
|
+
|
423
|
+
# Return original response
|
424
|
+
return response
|
425
|
+
|
426
|
+
# Get the original client instance from the wrapper
|
427
|
+
client = wrapped(*args, **kwargs)
|
428
|
+
|
429
|
+
# Replace the original method with the instrumented one
|
430
|
+
original_invoke_model = client.invoke_model
|
431
|
+
client.invoke_model = lambda *args, **kwargs: add_instrumentation(original_invoke_model,
|
432
|
+
*args, **kwargs)
|
433
|
+
|
434
|
+
return client
|
435
|
+
|
436
|
+
return wrapper
|
openlit/otel/metrics.py
CHANGED
@@ -72,7 +72,7 @@ def setup_meter(application_name, environment, meter, otlp_endpoint, otlp_header
|
|
72
72
|
metrics_dict = {
|
73
73
|
"genai_requests": meter.create_counter(
|
74
74
|
name=SemanticConvetion.GEN_AI_REQUESTS,
|
75
|
-
description="Number of requests to
|
75
|
+
description="Number of requests to GenAI",
|
76
76
|
unit="1",
|
77
77
|
),
|
78
78
|
"genai_prompt_tokens": meter.create_counter(
|
@@ -92,7 +92,7 @@ def setup_meter(application_name, environment, meter, otlp_endpoint, otlp_header
|
|
92
92
|
),
|
93
93
|
"genai_cost": meter.create_histogram(
|
94
94
|
name=SemanticConvetion.GEN_AI_USAGE_COST,
|
95
|
-
description="The distribution of
|
95
|
+
description="The distribution of GenAI request costs.",
|
96
96
|
unit="USD",
|
97
97
|
),
|
98
98
|
"db_requests": meter.create_counter(
|
openlit/semcov/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openlit
|
3
|
-
Version:
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
|
7
7
|
Author: OpenLIT
|
8
8
|
Requires-Python: >=3.7.1,<4.0.0
|
@@ -12,16 +12,33 @@ Classifier: Programming Language :: Python :: 3.9
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
|
+
Requires-Dist: boto3 (>=1.34.0,<2.0.0)
|
16
|
+
Requires-Dist: botocore (>=1.34.0,<2.0.0)
|
15
17
|
Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
|
16
18
|
Requires-Dist: opentelemetry-exporter-otlp (>=1.24.0,<2.0.0)
|
17
19
|
Requires-Dist: opentelemetry-instrumentation (>=0.45b0,<0.46)
|
18
20
|
Requires-Dist: opentelemetry-sdk (>=1.24.0,<2.0.0)
|
19
21
|
Requires-Dist: requests (>=2.26.0,<3.0.0)
|
20
22
|
Requires-Dist: tiktoken (>=0.1.1,<0.2.0)
|
21
|
-
Project-URL: Repository, https://github.com/
|
23
|
+
Project-URL: Repository, https://github.com/openlit/openlit/tree/main/openlit/python
|
22
24
|
Description-Content-Type: text/markdown
|
23
25
|
|
24
|
-
|
26
|
+
<div align="center">
|
27
|
+
<img src="https://github.com/openlit/.github/blob/main/profile/assets/wide-logo-no-bg.png?raw=true" alt="OpenLIT Logo" width="30%"><h1>
|
28
|
+
OpenTelemetry Auto-Instrumentation for GenAI & LLM Applications</h1>
|
29
|
+
|
30
|
+
**[Documentation](https://docs.openlit.io/) | [Quickstart](#-getting-started) | [Python SDK](https://github.com/openlit/openlit/tree/main/sdk/python)**
|
31
|
+
|
32
|
+
[](https://github.com/openlit/openlit)
|
33
|
+
[](https://github.com/openlit/openlit/blob/main/LICENSE)
|
34
|
+
[](https://pepy.tech/project/openlit)
|
35
|
+
[](https://github.com/openlit/openlit/pulse)
|
36
|
+
[](https://github.com/openlit/openlit/graphs/contributors)
|
37
|
+
|
38
|
+
[](https://join.slack.com/t/openlit/shared_invite/zt-2etnfttwg-TjP_7BZXfYg84oAukY8QRQ)
|
39
|
+
[](https://twitter.com/openlit_io)
|
40
|
+
|
41
|
+
</div>
|
25
42
|
|
26
43
|
OpenLIT Python SDK is an **OpenTelemetry-native** Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects. Designed with simplicity and efficiency, OpenLIT offers the ability to embed observability into your GenAI-driven projects effortlessly using just **a single line of code**.
|
27
44
|
|
@@ -63,10 +80,20 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
|
|
63
80
|
pip install openlit
|
64
81
|
```
|
65
82
|
|
66
|
-
##
|
83
|
+
## 🚀 Getting Started
|
84
|
+
|
85
|
+
## Step 1: Install OpenLIT SDK
|
86
|
+
|
87
|
+
```bash
|
88
|
+
pip install openlit
|
89
|
+
```
|
90
|
+
|
91
|
+
### Step 2: Instrument your Application
|
92
|
+
Integrating the OpenLIT into LLM applications is straightforward. Start monitoring for your LLM Application with just **one line of code**:
|
67
93
|
|
68
94
|
```python
|
69
95
|
import openlit
|
96
|
+
|
70
97
|
openlit.init()
|
71
98
|
```
|
72
99
|
|
@@ -74,6 +101,42 @@ By default, OpenLIT directs traces and metrics straight to your console. To forw
|
|
74
101
|
|
75
102
|
To send telemetry to OpenTelemetry backends requiring authentication, set the `otlp_headers` parameter with its desired value. Alternatively, you can configure the endpoint by setting the `OTEL_EXPORTER_OTLP_HEADERS` environment variable as recommended in the OpenTelemetry documentation.
|
76
103
|
|
104
|
+
#### Example
|
105
|
+
|
106
|
+
Here is how you can send telemetry from OpenLIT to Grafana Cloud
|
107
|
+
|
108
|
+
```python
|
109
|
+
openlit.init(
|
110
|
+
otlp_endpoint="https://otlp-gateway-prod-us-east-0.grafana.net/otlp",
|
111
|
+
otlp_headers="Authorization=Basic%20<base64 encoded Instance ID and API Token>"
|
112
|
+
)
|
113
|
+
```
|
114
|
+
|
115
|
+
Alternatively, You can also choose to set these values using `OTEL_EXPORTER_OTLP_ENDPOINT` and `OTEL_EXPORTER_OTLP_HEADERS` environment variables
|
116
|
+
|
117
|
+
```python
|
118
|
+
openlit.init()
|
119
|
+
```
|
120
|
+
|
121
|
+
```env
|
122
|
+
export OTEL_EXPORTER_OTLP_ENDPOINT = "https://otlp-gateway-prod-us-east-0.grafana.net/otlp"
|
123
|
+
export OTEL_EXPORTER_OTLP_HEADERS = "Authorization=Basic%20<base64 encoded Instance ID and API Token>"
|
124
|
+
```
|
125
|
+
|
126
|
+
### Step 3: Visualize and Optimize!
|
127
|
+
With the LLM Observability data now being collected and sent to your chosen OpenTelemetry backend, the next step is to visualize and analyze this data to glean insights into your application's performance, behavior, and identify areas of improvement. Here is how you would use the data in Grafana, follow these detailed instructions to explore your LLM application's Telemetry data.
|
128
|
+
|
129
|
+
- Select the **Explore** option from Grafana's sidebar.
|
130
|
+
- At the top, ensure the correct Tempo data source is selected from the dropdown menu.
|
131
|
+
- Use the **Query** field to specify any particular traces you are interested in, or leave it empty to browse through all the available traces.
|
132
|
+
- You can adjust the time range to focus on specific periods of interest.
|
133
|
+
- Hit **Run Query** to fetch your trace data. You'll see a visual representation of your traces along with detailed information on particular spans when clicked.
|
134
|
+
|
135
|
+
#### Next Steps
|
136
|
+
|
137
|
+
- **Create Dashboards:** Beyond just exploring traces, consider creating dashboards in Grafana to monitor key performance indicators (KPIs) and metrics over time. Dashboards can be customized with various panels to display graphs, logs, and single stats that are most relevant to your application's performance and usage patterns.
|
138
|
+
- **Set Alerts:** Grafana also allows you to set up alerts based on specific thresholds. This feature can be invaluable in proactively managing your application's health by notifying you of potential issues before they impact users.
|
139
|
+
- **Iterate and Optimize:** Use the insights gained from your observability data to make informed decisions on optimizing your LLM application. This might involve refining model parameters, adjusting scaling strategies, or identifying and resolving bottlenecks.
|
77
140
|
|
78
141
|
|
79
142
|
### Configuration
|
@@ -107,7 +170,8 @@ Your input helps us grow and improve, and we're here to support you every step o
|
|
107
170
|
|
108
171
|
Connect with the OpenLIT community and maintainers for support, discussions, and updates:
|
109
172
|
|
110
|
-
- 🌟 If you like it, Leave a star on our [GitHub](https://github.com/
|
173
|
+
- 🌟 If you like it, Leave a star on our [GitHub](https://github.com/openlit/openlit/)
|
111
174
|
- 🌍 Join our [Slack](https://join.slack.com/t/openlit/shared_invite/zt-2etnfttwg-TjP_7BZXfYg84oAukY8QRQ) Community for live interactions and questions.
|
112
|
-
- 🐞 Report bugs on our [GitHub Issues](https://github.com/
|
175
|
+
- 🐞 Report bugs on our [GitHub Issues](https://github.com/openlit/openlit/issues) to help us improve OpenLIT.
|
113
176
|
- 𝕏 Follow us on [X](https://twitter.com/openlit) for the latest updates and news.
|
177
|
+
|
@@ -1,8 +1,10 @@
|
|
1
|
-
openlit/__helpers.py,sha256=
|
2
|
-
openlit/__init__.py,sha256=
|
1
|
+
openlit/__helpers.py,sha256=EEbLEUKuCiBp0WiieAvUnGcaU5D7grFgNVDCBgMKjQE,4651
|
2
|
+
openlit/__init__.py,sha256=wUvAHi_LxSKpBVSsrTtzA91dpoa18ftqHB7JUzSgAUU,8931
|
3
3
|
openlit/instrumentation/anthropic/__init__.py,sha256=oaU53BOPyfUKbEzYvLr1DPymDluurSnwo4Hernf2XdU,1955
|
4
4
|
openlit/instrumentation/anthropic/anthropic.py,sha256=gLN7LrgbTTOxgO8TEn-mX7WCYVGExrIGB-_ueCLPMEY,15993
|
5
5
|
openlit/instrumentation/anthropic/async_anthropic.py,sha256=wb5U9aF3FtgPZ_1EZudsuKaB6wmOrEVwDIlfcEWnQqU,16035
|
6
|
+
openlit/instrumentation/bedrock/__init__.py,sha256=QPvDMQde6Meodu5JvosHdZsnyExS19lcoP5Li4YrOkw,1540
|
7
|
+
openlit/instrumentation/bedrock/bedrock.py,sha256=Q5t5283LGEvhyrUCr9ofEQF22JTkc1UvT2_6u7e7gmA,22278
|
6
8
|
openlit/instrumentation/chroma/__init__.py,sha256=61lFpHlUEQUobsUJZHXdvOViKwsOH8AOvSfc4VgCmiM,3253
|
7
9
|
openlit/instrumentation/chroma/chroma.py,sha256=wcY5sN-Lfdr4P56FDy8O_ft20gfxTDP12c2vIUF7Qno,10374
|
8
10
|
openlit/instrumentation/cohere/__init__.py,sha256=PC5T1qIg9pwLNocBP_WjG5B_6p_z019s8quk_fNLAMs,1920
|
@@ -21,10 +23,10 @@ openlit/instrumentation/pinecone/__init__.py,sha256=Mv9bElqNs07_JQkYyNnO0wOM3hdb
|
|
21
23
|
openlit/instrumentation/pinecone/pinecone.py,sha256=0C-Dd4YOlBCKQ7vmWvFsvokjFCKCn-snquHp7n12yPM,8732
|
22
24
|
openlit/instrumentation/transformers/__init__.py,sha256=9-KLjq-aPTh13gTBYsWltV6hokGwt3mP4759SwsaaCk,1478
|
23
25
|
openlit/instrumentation/transformers/transformers.py,sha256=peT0BGskYt7AZ0b93TZ7qECXfZRgDQMasUeamexYdZI,7592
|
24
|
-
openlit/otel/metrics.py,sha256=
|
26
|
+
openlit/otel/metrics.py,sha256=O7NoaDz0bY19mqpE4-0PcKwEe-B-iJFRgOCaanAuZAc,4291
|
25
27
|
openlit/otel/tracing.py,sha256=peismkno0YPoRezHPbF5Ycz15_oOBErn_coW1CPspHg,3612
|
26
|
-
openlit/semcov/__init__.py,sha256=
|
27
|
-
openlit-
|
28
|
-
openlit-
|
29
|
-
openlit-
|
30
|
-
openlit-
|
28
|
+
openlit/semcov/__init__.py,sha256=l8Vd1-HqSOD8d7V_D4HIALBNfHeaI4uYeo-bGiMS5tQ,5724
|
29
|
+
openlit-1.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
30
|
+
openlit-1.1.0.dist-info/METADATA,sha256=I9ovMr25H4pdo_qkIg8YpjbO4pxkV90p7YNeBFaUby0,10535
|
31
|
+
openlit-1.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
32
|
+
openlit-1.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|