openlit 1.33.19__py3-none-any.whl → 1.33.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +64 -7
- openlit/__init__.py +3 -3
- openlit/evals/utils.py +7 -7
- openlit/guard/utils.py +7 -7
- openlit/instrumentation/ag2/ag2.py +24 -24
- openlit/instrumentation/ai21/ai21.py +3 -3
- openlit/instrumentation/ai21/async_ai21.py +3 -3
- openlit/instrumentation/ai21/utils.py +59 -59
- openlit/instrumentation/anthropic/anthropic.py +2 -2
- openlit/instrumentation/anthropic/async_anthropic.py +2 -2
- openlit/instrumentation/anthropic/utils.py +34 -34
- openlit/instrumentation/assemblyai/assemblyai.py +24 -24
- openlit/instrumentation/astra/astra.py +3 -3
- openlit/instrumentation/astra/async_astra.py +3 -3
- openlit/instrumentation/astra/utils.py +39 -39
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/utils.py +36 -36
- openlit/instrumentation/bedrock/bedrock.py +2 -2
- openlit/instrumentation/bedrock/utils.py +35 -35
- openlit/instrumentation/chroma/chroma.py +57 -57
- openlit/instrumentation/cohere/async_cohere.py +88 -88
- openlit/instrumentation/cohere/cohere.py +88 -88
- openlit/instrumentation/controlflow/controlflow.py +15 -15
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
- openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
- openlit/instrumentation/crewai/crewai.py +22 -22
- openlit/instrumentation/dynamiq/dynamiq.py +19 -19
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
- openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
- openlit/instrumentation/embedchain/embedchain.py +15 -15
- openlit/instrumentation/firecrawl/firecrawl.py +10 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
- openlit/instrumentation/gpt4all/gpt4all.py +78 -78
- openlit/instrumentation/gpu/__init__.py +8 -8
- openlit/instrumentation/groq/async_groq.py +74 -74
- openlit/instrumentation/groq/groq.py +74 -74
- openlit/instrumentation/haystack/haystack.py +6 -6
- openlit/instrumentation/julep/async_julep.py +14 -14
- openlit/instrumentation/julep/julep.py +14 -14
- openlit/instrumentation/langchain/async_langchain.py +39 -39
- openlit/instrumentation/langchain/langchain.py +39 -39
- openlit/instrumentation/letta/letta.py +26 -26
- openlit/instrumentation/litellm/async_litellm.py +94 -94
- openlit/instrumentation/litellm/litellm.py +94 -94
- openlit/instrumentation/llamaindex/llamaindex.py +7 -7
- openlit/instrumentation/mem0/mem0.py +13 -13
- openlit/instrumentation/milvus/milvus.py +47 -47
- openlit/instrumentation/mistral/async_mistral.py +88 -88
- openlit/instrumentation/mistral/mistral.py +88 -88
- openlit/instrumentation/multion/async_multion.py +21 -21
- openlit/instrumentation/multion/multion.py +21 -21
- openlit/instrumentation/ollama/__init__.py +47 -34
- openlit/instrumentation/ollama/async_ollama.py +7 -5
- openlit/instrumentation/ollama/ollama.py +7 -5
- openlit/instrumentation/ollama/utils.py +58 -54
- openlit/instrumentation/openai/async_openai.py +225 -225
- openlit/instrumentation/openai/openai.py +225 -225
- openlit/instrumentation/openai_agents/openai_agents.py +11 -11
- openlit/instrumentation/phidata/phidata.py +15 -15
- openlit/instrumentation/pinecone/pinecone.py +43 -43
- openlit/instrumentation/premai/premai.py +86 -86
- openlit/instrumentation/qdrant/async_qdrant.py +95 -95
- openlit/instrumentation/qdrant/qdrant.py +99 -99
- openlit/instrumentation/reka/async_reka.py +33 -33
- openlit/instrumentation/reka/reka.py +33 -33
- openlit/instrumentation/together/async_together.py +90 -90
- openlit/instrumentation/together/together.py +90 -90
- openlit/instrumentation/transformers/__init__.py +11 -7
- openlit/instrumentation/transformers/transformers.py +32 -168
- openlit/instrumentation/transformers/utils.py +183 -0
- openlit/instrumentation/vertexai/async_vertexai.py +64 -64
- openlit/instrumentation/vertexai/vertexai.py +64 -64
- openlit/instrumentation/vllm/vllm.py +24 -24
- openlit/otel/metrics.py +11 -11
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/METADATA +8 -8
- openlit-1.33.21.dist-info/RECORD +132 -0
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
- openlit-1.33.19.dist-info/RECORD +0 -131
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0
@@ -16,7 +16,7 @@ from openlit.__helpers import (
|
|
16
16
|
create_metrics_attributes,
|
17
17
|
set_server_address_and_port
|
18
18
|
)
|
19
|
-
from openlit.semcov import
|
19
|
+
from openlit.semcov import SemanticConvention
|
20
20
|
|
21
21
|
# Initialize logger for logging potential issues and operations
|
22
22
|
logger = logging.getLogger(__name__)
|
@@ -157,46 +157,46 @@ def completion(version, environment, application_name,
|
|
157
157
|
|
158
158
|
# Set Span attributes (OTel Semconv)
|
159
159
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
160
|
-
self._span.set_attribute(
|
161
|
-
|
162
|
-
self._span.set_attribute(
|
163
|
-
|
164
|
-
self._span.set_attribute(
|
160
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
161
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
162
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
163
|
+
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
164
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
165
165
|
request_model)
|
166
|
-
self._span.set_attribute(
|
166
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
167
167
|
self._kwargs.get("seed", ""))
|
168
|
-
self._span.set_attribute(
|
168
|
+
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
169
169
|
self._server_port)
|
170
|
-
self._span.set_attribute(
|
170
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
171
171
|
self._kwargs.get("frequency_penalty", 0.0))
|
172
|
-
self._span.set_attribute(
|
172
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
173
173
|
self._kwargs.get("max_tokens", -1))
|
174
|
-
self._span.set_attribute(
|
174
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
175
175
|
self._kwargs.get("presence_penalty", 0.0))
|
176
|
-
self._span.set_attribute(
|
176
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
177
177
|
self._kwargs.get("stop", []))
|
178
|
-
self._span.set_attribute(
|
178
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
179
179
|
self._kwargs.get("temperature", 1.0))
|
180
|
-
self._span.set_attribute(
|
180
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
181
181
|
self._kwargs.get("top_p", 1.0))
|
182
|
-
self._span.set_attribute(
|
182
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
183
183
|
[self._finish_reason])
|
184
|
-
self._span.set_attribute(
|
184
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
185
185
|
self._response_id)
|
186
|
-
self._span.set_attribute(
|
186
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
187
187
|
self._response_model)
|
188
|
-
self._span.set_attribute(
|
188
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
189
189
|
self._input_tokens)
|
190
|
-
self._span.set_attribute(
|
190
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
191
191
|
self._output_tokens)
|
192
|
-
self._span.set_attribute(
|
192
|
+
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
193
193
|
self._server_address)
|
194
194
|
|
195
195
|
if isinstance(self._llmresponse, str):
|
196
|
-
self._span.set_attribute(
|
196
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
197
197
|
"text")
|
198
198
|
else:
|
199
|
-
self._span.set_attribute(
|
199
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
200
200
|
"json")
|
201
201
|
|
202
202
|
# Set Span attributes (Extra)
|
@@ -204,31 +204,31 @@ def completion(version, environment, application_name,
|
|
204
204
|
environment)
|
205
205
|
self._span.set_attribute(SERVICE_NAME,
|
206
206
|
application_name)
|
207
|
-
self._span.set_attribute(
|
207
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
208
208
|
self._kwargs.get("user", ""))
|
209
|
-
self._span.set_attribute(
|
209
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
210
210
|
True)
|
211
|
-
self._span.set_attribute(
|
211
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
212
212
|
self._input_tokens + self._output_tokens)
|
213
|
-
self._span.set_attribute(
|
213
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
214
214
|
cost)
|
215
|
-
self._span.set_attribute(
|
215
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
216
216
|
self._tbt)
|
217
|
-
self._span.set_attribute(
|
217
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
218
218
|
self._ttft)
|
219
|
-
self._span.set_attribute(
|
219
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
220
220
|
version)
|
221
221
|
if capture_message_content:
|
222
222
|
self._span.add_event(
|
223
|
-
name=
|
223
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
224
224
|
attributes={
|
225
|
-
|
225
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
226
226
|
},
|
227
227
|
)
|
228
228
|
self._span.add_event(
|
229
|
-
name=
|
229
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
230
230
|
attributes={
|
231
|
-
|
231
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
232
232
|
},
|
233
233
|
)
|
234
234
|
self._span.set_status(Status(StatusCode.OK))
|
@@ -237,8 +237,8 @@ def completion(version, environment, application_name,
|
|
237
237
|
attributes = create_metrics_attributes(
|
238
238
|
service_name=application_name,
|
239
239
|
deployment_environment=environment,
|
240
|
-
operation=
|
241
|
-
system=
|
240
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
241
|
+
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
242
242
|
request_model=request_model,
|
243
243
|
server_address=self._server_address,
|
244
244
|
server_port=self._server_port,
|
@@ -290,7 +290,7 @@ def completion(version, environment, application_name,
|
|
290
290
|
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
291
291
|
request_model = kwargs.get("model", "gpt-4o")
|
292
292
|
|
293
|
-
span_name = f"{
|
293
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
294
294
|
|
295
295
|
# pylint: disable=no-else-return
|
296
296
|
if streaming:
|
@@ -338,37 +338,37 @@ def completion(version, environment, application_name,
|
|
338
338
|
|
339
339
|
# Set base span attribues (OTel Semconv)
|
340
340
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
341
|
-
span.set_attribute(
|
342
|
-
|
343
|
-
span.set_attribute(
|
344
|
-
|
345
|
-
span.set_attribute(
|
341
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
342
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
343
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
344
|
+
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
345
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
346
346
|
request_model)
|
347
|
-
span.set_attribute(
|
347
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
348
348
|
kwargs.get("seed", ""))
|
349
|
-
span.set_attribute(
|
349
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
350
350
|
server_port)
|
351
|
-
span.set_attribute(
|
351
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
352
352
|
kwargs.get("frequency_penalty", 0.0))
|
353
|
-
span.set_attribute(
|
353
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
354
354
|
kwargs.get("max_tokens", -1))
|
355
|
-
span.set_attribute(
|
355
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
356
356
|
kwargs.get("presence_penalty", 0.0))
|
357
|
-
span.set_attribute(
|
357
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
358
358
|
kwargs.get("stop", []))
|
359
|
-
span.set_attribute(
|
359
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
360
360
|
kwargs.get("temperature", 1.0))
|
361
|
-
span.set_attribute(
|
361
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
362
362
|
kwargs.get("top_p", 1.0))
|
363
|
-
span.set_attribute(
|
363
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
364
364
|
response_dict.get("id"))
|
365
|
-
span.set_attribute(
|
365
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
366
366
|
response_dict.get('model'))
|
367
|
-
span.set_attribute(
|
367
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
368
368
|
input_tokens)
|
369
|
-
span.set_attribute(
|
369
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
370
370
|
output_tokens)
|
371
|
-
span.set_attribute(
|
371
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
372
372
|
server_address)
|
373
373
|
|
374
374
|
# Set base span attribues (Extras)
|
@@ -376,46 +376,46 @@ def completion(version, environment, application_name,
|
|
376
376
|
environment)
|
377
377
|
span.set_attribute(SERVICE_NAME,
|
378
378
|
application_name)
|
379
|
-
span.set_attribute(
|
379
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
380
380
|
kwargs.get("user", ""))
|
381
|
-
span.set_attribute(
|
381
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
382
382
|
False)
|
383
|
-
span.set_attribute(
|
383
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
384
384
|
input_tokens + output_tokens)
|
385
|
-
span.set_attribute(
|
385
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
386
386
|
cost)
|
387
|
-
span.set_attribute(
|
387
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
388
388
|
end_time - start_time)
|
389
|
-
span.set_attribute(
|
389
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
390
390
|
version)
|
391
391
|
if capture_message_content:
|
392
392
|
span.add_event(
|
393
|
-
name=
|
393
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
394
394
|
attributes={
|
395
|
-
|
395
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
396
396
|
},
|
397
397
|
)
|
398
398
|
|
399
399
|
for i in range(kwargs.get('n',1)):
|
400
|
-
span.set_attribute(
|
400
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
401
401
|
[str(response_dict.get('choices')[i].get('finish_reason'))])
|
402
402
|
if capture_message_content:
|
403
403
|
span.add_event(
|
404
|
-
name=
|
404
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
405
405
|
attributes={
|
406
406
|
# pylint: disable=line-too-long
|
407
|
-
|
407
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
408
408
|
},
|
409
409
|
)
|
410
410
|
if kwargs.get('tools'):
|
411
|
-
span.set_attribute(
|
411
|
+
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
412
412
|
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
413
413
|
|
414
414
|
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
415
|
-
span.set_attribute(
|
415
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
416
416
|
"text")
|
417
417
|
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
418
|
-
span.set_attribute(
|
418
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
419
419
|
"json")
|
420
420
|
|
421
421
|
span.set_status(Status(StatusCode.OK))
|
@@ -424,8 +424,8 @@ def completion(version, environment, application_name,
|
|
424
424
|
attributes = create_metrics_attributes(
|
425
425
|
service_name=application_name,
|
426
426
|
deployment_environment=environment,
|
427
|
-
operation=
|
428
|
-
system=
|
427
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
+
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
429
429
|
request_model=request_model,
|
430
430
|
server_address=server_address,
|
431
431
|
server_port=server_port,
|
@@ -495,7 +495,7 @@ def image_generate(version, environment, application_name,
|
|
495
495
|
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
496
496
|
request_model = kwargs.get("model", "dall-e-2")
|
497
497
|
|
498
|
-
span_name = f"{
|
498
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
499
499
|
|
500
500
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
501
501
|
start_time = time.time()
|
@@ -521,21 +521,21 @@ def image_generate(version, environment, application_name,
|
|
521
521
|
for items in response.data:
|
522
522
|
# Set Span attributes (OTel Semconv)
|
523
523
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
524
|
-
span.set_attribute(
|
525
|
-
|
526
|
-
span.set_attribute(
|
527
|
-
|
528
|
-
span.set_attribute(
|
524
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
525
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
|
526
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
527
|
+
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
528
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
529
529
|
request_model)
|
530
|
-
span.set_attribute(
|
530
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
531
531
|
server_address)
|
532
|
-
span.set_attribute(
|
532
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
533
533
|
server_port)
|
534
|
-
span.set_attribute(
|
534
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
535
535
|
response.id)
|
536
|
-
span.set_attribute(
|
536
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
537
537
|
response.model)
|
538
|
-
span.set_attribute(
|
538
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
539
539
|
"image")
|
540
540
|
|
541
541
|
# Set Span attributes (Extras)
|
@@ -543,29 +543,29 @@ def image_generate(version, environment, application_name,
|
|
543
543
|
environment)
|
544
544
|
span.set_attribute(SERVICE_NAME,
|
545
545
|
application_name)
|
546
|
-
span.set_attribute(
|
546
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
|
547
547
|
image_size)
|
548
|
-
span.set_attribute(
|
548
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
549
549
|
version)
|
550
550
|
|
551
551
|
if capture_message_content:
|
552
552
|
span.add_event(
|
553
|
-
name=
|
553
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
554
554
|
attributes={
|
555
|
-
|
555
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
556
556
|
},
|
557
557
|
)
|
558
|
-
attribute_name = f"{
|
558
|
+
attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
559
559
|
span.add_event(
|
560
560
|
name=attribute_name,
|
561
561
|
attributes={
|
562
|
-
|
562
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
|
563
563
|
},
|
564
564
|
)
|
565
565
|
|
566
566
|
images_count+=1
|
567
567
|
|
568
|
-
span.set_attribute(
|
568
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
569
569
|
len(response.data) * cost)
|
570
570
|
span.set_status(Status(StatusCode.OK))
|
571
571
|
|
@@ -573,8 +573,8 @@ def image_generate(version, environment, application_name,
|
|
573
573
|
attributes = create_metrics_attributes(
|
574
574
|
service_name=application_name,
|
575
575
|
deployment_environment=environment,
|
576
|
-
operation=
|
577
|
-
system=
|
576
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
|
577
|
+
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
578
578
|
request_model=request_model,
|
579
579
|
server_address=server_address,
|
580
580
|
server_port=server_port,
|
@@ -1,16 +1,20 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Initializer of Auto Instrumentation of HuggingFace Transformer Functions
|
3
|
+
"""
|
4
|
+
|
3
5
|
from typing import Collection
|
4
6
|
import importlib.metadata
|
5
7
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
8
|
from wrapt import wrap_function_wrapper
|
7
9
|
|
8
|
-
from openlit.instrumentation.transformers.transformers import
|
10
|
+
from openlit.instrumentation.transformers.transformers import pipeline_wrapper
|
9
11
|
|
10
|
-
_instruments = ("transformers >= 4.
|
12
|
+
_instruments = ("transformers >= 4.48.0",)
|
11
13
|
|
12
14
|
class TransformersInstrumentor(BaseInstrumentor):
|
13
|
-
"""
|
15
|
+
"""
|
16
|
+
An instrumentor for HuggingFace Transformer library.
|
17
|
+
"""
|
14
18
|
|
15
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
16
20
|
return _instruments
|
@@ -28,10 +32,10 @@ class TransformersInstrumentor(BaseInstrumentor):
|
|
28
32
|
wrap_function_wrapper(
|
29
33
|
"transformers",
|
30
34
|
"TextGenerationPipeline.__call__",
|
31
|
-
|
35
|
+
pipeline_wrapper(version, environment, application_name,
|
32
36
|
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
33
37
|
)
|
34
38
|
|
35
|
-
@staticmethod
|
36
39
|
def _uninstrument(self, **kwargs):
|
40
|
+
# Proper uninstrumentation logic to revert patched methods
|
37
41
|
pass
|
@@ -1,197 +1,61 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring
|
2
|
+
Module for monitoring HF Transformers API calls.
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
6
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import SpanKind
|
9
8
|
from openlit.__helpers import (
|
10
|
-
get_chat_model_cost,
|
11
|
-
handle_exception,
|
12
|
-
general_tokens,
|
13
|
-
create_metrics_attributes,
|
14
9
|
set_server_address_and_port
|
15
10
|
)
|
16
|
-
|
11
|
+
|
12
|
+
from openlit.instrumentation.transformers.utils import (
|
13
|
+
process_chat_response,
|
14
|
+
)
|
15
|
+
from openlit.semcov import SemanticConvention
|
17
16
|
|
18
17
|
# Initialize logger for logging potential issues and operations
|
19
18
|
logger = logging.getLogger(__name__)
|
20
19
|
|
21
|
-
def
|
22
|
-
|
20
|
+
def pipeline_wrapper(version, environment, application_name,
|
21
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
23
22
|
"""
|
24
|
-
|
25
|
-
|
26
|
-
This function wraps any given function to measure its execution time,
|
27
|
-
log its operation, and trace its execution using OpenTelemetry.
|
28
|
-
|
29
|
-
Parameters:
|
30
|
-
- version (str): The version of the Langchain application.
|
31
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
32
|
-
- application_name (str): Name of the Langchain application.
|
33
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
34
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
35
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
- function: A higher-order function that takes a function 'wrapped' and returns
|
39
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
23
|
+
Generates a telemetry wrapper for GenAI function call
|
40
24
|
"""
|
41
25
|
|
42
26
|
def wrapper(wrapped, instance, args, kwargs):
|
43
27
|
"""
|
44
|
-
|
45
|
-
time, and records trace data using OpenTelemetry.
|
46
|
-
|
47
|
-
Parameters:
|
48
|
-
- wrapped (Callable): The original function that this wrapper will execute.
|
49
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
50
|
-
is used for instance methods. For static and classmethods,
|
51
|
-
this may be None.
|
52
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
53
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
54
|
-
|
55
|
-
Returns:
|
56
|
-
- The result of the wrapped function call.
|
57
|
-
|
58
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
59
|
-
on the span based on the function's execution and response, and ensures
|
60
|
-
errors are handled and logged appropriately.
|
28
|
+
Wraps the GenAI function call.
|
61
29
|
"""
|
62
30
|
|
63
31
|
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
64
32
|
request_model = instance.model.config.name_or_path
|
65
33
|
|
66
|
-
span_name = f"{
|
34
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
67
35
|
|
68
36
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
69
37
|
start_time = time.time()
|
70
38
|
response = wrapped(*args, **kwargs)
|
71
|
-
end_time = time.time()
|
72
|
-
|
73
|
-
# pylint: disable=protected-access
|
74
|
-
forward_params = instance._forward_params
|
75
|
-
|
76
|
-
try:
|
77
|
-
if args and len(args) > 0:
|
78
|
-
prompt = args[0]
|
79
|
-
else:
|
80
|
-
prompt = kwargs.get("args", "")
|
81
|
-
|
82
|
-
input_tokens = general_tokens(prompt[0])
|
83
|
-
|
84
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
85
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
86
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
87
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
88
|
-
SemanticConvetion.GEN_AI_SYSTEM_HUGGING_FACE)
|
89
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
90
|
-
request_model)
|
91
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
92
|
-
forward_params.get("temperature", "null"))
|
93
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
94
|
-
forward_params.get("top_p", "null"))
|
95
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
96
|
-
forward_params.get("max_length", -1))
|
97
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
98
|
-
input_tokens)
|
99
|
-
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
100
|
-
server_address)
|
101
|
-
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
102
|
-
server_port)
|
103
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
104
|
-
request_model)
|
105
|
-
|
106
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
107
|
-
environment)
|
108
|
-
span.set_attribute(SERVICE_NAME,
|
109
|
-
application_name)
|
110
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
111
|
-
False)
|
112
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
113
|
-
end_time - start_time)
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
115
|
-
version)
|
116
|
-
if capture_message_content:
|
117
|
-
span.add_event(
|
118
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
119
|
-
attributes={
|
120
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
121
|
-
},
|
122
|
-
)
|
123
|
-
|
124
|
-
i = 0
|
125
|
-
output_tokens = 0
|
126
|
-
for completion in response:
|
127
|
-
if len(response) > 1:
|
128
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
129
|
-
else:
|
130
|
-
attribute_name = SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT
|
131
|
-
if capture_message_content:
|
132
|
-
# pylint: disable=bare-except
|
133
|
-
try:
|
134
|
-
llm_response = completion.get('generated_text', '')
|
135
|
-
except:
|
136
|
-
llm_response = completion[i].get('generated_text', '')
|
137
|
-
|
138
|
-
span.add_event(
|
139
|
-
name=attribute_name,
|
140
|
-
attributes={
|
141
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
|
142
|
-
},
|
143
|
-
)
|
144
|
-
output_tokens += general_tokens(llm_response)
|
145
|
-
|
146
|
-
i=i+1
|
147
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
148
|
-
output_tokens)
|
149
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
150
|
-
input_tokens + output_tokens)
|
151
|
-
|
152
|
-
# Calculate cost of the operation
|
153
|
-
cost = get_chat_model_cost(request_model,
|
154
|
-
pricing_info, input_tokens,
|
155
|
-
output_tokens)
|
156
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
157
|
-
cost)
|
158
|
-
|
159
|
-
span.set_status(Status(StatusCode.OK))
|
160
|
-
|
161
|
-
if disable_metrics is False:
|
162
|
-
attributes = create_metrics_attributes(
|
163
|
-
service_name=application_name,
|
164
|
-
deployment_environment=environment,
|
165
|
-
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
166
|
-
system=SemanticConvetion.GEN_AI_SYSTEM_HUGGING_FACE,
|
167
|
-
request_model=request_model,
|
168
|
-
server_address=server_address,
|
169
|
-
server_port=server_port,
|
170
|
-
response_model=request_model,
|
171
|
-
)
|
172
|
-
|
173
|
-
metrics["genai_client_usage_tokens"].record(
|
174
|
-
input_tokens + output_tokens, attributes
|
175
|
-
)
|
176
|
-
metrics["genai_client_operation_duration"].record(
|
177
|
-
end_time - start_time, attributes
|
178
|
-
)
|
179
|
-
metrics["genai_server_ttft"].record(
|
180
|
-
end_time - start_time, attributes
|
181
|
-
)
|
182
|
-
metrics["genai_requests"].add(1, attributes)
|
183
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
184
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
185
|
-
metrics["genai_cost"].record(cost, attributes)
|
186
|
-
|
187
|
-
# Return original response
|
188
|
-
return response
|
189
|
-
|
190
|
-
except Exception as e:
|
191
|
-
handle_exception(span, e)
|
192
|
-
logger.error("Error in trace creation: %s", e)
|
193
39
|
|
194
|
-
|
195
|
-
|
40
|
+
response = process_chat_response(
|
41
|
+
instance = instance,
|
42
|
+
response=response,
|
43
|
+
request_model=request_model,
|
44
|
+
pricing_info=pricing_info,
|
45
|
+
server_port=server_port,
|
46
|
+
server_address=server_address,
|
47
|
+
environment=environment,
|
48
|
+
application_name=application_name,
|
49
|
+
metrics=metrics,
|
50
|
+
start_time=start_time,
|
51
|
+
span=span,
|
52
|
+
args=args,
|
53
|
+
kwargs=kwargs,
|
54
|
+
capture_message_content=capture_message_content,
|
55
|
+
disable_metrics=disable_metrics,
|
56
|
+
version=version,
|
57
|
+
)
|
58
|
+
|
59
|
+
return response
|
196
60
|
|
197
61
|
return wrapper
|