openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -4,9 +4,17 @@ Module for monitoring Langchain applications.
4
4
  """
5
5
 
6
6
  import logging
7
+ import time
7
8
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import handle_exception, get_chat_model_cost, general_tokens
9
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
10
+ from openlit.__helpers import (
11
+ get_chat_model_cost,
12
+ handle_exception,
13
+ general_tokens,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ )
10
18
  from openlit.semcov import SemanticConvetion
11
19
 
12
20
  # Initialize logger for logging potential issues and operations
@@ -83,11 +91,11 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
83
91
  SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
84
92
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
85
93
  gen_ai_endpoint)
86
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
94
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
87
95
  environment)
88
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
89
- SemanticConvetion.GEN_AI_TYPE_FRAMEWORK)
90
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
96
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
97
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_FRAMEWORK)
98
+ span.set_attribute(SERVICE_NAME,
91
99
  application_name)
92
100
  span.set_attribute(SemanticConvetion.GEN_AI_RETRIEVAL_SOURCE,
93
101
  response[0].metadata["source"])
@@ -157,11 +165,11 @@ def hub(gen_ai_endpoint, version, environment, application_name, tracer,
157
165
  SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
158
166
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
159
167
  gen_ai_endpoint)
160
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
168
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
161
169
  environment)
162
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
163
- SemanticConvetion.GEN_AI_TYPE_FRAMEWORK)
164
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
170
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
171
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_FRAMEWORK)
172
+ span.set_attribute(SERVICE_NAME,
165
173
  application_name)
166
174
  span.set_attribute(SemanticConvetion.GEN_AI_HUB_OWNER,
167
175
  response.metadata["lc_hub_owner"])
@@ -180,148 +188,7 @@ def hub(gen_ai_endpoint, version, environment, application_name, tracer,
180
188
 
181
189
  return wrapper
182
190
 
183
-
184
- def allm(gen_ai_endpoint, version, environment, application_name,
185
- tracer, pricing_info, trace_content, metrics, disable_metrics):
186
- """
187
- Creates a wrapper around a function call to trace and log its execution metrics.
188
-
189
- This function wraps any given function to measure its execution time,
190
- log its operation, and trace its execution using OpenTelemetry.
191
-
192
- Parameters:
193
- - gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
194
- - version (str): The version of the Langchain application.
195
- - environment (str): The deployment environment (e.g., 'production', 'development').
196
- - application_name (str): Name of the Langchain application.
197
- - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
198
- - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
199
- - trace_content (bool): Flag indicating whether to trace the content of the response.
200
-
201
- Returns:
202
- - function: A higher-order function that takes a function 'wrapped' and returns
203
- a new function that wraps 'wrapped' with additional tracing and logging.
204
- """
205
-
206
- async def wrapper(wrapped, instance, args, kwargs):
207
- """
208
- An inner wrapper function that executes the wrapped function, measures execution
209
- time, and records trace data using OpenTelemetry.
210
-
211
- Parameters:
212
- - wrapped (Callable): The original function that this wrapper will execute.
213
- - instance (object): The instance to which the wrapped function belongs. This
214
- is used for instance methods. For static and classmethods,
215
- this may be None.
216
- - args (tuple): Positional arguments passed to the wrapped function.
217
- - kwargs (dict): Keyword arguments passed to the wrapped function.
218
-
219
- Returns:
220
- - The result of the wrapped function call.
221
-
222
- The wrapper initiates a span with the provided tracer, sets various attributes
223
- on the span based on the function's execution and response, and ensures
224
- errors are handled and logged appropriately.
225
- """
226
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
227
- response = await wrapped(*args, **kwargs)
228
-
229
- try:
230
- if args:
231
- prompt = str(args[0]) if args[0] is not None else ""
232
- else:
233
- prompt = ""
234
- input_tokens = general_tokens(prompt)
235
- output_tokens = general_tokens(response)
236
-
237
- # Calculate cost of the operation
238
- cost = get_chat_model_cost(
239
- str(get_attribute_from_instance_or_kwargs(instance, 'model')),
240
- pricing_info, input_tokens, output_tokens
241
- )
242
-
243
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
244
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
245
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
246
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
247
- gen_ai_endpoint)
248
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
249
- environment)
250
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
251
- SemanticConvetion.GEN_AI_TYPE_FRAMEWORK)
252
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
253
- application_name)
254
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
255
- str(get_attribute_from_instance_or_kwargs(instance, 'model')))
256
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
257
- str(get_attribute_from_instance_or_kwargs(instance, 'temperature')))
258
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
259
- str(get_attribute_from_instance_or_kwargs(instance, 'top_k')))
260
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
261
- str(get_attribute_from_instance_or_kwargs(instance, 'top_p')))
262
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
263
- False)
264
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
265
- input_tokens)
266
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
267
- output_tokens)
268
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
269
- input_tokens + output_tokens)
270
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
271
- cost)
272
- if trace_content:
273
- span.add_event(
274
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
275
- attributes={
276
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
277
- },
278
- )
279
- span.add_event(
280
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
281
- attributes={
282
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response,
283
- },
284
- )
285
-
286
- span.set_status(Status(StatusCode.OK))
287
-
288
- if disable_metrics is False:
289
- attributes = {
290
- TELEMETRY_SDK_NAME:
291
- "openlit",
292
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
293
- application_name,
294
- SemanticConvetion.GEN_AI_SYSTEM:
295
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN,
296
- SemanticConvetion.GEN_AI_ENVIRONMENT:
297
- environment,
298
- SemanticConvetion.GEN_AI_TYPE:
299
- SemanticConvetion.GEN_AI_TYPE_CHAT,
300
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
301
- str(get_attribute_from_instance_or_kwargs(instance, 'model'))
302
- }
303
-
304
- metrics["genai_requests"].add(1, attributes)
305
- metrics["genai_total_tokens"].add(
306
- input_tokens + output_tokens, attributes
307
- )
308
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
309
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
310
- metrics["genai_cost"].record(cost, attributes)
311
-
312
- # Return original response
313
- return response
314
-
315
- except Exception as e:
316
- handle_exception(span, e)
317
- logger.error("Error in trace creation: %s", e)
318
-
319
- # Return original response
320
- return response
321
-
322
- return wrapper
323
-
324
- def llm(gen_ai_endpoint, version, environment, application_name,
191
+ def chat(gen_ai_endpoint, version, environment, application_name,
325
192
  tracer, pricing_info, trace_content, metrics, disable_metrics):
326
193
  """
327
194
  Creates a wrapper around a function call to trace and log its execution metrics.
@@ -330,7 +197,6 @@ def llm(gen_ai_endpoint, version, environment, application_name,
330
197
  log its operation, and trace its execution using OpenTelemetry.
331
198
 
332
199
  Parameters:
333
- - gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
334
200
  - version (str): The version of the Langchain application.
335
201
  - environment (str): The deployment environment (e.g., 'production', 'development').
336
202
  - application_name (str): Name of the Langchain application.
@@ -363,360 +229,105 @@ def llm(gen_ai_endpoint, version, environment, application_name,
363
229
  on the span based on the function's execution and response, and ensures
364
230
  errors are handled and logged appropriately.
365
231
  """
366
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
367
- response = wrapped(*args, **kwargs)
368
-
369
- try:
370
- if args:
371
- prompt = str(args[0]) if args[0] is not None else ""
372
- else:
373
- prompt = ""
374
- input_tokens = general_tokens(prompt)
375
- output_tokens = general_tokens(response)
376
-
377
- # Calculate cost of the operation
378
- cost = get_chat_model_cost(
379
- str(get_attribute_from_instance_or_kwargs(instance, 'model')),
380
- pricing_info, input_tokens, output_tokens
381
- )
382
-
383
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
384
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
385
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
386
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
387
- gen_ai_endpoint)
388
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
389
- environment)
390
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
391
- SemanticConvetion.GEN_AI_TYPE_FRAMEWORK)
392
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
393
- application_name)
394
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
395
- str(get_attribute_from_instance_or_kwargs(instance, 'model')))
396
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
397
- str(get_attribute_from_instance_or_kwargs(instance, 'temperature')))
398
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
399
- str(get_attribute_from_instance_or_kwargs(instance, 'top_k')))
400
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
401
- str(get_attribute_from_instance_or_kwargs(instance, 'top_p')))
402
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
403
- False)
404
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
405
- input_tokens)
406
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
407
- output_tokens)
408
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
409
- input_tokens + output_tokens)
410
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
411
- cost)
412
- if trace_content:
413
- span.add_event(
414
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
415
- attributes={
416
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
417
- },
418
- )
419
- span.add_event(
420
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
421
- attributes={
422
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response,
423
- },
424
- )
425
232
 
426
- span.set_status(Status(StatusCode.OK))
233
+ server_address, server_port = "NOT_FOUND", "NOT_FOUND"
427
234
 
428
- if disable_metrics is False:
429
- attributes = {
430
- TELEMETRY_SDK_NAME:
431
- "openlit",
432
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
433
- application_name,
434
- SemanticConvetion.GEN_AI_SYSTEM:
435
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN,
436
- SemanticConvetion.GEN_AI_ENVIRONMENT:
437
- environment,
438
- SemanticConvetion.GEN_AI_TYPE:
439
- SemanticConvetion.GEN_AI_TYPE_CHAT,
440
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
441
- str(get_attribute_from_instance_or_kwargs(instance, 'model'))
442
- }
443
-
444
- metrics["genai_requests"].add(1, attributes)
445
- metrics["genai_total_tokens"].add(
446
- input_tokens + output_tokens, attributes
447
- )
448
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
449
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
450
- metrics["genai_cost"].record(cost, attributes)
235
+ if hasattr(instance, "model_id"):
236
+ request_model = instance.model_id
237
+ elif hasattr(instance, "model"):
238
+ request_model = instance.model
239
+ elif hasattr(instance, "model_name"):
240
+ request_model = instance.model_name
241
+ else:
242
+ request_model = "NOT_FOUND"
451
243
 
452
- # Return original response
453
- return response
244
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
454
245
 
455
- except Exception as e:
456
- handle_exception(span, e)
457
- logger.error("Error in trace creation: %s", e)
458
-
459
- # Return original response
460
- return response
461
-
462
- return wrapper
463
-
464
- def chat(gen_ai_endpoint, version, environment, application_name,
465
- tracer, pricing_info, trace_content, metrics, disable_metrics):
466
- """
467
- Creates a wrapper around a function call to trace and log its execution metrics.
468
-
469
- This function wraps any given function to measure its execution time,
470
- log its operation, and trace its execution using OpenTelemetry.
471
-
472
- Parameters:
473
- - gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
474
- - version (str): The version of the Langchain application.
475
- - environment (str): The deployment environment (e.g., 'production', 'development').
476
- - application_name (str): Name of the Langchain application.
477
- - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
478
- - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
479
- - trace_content (bool): Flag indicating whether to trace the content of the response.
480
-
481
- Returns:
482
- - function: A higher-order function that takes a function 'wrapped' and returns
483
- a new function that wraps 'wrapped' with additional tracing and logging.
484
- """
485
-
486
- def wrapper(wrapped, instance, args, kwargs):
487
- """
488
- An inner wrapper function that executes the wrapped function, measures execution
489
- time, and records trace data using OpenTelemetry.
490
-
491
- Parameters:
492
- - wrapped (Callable): The original function that this wrapper will execute.
493
- - instance (object): The instance to which the wrapped function belongs. This
494
- is used for instance methods. For static and classmethods,
495
- this may be None.
496
- - args (tuple): Positional arguments passed to the wrapped function.
497
- - kwargs (dict): Keyword arguments passed to the wrapped function.
498
-
499
- Returns:
500
- - The result of the wrapped function call.
501
-
502
- The wrapper initiates a span with the provided tracer, sets various attributes
503
- on the span based on the function's execution and response, and ensures
504
- errors are handled and logged appropriately.
505
- """
506
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
246
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
247
+ start_time = time.time()
507
248
  response = wrapped(*args, **kwargs)
249
+ end_time = time.time()
508
250
 
509
251
  try:
510
- prompt = ""
511
- if hasattr(response, 'usage_metadata') and response.usage_metadata:
512
- token_usage = response.usage_metadata
513
- input_tokens = token_usage.get("input_tokens", 0)
514
- output_tokens = token_usage.get("output_tokens", 0)
515
- model = instance.model_id
516
- prompt = "" if isinstance(args[0], list) else args[0]
517
- else:
518
- if not isinstance(response, dict) or "output_text" not in response:
519
- return response
520
- # Fallback: Calculate tokens manually if response_metadata is missing
521
- model = "gpt-4o-mini" # Fallback model
522
- input_texts = [
523
- doc.page_content for doc in response.get("input_documents", [])
524
- if isinstance(doc.page_content, str)
525
- ]
526
- input_tokens = sum(general_tokens(text) for text in input_texts)
527
- output_text = response.get("output_text", "")
528
- output_tokens = general_tokens(output_text)
252
+ # Format 'messages' into a single string
253
+ message_prompt = kwargs.get("messages", "") or args[0]
254
+ formatted_messages = []
255
+
256
+ for message in message_prompt:
257
+ # Handle the case where message is a tuple
258
+ if isinstance(message, tuple) and len(message) == 2:
259
+ role, content = message
260
+ # Handle the case where message is a dictionary
261
+ elif isinstance(message, dict):
262
+ role = message["role"]
263
+ content = message["content"]
264
+ else:
265
+ continue
266
+
267
+ # Check if the content is a list
268
+ if isinstance(content, list):
269
+ content_str = ", ".join(
270
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
271
+ if "type" in item else f'text: {item["text"]}'
272
+ for item in content
273
+ )
274
+ formatted_messages.append(f"{role}: {content_str}")
275
+ else:
276
+ formatted_messages.append(f"{role}: {content}")
277
+
278
+ # Join all formatted messages with newline
279
+ prompt = "\n".join(formatted_messages)
280
+
281
+ input_tokens = general_tokens(str(prompt))
282
+ output_tokens = general_tokens(str(response))
529
283
 
530
284
  # Calculate cost of the operation
531
285
  cost = get_chat_model_cost(
532
- model,
286
+ request_model,
533
287
  pricing_info, input_tokens, output_tokens
534
288
  )
535
289
 
290
+ # Set base span attribues (OTel Semconv)
536
291
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
292
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
293
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
537
294
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
538
295
  SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
539
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
540
- gen_ai_endpoint)
541
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
542
- environment)
543
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
544
- SemanticConvetion.GEN_AI_TYPE_CHAT)
545
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
546
- application_name)
547
296
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
548
- model)
297
+ request_model)
298
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
299
+ request_model)
549
300
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
550
301
  str(getattr(instance, 'temperature', 1)))
551
302
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
552
303
  str(getattr(instance, 'top_k', 1)))
553
304
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
554
305
  str(getattr(instance, 'top_p', 1)))
555
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
556
- False)
557
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
306
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
558
307
  input_tokens)
559
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
308
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
560
309
  output_tokens)
561
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
562
- input_tokens + output_tokens)
563
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
564
- cost)
565
- if trace_content:
566
- span.add_event(
567
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
568
- attributes={
569
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
570
- },
571
- )
572
- completion_content = getattr(response, 'content', "")
573
- span.add_event(
574
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
575
- attributes={
576
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: completion_content,
577
- },
578
- )
579
-
580
- span.set_status(Status(StatusCode.OK))
581
-
582
- if disable_metrics is False:
583
- attributes = {
584
- TELEMETRY_SDK_NAME:
585
- "openlit",
586
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
587
- application_name,
588
- SemanticConvetion.GEN_AI_SYSTEM:
589
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN,
590
- SemanticConvetion.GEN_AI_ENVIRONMENT:
591
- environment,
592
- SemanticConvetion.GEN_AI_TYPE:
593
- SemanticConvetion.GEN_AI_TYPE_CHAT,
594
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
595
- model
596
- }
597
-
598
- metrics["genai_requests"].add(1, attributes)
599
- metrics["genai_total_tokens"].add(
600
- input_tokens + output_tokens, attributes
601
- )
602
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
603
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
604
- metrics["genai_cost"].record(cost, attributes)
605
-
606
- # Return original response
607
- return response
608
-
609
- except Exception as e:
610
- handle_exception(span, e)
611
- logger.error("Error in trace creation: %s", e)
612
-
613
- # Return original response
614
- return response
615
-
616
- return wrapper
617
-
618
- def achat(gen_ai_endpoint, version, environment, application_name,
619
- tracer, pricing_info, trace_content, metrics, disable_metrics):
620
- """
621
- Creates a wrapper around a function call to trace and log its execution metrics.
622
-
623
- This function wraps any given function to measure its execution time,
624
- log its operation, and trace its execution using OpenTelemetry.
625
-
626
- Parameters:
627
- - gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
628
- - version (str): The version of the Langchain application.
629
- - environment (str): The deployment environment (e.g., 'production', 'development').
630
- - application_name (str): Name of the Langchain application.
631
- - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
632
- - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
633
- - trace_content (bool): Flag indicating whether to trace the content of the response.
634
-
635
- Returns:
636
- - function: A higher-order function that takes a function 'wrapped' and returns
637
- a new function that wraps 'wrapped' with additional tracing and logging.
638
- """
639
-
640
- async def wrapper(wrapped, instance, args, kwargs):
641
- """
642
- An inner wrapper function that executes the wrapped function, measures execution
643
- time, and records trace data using OpenTelemetry.
644
-
645
- Parameters:
646
- - wrapped (Callable): The original function that this wrapper will execute.
647
- - instance (object): The instance to which the wrapped function belongs. This
648
- is used for instance methods. For static and classmethods,
649
- this may be None.
650
- - args (tuple): Positional arguments passed to the wrapped function.
651
- - kwargs (dict): Keyword arguments passed to the wrapped function.
652
-
653
- Returns:
654
- - The result of the wrapped function call.
655
-
656
- The wrapper initiates a span with the provided tracer, sets various attributes
657
- on the span based on the function's execution and response, and ensures
658
- errors are handled and logged appropriately.
659
- """
660
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
661
- response = await wrapped(*args, **kwargs)
662
-
663
- try:
664
- prompt = ""
665
- if hasattr(response, 'usage_metadata') and response.usage_metadata:
666
- token_usage = response.usage_metadata
667
- input_tokens = token_usage.get("input_tokens", 0)
668
- output_tokens = token_usage.get("output_tokens", 0)
669
- model = instance.model_id
670
- prompt = "" if isinstance(args[0], list) else args[0]
671
-
672
- else:
673
- if not isinstance(response, dict) or "output_text" not in response:
674
- return response
675
- # Fallback: Calculate tokens manually if response_metadata is missing
676
- model = "gpt-4o-mini" # Fallback model
677
- input_texts = [
678
- doc.page_content for doc in response.get("input_documents", [])
679
- if isinstance(doc.page_content, str)
680
- ]
681
- input_tokens = sum(general_tokens(text) for text in input_texts)
682
- output_text = response.get("output_text", "")
683
- output_tokens = general_tokens(output_text)
684
-
685
- # Calculate cost of the operation
686
- cost = get_chat_model_cost(
687
- model,
688
- pricing_info, input_tokens, output_tokens
689
- )
690
-
691
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
692
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
693
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN)
694
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
695
- gen_ai_endpoint)
696
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
697
- environment)
698
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
699
- SemanticConvetion.GEN_AI_TYPE_CHAT)
700
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
310
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
311
+ server_address)
312
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
313
+ server_port)
314
+
315
+ # Set base span attribues (Extras)
316
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
317
+ environment)
318
+ span.set_attribute(SERVICE_NAME,
701
319
  application_name)
702
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
703
- model)
704
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
705
- str(getattr(instance, 'temperature',1)))
706
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
707
- str(getattr(instance, 'top_k',1)))
708
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
709
- str(getattr(instance, 'top_p',1)))
710
320
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
711
321
  False)
712
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
713
- input_tokens)
714
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
715
- output_tokens)
716
322
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
717
323
  input_tokens + output_tokens)
718
324
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
719
325
  cost)
326
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
327
+ end_time - start_time)
328
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
329
+ version)
330
+
720
331
  if trace_content:
721
332
  span.add_event(
722
333
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -727,32 +338,34 @@ def achat(gen_ai_endpoint, version, environment, application_name,
727
338
  span.add_event(
728
339
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
729
340
  attributes={
730
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content,
341
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response,
731
342
  },
732
343
  )
733
344
 
734
345
  span.set_status(Status(StatusCode.OK))
735
346
 
736
347
  if disable_metrics is False:
737
- attributes = {
738
- TELEMETRY_SDK_NAME:
739
- "openlit",
740
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
741
- application_name,
742
- SemanticConvetion.GEN_AI_SYSTEM:
743
- SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN,
744
- SemanticConvetion.GEN_AI_ENVIRONMENT:
745
- environment,
746
- SemanticConvetion.GEN_AI_TYPE:
747
- SemanticConvetion.GEN_AI_TYPE_CHAT,
748
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
749
- model
750
- }
348
+ attributes = create_metrics_attributes(
349
+ service_name=application_name,
350
+ deployment_environment=environment,
351
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
352
+ system=SemanticConvetion.GEN_AI_SYSTEM_LANGCHAIN,
353
+ request_model=request_model,
354
+ server_address=server_address,
355
+ server_port=server_port,
356
+ response_model=request_model,
357
+ )
751
358
 
752
- metrics["genai_requests"].add(1, attributes)
753
- metrics["genai_total_tokens"].add(
359
+ metrics["genai_client_usage_tokens"].record(
754
360
  input_tokens + output_tokens, attributes
755
361
  )
362
+ metrics["genai_client_operation_duration"].record(
363
+ end_time - start_time, attributes
364
+ )
365
+ metrics["genai_server_ttft"].record(
366
+ end_time - start_time, attributes
367
+ )
368
+ metrics["genai_requests"].add(1, attributes)
756
369
  metrics["genai_completion_tokens"].add(output_tokens, attributes)
757
370
  metrics["genai_prompt_tokens"].add(input_tokens, attributes)
758
371
  metrics["genai_cost"].record(cost, attributes)