openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -273,7 +273,7 @@ class QdrantInstrumentor(BaseInstrumentor):
273
273
  tracer = kwargs.get("tracer")
274
274
  metrics = kwargs.get("metrics_dict")
275
275
  pricing_info = kwargs.get("pricing_info")
276
- trace_content = kwargs.get("trace_content")
276
+ capture_message_content = kwargs.get("capture_message_content")
277
277
  disable_metrics = kwargs.get("disable_metrics")
278
278
  version = importlib.metadata.version("qdrant-client")
279
279
 
@@ -286,7 +286,7 @@ class QdrantInstrumentor(BaseInstrumentor):
286
286
  wrap_package,
287
287
  wrap_object,
288
288
  wrapper(gen_ai_endpoint, version, environment, application_name,
289
- tracer, pricing_info, trace_content, metrics, disable_metrics),
289
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
290
290
  )
291
291
 
292
292
 
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -25,7 +25,7 @@ def object_count(obj):
25
25
  return cnt
26
26
 
27
27
  def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
28
- tracer, pricing_info, trace_content, metrics, disable_metrics):
28
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
29
29
  """
30
30
  Creates a wrapper around a function call to trace and log its execution metrics.
31
31
 
@@ -39,7 +39,7 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
39
39
  - application_name (str): Name of the Langchain application.
40
40
  - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
41
41
  - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
42
- - trace_content (bool): Flag indicating whether to trace the content of the response.
42
+ - capture_message_content (bool): Flag indicating whether to trace the content of the response.
43
43
 
44
44
  Returns:
45
45
  - function: A higher-order function that takes a function 'wrapped' and returns
@@ -73,9 +73,9 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
80
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
81
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
@@ -241,11 +241,11 @@ def async_general_wrap(gen_ai_endpoint, version, environment, application_name,
241
241
  attributes = {
242
242
  TELEMETRY_SDK_NAME:
243
243
  "openlit",
244
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
244
+ SERVICE_NAME:
245
245
  application_name,
246
246
  SemanticConvetion.DB_SYSTEM:
247
247
  SemanticConvetion.DB_SYSTEM_QDRANT,
248
- SemanticConvetion.GEN_AI_ENVIRONMENT:
248
+ DEPLOYMENT_ENVIRONMENT:
249
249
  environment,
250
250
  SemanticConvetion.GEN_AI_OPERATION:
251
251
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
@@ -5,7 +5,7 @@ Module for monitoring Qdrant.
5
5
 
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import handle_exception
10
10
  from openlit.semcov import SemanticConvetion
11
11
 
@@ -25,7 +25,7 @@ def object_count(obj):
25
25
  return cnt
26
26
 
27
27
  def general_wrap(gen_ai_endpoint, version, environment, application_name,
28
- tracer, pricing_info, trace_content, metrics, disable_metrics):
28
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
29
29
  """
30
30
  Creates a wrapper around a function call to trace and log its execution metrics.
31
31
 
@@ -39,7 +39,7 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
39
39
  - application_name (str): Name of the Langchain application.
40
40
  - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
41
41
  - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
42
- - trace_content (bool): Flag indicating whether to trace the content of the response.
42
+ - capture_message_content (bool): Flag indicating whether to trace the content of the response.
43
43
 
44
44
  Returns:
45
45
  - function: A higher-order function that takes a function 'wrapped' and returns
@@ -73,9 +73,9 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
73
73
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
74
74
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
75
75
  gen_ai_endpoint)
76
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
76
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
77
77
  environment)
78
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
78
+ span.set_attribute(SERVICE_NAME,
79
79
  application_name)
80
80
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
81
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
@@ -248,11 +248,11 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
248
248
  attributes = {
249
249
  TELEMETRY_SDK_NAME:
250
250
  "openlit",
251
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
251
+ SERVICE_NAME:
252
252
  application_name,
253
253
  SemanticConvetion.DB_SYSTEM:
254
254
  SemanticConvetion.DB_SYSTEM_QDRANT,
255
- SemanticConvetion.GEN_AI_ENVIRONMENT:
255
+ DEPLOYMENT_ENVIRONMENT:
256
256
  environment,
257
257
  SemanticConvetion.GEN_AI_OPERATION:
258
258
  SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
@@ -29,7 +29,7 @@ class RekaInstrumentor(BaseInstrumentor):
29
29
  tracer = kwargs.get("tracer")
30
30
  metrics = kwargs.get("metrics_dict")
31
31
  pricing_info = kwargs.get("pricing_info", {})
32
- trace_content = kwargs.get("trace_content", False)
32
+ capture_message_content = kwargs.get("capture_message_content", False)
33
33
  disable_metrics = kwargs.get("disable_metrics")
34
34
  version = importlib.metadata.version("reka-api")
35
35
 
@@ -37,16 +37,16 @@ class RekaInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "reka.chat.client",
39
39
  "ChatClient.create",
40
- chat("reka.chat", version, environment, application_name,
41
- tracer, pricing_info, trace_content, metrics, disable_metrics),
40
+ chat(version, environment, application_name,
41
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
42
  )
43
43
 
44
44
  # async chat
45
45
  wrap_function_wrapper(
46
46
  "reka.chat.client",
47
47
  "AsyncChatClient.create",
48
- async_chat("reka.chat", version, environment, application_name,
49
- tracer, pricing_info, trace_content, metrics, disable_metrics),
48
+ async_chat(version, environment, application_name,
49
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
50
50
  )
51
51
 
52
52
  def _uninstrument(self, **kwargs):
@@ -1,33 +1,34 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def async_chat(gen_ai_endpoint, version, environment, application_name,
19
- tracer, pricing_info, trace_content, metrics, disable_metrics):
20
+ def async_chat(version, environment, application_name,
21
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
28
29
  tracer: OpenTelemetry tracer for creating spans.
29
30
  pricing_info: Information used for calculating the cost of Reka usage.
30
- trace_content: Flag indicating whether to trace the actual content.
31
+ capture_message_content: Flag indicating whether to trace the actual content.
31
32
 
32
33
  Returns:
33
34
  A function that wraps the chat method to add telemetry.
@@ -50,8 +51,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = await wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,23 +80,69 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
78
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
92
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
93
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
94
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
95
+ SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
92
- if trace_content:
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
145
+ if capture_message_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
95
148
  attributes={
@@ -99,51 +152,36 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_OPERATION:
138
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -1,33 +1,34 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Reka API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port
12
14
  )
13
15
  from openlit.semcov import SemanticConvetion
14
16
 
15
17
  # Initialize logger for logging potential issues and operations
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def chat(gen_ai_endpoint, version, environment, application_name,
19
- tracer, pricing_info, trace_content, metrics, disable_metrics):
20
+ def chat(version, environment, application_name,
21
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
20
22
  """
21
23
  Generates a telemetry wrapper for chat to collect metrics.
22
24
 
23
25
  Args:
24
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
26
  version: Version of the monitoring package.
26
27
  environment: Deployment environment (e.g., production, staging).
27
28
  application_name: Name of the application using the Reka API.
28
29
  tracer: OpenTelemetry tracer for creating spans.
29
30
  pricing_info: Information used for calculating the cost of Reka usage.
30
- trace_content: Flag indicating whether to trace the actual content.
31
+ capture_message_content: Flag indicating whether to trace the actual content.
31
32
 
32
33
  Returns:
33
34
  A function that wraps the chat method to add telemetry.
@@ -50,8 +51,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
50
51
  The response from the original 'chat' method.
51
52
  """
52
53
 
53
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
54
+ server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
55
+ request_model = kwargs.get("model", "reka-core-20240501")
56
+
57
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
+
59
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
60
+ start_time = time.time()
54
61
  response = wrapped(*args, **kwargs)
62
+ end_time = time.time()
55
63
 
56
64
  try:
57
65
  # Format 'messages' into a single string
@@ -63,7 +71,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
63
71
 
64
72
  if isinstance(content, list):
65
73
  content_str = ", ".join(
66
- # pylint: disable=line-too-long
67
74
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
68
75
  if "type" in item else f'text: {item["text"]}'
69
76
  for item in content
@@ -73,23 +80,69 @@ def chat(gen_ai_endpoint, version, environment, application_name,
73
80
  formatted_messages.append(f"{role}: {content}")
74
81
  prompt = "\n".join(formatted_messages)
75
82
 
76
- # Set base span attribues
83
+ input_tokens = response.usage.input_tokens
84
+ output_tokens = response.usage.output_tokens
85
+
86
+ # Calculate cost of the operation
87
+ cost = get_chat_model_cost(request_model,
88
+ pricing_info, input_tokens, output_tokens)
89
+
90
+ # Set Span attributes (OTel Semconv)
77
91
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
78
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
79
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
80
92
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
93
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
82
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
83
- gen_ai_endpoint)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
94
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
95
+ SemanticConvetion.GEN_AI_SYSTEM_REKAAI)
96
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
97
+ request_model)
98
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
99
+ server_port)
100
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
+ kwargs.get("seed", ""))
102
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
103
+ kwargs.get("max_tokens", -1))
104
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
105
+ kwargs.get("stop", []))
106
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
+ kwargs.get("presence_penalty", 0.0))
108
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
109
+ kwargs.get("temperature", 0.4))
110
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
111
+ kwargs.get("top_k", 1.0))
112
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
113
+ kwargs.get("top_p", 1.0))
114
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
115
+ [response.responses[0].finish_reason])
116
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
117
+ response.id)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
119
+ response.model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
+ input_tokens)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
+ output_tokens)
124
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
125
+ server_address)
126
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
127
+ 'text')
128
+
129
+ # Set Span attributes (Extra)
130
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
85
131
  environment)
86
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
132
+ span.set_attribute(SERVICE_NAME,
87
133
  application_name)
88
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
89
- kwargs.get("model", "reka-core"))
90
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
91
135
  False)
92
- if trace_content:
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
+ cost)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
141
+ end_time - start_time)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
143
+ version)
144
+
145
+ if capture_message_content:
93
146
  span.add_event(
94
147
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
95
148
  attributes={
@@ -99,51 +152,36 @@ def chat(gen_ai_endpoint, version, environment, application_name,
99
152
  span.add_event(
100
153
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
101
154
  attributes={
102
- # pylint: disable=line-too-long
103
155
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
104
156
  },
105
157
  )
106
158
 
107
- prompt_tokens = response.usage.input_tokens
108
- completion_tokens = response.usage.output_tokens
109
- total_tokens = prompt_tokens + completion_tokens
110
- # Calculate cost of the operation
111
- cost = get_chat_model_cost(kwargs.get("model", "reka-core"),
112
- pricing_info, prompt_tokens, completion_tokens)
113
-
114
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
115
- prompt_tokens)
116
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
117
- completion_tokens)
118
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
119
- total_tokens)
120
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
121
- [response.responses[0].finish_reason])
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
123
- cost)
124
-
125
159
  span.set_status(Status(StatusCode.OK))
126
160
 
127
161
  if disable_metrics is False:
128
- attributes = {
129
- TELEMETRY_SDK_NAME:
130
- "openlit",
131
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
132
- application_name,
133
- SemanticConvetion.GEN_AI_SYSTEM:
134
- SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
135
- SemanticConvetion.GEN_AI_ENVIRONMENT:
136
- environment,
137
- SemanticConvetion.GEN_AI_OPERATION:
138
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
139
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
140
- kwargs.get("model", "reka-core")
141
- }
162
+ attributes = create_metrics_attributes(
163
+ service_name=application_name,
164
+ deployment_environment=environment,
165
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
166
+ system=SemanticConvetion.GEN_AI_SYSTEM_REKAAI,
167
+ request_model=request_model,
168
+ server_address=server_address,
169
+ server_port=server_port,
170
+ response_model=response.model,
171
+ )
142
172
 
173
+ metrics["genai_client_usage_tokens"].record(
174
+ input_tokens + output_tokens, attributes
175
+ )
176
+ metrics["genai_client_operation_duration"].record(
177
+ end_time - start_time, attributes
178
+ )
179
+ metrics["genai_server_ttft"].record(
180
+ end_time - start_time, attributes
181
+ )
143
182
  metrics["genai_requests"].add(1, attributes)
144
- metrics["genai_total_tokens"].add(total_tokens, attributes)
145
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
146
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
183
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
147
185
  metrics["genai_cost"].record(cost, attributes)
148
186
 
149
187
  # Return original response
@@ -29,7 +29,7 @@ class TogetherInstrumentor(BaseInstrumentor):
29
29
  tracer = kwargs.get("tracer")
30
30
  metrics = kwargs.get("metrics_dict")
31
31
  pricing_info = kwargs.get("pricing_info", {})
32
- trace_content = kwargs.get("trace_content", False)
32
+ capture_message_content = kwargs.get("capture_message_content", False)
33
33
  disable_metrics = kwargs.get("disable_metrics")
34
34
  version = importlib.metadata.version("together")
35
35
 
@@ -37,32 +37,32 @@ class TogetherInstrumentor(BaseInstrumentor):
37
37
  wrap_function_wrapper(
38
38
  "together.resources.chat.completions",
39
39
  "ChatCompletions.create",
40
- completion("together.chat.completions", version, environment, application_name,
41
- tracer, pricing_info, trace_content, metrics, disable_metrics),
40
+ completion(version, environment, application_name,
41
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
42
  )
43
43
 
44
44
  # Image generate
45
45
  wrap_function_wrapper(
46
46
  "together.resources.images",
47
47
  "Images.generate",
48
- image_generate("together.image.generate", version, environment, application_name,
49
- tracer, pricing_info, trace_content, metrics, disable_metrics),
48
+ image_generate(version, environment, application_name,
49
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
50
50
  )
51
51
 
52
52
  # Chat completions
53
53
  wrap_function_wrapper(
54
54
  "together.resources.chat.completions",
55
55
  "AsyncChatCompletions.create",
56
- async_completion("together.chat.completions", version, environment, application_name,
57
- tracer, pricing_info, trace_content, metrics, disable_metrics),
56
+ async_completion(version, environment, application_name,
57
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
58
58
  )
59
59
 
60
60
  # Image generate
61
61
  wrap_function_wrapper(
62
62
  "together.resources.images",
63
63
  "AsyncImages.generate",
64
- async_image_generate("together.image.generate", version, environment, application_name,
65
- tracer, pricing_info, trace_content, metrics, disable_metrics),
64
+ async_image_generate(version, environment, application_name,
65
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
66
66
  )
67
67
 
68
68
  def _uninstrument(self, **kwargs):