openlit 1.34.8__py3-none-any.whl → 1.34.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ """
2
+ Groq OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ calculate_ttft,
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ get_chat_model_cost,
13
+ common_span_attributes,
14
+ record_completion_metrics,
15
+ )
16
+ from openlit.semcov import SemanticConvention
17
+
18
+ def format_content(messages):
19
+ """
20
+ Process a list of messages to extract content.
21
+ """
22
+
23
+ formatted_messages = []
24
+ for message in messages:
25
+ role = message["role"]
26
+ content = message["content"]
27
+
28
+ if isinstance(content, list):
29
+ content_str = ", ".join(
30
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
31
+ if "type" in item else f'text: {item["text"]}'
32
+ for item in content
33
+ )
34
+ formatted_messages.append(f"{role}: {content_str}")
35
+ else:
36
+ formatted_messages.append(f"{role}: {content}")
37
+
38
+ return "\n".join(formatted_messages)
39
+
40
+ def process_chunk(scope, chunk):
41
+ """
42
+ Process a chunk of response data and update state.
43
+ """
44
+
45
+ end_time = time.time()
46
+ # Record the timestamp for the current chunk
47
+ scope._timestamps.append(end_time)
48
+
49
+ if len(scope._timestamps) == 1:
50
+ # Calculate time to first chunk
51
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
52
+
53
+ chunked = response_as_dict(chunk)
54
+
55
+ # Collect message IDs and aggregated response from events
56
+ if (len(chunked.get("choices", [])) > 0 and
57
+ "delta" in chunked.get("choices")[0] and
58
+ "content" in chunked.get("choices")[0].get("delta", {})):
59
+
60
+ content = chunked.get("choices")[0].get("delta").get("content")
61
+ if content:
62
+ scope._llmresponse += content
63
+
64
+ if chunked.get('x_groq') is not None:
65
+ if chunked.get('x_groq').get('usage') is not None:
66
+ scope._input_tokens = chunked.get('x_groq').get('usage').get('prompt_tokens')
67
+ scope._output_tokens = chunked.get('x_groq').get('usage').get('completion_tokens')
68
+ scope._response_id = chunked.get('x_groq').get('id')
69
+ scope._response_model = chunked.get('x_groq').get('model')
70
+ scope._finish_reason = chunked.get('choices', [{}])[0].get('finish_reason')
71
+ scope._system_fingerprint = chunked.get('x_groq').get('system_fingerprint')
72
+ scope._end_time = time.time()
73
+
74
+
75
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
76
+ capture_message_content, disable_metrics, version, is_stream):
77
+ """
78
+ Process chat request and generate Telemetry
79
+ """
80
+
81
+ if len(scope._timestamps) > 1:
82
+ scope._tbt = calculate_tbt(scope._timestamps)
83
+
84
+ prompt = format_content(scope._kwargs.get("messages", []))
85
+ request_model = scope._kwargs.get("model", "mixtral-8x7b-32768")
86
+
87
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
88
+
89
+ # Common Span Attributes
90
+ common_span_attributes(scope,
91
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_GROQ,
92
+ scope._server_address, scope._server_port, request_model, scope._response_model,
93
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
94
+
95
+ # Span Attributes for Request parameters
96
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
97
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
98
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_completion_tokens", -1))
99
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
100
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
101
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
102
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
103
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
104
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._kwargs.get("service_tier", "on_demand"))
105
+
106
+ # Span Attributes for Response parameters
107
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
108
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
109
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT, scope._system_fingerprint)
110
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
111
+
112
+ # Span Attributes for Cost and Tokens
113
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
114
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
115
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
116
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
117
+
118
+ # Span Attributes for Tools
119
+ if scope._tools:
120
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function", {}).get("name", ""))
121
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", "")))
122
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function", {}).get("arguments", "")))
123
+
124
+ # Span Attributes for Content
125
+ if capture_message_content:
126
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
127
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
128
+
129
+ # To be removed one the change to span_attributes (from span events) is complete
130
+ scope._span.add_event(
131
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
132
+ attributes={
133
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
134
+ },
135
+ )
136
+ scope._span.add_event(
137
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
138
+ attributes={
139
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
140
+ },
141
+ )
142
+
143
+ scope._span.set_status(Status(StatusCode.OK))
144
+
145
+ # Metrics
146
+ if not disable_metrics:
147
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_GROQ,
148
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
149
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
150
+ cost, scope._tbt, scope._ttft)
151
+
152
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
153
+ capture_message_content=False, disable_metrics=False, version=""):
154
+ """
155
+ Process chat request and generate Telemetry
156
+ """
157
+
158
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
159
+ capture_message_content, disable_metrics, version, is_stream=True)
160
+
161
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
162
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
163
+ disable_metrics=False, version="1.0.0", **kwargs):
164
+ """
165
+ Process chat request and generate Telemetry
166
+ """
167
+
168
+ # Create scope object
169
+ scope = type("GenericScope", (), {})()
170
+ response_dict = response_as_dict(response)
171
+
172
+ scope._start_time = start_time
173
+ scope._end_time = time.time()
174
+ scope._span = span
175
+ scope._llmresponse = " ".join(
176
+ (choice.get("message", {}).get("content") or "")
177
+ for choice in response_dict.get("choices", [])
178
+ )
179
+ scope._response_id = response_dict.get("id")
180
+ scope._response_model = response_dict.get("model")
181
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
182
+ scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
183
+ scope._timestamps = []
184
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
185
+ scope._server_address, scope._server_port = server_address, server_port
186
+ scope._kwargs = kwargs
187
+ scope._system_fingerprint = response_dict.get("system_fingerprint")
188
+ scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", ""))
189
+
190
+ # Handle tool calls
191
+ if scope._kwargs.get("tools"):
192
+ scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
193
+ else:
194
+ scope._tools = None
195
+
196
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
197
+ capture_message_content, disable_metrics, version, is_stream=False)
198
+
199
+ return response
@@ -41,7 +41,7 @@ def _dispatch_async(async_chat_wrap, async_emb_wrap):
41
41
 
42
42
  class OllamaInstrumentor(BaseInstrumentor):
43
43
  """
44
- An instrumentor for Ollama's client library.
44
+ An instrumentor for Ollama client library.
45
45
  """
46
46
 
47
47
  def instrumentation_dependencies(self) -> Collection[str]:
@@ -51,7 +51,6 @@ class OllamaInstrumentor(BaseInstrumentor):
51
51
  application_name = kwargs.get("application_name", "default_application")
52
52
  environment = kwargs.get("environment", "default_environment")
53
53
  tracer = kwargs.get("tracer")
54
- event_provider = kwargs.get("event_provider")
55
54
  metrics = kwargs.get("metrics_dict")
56
55
  pricing_info = kwargs.get("pricing_info", {})
57
56
  capture_message_content = kwargs.get("capture_message_content", False)
@@ -61,22 +60,22 @@ class OllamaInstrumentor(BaseInstrumentor):
61
60
  # Build wrapper factories for chat and embeddings
62
61
  sync_chat_wrap = chat(
63
62
  version, environment, application_name,
64
- tracer, event_provider, pricing_info,
63
+ tracer, pricing_info,
65
64
  capture_message_content, metrics, disable_metrics
66
65
  )
67
66
  sync_emb_wrap = embeddings(
68
67
  version, environment, application_name,
69
- tracer, event_provider, pricing_info,
68
+ tracer, pricing_info,
70
69
  capture_message_content, metrics, disable_metrics
71
70
  )
72
71
  async_chat_wrap = async_chat(
73
72
  version, environment, application_name,
74
- tracer, event_provider, pricing_info,
73
+ tracer, pricing_info,
75
74
  capture_message_content, metrics, disable_metrics
76
75
  )
77
76
  async_emb_wrap = async_embeddings(
78
77
  version, environment, application_name,
79
- tracer, event_provider, pricing_info,
78
+ tracer, pricing_info,
80
79
  capture_message_content, metrics, disable_metrics
81
80
  )
82
81
 
@@ -2,7 +2,6 @@
2
2
  Module for monitoring Ollama API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -17,12 +16,10 @@ from openlit.instrumentation.ollama.utils import (
17
16
  )
18
17
  from openlit.semcov import SemanticConvention
19
18
 
20
- logger = logging.getLogger(__name__)
21
-
22
19
  def async_chat(version, environment, application_name,
23
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
20
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
21
  """
25
- Generates a telemetry wrapper for GenAI function call
22
+ Generates a telemetry wrapper for Ollama async chat function call
26
23
  """
27
24
 
28
25
  class TracedAsyncStream:
@@ -38,21 +35,21 @@ def async_chat(version, environment, application_name,
38
35
  kwargs,
39
36
  server_address,
40
37
  server_port,
41
- **args,
38
+ args,
42
39
  ):
43
40
  self.__wrapped__ = wrapped
44
41
  self._span = span
45
42
  self._llmresponse = ""
46
43
  self._response_model = ""
47
44
  self._finish_reason = ""
48
- self._tool_calls = []
45
+ self._tools = []
49
46
  self._input_tokens = 0
50
47
  self._output_tokens = 0
51
- self._response_role = ''
48
+ self._response_role = ""
52
49
  self._span_name = span_name
53
50
  self._args = args
54
51
  self._kwargs = kwargs
55
- self._start_time = time.time()
52
+ self._start_time = time.monotonic()
56
53
  self._end_time = None
57
54
  self._timestamps = []
58
55
  self._ttft = 0
@@ -81,26 +78,25 @@ def async_chat(version, environment, application_name,
81
78
  return chunk
82
79
  except StopAsyncIteration:
83
80
  try:
84
- with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
81
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
85
82
  process_streaming_chat_response(
86
83
  self,
87
84
  pricing_info=pricing_info,
88
85
  environment=environment,
89
86
  application_name=application_name,
90
87
  metrics=metrics,
91
- event_provider=event_provider,
92
88
  capture_message_content=capture_message_content,
93
89
  disable_metrics=disable_metrics,
94
90
  version=version
95
91
  )
96
92
  except Exception as e:
97
93
  handle_exception(self._span, e)
98
- logger.error("Error in trace creation: %s", e)
94
+
99
95
  raise
100
96
 
101
97
  async def wrapper(wrapped, instance, args, kwargs):
102
98
  """
103
- Wraps the GenAI function call.
99
+ Wraps the Ollama async chat function call.
104
100
  """
105
101
 
106
102
  streaming = kwargs.get("stream", False)
@@ -111,26 +107,73 @@ def async_chat(version, environment, application_name,
111
107
 
112
108
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
113
109
 
114
- # pylint: disable=no-else-return
115
110
  if streaming:
116
111
  awaited_wrapped = await wrapped(*args, **kwargs)
117
112
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
118
- return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
113
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port, args)
119
114
 
120
115
  else:
121
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
122
- start_time = time.time()
116
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
117
+ start_time = time.monotonic()
118
+
119
+ try:
120
+ response = await wrapped(*args, **kwargs)
121
+
122
+ response = process_chat_response(
123
+ response=response,
124
+ gen_ai_endpoint="ollama.chat",
125
+ pricing_info=pricing_info,
126
+ server_port=server_port,
127
+ server_address=server_address,
128
+ environment=environment,
129
+ application_name=application_name,
130
+ metrics=metrics,
131
+ start_time=start_time,
132
+ span=span,
133
+ capture_message_content=capture_message_content,
134
+ disable_metrics=disable_metrics,
135
+ version=version,
136
+ **kwargs
137
+ )
138
+
139
+ except Exception as e:
140
+ handle_exception(span, e)
141
+
142
+ return response
143
+
144
+ return wrapper
145
+
146
+ def async_embeddings(version, environment, application_name,
147
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
148
+ """
149
+ Generates a telemetry wrapper for Ollama async embeddings function call
150
+ """
151
+
152
+ async def wrapper(wrapped, instance, args, kwargs):
153
+ """
154
+ Wraps the Ollama async embeddings function call.
155
+ """
156
+
157
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
158
+ request_model = kwargs.get("model")
159
+
160
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
161
+
162
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
163
+ start_time = time.monotonic()
164
+
165
+ try:
123
166
  response = await wrapped(*args, **kwargs)
124
- response = process_chat_response(
167
+
168
+ response = process_embedding_response(
125
169
  response=response,
126
- request_model=request_model,
170
+ gen_ai_endpoint="ollama.embeddings",
127
171
  pricing_info=pricing_info,
128
172
  server_port=server_port,
129
173
  server_address=server_address,
130
174
  environment=environment,
131
175
  application_name=application_name,
132
176
  metrics=metrics,
133
- event_provider=event_provider,
134
177
  start_time=start_time,
135
178
  span=span,
136
179
  capture_message_content=capture_message_content,
@@ -139,47 +182,8 @@ def async_chat(version, environment, application_name,
139
182
  **kwargs
140
183
  )
141
184
 
142
- return response
143
-
144
- return wrapper
145
-
146
- def async_embeddings(version, environment, application_name,
147
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
148
- """
149
- Generates a telemetry wrapper for GenAI function call
150
- """
151
-
152
- async def wrapper(wrapped, instance, args, kwargs):
153
- """
154
- Wraps the GenAI function call.
155
- """
156
-
157
- server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
158
- json_body = kwargs.get('json', {}) or {}
159
- request_model = json_body.get('model') or kwargs.get('model')
160
-
161
- span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
162
-
163
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
164
- start_time = time.time()
165
- response = await wrapped(*args, **kwargs)
166
- response = process_embedding_response(
167
- response=response,
168
- request_model=request_model,
169
- pricing_info=pricing_info,
170
- server_port=server_port,
171
- server_address=server_address,
172
- environment=environment,
173
- application_name=application_name,
174
- metrics=metrics,
175
- event_provider=event_provider,
176
- start_time=start_time,
177
- span=span,
178
- capture_message_content=capture_message_content,
179
- disable_metrics=disable_metrics,
180
- version=version,
181
- **kwargs
182
- )
185
+ except Exception as e:
186
+ handle_exception(span, e)
183
187
 
184
188
  return response
185
189
 
@@ -2,7 +2,6 @@
2
2
  Module for monitoring Ollama API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -17,12 +16,10 @@ from openlit.instrumentation.ollama.utils import (
17
16
  )
18
17
  from openlit.semcov import SemanticConvention
19
18
 
20
- logger = logging.getLogger(__name__)
21
-
22
19
  def chat(version, environment, application_name,
23
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
20
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
21
  """
25
- Generates a telemetry wrapper for GenAI function call
22
+ Generates a telemetry wrapper for Ollama chat function call
26
23
  """
27
24
 
28
25
  class TracedSyncStream:
@@ -38,21 +35,21 @@ def chat(version, environment, application_name,
38
35
  kwargs,
39
36
  server_address,
40
37
  server_port,
41
- **args,
38
+ args,
42
39
  ):
43
40
  self.__wrapped__ = wrapped
44
41
  self._span = span
45
42
  self._llmresponse = ""
46
43
  self._response_model = ""
47
44
  self._finish_reason = ""
48
- self._tool_calls = []
45
+ self._tools = []
49
46
  self._input_tokens = 0
50
47
  self._output_tokens = 0
51
- self._response_role = ''
48
+ self._response_role = ""
52
49
  self._span_name = span_name
53
50
  self._args = args
54
51
  self._kwargs = kwargs
55
- self._start_time = time.time()
52
+ self._start_time = time.monotonic()
56
53
  self._end_time = None
57
54
  self._timestamps = []
58
55
  self._ttft = 0
@@ -81,26 +78,25 @@ def chat(version, environment, application_name,
81
78
  return chunk
82
79
  except StopIteration:
83
80
  try:
84
- with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
81
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
85
82
  process_streaming_chat_response(
86
83
  self,
87
84
  pricing_info=pricing_info,
88
85
  environment=environment,
89
86
  application_name=application_name,
90
87
  metrics=metrics,
91
- event_provider=event_provider,
92
88
  capture_message_content=capture_message_content,
93
89
  disable_metrics=disable_metrics,
94
90
  version=version
95
91
  )
96
92
  except Exception as e:
97
93
  handle_exception(self._span, e)
98
- logger.error("Error in trace creation: %s", e)
94
+
99
95
  raise
100
96
 
101
97
  def wrapper(wrapped, instance, args, kwargs):
102
98
  """
103
- Wraps the GenAI function call.
99
+ Wraps the Ollama chat function call.
104
100
  """
105
101
 
106
102
  streaming = kwargs.get("stream", False)
@@ -111,26 +107,73 @@ def chat(version, environment, application_name,
111
107
 
112
108
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
113
109
 
114
- # pylint: disable=no-else-return
115
110
  if streaming:
116
111
  awaited_wrapped = wrapped(*args, **kwargs)
117
112
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
118
- return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
113
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port, args)
119
114
 
120
115
  else:
121
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
122
- start_time = time.time()
116
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
117
+ start_time = time.monotonic()
118
+
119
+ try:
120
+ response = wrapped(*args, **kwargs)
121
+
122
+ response = process_chat_response(
123
+ response=response,
124
+ gen_ai_endpoint="ollama.chat",
125
+ pricing_info=pricing_info,
126
+ server_port=server_port,
127
+ server_address=server_address,
128
+ environment=environment,
129
+ application_name=application_name,
130
+ metrics=metrics,
131
+ start_time=start_time,
132
+ span=span,
133
+ capture_message_content=capture_message_content,
134
+ disable_metrics=disable_metrics,
135
+ version=version,
136
+ **kwargs
137
+ )
138
+
139
+ except Exception as e:
140
+ handle_exception(span, e)
141
+
142
+ return response
143
+
144
+ return wrapper
145
+
146
+ def embeddings(version, environment, application_name,
147
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
148
+ """
149
+ Generates a telemetry wrapper for Ollama embeddings function call
150
+ """
151
+
152
+ def wrapper(wrapped, instance, args, kwargs):
153
+ """
154
+ Wraps the Ollama embeddings function call.
155
+ """
156
+
157
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
158
+ request_model = kwargs.get("model")
159
+
160
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
161
+
162
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
163
+ start_time = time.monotonic()
164
+
165
+ try:
123
166
  response = wrapped(*args, **kwargs)
124
- response = process_chat_response(
167
+
168
+ response = process_embedding_response(
125
169
  response=response,
126
- request_model=request_model,
170
+ gen_ai_endpoint="ollama.embeddings",
127
171
  pricing_info=pricing_info,
128
172
  server_port=server_port,
129
173
  server_address=server_address,
130
174
  environment=environment,
131
175
  application_name=application_name,
132
176
  metrics=metrics,
133
- event_provider=event_provider,
134
177
  start_time=start_time,
135
178
  span=span,
136
179
  capture_message_content=capture_message_content,
@@ -139,47 +182,8 @@ def chat(version, environment, application_name,
139
182
  **kwargs
140
183
  )
141
184
 
142
- return response
143
-
144
- return wrapper
145
-
146
- def embeddings(version, environment, application_name,
147
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
148
- """
149
- Generates a telemetry wrapper for GenAI function call
150
- """
151
-
152
- def wrapper(wrapped, instance, args, kwargs):
153
- """
154
- Wraps the GenAI function call.
155
- """
156
-
157
- server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
158
- json_body = kwargs.get('json', {}) or {}
159
- request_model = json_body.get('model') or kwargs.get('model')
160
-
161
- span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
162
-
163
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
164
- start_time = time.time()
165
- response = wrapped(*args, **kwargs)
166
- response = process_embedding_response(
167
- response=response,
168
- request_model=request_model,
169
- pricing_info=pricing_info,
170
- server_port=server_port,
171
- server_address=server_address,
172
- environment=environment,
173
- application_name=application_name,
174
- metrics=metrics,
175
- event_provider=event_provider,
176
- start_time=start_time,
177
- span=span,
178
- capture_message_content=capture_message_content,
179
- disable_metrics=disable_metrics,
180
- version=version,
181
- **kwargs
182
- )
185
+ except Exception as e:
186
+ handle_exception(span, e)
183
187
 
184
188
  return response
185
189