openlit 1.33.19__py3-none-any.whl → 1.33.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. openlit/__helpers.py +64 -7
  2. openlit/__init__.py +3 -3
  3. openlit/evals/utils.py +7 -7
  4. openlit/guard/utils.py +7 -7
  5. openlit/instrumentation/ag2/ag2.py +24 -24
  6. openlit/instrumentation/ai21/ai21.py +3 -3
  7. openlit/instrumentation/ai21/async_ai21.py +3 -3
  8. openlit/instrumentation/ai21/utils.py +59 -59
  9. openlit/instrumentation/anthropic/anthropic.py +2 -2
  10. openlit/instrumentation/anthropic/async_anthropic.py +2 -2
  11. openlit/instrumentation/anthropic/utils.py +34 -34
  12. openlit/instrumentation/assemblyai/assemblyai.py +24 -24
  13. openlit/instrumentation/astra/astra.py +3 -3
  14. openlit/instrumentation/astra/async_astra.py +3 -3
  15. openlit/instrumentation/astra/utils.py +39 -39
  16. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +2 -2
  17. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +2 -2
  18. openlit/instrumentation/azure_ai_inference/utils.py +36 -36
  19. openlit/instrumentation/bedrock/bedrock.py +2 -2
  20. openlit/instrumentation/bedrock/utils.py +35 -35
  21. openlit/instrumentation/chroma/chroma.py +57 -57
  22. openlit/instrumentation/cohere/async_cohere.py +88 -88
  23. openlit/instrumentation/cohere/cohere.py +88 -88
  24. openlit/instrumentation/controlflow/controlflow.py +15 -15
  25. openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
  26. openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
  27. openlit/instrumentation/crewai/crewai.py +22 -22
  28. openlit/instrumentation/dynamiq/dynamiq.py +19 -19
  29. openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
  30. openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
  31. openlit/instrumentation/embedchain/embedchain.py +15 -15
  32. openlit/instrumentation/firecrawl/firecrawl.py +10 -10
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
  35. openlit/instrumentation/gpt4all/gpt4all.py +78 -78
  36. openlit/instrumentation/gpu/__init__.py +8 -8
  37. openlit/instrumentation/groq/async_groq.py +74 -74
  38. openlit/instrumentation/groq/groq.py +74 -74
  39. openlit/instrumentation/haystack/haystack.py +6 -6
  40. openlit/instrumentation/julep/async_julep.py +14 -14
  41. openlit/instrumentation/julep/julep.py +14 -14
  42. openlit/instrumentation/langchain/async_langchain.py +39 -39
  43. openlit/instrumentation/langchain/langchain.py +39 -39
  44. openlit/instrumentation/letta/letta.py +26 -26
  45. openlit/instrumentation/litellm/async_litellm.py +94 -94
  46. openlit/instrumentation/litellm/litellm.py +94 -94
  47. openlit/instrumentation/llamaindex/llamaindex.py +7 -7
  48. openlit/instrumentation/mem0/mem0.py +13 -13
  49. openlit/instrumentation/milvus/milvus.py +47 -47
  50. openlit/instrumentation/mistral/async_mistral.py +88 -88
  51. openlit/instrumentation/mistral/mistral.py +88 -88
  52. openlit/instrumentation/multion/async_multion.py +21 -21
  53. openlit/instrumentation/multion/multion.py +21 -21
  54. openlit/instrumentation/ollama/__init__.py +47 -34
  55. openlit/instrumentation/ollama/async_ollama.py +7 -5
  56. openlit/instrumentation/ollama/ollama.py +7 -5
  57. openlit/instrumentation/ollama/utils.py +58 -54
  58. openlit/instrumentation/openai/async_openai.py +225 -225
  59. openlit/instrumentation/openai/openai.py +225 -225
  60. openlit/instrumentation/openai_agents/openai_agents.py +11 -11
  61. openlit/instrumentation/phidata/phidata.py +15 -15
  62. openlit/instrumentation/pinecone/pinecone.py +43 -43
  63. openlit/instrumentation/premai/premai.py +86 -86
  64. openlit/instrumentation/qdrant/async_qdrant.py +95 -95
  65. openlit/instrumentation/qdrant/qdrant.py +99 -99
  66. openlit/instrumentation/reka/async_reka.py +33 -33
  67. openlit/instrumentation/reka/reka.py +33 -33
  68. openlit/instrumentation/together/async_together.py +90 -90
  69. openlit/instrumentation/together/together.py +90 -90
  70. openlit/instrumentation/transformers/__init__.py +11 -7
  71. openlit/instrumentation/transformers/transformers.py +32 -168
  72. openlit/instrumentation/transformers/utils.py +183 -0
  73. openlit/instrumentation/vertexai/async_vertexai.py +64 -64
  74. openlit/instrumentation/vertexai/vertexai.py +64 -64
  75. openlit/instrumentation/vllm/vllm.py +24 -24
  76. openlit/otel/metrics.py +11 -11
  77. openlit/semcov/__init__.py +3 -3
  78. {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/METADATA +8 -8
  79. openlit-1.33.21.dist-info/RECORD +132 -0
  80. {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
  81. openlit-1.33.19.dist-info/RECORD +0 -131
  82. {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0
@@ -16,6 +16,29 @@ from openlit.instrumentation.ollama.async_ollama import (
16
16
 
17
17
  _instruments = ("ollama >= 0.2.0",)
18
18
 
19
+ # Dispatch wrapper to route instrumentation to chat or embeddings based on path
20
+ def _dispatch(sync_chat_wrap, sync_emb_wrap):
21
+ def wrapper(wrapped, instance, args, kwargs):
22
+ if len(args) > 2 and isinstance(args[2], str):
23
+ op = args[2].rstrip("/").split("/")[-1]
24
+ if op == "chat":
25
+ return sync_chat_wrap(wrapped, instance, args, kwargs)
26
+ if op == "embeddings":
27
+ return sync_emb_wrap(wrapped, instance, args, kwargs)
28
+ return wrapped(*args, **kwargs)
29
+ return wrapper
30
+
31
+ def _dispatch_async(async_chat_wrap, async_emb_wrap):
32
+ async def wrapper(wrapped, instance, args, kwargs):
33
+ if len(args) > 2 and isinstance(args[2], str):
34
+ op = args[2].rstrip("/").split("/")[-1]
35
+ if op == "chat":
36
+ return await async_chat_wrap(wrapped, instance, args, kwargs)
37
+ if op == "embeddings":
38
+ return await async_emb_wrap(wrapped, instance, args, kwargs)
39
+ return await wrapped(*args, **kwargs)
40
+ return wrapper
41
+
19
42
  class OllamaInstrumentor(BaseInstrumentor):
20
43
  """
21
44
  An instrumentor for Ollama's client library.
@@ -35,48 +58,38 @@ class OllamaInstrumentor(BaseInstrumentor):
35
58
  disable_metrics = kwargs.get("disable_metrics")
36
59
  version = importlib.metadata.version("ollama")
37
60
 
38
- # sync chat
39
- wrap_function_wrapper(
40
- "ollama",
41
- "chat",
42
- chat(version, environment, application_name,
43
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
61
+ # Build wrapper factories for chat and embeddings
62
+ sync_chat_wrap = chat(
63
+ version, environment, application_name,
64
+ tracer, event_provider, pricing_info,
65
+ capture_message_content, metrics, disable_metrics
44
66
  )
45
- wrap_function_wrapper(
46
- "ollama",
47
- "Client.chat",
48
- chat(version, environment, application_name,
49
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
67
+ sync_emb_wrap = embeddings(
68
+ version, environment, application_name,
69
+ tracer, event_provider, pricing_info,
70
+ capture_message_content, metrics, disable_metrics
50
71
  )
51
-
52
- # sync embeddings
53
- wrap_function_wrapper(
54
- "ollama",
55
- "embeddings",
56
- embeddings(version, environment, application_name,
57
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
72
+ async_chat_wrap = async_chat(
73
+ version, environment, application_name,
74
+ tracer, event_provider, pricing_info,
75
+ capture_message_content, metrics, disable_metrics
58
76
  )
59
- wrap_function_wrapper(
60
- "ollama",
61
- "Client.embeddings",
62
- embeddings(version, environment, application_name,
63
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
77
+ async_emb_wrap = async_embeddings(
78
+ version, environment, application_name,
79
+ tracer, event_provider, pricing_info,
80
+ capture_message_content, metrics, disable_metrics
64
81
  )
65
82
 
66
- # async chat
83
+ # Patch underlying request methods to ensure instrumentation regardless of import order
67
84
  wrap_function_wrapper(
68
- "ollama",
69
- "AsyncClient.chat",
70
- async_chat(version, environment, application_name,
71
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
85
+ "ollama._client",
86
+ "Client._request",
87
+ _dispatch(sync_chat_wrap, sync_emb_wrap),
72
88
  )
73
-
74
- # async embeddings
75
89
  wrap_function_wrapper(
76
- "ollama",
77
- "AsyncClient.embeddings",
78
- async_embeddings(version, environment, application_name,
79
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
90
+ "ollama._client",
91
+ "AsyncClient._request",
92
+ _dispatch_async(async_chat_wrap, async_emb_wrap),
80
93
  )
81
94
 
82
95
  def _uninstrument(self, **kwargs):
@@ -15,7 +15,7 @@ from openlit.instrumentation.ollama.utils import (
15
15
  process_streaming_chat_response,
16
16
  process_embedding_response
17
17
  )
18
- from openlit.semcov import SemanticConvetion
18
+ from openlit.semcov import SemanticConvention
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -106,9 +106,10 @@ def async_chat(version, environment, application_name,
106
106
  streaming = kwargs.get("stream", False)
107
107
 
108
108
  server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
- request_model = kwargs.get("model", "gpt-4o")
109
+ json_body = kwargs.get("json", {}) or {}
110
+ request_model = json_body.get("model") or kwargs.get("model")
110
111
 
111
- span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
113
 
113
114
  # pylint: disable=no-else-return
114
115
  if streaming:
@@ -154,9 +155,10 @@ def async_embeddings(version, environment, application_name,
154
155
  """
155
156
 
156
157
  server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
- request_model = kwargs.get('model', 'all-minilm')
158
+ json_body = kwargs.get('json', {}) or {}
159
+ request_model = json_body.get('model') or kwargs.get('model')
158
160
 
159
- span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
161
+ span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
160
162
 
161
163
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
162
164
  start_time = time.time()
@@ -15,7 +15,7 @@ from openlit.instrumentation.ollama.utils import (
15
15
  process_streaming_chat_response,
16
16
  process_embedding_response
17
17
  )
18
- from openlit.semcov import SemanticConvetion
18
+ from openlit.semcov import SemanticConvention
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -106,9 +106,10 @@ def chat(version, environment, application_name,
106
106
  streaming = kwargs.get("stream", False)
107
107
 
108
108
  server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
- request_model = kwargs.get("model", "gpt-4o")
109
+ json_body = kwargs.get("json", {}) or {}
110
+ request_model = json_body.get("model") or kwargs.get("model")
110
111
 
111
- span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
113
 
113
114
  # pylint: disable=no-else-return
114
115
  if streaming:
@@ -154,9 +155,10 @@ def embeddings(version, environment, application_name,
154
155
  """
155
156
 
156
157
  server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
- request_model = kwargs.get('model', 'all-minilm')
158
+ json_body = kwargs.get('json', {}) or {}
159
+ request_model = json_body.get('model') or kwargs.get('model')
158
160
 
159
- span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
161
+ span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
160
162
 
161
163
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
162
164
  start_time = time.time()
@@ -19,7 +19,7 @@ from openlit.__helpers import (
19
19
  otel_event,
20
20
  concatenate_all_contents
21
21
  )
22
- from openlit.semcov import SemanticConvetion
22
+ from openlit.semcov import SemanticConvention
23
23
 
24
24
  def process_chunk(self, chunk):
25
25
  """
@@ -57,27 +57,29 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
57
57
  if len(scope._timestamps) > 1:
58
58
  scope._tbt = calculate_tbt(scope._timestamps)
59
59
 
60
- formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
61
- request_model = scope._kwargs.get("model", "gpt-4o")
60
+ json_body = scope._kwargs.get("json", {}) or {}
61
+ request_model = json_body.get("model") or scope._kwargs.get("model")
62
+ messages = json_body.get("messages", scope._kwargs.get("messages", ""))
63
+ formatted_messages = extract_and_format_input(messages)
62
64
 
63
65
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
64
66
 
65
67
  # Set Span attributes (OTel Semconv)
66
68
  scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
67
- scope._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
68
- scope._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
69
- scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
70
- scope._span.set_attribute(SemanticConvetion.SERVER_PORT, scope._server_port)
69
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
70
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
71
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
72
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
71
73
 
72
74
  options = scope._kwargs.get('options', {})
73
75
  attributes = [
74
- (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
75
- (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
76
- (SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
77
- (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
78
- (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
79
- (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
80
- (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
76
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
77
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
78
+ (SemanticConvention.GEN_AI_REQUEST_SEED, 'seed'),
79
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
80
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
81
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
82
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
81
83
  ]
82
84
 
83
85
  for attribute, key in attributes:
@@ -85,37 +87,37 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
85
87
  if value is not None:
86
88
  scope._span.set_attribute(attribute, value)
87
89
 
88
- scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
89
- scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, scope._response_model)
90
- scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
91
- scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
92
- scope._span.set_attribute(SemanticConvetion.SERVER_ADDRESS, scope._server_address)
90
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
91
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
92
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
93
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
94
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
93
95
 
94
- scope._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
96
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
95
97
  "text" if isinstance(scope._llmresponse, str) else "json")
96
98
 
97
99
  scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
98
100
  scope._span.set_attribute(SERVICE_NAME, application_name)
99
- scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM, is_stream)
100
- scope._span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
101
- scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
102
- scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT, scope._tbt)
103
- scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT, scope._ttft)
104
- scope._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION, version)
101
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
102
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
103
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
104
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
105
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
106
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
105
107
 
106
108
  # To be removed one the change to log events (from span events) is complete
107
109
  prompt = concatenate_all_contents(formatted_messages)
108
110
  if capture_message_content:
109
111
  scope._span.add_event(
110
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
112
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
111
113
  attributes={
112
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
114
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
113
115
  },
114
116
  )
115
117
  scope._span.add_event(
116
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
118
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
117
119
  attributes={
118
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
120
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
119
121
  },
120
122
  )
121
123
 
@@ -145,9 +147,9 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
145
147
  for role in ['user', 'system', 'assistant', 'tool']:
146
148
  if formatted_messages.get(role, {}).get('content', ''):
147
149
  event = otel_event(
148
- name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
150
+ name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
149
151
  attributes={
150
- SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_OLLAMA
152
+ SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
151
153
  },
152
154
  body = {
153
155
  # pylint: disable=line-too-long
@@ -172,9 +174,9 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
172
174
  event_provider.emit(event)
173
175
 
174
176
  choice_event = otel_event(
175
- name=SemanticConvetion.GEN_AI_CHOICE,
177
+ name=SemanticConvention.GEN_AI_CHOICE,
176
178
  attributes={
177
- SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_OLLAMA
179
+ SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
178
180
  },
179
181
  body=choice_event_body
180
182
  )
@@ -186,8 +188,8 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
186
188
  metrics_attributes = create_metrics_attributes(
187
189
  service_name=application_name,
188
190
  deployment_environment=environment,
189
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
190
- system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
191
+ operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
192
+ system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
191
193
  request_model=request_model,
192
194
  server_address=scope._server_address,
193
195
  server_port=scope._server_port,
@@ -252,7 +254,9 @@ def process_embedding_response(response, request_model, pricing_info, server_por
252
254
  end_time = time.time()
253
255
 
254
256
  try:
255
- input_tokens = general_tokens(str(kwargs.get('prompt')))
257
+ json_body = kwargs.get("json", {}) or {}
258
+ prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
259
+ input_tokens = general_tokens(str(prompt_val))
256
260
 
257
261
  # Calculate cost of the operation
258
262
  cost = get_embed_model_cost(request_model,
@@ -260,19 +264,19 @@ def process_embedding_response(response, request_model, pricing_info, server_por
260
264
 
261
265
  # Set Span attributes (OTel Semconv)
262
266
  span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
263
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
264
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
265
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
266
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
267
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
267
+ span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
268
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
269
+ span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
270
+ SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
271
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
268
272
  request_model)
269
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
273
+ span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
270
274
  request_model)
271
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
275
+ span.set_attribute(SemanticConvention.SERVER_ADDRESS,
272
276
  server_address)
273
- span.set_attribute(SemanticConvetion.SERVER_PORT,
277
+ span.set_attribute(SemanticConvention.SERVER_PORT,
274
278
  server_port)
275
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
279
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
276
280
  input_tokens)
277
281
 
278
282
  # Set Span attributes (Extras)
@@ -280,20 +284,20 @@ def process_embedding_response(response, request_model, pricing_info, server_por
280
284
  environment)
281
285
  span.set_attribute(SERVICE_NAME,
282
286
  application_name)
283
- span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE,
287
+ span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
284
288
  input_tokens)
285
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
289
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
286
290
  cost)
287
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
291
+ span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
288
292
  version)
289
293
 
290
294
  prompt_event = otel_event(
291
- name=SemanticConvetion.GEN_AI_USER_MESSAGE,
295
+ name=SemanticConvention.GEN_AI_USER_MESSAGE,
292
296
  attributes={
293
- SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_OLLAMA
297
+ SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
294
298
  },
295
299
  body={
296
- **({"content": kwargs.get('prompt', '')} if capture_message_content else {}),
300
+ **({"content": prompt_val} if capture_message_content else {}),
297
301
  "role": 'user'
298
302
  }
299
303
  )
@@ -305,8 +309,8 @@ def process_embedding_response(response, request_model, pricing_info, server_por
305
309
  attributes = create_metrics_attributes(
306
310
  service_name=application_name,
307
311
  deployment_environment=environment,
308
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
309
- system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
312
+ operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
313
+ system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
310
314
  request_model=request_model,
311
315
  server_address=server_address,
312
316
  server_port=server_port,