openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -4,20 +4,26 @@ Module for monitoring Ollama API calls.
|
|
4
4
|
|
5
5
|
import time
|
6
6
|
from opentelemetry.trace import SpanKind
|
7
|
-
from openlit.__helpers import
|
8
|
-
handle_exception,
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
7
|
+
from openlit.__helpers import handle_exception, set_server_address_and_port
|
11
8
|
from openlit.instrumentation.ollama.utils import (
|
12
9
|
process_chunk,
|
13
10
|
process_chat_response,
|
14
11
|
process_streaming_chat_response,
|
15
|
-
process_embedding_response
|
12
|
+
process_embedding_response,
|
16
13
|
)
|
17
14
|
from openlit.semcov import SemanticConvention
|
18
15
|
|
19
|
-
|
20
|
-
|
16
|
+
|
17
|
+
def chat(
|
18
|
+
version,
|
19
|
+
environment,
|
20
|
+
application_name,
|
21
|
+
tracer,
|
22
|
+
pricing_info,
|
23
|
+
capture_message_content,
|
24
|
+
metrics,
|
25
|
+
disable_metrics,
|
26
|
+
):
|
21
27
|
"""
|
22
28
|
Generates a telemetry wrapper for Ollama chat function call
|
23
29
|
"""
|
@@ -28,15 +34,15 @@ def chat(version, environment, application_name,
|
|
28
34
|
"""
|
29
35
|
|
30
36
|
def __init__(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
self,
|
38
|
+
wrapped,
|
39
|
+
span,
|
40
|
+
span_name,
|
41
|
+
kwargs,
|
42
|
+
server_address,
|
43
|
+
server_port,
|
44
|
+
args,
|
45
|
+
):
|
40
46
|
self.__wrapped__ = wrapped
|
41
47
|
self._span = span
|
42
48
|
self._llmresponse = ""
|
@@ -78,7 +84,9 @@ def chat(version, environment, application_name,
|
|
78
84
|
return chunk
|
79
85
|
except StopIteration:
|
80
86
|
try:
|
81
|
-
with tracer.start_as_current_span(
|
87
|
+
with tracer.start_as_current_span(
|
88
|
+
self._span_name, kind=SpanKind.CLIENT
|
89
|
+
) as self._span:
|
82
90
|
process_streaming_chat_response(
|
83
91
|
self,
|
84
92
|
pricing_info=pricing_info,
|
@@ -87,7 +95,7 @@ def chat(version, environment, application_name,
|
|
87
95
|
metrics=metrics,
|
88
96
|
capture_message_content=capture_message_content,
|
89
97
|
disable_metrics=disable_metrics,
|
90
|
-
version=version
|
98
|
+
version=version,
|
91
99
|
)
|
92
100
|
except Exception as e:
|
93
101
|
handle_exception(self._span, e)
|
@@ -101,7 +109,9 @@ def chat(version, environment, application_name,
|
|
101
109
|
|
102
110
|
streaming = kwargs.get("stream", False)
|
103
111
|
|
104
|
-
server_address, server_port = set_server_address_and_port(
|
112
|
+
server_address, server_port = set_server_address_and_port(
|
113
|
+
instance, "127.0.0.1", 11434
|
114
|
+
)
|
105
115
|
json_body = kwargs.get("json", {}) or {}
|
106
116
|
request_model = json_body.get("model") or kwargs.get("model")
|
107
117
|
|
@@ -110,7 +120,15 @@ def chat(version, environment, application_name,
|
|
110
120
|
if streaming:
|
111
121
|
awaited_wrapped = wrapped(*args, **kwargs)
|
112
122
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
113
|
-
return TracedSyncStream(
|
123
|
+
return TracedSyncStream(
|
124
|
+
awaited_wrapped,
|
125
|
+
span,
|
126
|
+
span_name,
|
127
|
+
kwargs,
|
128
|
+
server_address,
|
129
|
+
server_port,
|
130
|
+
args,
|
131
|
+
)
|
114
132
|
|
115
133
|
else:
|
116
134
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
@@ -133,7 +151,7 @@ def chat(version, environment, application_name,
|
|
133
151
|
capture_message_content=capture_message_content,
|
134
152
|
disable_metrics=disable_metrics,
|
135
153
|
version=version,
|
136
|
-
**kwargs
|
154
|
+
**kwargs,
|
137
155
|
)
|
138
156
|
|
139
157
|
except Exception as e:
|
@@ -143,8 +161,17 @@ def chat(version, environment, application_name,
|
|
143
161
|
|
144
162
|
return wrapper
|
145
163
|
|
146
|
-
|
147
|
-
|
164
|
+
|
165
|
+
def embeddings(
|
166
|
+
version,
|
167
|
+
environment,
|
168
|
+
application_name,
|
169
|
+
tracer,
|
170
|
+
pricing_info,
|
171
|
+
capture_message_content,
|
172
|
+
metrics,
|
173
|
+
disable_metrics,
|
174
|
+
):
|
148
175
|
"""
|
149
176
|
Generates a telemetry wrapper for Ollama embeddings function call
|
150
177
|
"""
|
@@ -154,10 +181,14 @@ def embeddings(version, environment, application_name,
|
|
154
181
|
Wraps the Ollama embeddings function call.
|
155
182
|
"""
|
156
183
|
|
157
|
-
server_address, server_port = set_server_address_and_port(
|
184
|
+
server_address, server_port = set_server_address_and_port(
|
185
|
+
instance, "127.0.0.1", 11434
|
186
|
+
)
|
158
187
|
request_model = kwargs.get("model")
|
159
188
|
|
160
|
-
span_name =
|
189
|
+
span_name = (
|
190
|
+
f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
191
|
+
)
|
161
192
|
|
162
193
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
163
194
|
start_time = time.monotonic()
|
@@ -179,7 +210,7 @@ def embeddings(version, environment, application_name,
|
|
179
210
|
capture_message_content=capture_message_content,
|
180
211
|
disable_metrics=disable_metrics,
|
181
212
|
version=version,
|
182
|
-
**kwargs
|
213
|
+
**kwargs,
|
183
214
|
)
|
184
215
|
|
185
216
|
except Exception as e:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Ollama OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -18,6 +19,7 @@ from openlit.__helpers import (
|
|
18
19
|
)
|
19
20
|
from openlit.semcov import SemanticConvention
|
20
21
|
|
22
|
+
|
21
23
|
def format_content(messages):
|
22
24
|
"""
|
23
25
|
Process a list of messages to extract content.
|
@@ -30,8 +32,9 @@ def format_content(messages):
|
|
30
32
|
|
31
33
|
if isinstance(content, list):
|
32
34
|
content_str = ", ".join(
|
33
|
-
f
|
34
|
-
if "type" in item
|
35
|
+
f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
|
36
|
+
if "type" in item
|
37
|
+
else f"text: {item['text']}"
|
35
38
|
for item in content
|
36
39
|
)
|
37
40
|
formatted_messages.append(f"{role}: {content_str}")
|
@@ -40,6 +43,7 @@ def format_content(messages):
|
|
40
43
|
|
41
44
|
return "\n".join(formatted_messages)
|
42
45
|
|
46
|
+
|
43
47
|
def process_chunk(self, chunk):
|
44
48
|
"""
|
45
49
|
Process a chunk of response data and update state.
|
@@ -66,8 +70,22 @@ def process_chunk(self, chunk):
|
|
66
70
|
self._response_model = chunked.get("model", "")
|
67
71
|
self._finish_reason = chunked.get("done_reason", "")
|
68
72
|
|
69
|
-
|
70
|
-
|
73
|
+
|
74
|
+
def record_embedding_metrics(
|
75
|
+
metrics,
|
76
|
+
gen_ai_operation,
|
77
|
+
gen_ai_system,
|
78
|
+
server_address,
|
79
|
+
server_port,
|
80
|
+
request_model,
|
81
|
+
response_model,
|
82
|
+
environment,
|
83
|
+
application_name,
|
84
|
+
start_time,
|
85
|
+
end_time,
|
86
|
+
cost,
|
87
|
+
input_tokens,
|
88
|
+
):
|
71
89
|
"""
|
72
90
|
Record embedding metrics for the operation.
|
73
91
|
"""
|
@@ -88,8 +106,18 @@ def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_ad
|
|
88
106
|
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
89
107
|
metrics["genai_cost"].record(cost, attributes)
|
90
108
|
|
91
|
-
|
92
|
-
|
109
|
+
|
110
|
+
def common_chat_logic(
|
111
|
+
scope,
|
112
|
+
gen_ai_endpoint,
|
113
|
+
pricing_info,
|
114
|
+
environment,
|
115
|
+
application_name,
|
116
|
+
metrics,
|
117
|
+
capture_message_content,
|
118
|
+
disable_metrics,
|
119
|
+
version,
|
120
|
+
):
|
93
121
|
"""
|
94
122
|
Process chat request and generate Telemetry
|
95
123
|
"""
|
@@ -103,13 +131,26 @@ def common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, applica
|
|
103
131
|
request_model = json_body.get("model") or scope._kwargs.get("model", "llama3.2")
|
104
132
|
is_stream = scope._kwargs.get("stream", False)
|
105
133
|
|
106
|
-
cost = get_chat_model_cost(
|
134
|
+
cost = get_chat_model_cost(
|
135
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
136
|
+
)
|
107
137
|
|
108
138
|
# Common Span Attributes
|
109
|
-
common_span_attributes(
|
110
|
-
|
111
|
-
|
112
|
-
|
139
|
+
common_span_attributes(
|
140
|
+
scope,
|
141
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
142
|
+
SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
|
143
|
+
scope._server_address,
|
144
|
+
scope._server_port,
|
145
|
+
request_model,
|
146
|
+
request_model,
|
147
|
+
environment,
|
148
|
+
application_name,
|
149
|
+
is_stream,
|
150
|
+
scope._tbt,
|
151
|
+
scope._ttft,
|
152
|
+
version,
|
153
|
+
)
|
113
154
|
|
114
155
|
# Span Attributes for Request parameters
|
115
156
|
options = json_body.get("options", scope._kwargs.get("options", {}))
|
@@ -128,25 +169,46 @@ def common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, applica
|
|
128
169
|
scope._span.set_attribute(attribute, value)
|
129
170
|
|
130
171
|
# Span Attributes for Response parameters
|
131
|
-
scope._span.set_attribute(
|
132
|
-
|
172
|
+
scope._span.set_attribute(
|
173
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
174
|
+
)
|
175
|
+
scope._span.set_attribute(
|
176
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
177
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
178
|
+
)
|
133
179
|
|
134
180
|
# Span Attributes for Cost and Tokens
|
135
|
-
scope._span.set_attribute(
|
136
|
-
|
137
|
-
|
181
|
+
scope._span.set_attribute(
|
182
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
183
|
+
)
|
184
|
+
scope._span.set_attribute(
|
185
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
186
|
+
)
|
187
|
+
scope._span.set_attribute(
|
188
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
189
|
+
scope._input_tokens + scope._output_tokens,
|
190
|
+
)
|
138
191
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
139
192
|
|
140
193
|
# Span Attributes for Tools
|
141
194
|
if scope._tools is not None:
|
142
|
-
scope._span.set_attribute(
|
143
|
-
|
144
|
-
|
195
|
+
scope._span.set_attribute(
|
196
|
+
SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function", "")
|
197
|
+
).get("name", "")
|
198
|
+
scope._span.set_attribute(
|
199
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", ""))
|
200
|
+
)
|
201
|
+
scope._span.set_attribute(
|
202
|
+
SemanticConvention.GEN_AI_TOOL_ARGS,
|
203
|
+
str(scope._tools.get("function", "").get("arguments", "")),
|
204
|
+
)
|
145
205
|
|
146
206
|
# Span Attributes for Content
|
147
207
|
if capture_message_content:
|
148
208
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
149
|
-
scope._span.set_attribute(
|
209
|
+
scope._span.set_attribute(
|
210
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
211
|
+
)
|
150
212
|
|
151
213
|
# To be removed once the change to span_attributes (from span events) is complete
|
152
214
|
scope._span.add_event(
|
@@ -166,13 +228,37 @@ def common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, applica
|
|
166
228
|
|
167
229
|
# Metrics
|
168
230
|
if not disable_metrics:
|
169
|
-
record_completion_metrics(
|
170
|
-
|
171
|
-
|
172
|
-
|
231
|
+
record_completion_metrics(
|
232
|
+
metrics,
|
233
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
234
|
+
SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
|
235
|
+
scope._server_address,
|
236
|
+
scope._server_port,
|
237
|
+
request_model,
|
238
|
+
scope._response_model,
|
239
|
+
environment,
|
240
|
+
application_name,
|
241
|
+
scope._start_time,
|
242
|
+
scope._end_time,
|
243
|
+
cost,
|
244
|
+
scope._input_tokens,
|
245
|
+
scope._output_tokens,
|
246
|
+
scope._tbt,
|
247
|
+
scope._ttft,
|
248
|
+
)
|
173
249
|
|
174
|
-
|
175
|
-
|
250
|
+
|
251
|
+
def common_embedding_logic(
|
252
|
+
scope,
|
253
|
+
gen_ai_endpoint,
|
254
|
+
pricing_info,
|
255
|
+
environment,
|
256
|
+
application_name,
|
257
|
+
metrics,
|
258
|
+
capture_message_content,
|
259
|
+
disable_metrics,
|
260
|
+
version,
|
261
|
+
):
|
176
262
|
"""
|
177
263
|
Process embedding request and generate Telemetry
|
178
264
|
"""
|
@@ -186,14 +272,29 @@ def common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, ap
|
|
186
272
|
cost = get_embed_model_cost(request_model, pricing_info, input_tokens)
|
187
273
|
|
188
274
|
# Common Span Attributes
|
189
|
-
common_span_attributes(
|
190
|
-
|
191
|
-
|
192
|
-
|
275
|
+
common_span_attributes(
|
276
|
+
scope,
|
277
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
278
|
+
SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
|
279
|
+
scope._server_address,
|
280
|
+
scope._server_port,
|
281
|
+
request_model,
|
282
|
+
request_model,
|
283
|
+
environment,
|
284
|
+
application_name,
|
285
|
+
is_stream,
|
286
|
+
scope._tbt,
|
287
|
+
scope._ttft,
|
288
|
+
version,
|
289
|
+
)
|
193
290
|
|
194
291
|
# Span Attributes for Embedding-specific parameters
|
195
|
-
scope._span.set_attribute(
|
196
|
-
|
292
|
+
scope._span.set_attribute(
|
293
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
294
|
+
)
|
295
|
+
scope._span.set_attribute(
|
296
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens
|
297
|
+
)
|
197
298
|
|
198
299
|
# Span Attributes for Cost
|
199
300
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
@@ -206,23 +307,66 @@ def common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, ap
|
|
206
307
|
|
207
308
|
# Metrics
|
208
309
|
if not disable_metrics:
|
209
|
-
record_embedding_metrics(
|
210
|
-
|
211
|
-
|
212
|
-
|
310
|
+
record_embedding_metrics(
|
311
|
+
metrics,
|
312
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
313
|
+
SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
|
314
|
+
scope._server_address,
|
315
|
+
scope._server_port,
|
316
|
+
request_model,
|
317
|
+
request_model,
|
318
|
+
environment,
|
319
|
+
application_name,
|
320
|
+
scope._start_time,
|
321
|
+
scope._end_time,
|
322
|
+
cost,
|
323
|
+
input_tokens,
|
324
|
+
)
|
213
325
|
|
214
|
-
|
215
|
-
|
326
|
+
|
327
|
+
def process_streaming_chat_response(
|
328
|
+
self,
|
329
|
+
pricing_info,
|
330
|
+
environment,
|
331
|
+
application_name,
|
332
|
+
metrics,
|
333
|
+
capture_message_content=False,
|
334
|
+
disable_metrics=False,
|
335
|
+
version="",
|
336
|
+
):
|
216
337
|
"""
|
217
338
|
Process streaming chat request and generate Telemetry
|
218
339
|
"""
|
219
340
|
|
220
|
-
common_chat_logic(
|
221
|
-
|
341
|
+
common_chat_logic(
|
342
|
+
self,
|
343
|
+
"ollama.chat",
|
344
|
+
pricing_info,
|
345
|
+
environment,
|
346
|
+
application_name,
|
347
|
+
metrics,
|
348
|
+
capture_message_content,
|
349
|
+
disable_metrics,
|
350
|
+
version,
|
351
|
+
)
|
352
|
+
|
222
353
|
|
223
|
-
def process_chat_response(
|
224
|
-
|
225
|
-
|
354
|
+
def process_chat_response(
|
355
|
+
response,
|
356
|
+
gen_ai_endpoint,
|
357
|
+
pricing_info,
|
358
|
+
server_port,
|
359
|
+
server_address,
|
360
|
+
environment,
|
361
|
+
application_name,
|
362
|
+
metrics,
|
363
|
+
start_time,
|
364
|
+
span,
|
365
|
+
capture_message_content=False,
|
366
|
+
disable_metrics=False,
|
367
|
+
version="1.0.0",
|
368
|
+
**kwargs,
|
369
|
+
):
|
226
370
|
"""
|
227
371
|
Process chat request and generate Telemetry
|
228
372
|
"""
|
@@ -250,14 +394,37 @@ def process_chat_response(response, gen_ai_endpoint, pricing_info, server_port,
|
|
250
394
|
else:
|
251
395
|
scope._tools = None
|
252
396
|
|
253
|
-
common_chat_logic(
|
254
|
-
|
397
|
+
common_chat_logic(
|
398
|
+
scope,
|
399
|
+
gen_ai_endpoint,
|
400
|
+
pricing_info,
|
401
|
+
environment,
|
402
|
+
application_name,
|
403
|
+
metrics,
|
404
|
+
capture_message_content,
|
405
|
+
disable_metrics,
|
406
|
+
version,
|
407
|
+
)
|
255
408
|
|
256
409
|
return response
|
257
410
|
|
258
|
-
|
259
|
-
|
260
|
-
|
411
|
+
|
412
|
+
def process_embedding_response(
|
413
|
+
response,
|
414
|
+
gen_ai_endpoint,
|
415
|
+
pricing_info,
|
416
|
+
server_port,
|
417
|
+
server_address,
|
418
|
+
environment,
|
419
|
+
application_name,
|
420
|
+
metrics,
|
421
|
+
start_time,
|
422
|
+
span,
|
423
|
+
capture_message_content=False,
|
424
|
+
disable_metrics=False,
|
425
|
+
version="1.0.0",
|
426
|
+
**kwargs,
|
427
|
+
):
|
261
428
|
"""
|
262
429
|
Process embedding request and generate Telemetry
|
263
430
|
"""
|
@@ -275,7 +442,16 @@ def process_embedding_response(response, gen_ai_endpoint, pricing_info, server_p
|
|
275
442
|
scope._tbt = 0.0
|
276
443
|
scope._ttft = scope._end_time - scope._start_time
|
277
444
|
|
278
|
-
common_embedding_logic(
|
279
|
-
|
445
|
+
common_embedding_logic(
|
446
|
+
scope,
|
447
|
+
gen_ai_endpoint,
|
448
|
+
pricing_info,
|
449
|
+
environment,
|
450
|
+
application_name,
|
451
|
+
metrics,
|
452
|
+
capture_message_content,
|
453
|
+
disable_metrics,
|
454
|
+
version,
|
455
|
+
)
|
280
456
|
|
281
457
|
return response
|