openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -4,20 +4,26 @@ Module for monitoring GPT4All API calls.
|
|
4
4
|
|
5
5
|
import time
|
6
6
|
from opentelemetry.trace import SpanKind
|
7
|
-
from openlit.__helpers import
|
8
|
-
handle_exception,
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
7
|
+
from openlit.__helpers import handle_exception, set_server_address_and_port
|
11
8
|
from openlit.instrumentation.gpt4all.utils import (
|
12
9
|
process_generate_response,
|
13
10
|
process_chunk,
|
14
11
|
process_streaming_generate_response,
|
15
|
-
process_embedding_response
|
12
|
+
process_embedding_response,
|
16
13
|
)
|
17
14
|
from openlit.semcov import SemanticConvention
|
18
15
|
|
19
|
-
|
20
|
-
|
16
|
+
|
17
|
+
def generate(
|
18
|
+
version,
|
19
|
+
environment,
|
20
|
+
application_name,
|
21
|
+
tracer,
|
22
|
+
pricing_info,
|
23
|
+
capture_message_content,
|
24
|
+
metrics,
|
25
|
+
disable_metrics,
|
26
|
+
):
|
21
27
|
"""
|
22
28
|
Generates a telemetry wrapper for GenAI function call
|
23
29
|
"""
|
@@ -28,16 +34,16 @@ def generate(version, environment, application_name,
|
|
28
34
|
"""
|
29
35
|
|
30
36
|
def __init__(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
self,
|
38
|
+
wrapped,
|
39
|
+
span,
|
40
|
+
span_name,
|
41
|
+
args,
|
42
|
+
kwargs,
|
43
|
+
server_address,
|
44
|
+
server_port,
|
45
|
+
request_model,
|
46
|
+
):
|
41
47
|
self.__wrapped__ = wrapped
|
42
48
|
self._span = span
|
43
49
|
self._span_name = span_name
|
@@ -75,7 +81,9 @@ def generate(version, environment, application_name,
|
|
75
81
|
return chunk
|
76
82
|
except StopIteration:
|
77
83
|
try:
|
78
|
-
with tracer.start_as_current_span(
|
84
|
+
with tracer.start_as_current_span(
|
85
|
+
self._span_name, kind=SpanKind.CLIENT
|
86
|
+
) as self._span:
|
79
87
|
process_streaming_generate_response(
|
80
88
|
self,
|
81
89
|
pricing_info=pricing_info,
|
@@ -84,7 +92,7 @@ def generate(version, environment, application_name,
|
|
84
92
|
metrics=metrics,
|
85
93
|
capture_message_content=capture_message_content,
|
86
94
|
disable_metrics=disable_metrics,
|
87
|
-
version=version
|
95
|
+
version=version,
|
88
96
|
)
|
89
97
|
|
90
98
|
except Exception as e:
|
@@ -100,8 +108,13 @@ def generate(version, environment, application_name,
|
|
100
108
|
# Check if streaming is enabled for the API call
|
101
109
|
streaming = kwargs.get("streaming", False)
|
102
110
|
|
103
|
-
server_address, server_port = set_server_address_and_port(
|
104
|
-
|
111
|
+
server_address, server_port = set_server_address_and_port(
|
112
|
+
instance, "127.0.0.1", 80
|
113
|
+
)
|
114
|
+
request_model = (
|
115
|
+
str(instance.model.model_path).rsplit("/", maxsplit=1)[-1]
|
116
|
+
or "orca-mini-3b-gguf2-q4_0.gguf"
|
117
|
+
)
|
105
118
|
|
106
119
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
107
120
|
|
@@ -109,7 +122,16 @@ def generate(version, environment, application_name,
|
|
109
122
|
# Special handling for streaming response to accommodate the nature of data flow
|
110
123
|
awaited_wrapped = wrapped(*args, **kwargs)
|
111
124
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
112
|
-
return TracedSyncStream(
|
125
|
+
return TracedSyncStream(
|
126
|
+
awaited_wrapped,
|
127
|
+
span,
|
128
|
+
span_name,
|
129
|
+
args,
|
130
|
+
kwargs,
|
131
|
+
server_address,
|
132
|
+
server_port,
|
133
|
+
request_model,
|
134
|
+
)
|
113
135
|
|
114
136
|
# Handling for non-streaming responses
|
115
137
|
else:
|
@@ -133,7 +155,7 @@ def generate(version, environment, application_name,
|
|
133
155
|
kwargs=kwargs,
|
134
156
|
capture_message_content=capture_message_content,
|
135
157
|
disable_metrics=disable_metrics,
|
136
|
-
version=version
|
158
|
+
version=version,
|
137
159
|
)
|
138
160
|
|
139
161
|
except Exception as e:
|
@@ -143,8 +165,17 @@ def generate(version, environment, application_name,
|
|
143
165
|
|
144
166
|
return wrapper
|
145
167
|
|
146
|
-
|
147
|
-
|
168
|
+
|
169
|
+
def embed(
|
170
|
+
version,
|
171
|
+
environment,
|
172
|
+
application_name,
|
173
|
+
tracer,
|
174
|
+
pricing_info,
|
175
|
+
capture_message_content,
|
176
|
+
metrics,
|
177
|
+
disable_metrics,
|
178
|
+
):
|
148
179
|
"""
|
149
180
|
Generates a telemetry wrapper for GenAI function call
|
150
181
|
"""
|
@@ -154,10 +185,17 @@ def embed(version, environment, application_name,
|
|
154
185
|
Wraps the GenAI function call.
|
155
186
|
"""
|
156
187
|
|
157
|
-
server_address, server_port = set_server_address_and_port(
|
158
|
-
|
188
|
+
server_address, server_port = set_server_address_and_port(
|
189
|
+
instance, "127.0.0.1", 80
|
190
|
+
)
|
191
|
+
request_model = (
|
192
|
+
str(instance.gpt4all.model.model_path).rsplit("/", maxsplit=1)[-1]
|
193
|
+
or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
194
|
+
)
|
159
195
|
|
160
|
-
span_name =
|
196
|
+
span_name = (
|
197
|
+
f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
198
|
+
)
|
161
199
|
|
162
200
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
163
201
|
start_time = time.time()
|
@@ -178,7 +216,7 @@ def embed(version, environment, application_name,
|
|
178
216
|
capture_message_content=capture_message_content,
|
179
217
|
disable_metrics=disable_metrics,
|
180
218
|
version=version,
|
181
|
-
**kwargs
|
219
|
+
**kwargs,
|
182
220
|
)
|
183
221
|
|
184
222
|
except Exception as e:
|
@@ -1,9 +1,14 @@
|
|
1
1
|
"""
|
2
2
|
GPT4All OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
|
-
from opentelemetry.sdk.resources import
|
7
|
+
from opentelemetry.sdk.resources import (
|
8
|
+
SERVICE_NAME,
|
9
|
+
TELEMETRY_SDK_NAME,
|
10
|
+
DEPLOYMENT_ENVIRONMENT,
|
11
|
+
)
|
7
12
|
from opentelemetry.trace import Status, StatusCode
|
8
13
|
|
9
14
|
from openlit.__helpers import (
|
@@ -16,12 +21,14 @@ from openlit.__helpers import (
|
|
16
21
|
)
|
17
22
|
from openlit.semcov import SemanticConvention
|
18
23
|
|
24
|
+
|
19
25
|
def format_content(prompt):
|
20
26
|
"""
|
21
27
|
Process a prompt to extract content.
|
22
28
|
"""
|
23
29
|
return str(prompt) if prompt else ""
|
24
30
|
|
31
|
+
|
25
32
|
def process_chunk(scope, chunk):
|
26
33
|
"""
|
27
34
|
Process a chunk of response data and update state.
|
@@ -38,8 +45,22 @@ def process_chunk(scope, chunk):
|
|
38
45
|
scope._llmresponse += chunk
|
39
46
|
scope._end_time = time.time()
|
40
47
|
|
41
|
-
|
42
|
-
|
48
|
+
|
49
|
+
def common_span_attributes(
|
50
|
+
scope,
|
51
|
+
gen_ai_operation,
|
52
|
+
gen_ai_system,
|
53
|
+
server_address,
|
54
|
+
server_port,
|
55
|
+
request_model,
|
56
|
+
response_model,
|
57
|
+
environment,
|
58
|
+
application_name,
|
59
|
+
is_stream,
|
60
|
+
tbt,
|
61
|
+
ttft,
|
62
|
+
version,
|
63
|
+
):
|
43
64
|
"""
|
44
65
|
Set common span attributes for both generate and embed operations.
|
45
66
|
"""
|
@@ -58,9 +79,25 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
|
|
58
79
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
59
80
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
60
81
|
|
61
|
-
|
62
|
-
|
63
|
-
|
82
|
+
|
83
|
+
def record_completion_metrics(
|
84
|
+
metrics,
|
85
|
+
gen_ai_operation,
|
86
|
+
gen_ai_system,
|
87
|
+
server_address,
|
88
|
+
server_port,
|
89
|
+
request_model,
|
90
|
+
response_model,
|
91
|
+
environment,
|
92
|
+
application_name,
|
93
|
+
start_time,
|
94
|
+
end_time,
|
95
|
+
input_tokens,
|
96
|
+
output_tokens,
|
97
|
+
cost,
|
98
|
+
tbt=None,
|
99
|
+
ttft=None,
|
100
|
+
):
|
64
101
|
"""
|
65
102
|
Record completion-specific metrics for the operation.
|
66
103
|
"""
|
@@ -79,16 +116,31 @@ def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_a
|
|
79
116
|
metrics["genai_requests"].add(1, attributes)
|
80
117
|
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
81
118
|
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
82
|
-
metrics["genai_client_usage_tokens"].record(
|
119
|
+
metrics["genai_client_usage_tokens"].record(
|
120
|
+
input_tokens + output_tokens, attributes
|
121
|
+
)
|
83
122
|
metrics["genai_cost"].record(cost, attributes)
|
84
123
|
if tbt is not None:
|
85
124
|
metrics["genai_server_tbt"].record(tbt, attributes)
|
86
125
|
if ttft is not None:
|
87
126
|
metrics["genai_server_ttft"].record(ttft, attributes)
|
88
127
|
|
89
|
-
|
90
|
-
|
91
|
-
|
128
|
+
|
129
|
+
def record_embedding_metrics(
|
130
|
+
metrics,
|
131
|
+
gen_ai_operation,
|
132
|
+
gen_ai_system,
|
133
|
+
server_address,
|
134
|
+
server_port,
|
135
|
+
request_model,
|
136
|
+
response_model,
|
137
|
+
environment,
|
138
|
+
application_name,
|
139
|
+
start_time,
|
140
|
+
end_time,
|
141
|
+
input_tokens,
|
142
|
+
cost,
|
143
|
+
):
|
92
144
|
"""
|
93
145
|
Record embedding-specific metrics for the operation.
|
94
146
|
"""
|
@@ -109,8 +161,18 @@ def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_ad
|
|
109
161
|
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
110
162
|
metrics["genai_cost"].record(cost, attributes)
|
111
163
|
|
112
|
-
|
113
|
-
|
164
|
+
|
165
|
+
def common_t2s_logic(
|
166
|
+
scope,
|
167
|
+
pricing_info,
|
168
|
+
environment,
|
169
|
+
application_name,
|
170
|
+
metrics,
|
171
|
+
capture_message_content,
|
172
|
+
disable_metrics,
|
173
|
+
version,
|
174
|
+
is_stream,
|
175
|
+
):
|
114
176
|
"""
|
115
177
|
Process generate request and generate Telemetry
|
116
178
|
"""
|
@@ -118,7 +180,9 @@ def common_t2s_logic(scope, pricing_info, environment, application_name, metrics
|
|
118
180
|
if len(scope._timestamps) > 1:
|
119
181
|
scope._tbt = calculate_tbt(scope._timestamps)
|
120
182
|
|
121
|
-
prompt = format_content(
|
183
|
+
prompt = format_content(
|
184
|
+
scope._kwargs.get("prompt") or (scope._args[0] if scope._args else "") or ""
|
185
|
+
)
|
122
186
|
request_model = scope._request_model
|
123
187
|
|
124
188
|
# Calculate tokens using input prompt and aggregated response
|
@@ -128,36 +192,80 @@ def common_t2s_logic(scope, pricing_info, environment, application_name, metrics
|
|
128
192
|
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
129
193
|
|
130
194
|
# Common Span Attributes
|
131
|
-
common_span_attributes(
|
132
|
-
|
133
|
-
|
134
|
-
|
195
|
+
common_span_attributes(
|
196
|
+
scope,
|
197
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
198
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
199
|
+
scope._server_address,
|
200
|
+
scope._server_port,
|
201
|
+
request_model,
|
202
|
+
request_model,
|
203
|
+
environment,
|
204
|
+
application_name,
|
205
|
+
is_stream,
|
206
|
+
scope._tbt,
|
207
|
+
scope._ttft,
|
208
|
+
version,
|
209
|
+
)
|
135
210
|
|
136
211
|
# Span Attributes for Request parameters
|
137
|
-
scope._span.set_attribute(
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
scope._span.set_attribute(
|
142
|
-
|
143
|
-
|
212
|
+
scope._span.set_attribute(
|
213
|
+
SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
214
|
+
scope._kwargs.get("repeat_penalty", 1.18),
|
215
|
+
)
|
216
|
+
scope._span.set_attribute(
|
217
|
+
SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
218
|
+
scope._kwargs.get("max_tokens", 200),
|
219
|
+
)
|
220
|
+
scope._span.set_attribute(
|
221
|
+
SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
222
|
+
scope._kwargs.get("presence_penalty", 0.0),
|
223
|
+
)
|
224
|
+
scope._span.set_attribute(
|
225
|
+
SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temp", 0.7)
|
226
|
+
)
|
227
|
+
scope._span.set_attribute(
|
228
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 0.4)
|
229
|
+
)
|
230
|
+
scope._span.set_attribute(
|
231
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("top_k", 40)
|
232
|
+
)
|
233
|
+
scope._span.set_attribute(
|
234
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
235
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
236
|
+
)
|
144
237
|
|
145
238
|
# Span Attributes for Cost and Tokens
|
146
|
-
scope._span.set_attribute(
|
147
|
-
|
148
|
-
|
239
|
+
scope._span.set_attribute(
|
240
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
241
|
+
)
|
242
|
+
scope._span.set_attribute(
|
243
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
|
244
|
+
)
|
245
|
+
scope._span.set_attribute(
|
246
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens
|
247
|
+
)
|
149
248
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
150
249
|
|
151
250
|
# Span Attributes for Tools
|
152
251
|
if scope._tools:
|
153
|
-
scope._span.set_attribute(
|
154
|
-
|
155
|
-
|
252
|
+
scope._span.set_attribute(
|
253
|
+
SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function", "")
|
254
|
+
).get("name", "")
|
255
|
+
scope._span.set_attribute(
|
256
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", ""))
|
257
|
+
)
|
258
|
+
scope._span.set_attribute(
|
259
|
+
SemanticConvention.GEN_AI_TOOL_ARGS,
|
260
|
+
str(scope._tools.get("function", "").get("arguments", "")),
|
261
|
+
)
|
156
262
|
|
157
263
|
# Span Attributes for Content
|
158
264
|
if capture_message_content:
|
159
265
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
160
|
-
scope._span.set_attribute(
|
266
|
+
scope._span.set_attribute(
|
267
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
268
|
+
)
|
161
269
|
|
162
270
|
# To be removed one the change to span_attributes (from span events) is complete
|
163
271
|
scope._span.add_event(
|
@@ -177,13 +285,36 @@ def common_t2s_logic(scope, pricing_info, environment, application_name, metrics
|
|
177
285
|
|
178
286
|
# Metrics
|
179
287
|
if not disable_metrics:
|
180
|
-
record_completion_metrics(
|
181
|
-
|
182
|
-
|
183
|
-
|
288
|
+
record_completion_metrics(
|
289
|
+
metrics,
|
290
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
291
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
292
|
+
scope._server_address,
|
293
|
+
scope._server_port,
|
294
|
+
request_model,
|
295
|
+
request_model,
|
296
|
+
environment,
|
297
|
+
application_name,
|
298
|
+
scope._start_time,
|
299
|
+
scope._end_time,
|
300
|
+
input_tokens,
|
301
|
+
output_tokens,
|
302
|
+
cost,
|
303
|
+
scope._tbt,
|
304
|
+
scope._ttft,
|
305
|
+
)
|
184
306
|
|
185
|
-
|
186
|
-
|
307
|
+
|
308
|
+
def common_embedding_logic(
|
309
|
+
scope,
|
310
|
+
pricing_info,
|
311
|
+
environment,
|
312
|
+
application_name,
|
313
|
+
metrics,
|
314
|
+
capture_message_content,
|
315
|
+
disable_metrics,
|
316
|
+
version,
|
317
|
+
):
|
187
318
|
"""
|
188
319
|
Process embedding request and generate Telemetry
|
189
320
|
"""
|
@@ -196,14 +327,29 @@ def common_embedding_logic(scope, pricing_info, environment, application_name, m
|
|
196
327
|
cost = get_embed_model_cost(request_model, pricing_info, input_tokens)
|
197
328
|
|
198
329
|
# Common Span Attributes
|
199
|
-
common_span_attributes(
|
200
|
-
|
201
|
-
|
202
|
-
|
330
|
+
common_span_attributes(
|
331
|
+
scope,
|
332
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
333
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
334
|
+
scope._server_address,
|
335
|
+
scope._server_port,
|
336
|
+
request_model,
|
337
|
+
request_model,
|
338
|
+
environment,
|
339
|
+
application_name,
|
340
|
+
False,
|
341
|
+
scope._tbt,
|
342
|
+
scope._ttft,
|
343
|
+
version,
|
344
|
+
)
|
203
345
|
|
204
346
|
# Embedding-specific span attributes
|
205
|
-
scope._span.set_attribute(
|
206
|
-
|
347
|
+
scope._span.set_attribute(
|
348
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
349
|
+
)
|
350
|
+
scope._span.set_attribute(
|
351
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens
|
352
|
+
)
|
207
353
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
208
354
|
|
209
355
|
# Span Attributes for Content
|
@@ -211,7 +357,9 @@ def common_embedding_logic(scope, pricing_info, environment, application_name, m
|
|
211
357
|
scope._span.add_event(
|
212
358
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
213
359
|
attributes={
|
214
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
360
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
361
|
+
scope._kwargs.get("input", "")
|
362
|
+
),
|
215
363
|
},
|
216
364
|
)
|
217
365
|
|
@@ -219,21 +367,66 @@ def common_embedding_logic(scope, pricing_info, environment, application_name, m
|
|
219
367
|
|
220
368
|
# Metrics
|
221
369
|
if not disable_metrics:
|
222
|
-
record_embedding_metrics(
|
223
|
-
|
224
|
-
|
370
|
+
record_embedding_metrics(
|
371
|
+
metrics,
|
372
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
373
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
374
|
+
scope._server_address,
|
375
|
+
scope._server_port,
|
376
|
+
request_model,
|
377
|
+
request_model,
|
378
|
+
environment,
|
379
|
+
application_name,
|
380
|
+
scope._start_time,
|
381
|
+
scope._end_time,
|
382
|
+
input_tokens,
|
383
|
+
cost,
|
384
|
+
)
|
225
385
|
|
226
|
-
|
227
|
-
|
386
|
+
|
387
|
+
def process_streaming_generate_response(
|
388
|
+
scope,
|
389
|
+
pricing_info,
|
390
|
+
environment,
|
391
|
+
application_name,
|
392
|
+
metrics,
|
393
|
+
capture_message_content=False,
|
394
|
+
disable_metrics=False,
|
395
|
+
version="",
|
396
|
+
):
|
228
397
|
"""
|
229
398
|
Process generate request and generate Telemetry
|
230
399
|
"""
|
231
|
-
common_t2s_logic(
|
232
|
-
|
400
|
+
common_t2s_logic(
|
401
|
+
scope,
|
402
|
+
pricing_info,
|
403
|
+
environment,
|
404
|
+
application_name,
|
405
|
+
metrics,
|
406
|
+
capture_message_content,
|
407
|
+
disable_metrics,
|
408
|
+
version,
|
409
|
+
is_stream=True,
|
410
|
+
)
|
233
411
|
|
234
|
-
|
235
|
-
|
236
|
-
|
412
|
+
|
413
|
+
def process_generate_response(
|
414
|
+
response,
|
415
|
+
request_model,
|
416
|
+
pricing_info,
|
417
|
+
server_port,
|
418
|
+
server_address,
|
419
|
+
environment,
|
420
|
+
application_name,
|
421
|
+
metrics,
|
422
|
+
start_time,
|
423
|
+
span,
|
424
|
+
args,
|
425
|
+
kwargs,
|
426
|
+
capture_message_content=False,
|
427
|
+
disable_metrics=False,
|
428
|
+
version="1.0.0",
|
429
|
+
):
|
237
430
|
"""
|
238
431
|
Process generate request and generate Telemetry
|
239
432
|
"""
|
@@ -252,14 +445,37 @@ def process_generate_response(response, request_model, pricing_info, server_port
|
|
252
445
|
scope._args = args
|
253
446
|
scope._tools = None
|
254
447
|
|
255
|
-
common_t2s_logic(
|
256
|
-
|
448
|
+
common_t2s_logic(
|
449
|
+
scope,
|
450
|
+
pricing_info,
|
451
|
+
environment,
|
452
|
+
application_name,
|
453
|
+
metrics,
|
454
|
+
capture_message_content,
|
455
|
+
disable_metrics,
|
456
|
+
version,
|
457
|
+
is_stream=False,
|
458
|
+
)
|
257
459
|
|
258
460
|
return response
|
259
461
|
|
260
|
-
|
261
|
-
|
262
|
-
|
462
|
+
|
463
|
+
def process_embedding_response(
|
464
|
+
response,
|
465
|
+
request_model,
|
466
|
+
pricing_info,
|
467
|
+
server_port,
|
468
|
+
server_address,
|
469
|
+
environment,
|
470
|
+
application_name,
|
471
|
+
metrics,
|
472
|
+
start_time,
|
473
|
+
span,
|
474
|
+
capture_message_content=False,
|
475
|
+
disable_metrics=False,
|
476
|
+
version="1.0.0",
|
477
|
+
**kwargs,
|
478
|
+
):
|
263
479
|
"""
|
264
480
|
Process embedding request and generate Telemetry
|
265
481
|
"""
|
@@ -275,7 +491,15 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
275
491
|
scope._server_address, scope._server_port = server_address, server_port
|
276
492
|
scope._kwargs = kwargs
|
277
493
|
|
278
|
-
common_embedding_logic(
|
279
|
-
|
494
|
+
common_embedding_logic(
|
495
|
+
scope,
|
496
|
+
pricing_info,
|
497
|
+
environment,
|
498
|
+
application_name,
|
499
|
+
metrics,
|
500
|
+
capture_message_content,
|
501
|
+
disable_metrics,
|
502
|
+
version,
|
503
|
+
)
|
280
504
|
|
281
505
|
return response
|