openlit 1.34.29__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +111 -24
- openlit/instrumentation/crewai/async_crewai.py +114 -0
- openlit/instrumentation/crewai/crewai.py +104 -131
- openlit/instrumentation/crewai/utils.py +615 -0
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +312 -101
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +660 -186
- openlit/instrumentation/openai_agents/__init__.py +6 -2
- openlit/instrumentation/openai_agents/processor.py +409 -537
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +101 -7
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.29.dist-info/RECORD +0 -166
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -1,8 +1,13 @@
|
|
1
1
|
"""
|
2
2
|
Google AI Studio OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
|
-
from opentelemetry.sdk.resources import
|
6
|
+
from opentelemetry.sdk.resources import (
|
7
|
+
SERVICE_NAME,
|
8
|
+
TELEMETRY_SDK_NAME,
|
9
|
+
DEPLOYMENT_ENVIRONMENT,
|
10
|
+
)
|
6
11
|
from opentelemetry.trace import Status, StatusCode
|
7
12
|
from openlit.__helpers import (
|
8
13
|
calculate_ttft,
|
@@ -13,6 +18,7 @@ from openlit.__helpers import (
|
|
13
18
|
)
|
14
19
|
from openlit.semcov import SemanticConvention
|
15
20
|
|
21
|
+
|
16
22
|
def format_content(messages):
|
17
23
|
"""
|
18
24
|
Process a list of messages to extract content.
|
@@ -37,7 +43,9 @@ def format_content(messages):
|
|
37
43
|
if part.thought:
|
38
44
|
content_str.append(f"thought: {part.thought}")
|
39
45
|
if part.code_execution_result:
|
40
|
-
content_str.append(
|
46
|
+
content_str.append(
|
47
|
+
f"code_execution_result: {part.code_execution_result}"
|
48
|
+
)
|
41
49
|
if part.executable_code:
|
42
50
|
content_str.append(f"executable_code: {part.executable_code}")
|
43
51
|
if part.file_data:
|
@@ -45,7 +53,9 @@ def format_content(messages):
|
|
45
53
|
if part.function_call:
|
46
54
|
content_str.append(f"function_call: {part.function_call}")
|
47
55
|
if part.function_response:
|
48
|
-
content_str.append(
|
56
|
+
content_str.append(
|
57
|
+
f"function_response: {part.function_response}"
|
58
|
+
)
|
49
59
|
if part.inline_data:
|
50
60
|
content_str.append(f"inline_data: {part.inline_data}")
|
51
61
|
|
@@ -61,6 +71,7 @@ def format_content(messages):
|
|
61
71
|
|
62
72
|
return prompt
|
63
73
|
|
74
|
+
|
64
75
|
def process_chunk(self, chunk):
|
65
76
|
"""
|
66
77
|
Process a chunk of response data and update state.
|
@@ -76,25 +87,41 @@ def process_chunk(self, chunk):
|
|
76
87
|
|
77
88
|
chunked = response_as_dict(chunk)
|
78
89
|
|
79
|
-
|
80
|
-
self.
|
81
|
-
self.
|
82
|
-
self._response_model = chunked.get('model_version')
|
90
|
+
self._response_id = str(chunked.get("response_id"))
|
91
|
+
self._input_tokens = chunked.get("usage_metadata").get("prompt_token_count")
|
92
|
+
self._response_model = chunked.get("model_version")
|
83
93
|
|
84
94
|
if chunk.text:
|
85
95
|
self._llmresponse += str(chunk.text)
|
86
96
|
|
87
|
-
self._output_tokens = chunked.get(
|
88
|
-
self._reasoning_tokens =
|
89
|
-
|
97
|
+
self._output_tokens = chunked.get("usage_metadata").get("candidates_token_count")
|
98
|
+
self._reasoning_tokens = (
|
99
|
+
chunked.get("usage_metadata").get("thoughts_token_count") or 0
|
100
|
+
)
|
101
|
+
self._finish_reason = str(chunked.get("candidates")[0].get("finish_reason"))
|
90
102
|
|
91
103
|
try:
|
92
|
-
self._tools =
|
104
|
+
self._tools = (
|
105
|
+
chunked.get("candidates", [])[0]
|
106
|
+
.get("content", {})
|
107
|
+
.get("parts", [])[0]
|
108
|
+
.get("function_call", "")
|
109
|
+
)
|
93
110
|
except:
|
94
111
|
self._tools = None
|
95
112
|
|
96
|
-
|
97
|
-
|
113
|
+
|
114
|
+
def common_chat_logic(
|
115
|
+
scope,
|
116
|
+
pricing_info,
|
117
|
+
environment,
|
118
|
+
application_name,
|
119
|
+
metrics,
|
120
|
+
capture_message_content,
|
121
|
+
disable_metrics,
|
122
|
+
version,
|
123
|
+
is_stream,
|
124
|
+
):
|
98
125
|
"""
|
99
126
|
Process chat request and generate Telemetry
|
100
127
|
"""
|
@@ -103,29 +130,36 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
103
130
|
if len(scope._timestamps) > 1:
|
104
131
|
scope._tbt = calculate_tbt(scope._timestamps)
|
105
132
|
|
106
|
-
prompt = format_content(scope._kwargs.get(
|
133
|
+
prompt = format_content(scope._kwargs.get("contents", ""))
|
107
134
|
request_model = scope._kwargs.get("model", "gemini-2.0-flash")
|
108
135
|
|
109
|
-
cost = get_chat_model_cost(
|
136
|
+
cost = get_chat_model_cost(
|
137
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
138
|
+
)
|
110
139
|
|
111
140
|
# Set Span attributes (OTel Semconv)
|
112
|
-
scope._span.set_attribute(TELEMETRY_SDK_NAME,
|
113
|
-
scope._span.set_attribute(
|
114
|
-
|
141
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
+
scope._span.set_attribute(
|
143
|
+
SemanticConvention.GEN_AI_OPERATION,
|
144
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
145
|
+
)
|
146
|
+
scope._span.set_attribute(
|
147
|
+
SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_GEMINI
|
148
|
+
)
|
115
149
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
116
150
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
|
117
151
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
118
152
|
|
119
|
-
inference_config = scope._kwargs.get(
|
153
|
+
inference_config = scope._kwargs.get("config", {})
|
120
154
|
|
121
155
|
attributes = [
|
122
|
-
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
123
|
-
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
124
|
-
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
125
|
-
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
126
|
-
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
127
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
128
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
156
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, "frequency_penalty"),
|
157
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_tokens"),
|
158
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, "presence_penalty"),
|
159
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, "stop_sequences"),
|
160
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
|
161
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
|
162
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
|
129
163
|
]
|
130
164
|
|
131
165
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -135,14 +169,26 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
135
169
|
if value is not None:
|
136
170
|
scope._span.set_attribute(attribute, value)
|
137
171
|
|
138
|
-
scope._span.set_attribute(
|
139
|
-
|
140
|
-
|
141
|
-
scope._span.set_attribute(
|
142
|
-
|
143
|
-
|
144
|
-
scope._span.set_attribute(
|
145
|
-
|
172
|
+
scope._span.set_attribute(
|
173
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
174
|
+
)
|
175
|
+
scope._span.set_attribute(
|
176
|
+
SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model
|
177
|
+
)
|
178
|
+
scope._span.set_attribute(
|
179
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
180
|
+
)
|
181
|
+
scope._span.set_attribute(
|
182
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
183
|
+
)
|
184
|
+
scope._span.set_attribute(
|
185
|
+
SemanticConvention.GEN_AI_USAGE_REASONING_TOKENS, scope._reasoning_tokens
|
186
|
+
)
|
187
|
+
|
188
|
+
scope._span.set_attribute(
|
189
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
190
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
191
|
+
)
|
146
192
|
|
147
193
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
148
194
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
@@ -151,18 +197,28 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
151
197
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
152
198
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
153
199
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
154
|
-
scope._span.set_attribute(
|
155
|
-
|
200
|
+
scope._span.set_attribute(
|
201
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
202
|
+
scope._input_tokens + scope._output_tokens + scope._reasoning_tokens,
|
203
|
+
)
|
156
204
|
|
157
205
|
if scope._tools:
|
158
|
-
scope._span.set_attribute(
|
159
|
-
|
160
|
-
|
206
|
+
scope._span.set_attribute(
|
207
|
+
SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("name", "")
|
208
|
+
)
|
209
|
+
scope._span.set_attribute(
|
210
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", ""))
|
211
|
+
)
|
212
|
+
scope._span.set_attribute(
|
213
|
+
SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("args", ""))
|
214
|
+
)
|
161
215
|
|
162
216
|
# To be removed one the change to span_attributes (from span events) is complete
|
163
217
|
if capture_message_content:
|
164
218
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
165
|
-
scope._span.set_attribute(
|
219
|
+
scope._span.set_attribute(
|
220
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
221
|
+
)
|
166
222
|
scope._span.add_event(
|
167
223
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
168
224
|
attributes={
|
@@ -190,57 +246,114 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
190
246
|
response_model=scope._response_model,
|
191
247
|
)
|
192
248
|
|
193
|
-
metrics[
|
194
|
-
|
195
|
-
|
196
|
-
metrics[
|
197
|
-
metrics[
|
198
|
-
metrics[
|
199
|
-
metrics[
|
200
|
-
metrics[
|
201
|
-
metrics[
|
202
|
-
scope.
|
249
|
+
metrics["genai_client_operation_duration"].record(
|
250
|
+
scope._end_time - scope._start_time, metrics_attributes
|
251
|
+
)
|
252
|
+
metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
|
253
|
+
metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
|
254
|
+
metrics["genai_requests"].add(1, metrics_attributes)
|
255
|
+
metrics["genai_completion_tokens"].add(scope._output_tokens, metrics_attributes)
|
256
|
+
metrics["genai_prompt_tokens"].add(scope._input_tokens, metrics_attributes)
|
257
|
+
metrics["genai_reasoning_tokens"].add(
|
258
|
+
scope._reasoning_tokens, metrics_attributes
|
259
|
+
)
|
260
|
+
metrics["genai_cost"].record(cost, metrics_attributes)
|
261
|
+
metrics["genai_client_usage_tokens"].record(
|
262
|
+
scope._input_tokens + scope._output_tokens + scope._reasoning_tokens,
|
263
|
+
metrics_attributes,
|
264
|
+
)
|
203
265
|
|
204
266
|
|
205
|
-
def process_streaming_chat_response(
|
206
|
-
|
267
|
+
def process_streaming_chat_response(
|
268
|
+
self,
|
269
|
+
pricing_info,
|
270
|
+
environment,
|
271
|
+
application_name,
|
272
|
+
metrics,
|
273
|
+
capture_message_content=False,
|
274
|
+
disable_metrics=False,
|
275
|
+
version="",
|
276
|
+
):
|
207
277
|
"""
|
208
278
|
Process chat request and generate Telemetry
|
209
279
|
"""
|
210
280
|
|
211
|
-
common_chat_logic(
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
281
|
+
common_chat_logic(
|
282
|
+
self,
|
283
|
+
pricing_info,
|
284
|
+
environment,
|
285
|
+
application_name,
|
286
|
+
metrics,
|
287
|
+
capture_message_content,
|
288
|
+
disable_metrics,
|
289
|
+
version,
|
290
|
+
is_stream=True,
|
291
|
+
)
|
292
|
+
|
293
|
+
|
294
|
+
def process_chat_response(
|
295
|
+
instance,
|
296
|
+
response,
|
297
|
+
request_model,
|
298
|
+
pricing_info,
|
299
|
+
server_port,
|
300
|
+
server_address,
|
301
|
+
environment,
|
302
|
+
application_name,
|
303
|
+
metrics,
|
304
|
+
start_time,
|
305
|
+
span,
|
306
|
+
args,
|
307
|
+
kwargs,
|
308
|
+
capture_message_content=False,
|
309
|
+
disable_metrics=False,
|
310
|
+
version="1.0.0",
|
311
|
+
):
|
217
312
|
"""
|
218
313
|
Process chat request and generate Telemetry
|
219
314
|
"""
|
220
315
|
|
221
|
-
self = type(
|
316
|
+
self = type("GenericScope", (), {})()
|
222
317
|
response_dict = response_as_dict(response)
|
223
318
|
|
224
319
|
self._start_time = start_time
|
225
320
|
self._end_time = time.time()
|
226
321
|
self._span = span
|
227
322
|
self._llmresponse = str(response.text)
|
228
|
-
self._input_tokens = response_dict.get(
|
229
|
-
self._output_tokens = response_dict.get(
|
230
|
-
|
231
|
-
|
323
|
+
self._input_tokens = response_dict.get("usage_metadata").get("prompt_token_count")
|
324
|
+
self._output_tokens = response_dict.get("usage_metadata").get(
|
325
|
+
"candidates_token_count"
|
326
|
+
)
|
327
|
+
self._reasoning_tokens = (
|
328
|
+
response_dict.get("usage_metadata").get("thoughts_token_count") or 0
|
329
|
+
)
|
330
|
+
self._response_model = response_dict.get("model_version")
|
232
331
|
self._timestamps = []
|
233
332
|
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
234
333
|
self._server_address, self._server_port = server_address, server_port
|
235
334
|
self._kwargs = kwargs
|
236
|
-
self._finish_reason = str(response_dict.get(
|
335
|
+
self._finish_reason = str(response_dict.get("candidates")[0].get("finish_reason"))
|
237
336
|
|
238
337
|
try:
|
239
|
-
self._tools =
|
338
|
+
self._tools = (
|
339
|
+
response_dict.get("candidates", [])[0]
|
340
|
+
.get("content", {})
|
341
|
+
.get("parts", [])[0]
|
342
|
+
.get("function_call", "")
|
343
|
+
)
|
240
344
|
except:
|
241
345
|
self._tools = None
|
242
346
|
|
243
|
-
common_chat_logic(
|
244
|
-
|
347
|
+
common_chat_logic(
|
348
|
+
self,
|
349
|
+
pricing_info,
|
350
|
+
environment,
|
351
|
+
application_name,
|
352
|
+
metrics,
|
353
|
+
capture_message_content,
|
354
|
+
disable_metrics,
|
355
|
+
version,
|
356
|
+
is_stream=False,
|
357
|
+
)
|
245
358
|
|
246
359
|
return response
|
@@ -5,12 +5,11 @@ import importlib.metadata
|
|
5
5
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
6
|
from wrapt import wrap_function_wrapper
|
7
7
|
|
8
|
-
from openlit.instrumentation.gpt4all.gpt4all import
|
9
|
-
embed, generate
|
10
|
-
)
|
8
|
+
from openlit.instrumentation.gpt4all.gpt4all import embed, generate
|
11
9
|
|
12
10
|
_instruments = ("gpt4all >= 2.6.0",)
|
13
11
|
|
12
|
+
|
14
13
|
class GPT4AllInstrumentor(BaseInstrumentor):
|
15
14
|
"""
|
16
15
|
An instrumentor for GPT4All client library.
|
@@ -33,16 +32,32 @@ class GPT4AllInstrumentor(BaseInstrumentor):
|
|
33
32
|
wrap_function_wrapper(
|
34
33
|
"gpt4all",
|
35
34
|
"GPT4All.generate",
|
36
|
-
generate(
|
37
|
-
|
35
|
+
generate(
|
36
|
+
version,
|
37
|
+
environment,
|
38
|
+
application_name,
|
39
|
+
tracer,
|
40
|
+
pricing_info,
|
41
|
+
capture_message_content,
|
42
|
+
metrics,
|
43
|
+
disable_metrics,
|
44
|
+
),
|
38
45
|
)
|
39
46
|
|
40
47
|
# embed
|
41
48
|
wrap_function_wrapper(
|
42
49
|
"gpt4all",
|
43
50
|
"Embed4All.embed",
|
44
|
-
embed(
|
45
|
-
|
51
|
+
embed(
|
52
|
+
version,
|
53
|
+
environment,
|
54
|
+
application_name,
|
55
|
+
tracer,
|
56
|
+
pricing_info,
|
57
|
+
capture_message_content,
|
58
|
+
metrics,
|
59
|
+
disable_metrics,
|
60
|
+
),
|
46
61
|
)
|
47
62
|
|
48
63
|
def _uninstrument(self, **kwargs):
|
@@ -4,20 +4,26 @@ Module for monitoring GPT4All API calls.
|
|
4
4
|
|
5
5
|
import time
|
6
6
|
from opentelemetry.trace import SpanKind
|
7
|
-
from openlit.__helpers import
|
8
|
-
handle_exception,
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
7
|
+
from openlit.__helpers import handle_exception, set_server_address_and_port
|
11
8
|
from openlit.instrumentation.gpt4all.utils import (
|
12
9
|
process_generate_response,
|
13
10
|
process_chunk,
|
14
11
|
process_streaming_generate_response,
|
15
|
-
process_embedding_response
|
12
|
+
process_embedding_response,
|
16
13
|
)
|
17
14
|
from openlit.semcov import SemanticConvention
|
18
15
|
|
19
|
-
|
20
|
-
|
16
|
+
|
17
|
+
def generate(
|
18
|
+
version,
|
19
|
+
environment,
|
20
|
+
application_name,
|
21
|
+
tracer,
|
22
|
+
pricing_info,
|
23
|
+
capture_message_content,
|
24
|
+
metrics,
|
25
|
+
disable_metrics,
|
26
|
+
):
|
21
27
|
"""
|
22
28
|
Generates a telemetry wrapper for GenAI function call
|
23
29
|
"""
|
@@ -28,16 +34,16 @@ def generate(version, environment, application_name,
|
|
28
34
|
"""
|
29
35
|
|
30
36
|
def __init__(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
self,
|
38
|
+
wrapped,
|
39
|
+
span,
|
40
|
+
span_name,
|
41
|
+
args,
|
42
|
+
kwargs,
|
43
|
+
server_address,
|
44
|
+
server_port,
|
45
|
+
request_model,
|
46
|
+
):
|
41
47
|
self.__wrapped__ = wrapped
|
42
48
|
self._span = span
|
43
49
|
self._span_name = span_name
|
@@ -75,7 +81,9 @@ def generate(version, environment, application_name,
|
|
75
81
|
return chunk
|
76
82
|
except StopIteration:
|
77
83
|
try:
|
78
|
-
with tracer.start_as_current_span(
|
84
|
+
with tracer.start_as_current_span(
|
85
|
+
self._span_name, kind=SpanKind.CLIENT
|
86
|
+
) as self._span:
|
79
87
|
process_streaming_generate_response(
|
80
88
|
self,
|
81
89
|
pricing_info=pricing_info,
|
@@ -84,7 +92,7 @@ def generate(version, environment, application_name,
|
|
84
92
|
metrics=metrics,
|
85
93
|
capture_message_content=capture_message_content,
|
86
94
|
disable_metrics=disable_metrics,
|
87
|
-
version=version
|
95
|
+
version=version,
|
88
96
|
)
|
89
97
|
|
90
98
|
except Exception as e:
|
@@ -100,8 +108,13 @@ def generate(version, environment, application_name,
|
|
100
108
|
# Check if streaming is enabled for the API call
|
101
109
|
streaming = kwargs.get("streaming", False)
|
102
110
|
|
103
|
-
server_address, server_port = set_server_address_and_port(
|
104
|
-
|
111
|
+
server_address, server_port = set_server_address_and_port(
|
112
|
+
instance, "127.0.0.1", 80
|
113
|
+
)
|
114
|
+
request_model = (
|
115
|
+
str(instance.model.model_path).rsplit("/", maxsplit=1)[-1]
|
116
|
+
or "orca-mini-3b-gguf2-q4_0.gguf"
|
117
|
+
)
|
105
118
|
|
106
119
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
107
120
|
|
@@ -109,7 +122,16 @@ def generate(version, environment, application_name,
|
|
109
122
|
# Special handling for streaming response to accommodate the nature of data flow
|
110
123
|
awaited_wrapped = wrapped(*args, **kwargs)
|
111
124
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
112
|
-
return TracedSyncStream(
|
125
|
+
return TracedSyncStream(
|
126
|
+
awaited_wrapped,
|
127
|
+
span,
|
128
|
+
span_name,
|
129
|
+
args,
|
130
|
+
kwargs,
|
131
|
+
server_address,
|
132
|
+
server_port,
|
133
|
+
request_model,
|
134
|
+
)
|
113
135
|
|
114
136
|
# Handling for non-streaming responses
|
115
137
|
else:
|
@@ -133,7 +155,7 @@ def generate(version, environment, application_name,
|
|
133
155
|
kwargs=kwargs,
|
134
156
|
capture_message_content=capture_message_content,
|
135
157
|
disable_metrics=disable_metrics,
|
136
|
-
version=version
|
158
|
+
version=version,
|
137
159
|
)
|
138
160
|
|
139
161
|
except Exception as e:
|
@@ -143,8 +165,17 @@ def generate(version, environment, application_name,
|
|
143
165
|
|
144
166
|
return wrapper
|
145
167
|
|
146
|
-
|
147
|
-
|
168
|
+
|
169
|
+
def embed(
|
170
|
+
version,
|
171
|
+
environment,
|
172
|
+
application_name,
|
173
|
+
tracer,
|
174
|
+
pricing_info,
|
175
|
+
capture_message_content,
|
176
|
+
metrics,
|
177
|
+
disable_metrics,
|
178
|
+
):
|
148
179
|
"""
|
149
180
|
Generates a telemetry wrapper for GenAI function call
|
150
181
|
"""
|
@@ -154,10 +185,17 @@ def embed(version, environment, application_name,
|
|
154
185
|
Wraps the GenAI function call.
|
155
186
|
"""
|
156
187
|
|
157
|
-
server_address, server_port = set_server_address_and_port(
|
158
|
-
|
188
|
+
server_address, server_port = set_server_address_and_port(
|
189
|
+
instance, "127.0.0.1", 80
|
190
|
+
)
|
191
|
+
request_model = (
|
192
|
+
str(instance.gpt4all.model.model_path).rsplit("/", maxsplit=1)[-1]
|
193
|
+
or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
194
|
+
)
|
159
195
|
|
160
|
-
span_name =
|
196
|
+
span_name = (
|
197
|
+
f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
198
|
+
)
|
161
199
|
|
162
200
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
163
201
|
start_time = time.time()
|
@@ -178,7 +216,7 @@ def embed(version, environment, application_name,
|
|
178
216
|
capture_message_content=capture_message_content,
|
179
217
|
disable_metrics=disable_metrics,
|
180
218
|
version=version,
|
181
|
-
**kwargs
|
219
|
+
**kwargs,
|
182
220
|
)
|
183
221
|
|
184
222
|
except Exception as e:
|