openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -4,19 +4,25 @@ Module for monitoring Google AI Studio API calls.
|
|
4
4
|
|
5
5
|
import time
|
6
6
|
from opentelemetry.trace import SpanKind
|
7
|
-
from openlit.__helpers import
|
8
|
-
handle_exception,
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
7
|
+
from openlit.__helpers import handle_exception, set_server_address_and_port
|
11
8
|
from openlit.instrumentation.google_ai_studio.utils import (
|
12
9
|
process_chat_response,
|
13
10
|
process_chunk,
|
14
|
-
process_streaming_chat_response
|
11
|
+
process_streaming_chat_response,
|
15
12
|
)
|
16
13
|
from openlit.semcov import SemanticConvention
|
17
14
|
|
18
|
-
|
19
|
-
|
15
|
+
|
16
|
+
def generate(
|
17
|
+
version,
|
18
|
+
environment,
|
19
|
+
application_name,
|
20
|
+
tracer,
|
21
|
+
pricing_info,
|
22
|
+
capture_message_content,
|
23
|
+
metrics,
|
24
|
+
disable_metrics,
|
25
|
+
):
|
20
26
|
"""
|
21
27
|
Generates a telemetry wrapper for GenAI function call
|
22
28
|
"""
|
@@ -26,7 +32,9 @@ def generate(version, environment, application_name,
|
|
26
32
|
Wraps the GenAI function call.
|
27
33
|
"""
|
28
34
|
|
29
|
-
server_address, server_port = set_server_address_and_port(
|
35
|
+
server_address, server_port = set_server_address_and_port(
|
36
|
+
instance, "generativelanguage.googleapis.com", 443
|
37
|
+
)
|
30
38
|
request_model = kwargs.get("model", "gemini-2.0-flash")
|
31
39
|
|
32
40
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
@@ -37,22 +45,22 @@ def generate(version, environment, application_name,
|
|
37
45
|
|
38
46
|
try:
|
39
47
|
response = process_chat_response(
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
48
|
+
instance=instance,
|
49
|
+
response=response,
|
50
|
+
request_model=request_model,
|
51
|
+
pricing_info=pricing_info,
|
52
|
+
server_port=server_port,
|
53
|
+
server_address=server_address,
|
54
|
+
environment=environment,
|
55
|
+
application_name=application_name,
|
56
|
+
metrics=metrics,
|
57
|
+
start_time=start_time,
|
58
|
+
span=span,
|
59
|
+
args=args,
|
60
|
+
kwargs=kwargs,
|
61
|
+
capture_message_content=capture_message_content,
|
62
|
+
disable_metrics=disable_metrics,
|
63
|
+
version=version,
|
56
64
|
)
|
57
65
|
|
58
66
|
except Exception as e:
|
@@ -63,8 +71,17 @@ def generate(version, environment, application_name,
|
|
63
71
|
|
64
72
|
return wrapper
|
65
73
|
|
66
|
-
|
67
|
-
|
74
|
+
|
75
|
+
def generate_stream(
|
76
|
+
version,
|
77
|
+
environment,
|
78
|
+
application_name,
|
79
|
+
tracer,
|
80
|
+
pricing_info,
|
81
|
+
capture_message_content,
|
82
|
+
metrics,
|
83
|
+
disable_metrics,
|
84
|
+
):
|
68
85
|
"""
|
69
86
|
Generates a telemetry wrapper for GenAI function call
|
70
87
|
"""
|
@@ -75,23 +92,23 @@ def generate_stream(version, environment, application_name,
|
|
75
92
|
"""
|
76
93
|
|
77
94
|
def __init__(
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
95
|
+
self,
|
96
|
+
wrapped,
|
97
|
+
span,
|
98
|
+
span_name,
|
99
|
+
kwargs,
|
100
|
+
server_address,
|
101
|
+
server_port,
|
102
|
+
**args,
|
103
|
+
):
|
87
104
|
self.__wrapped__ = wrapped
|
88
105
|
self._span = span
|
89
106
|
self._span_name = span_name
|
90
|
-
self._llmresponse =
|
91
|
-
self._finish_reason =
|
92
|
-
self._output_tokens =
|
93
|
-
self._input_tokens =
|
94
|
-
self._response_model =
|
107
|
+
self._llmresponse = ""
|
108
|
+
self._finish_reason = ""
|
109
|
+
self._output_tokens = ""
|
110
|
+
self._input_tokens = ""
|
111
|
+
self._response_model = ""
|
95
112
|
self._tools = None
|
96
113
|
|
97
114
|
self._args = args
|
@@ -125,7 +142,9 @@ def generate_stream(version, environment, application_name,
|
|
125
142
|
return chunk
|
126
143
|
except StopIteration:
|
127
144
|
try:
|
128
|
-
with tracer.start_as_current_span(
|
145
|
+
with tracer.start_as_current_span(
|
146
|
+
self._span_name, kind=SpanKind.CLIENT
|
147
|
+
) as self._span:
|
129
148
|
process_streaming_chat_response(
|
130
149
|
self,
|
131
150
|
pricing_info=pricing_info,
|
@@ -134,7 +153,7 @@ def generate_stream(version, environment, application_name,
|
|
134
153
|
metrics=metrics,
|
135
154
|
capture_message_content=capture_message_content,
|
136
155
|
disable_metrics=disable_metrics,
|
137
|
-
version=version
|
156
|
+
version=version,
|
138
157
|
)
|
139
158
|
|
140
159
|
except Exception as e:
|
@@ -146,7 +165,9 @@ def generate_stream(version, environment, application_name,
|
|
146
165
|
Wraps the GenAI function call.
|
147
166
|
"""
|
148
167
|
|
149
|
-
server_address, server_port = set_server_address_and_port(
|
168
|
+
server_address, server_port = set_server_address_and_port(
|
169
|
+
instance, "generativelanguage.googleapis.com", 443
|
170
|
+
)
|
150
171
|
request_model = kwargs.get("model", "gemini-2.0-flash")
|
151
172
|
|
152
173
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
@@ -154,6 +175,8 @@ def generate_stream(version, environment, application_name,
|
|
154
175
|
awaited_wrapped = wrapped(*args, **kwargs)
|
155
176
|
span = tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT)
|
156
177
|
|
157
|
-
return TracedSyncStream(
|
178
|
+
return TracedSyncStream(
|
179
|
+
awaited_wrapped, span, span_name, kwargs, server_address, server_port
|
180
|
+
)
|
158
181
|
|
159
182
|
return wrapper
|
@@ -1,8 +1,13 @@
|
|
1
1
|
"""
|
2
2
|
Google AI Studio OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
|
-
from opentelemetry.sdk.resources import
|
6
|
+
from opentelemetry.sdk.resources import (
|
7
|
+
SERVICE_NAME,
|
8
|
+
TELEMETRY_SDK_NAME,
|
9
|
+
DEPLOYMENT_ENVIRONMENT,
|
10
|
+
)
|
6
11
|
from opentelemetry.trace import Status, StatusCode
|
7
12
|
from openlit.__helpers import (
|
8
13
|
calculate_ttft,
|
@@ -13,6 +18,7 @@ from openlit.__helpers import (
|
|
13
18
|
)
|
14
19
|
from openlit.semcov import SemanticConvention
|
15
20
|
|
21
|
+
|
16
22
|
def format_content(messages):
|
17
23
|
"""
|
18
24
|
Process a list of messages to extract content.
|
@@ -37,7 +43,9 @@ def format_content(messages):
|
|
37
43
|
if part.thought:
|
38
44
|
content_str.append(f"thought: {part.thought}")
|
39
45
|
if part.code_execution_result:
|
40
|
-
content_str.append(
|
46
|
+
content_str.append(
|
47
|
+
f"code_execution_result: {part.code_execution_result}"
|
48
|
+
)
|
41
49
|
if part.executable_code:
|
42
50
|
content_str.append(f"executable_code: {part.executable_code}")
|
43
51
|
if part.file_data:
|
@@ -45,7 +53,9 @@ def format_content(messages):
|
|
45
53
|
if part.function_call:
|
46
54
|
content_str.append(f"function_call: {part.function_call}")
|
47
55
|
if part.function_response:
|
48
|
-
content_str.append(
|
56
|
+
content_str.append(
|
57
|
+
f"function_response: {part.function_response}"
|
58
|
+
)
|
49
59
|
if part.inline_data:
|
50
60
|
content_str.append(f"inline_data: {part.inline_data}")
|
51
61
|
|
@@ -61,6 +71,7 @@ def format_content(messages):
|
|
61
71
|
|
62
72
|
return prompt
|
63
73
|
|
74
|
+
|
64
75
|
def process_chunk(self, chunk):
|
65
76
|
"""
|
66
77
|
Process a chunk of response data and update state.
|
@@ -76,25 +87,41 @@ def process_chunk(self, chunk):
|
|
76
87
|
|
77
88
|
chunked = response_as_dict(chunk)
|
78
89
|
|
79
|
-
|
80
|
-
self.
|
81
|
-
self.
|
82
|
-
self._response_model = chunked.get('model_version')
|
90
|
+
self._response_id = str(chunked.get("response_id"))
|
91
|
+
self._input_tokens = chunked.get("usage_metadata").get("prompt_token_count")
|
92
|
+
self._response_model = chunked.get("model_version")
|
83
93
|
|
84
94
|
if chunk.text:
|
85
95
|
self._llmresponse += str(chunk.text)
|
86
96
|
|
87
|
-
self._output_tokens = chunked.get(
|
88
|
-
self._reasoning_tokens =
|
89
|
-
|
97
|
+
self._output_tokens = chunked.get("usage_metadata").get("candidates_token_count")
|
98
|
+
self._reasoning_tokens = (
|
99
|
+
chunked.get("usage_metadata").get("thoughts_token_count") or 0
|
100
|
+
)
|
101
|
+
self._finish_reason = str(chunked.get("candidates")[0].get("finish_reason"))
|
90
102
|
|
91
103
|
try:
|
92
|
-
self._tools =
|
104
|
+
self._tools = (
|
105
|
+
chunked.get("candidates", [])[0]
|
106
|
+
.get("content", {})
|
107
|
+
.get("parts", [])[0]
|
108
|
+
.get("function_call", "")
|
109
|
+
)
|
93
110
|
except:
|
94
111
|
self._tools = None
|
95
112
|
|
96
|
-
|
97
|
-
|
113
|
+
|
114
|
+
def common_chat_logic(
|
115
|
+
scope,
|
116
|
+
pricing_info,
|
117
|
+
environment,
|
118
|
+
application_name,
|
119
|
+
metrics,
|
120
|
+
capture_message_content,
|
121
|
+
disable_metrics,
|
122
|
+
version,
|
123
|
+
is_stream,
|
124
|
+
):
|
98
125
|
"""
|
99
126
|
Process chat request and generate Telemetry
|
100
127
|
"""
|
@@ -103,29 +130,36 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
103
130
|
if len(scope._timestamps) > 1:
|
104
131
|
scope._tbt = calculate_tbt(scope._timestamps)
|
105
132
|
|
106
|
-
prompt = format_content(scope._kwargs.get(
|
133
|
+
prompt = format_content(scope._kwargs.get("contents", ""))
|
107
134
|
request_model = scope._kwargs.get("model", "gemini-2.0-flash")
|
108
135
|
|
109
|
-
cost = get_chat_model_cost(
|
136
|
+
cost = get_chat_model_cost(
|
137
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
138
|
+
)
|
110
139
|
|
111
140
|
# Set Span attributes (OTel Semconv)
|
112
|
-
scope._span.set_attribute(TELEMETRY_SDK_NAME,
|
113
|
-
scope._span.set_attribute(
|
114
|
-
|
141
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
+
scope._span.set_attribute(
|
143
|
+
SemanticConvention.GEN_AI_OPERATION,
|
144
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
145
|
+
)
|
146
|
+
scope._span.set_attribute(
|
147
|
+
SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_GEMINI
|
148
|
+
)
|
115
149
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
116
150
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
|
117
151
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
118
152
|
|
119
|
-
inference_config = scope._kwargs.get(
|
153
|
+
inference_config = scope._kwargs.get("config", {})
|
120
154
|
|
121
155
|
attributes = [
|
122
|
-
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
123
|
-
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
124
|
-
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
125
|
-
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
126
|
-
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
127
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
128
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
156
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, "frequency_penalty"),
|
157
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_tokens"),
|
158
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, "presence_penalty"),
|
159
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, "stop_sequences"),
|
160
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
|
161
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
|
162
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
|
129
163
|
]
|
130
164
|
|
131
165
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -135,14 +169,26 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
135
169
|
if value is not None:
|
136
170
|
scope._span.set_attribute(attribute, value)
|
137
171
|
|
138
|
-
scope._span.set_attribute(
|
139
|
-
|
140
|
-
|
141
|
-
scope._span.set_attribute(
|
142
|
-
|
143
|
-
|
144
|
-
scope._span.set_attribute(
|
145
|
-
|
172
|
+
scope._span.set_attribute(
|
173
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
174
|
+
)
|
175
|
+
scope._span.set_attribute(
|
176
|
+
SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model
|
177
|
+
)
|
178
|
+
scope._span.set_attribute(
|
179
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
180
|
+
)
|
181
|
+
scope._span.set_attribute(
|
182
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
183
|
+
)
|
184
|
+
scope._span.set_attribute(
|
185
|
+
SemanticConvention.GEN_AI_USAGE_REASONING_TOKENS, scope._reasoning_tokens
|
186
|
+
)
|
187
|
+
|
188
|
+
scope._span.set_attribute(
|
189
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
190
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
191
|
+
)
|
146
192
|
|
147
193
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
148
194
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
@@ -151,18 +197,28 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
151
197
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
152
198
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
153
199
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
154
|
-
scope._span.set_attribute(
|
155
|
-
|
200
|
+
scope._span.set_attribute(
|
201
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
202
|
+
scope._input_tokens + scope._output_tokens + scope._reasoning_tokens,
|
203
|
+
)
|
156
204
|
|
157
205
|
if scope._tools:
|
158
|
-
scope._span.set_attribute(
|
159
|
-
|
160
|
-
|
206
|
+
scope._span.set_attribute(
|
207
|
+
SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("name", "")
|
208
|
+
)
|
209
|
+
scope._span.set_attribute(
|
210
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", ""))
|
211
|
+
)
|
212
|
+
scope._span.set_attribute(
|
213
|
+
SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("args", ""))
|
214
|
+
)
|
161
215
|
|
162
216
|
# To be removed one the change to span_attributes (from span events) is complete
|
163
217
|
if capture_message_content:
|
164
218
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
165
|
-
scope._span.set_attribute(
|
219
|
+
scope._span.set_attribute(
|
220
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
221
|
+
)
|
166
222
|
scope._span.add_event(
|
167
223
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
168
224
|
attributes={
|
@@ -190,57 +246,114 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
190
246
|
response_model=scope._response_model,
|
191
247
|
)
|
192
248
|
|
193
|
-
metrics[
|
194
|
-
|
195
|
-
|
196
|
-
metrics[
|
197
|
-
metrics[
|
198
|
-
metrics[
|
199
|
-
metrics[
|
200
|
-
metrics[
|
201
|
-
metrics[
|
202
|
-
scope.
|
249
|
+
metrics["genai_client_operation_duration"].record(
|
250
|
+
scope._end_time - scope._start_time, metrics_attributes
|
251
|
+
)
|
252
|
+
metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
|
253
|
+
metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
|
254
|
+
metrics["genai_requests"].add(1, metrics_attributes)
|
255
|
+
metrics["genai_completion_tokens"].add(scope._output_tokens, metrics_attributes)
|
256
|
+
metrics["genai_prompt_tokens"].add(scope._input_tokens, metrics_attributes)
|
257
|
+
metrics["genai_reasoning_tokens"].add(
|
258
|
+
scope._reasoning_tokens, metrics_attributes
|
259
|
+
)
|
260
|
+
metrics["genai_cost"].record(cost, metrics_attributes)
|
261
|
+
metrics["genai_client_usage_tokens"].record(
|
262
|
+
scope._input_tokens + scope._output_tokens + scope._reasoning_tokens,
|
263
|
+
metrics_attributes,
|
264
|
+
)
|
203
265
|
|
204
266
|
|
205
|
-
def process_streaming_chat_response(
|
206
|
-
|
267
|
+
def process_streaming_chat_response(
|
268
|
+
self,
|
269
|
+
pricing_info,
|
270
|
+
environment,
|
271
|
+
application_name,
|
272
|
+
metrics,
|
273
|
+
capture_message_content=False,
|
274
|
+
disable_metrics=False,
|
275
|
+
version="",
|
276
|
+
):
|
207
277
|
"""
|
208
278
|
Process chat request and generate Telemetry
|
209
279
|
"""
|
210
280
|
|
211
|
-
common_chat_logic(
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
281
|
+
common_chat_logic(
|
282
|
+
self,
|
283
|
+
pricing_info,
|
284
|
+
environment,
|
285
|
+
application_name,
|
286
|
+
metrics,
|
287
|
+
capture_message_content,
|
288
|
+
disable_metrics,
|
289
|
+
version,
|
290
|
+
is_stream=True,
|
291
|
+
)
|
292
|
+
|
293
|
+
|
294
|
+
def process_chat_response(
|
295
|
+
instance,
|
296
|
+
response,
|
297
|
+
request_model,
|
298
|
+
pricing_info,
|
299
|
+
server_port,
|
300
|
+
server_address,
|
301
|
+
environment,
|
302
|
+
application_name,
|
303
|
+
metrics,
|
304
|
+
start_time,
|
305
|
+
span,
|
306
|
+
args,
|
307
|
+
kwargs,
|
308
|
+
capture_message_content=False,
|
309
|
+
disable_metrics=False,
|
310
|
+
version="1.0.0",
|
311
|
+
):
|
217
312
|
"""
|
218
313
|
Process chat request and generate Telemetry
|
219
314
|
"""
|
220
315
|
|
221
|
-
self = type(
|
316
|
+
self = type("GenericScope", (), {})()
|
222
317
|
response_dict = response_as_dict(response)
|
223
318
|
|
224
319
|
self._start_time = start_time
|
225
320
|
self._end_time = time.time()
|
226
321
|
self._span = span
|
227
322
|
self._llmresponse = str(response.text)
|
228
|
-
self._input_tokens = response_dict.get(
|
229
|
-
self._output_tokens = response_dict.get(
|
230
|
-
|
231
|
-
|
323
|
+
self._input_tokens = response_dict.get("usage_metadata").get("prompt_token_count")
|
324
|
+
self._output_tokens = response_dict.get("usage_metadata").get(
|
325
|
+
"candidates_token_count"
|
326
|
+
)
|
327
|
+
self._reasoning_tokens = (
|
328
|
+
response_dict.get("usage_metadata").get("thoughts_token_count") or 0
|
329
|
+
)
|
330
|
+
self._response_model = response_dict.get("model_version")
|
232
331
|
self._timestamps = []
|
233
332
|
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
234
333
|
self._server_address, self._server_port = server_address, server_port
|
235
334
|
self._kwargs = kwargs
|
236
|
-
self._finish_reason = str(response_dict.get(
|
335
|
+
self._finish_reason = str(response_dict.get("candidates")[0].get("finish_reason"))
|
237
336
|
|
238
337
|
try:
|
239
|
-
self._tools =
|
338
|
+
self._tools = (
|
339
|
+
response_dict.get("candidates", [])[0]
|
340
|
+
.get("content", {})
|
341
|
+
.get("parts", [])[0]
|
342
|
+
.get("function_call", "")
|
343
|
+
)
|
240
344
|
except:
|
241
345
|
self._tools = None
|
242
346
|
|
243
|
-
common_chat_logic(
|
244
|
-
|
347
|
+
common_chat_logic(
|
348
|
+
self,
|
349
|
+
pricing_info,
|
350
|
+
environment,
|
351
|
+
application_name,
|
352
|
+
metrics,
|
353
|
+
capture_message_content,
|
354
|
+
disable_metrics,
|
355
|
+
version,
|
356
|
+
is_stream=False,
|
357
|
+
)
|
245
358
|
|
246
359
|
return response
|
@@ -5,12 +5,11 @@ import importlib.metadata
|
|
5
5
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
6
|
from wrapt import wrap_function_wrapper
|
7
7
|
|
8
|
-
from openlit.instrumentation.gpt4all.gpt4all import
|
9
|
-
embed, generate
|
10
|
-
)
|
8
|
+
from openlit.instrumentation.gpt4all.gpt4all import embed, generate
|
11
9
|
|
12
10
|
_instruments = ("gpt4all >= 2.6.0",)
|
13
11
|
|
12
|
+
|
14
13
|
class GPT4AllInstrumentor(BaseInstrumentor):
|
15
14
|
"""
|
16
15
|
An instrumentor for GPT4All client library.
|
@@ -33,16 +32,32 @@ class GPT4AllInstrumentor(BaseInstrumentor):
|
|
33
32
|
wrap_function_wrapper(
|
34
33
|
"gpt4all",
|
35
34
|
"GPT4All.generate",
|
36
|
-
generate(
|
37
|
-
|
35
|
+
generate(
|
36
|
+
version,
|
37
|
+
environment,
|
38
|
+
application_name,
|
39
|
+
tracer,
|
40
|
+
pricing_info,
|
41
|
+
capture_message_content,
|
42
|
+
metrics,
|
43
|
+
disable_metrics,
|
44
|
+
),
|
38
45
|
)
|
39
46
|
|
40
47
|
# embed
|
41
48
|
wrap_function_wrapper(
|
42
49
|
"gpt4all",
|
43
50
|
"Embed4All.embed",
|
44
|
-
embed(
|
45
|
-
|
51
|
+
embed(
|
52
|
+
version,
|
53
|
+
environment,
|
54
|
+
application_name,
|
55
|
+
tracer,
|
56
|
+
pricing_info,
|
57
|
+
capture_message_content,
|
58
|
+
metrics,
|
59
|
+
disable_metrics,
|
60
|
+
),
|
46
61
|
)
|
47
62
|
|
48
63
|
def _uninstrument(self, **kwargs):
|