langtrace-python-sdk 2.1.28__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/cohere_example/chat.py +1 -0
- examples/cohere_example/chat_stream.py +3 -0
- examples/gemini_example/__init__.py +6 -0
- examples/gemini_example/function_tools.py +62 -0
- examples/gemini_example/main.py +91 -0
- examples/langchain_example/__init__.py +8 -0
- examples/langchain_example/groq_example.py +28 -15
- examples/ollama_example/basic.py +1 -0
- examples/openai_example/__init__.py +1 -0
- examples/openai_example/async_tool_calling_nonstreaming.py +1 -1
- examples/openai_example/chat_completion.py +1 -1
- examples/openai_example/embeddings_create.py +1 -0
- examples/openai_example/images_edit.py +2 -2
- examples/vertexai_example/__init__.py +6 -0
- examples/vertexai_example/main.py +214 -0
- langtrace_python_sdk/constants/instrumentation/common.py +2 -0
- langtrace_python_sdk/constants/instrumentation/gemini.py +12 -0
- langtrace_python_sdk/constants/instrumentation/vertexai.py +42 -0
- langtrace_python_sdk/instrumentation/__init__.py +4 -0
- langtrace_python_sdk/instrumentation/anthropic/patch.py +68 -96
- langtrace_python_sdk/instrumentation/chroma/patch.py +29 -29
- langtrace_python_sdk/instrumentation/cohere/patch.py +143 -242
- langtrace_python_sdk/instrumentation/gemini/__init__.py +3 -0
- langtrace_python_sdk/instrumentation/gemini/instrumentation.py +36 -0
- langtrace_python_sdk/instrumentation/gemini/patch.py +186 -0
- langtrace_python_sdk/instrumentation/groq/patch.py +82 -125
- langtrace_python_sdk/instrumentation/ollama/patch.py +62 -65
- langtrace_python_sdk/instrumentation/openai/patch.py +190 -494
- langtrace_python_sdk/instrumentation/qdrant/patch.py +6 -6
- langtrace_python_sdk/instrumentation/vertexai/__init__.py +3 -0
- langtrace_python_sdk/instrumentation/vertexai/instrumentation.py +33 -0
- langtrace_python_sdk/instrumentation/vertexai/patch.py +131 -0
- langtrace_python_sdk/langtrace.py +7 -1
- langtrace_python_sdk/utils/__init__.py +14 -3
- langtrace_python_sdk/utils/llm.py +311 -6
- langtrace_python_sdk/version.py +1 -1
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/METADATA +26 -19
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/RECORD +55 -36
- tests/anthropic/test_anthropic.py +28 -27
- tests/cohere/test_cohere_chat.py +36 -36
- tests/cohere/test_cohere_embed.py +12 -9
- tests/cohere/test_cohere_rerank.py +18 -11
- tests/groq/cassettes/test_async_chat_completion.yaml +113 -0
- tests/groq/cassettes/test_async_chat_completion_streaming.yaml +2232 -0
- tests/groq/cassettes/test_chat_completion.yaml +114 -0
- tests/groq/cassettes/test_chat_completion_streaming.yaml +2512 -0
- tests/groq/conftest.py +33 -0
- tests/groq/test_groq.py +142 -0
- tests/openai/cassettes/test_async_chat_completion_streaming.yaml +28 -28
- tests/openai/test_chat_completion.py +53 -67
- tests/openai/test_image_generation.py +47 -24
- tests/utils.py +40 -5
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/WHEEL +0 -0
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/entry_points.txt +0 -0
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,19 +16,24 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import json
|
|
18
18
|
|
|
19
|
+
from langtrace_python_sdk.utils.llm import (
|
|
20
|
+
get_langtrace_attributes,
|
|
21
|
+
get_llm_request_attributes,
|
|
22
|
+
get_extra_attributes,
|
|
23
|
+
get_llm_url,
|
|
24
|
+
set_event_completion,
|
|
25
|
+
set_usage_attributes,
|
|
26
|
+
)
|
|
19
27
|
from langtrace.trace_attributes import Event, LLMSpanAttributes
|
|
20
|
-
from
|
|
28
|
+
from langtrace_python_sdk.utils import set_span_attribute
|
|
21
29
|
from opentelemetry.trace import SpanKind
|
|
22
30
|
from opentelemetry.trace.status import Status, StatusCode
|
|
23
31
|
|
|
24
32
|
from langtrace_python_sdk.constants.instrumentation.cohere import APIS
|
|
25
33
|
from langtrace_python_sdk.constants.instrumentation.common import (
|
|
26
|
-
LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY,
|
|
27
34
|
SERVICE_PROVIDERS,
|
|
28
35
|
)
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME
|
|
36
|
+
from langtrace.trace_attributes import SpanAttributes
|
|
32
37
|
|
|
33
38
|
|
|
34
39
|
def rerank(original_method, version, tracer):
|
|
@@ -36,35 +41,24 @@ def rerank(original_method, version, tracer):
|
|
|
36
41
|
|
|
37
42
|
def traced_method(wrapped, instance, args, kwargs):
|
|
38
43
|
service_provider = SERVICE_PROVIDERS["COHERE"]
|
|
39
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
40
44
|
|
|
41
45
|
span_attributes = {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"llm.documents": json.dumps(kwargs.get("documents")),
|
|
52
|
-
"llm.retrieval.query": kwargs.get("query"),
|
|
53
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
46
|
+
**get_langtrace_attributes(version, service_provider),
|
|
47
|
+
**get_llm_request_attributes(kwargs),
|
|
48
|
+
**get_llm_url(instance),
|
|
49
|
+
SpanAttributes.LLM_REQUEST_MODEL: kwargs.get("model") or "command-r-plus",
|
|
50
|
+
SpanAttributes.LLM_URL: APIS["RERANK"]["URL"],
|
|
51
|
+
SpanAttributes.LLM_PATH: APIS["RERANK"]["ENDPOINT"],
|
|
52
|
+
SpanAttributes.LLM_REQUEST_DOCUMENTS: json.dumps(kwargs.get("documents")),
|
|
53
|
+
SpanAttributes.LLM_COHERE_RERANK_QUERY: kwargs.get("query"),
|
|
54
|
+
**get_extra_attributes(),
|
|
54
55
|
}
|
|
55
56
|
|
|
56
57
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
57
58
|
|
|
58
|
-
if kwargs.get("top_n") is not None:
|
|
59
|
-
attributes.llm_top_k = kwargs.get("top_n")
|
|
60
|
-
|
|
61
|
-
if kwargs.get("user") is not None:
|
|
62
|
-
attributes.llm_user = kwargs.get("user")
|
|
63
|
-
|
|
64
59
|
span = tracer.start_span(APIS["RERANK"]["METHOD"], kind=SpanKind.CLIENT)
|
|
65
60
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
66
|
-
|
|
67
|
-
span.set_attribute(field, value)
|
|
61
|
+
set_span_attribute(span, field, value)
|
|
68
62
|
try:
|
|
69
63
|
# Attempt to call the original method
|
|
70
64
|
result = wrapped(*args, **kwargs)
|
|
@@ -73,10 +67,12 @@ def rerank(original_method, version, tracer):
|
|
|
73
67
|
results = []
|
|
74
68
|
for _, doc in enumerate(result.results):
|
|
75
69
|
results.append(doc.json())
|
|
76
|
-
span.set_attribute(
|
|
70
|
+
span.set_attribute(
|
|
71
|
+
SpanAttributes.LLM_COHERE_RERANK_RESULTS, json.dumps(results)
|
|
72
|
+
)
|
|
77
73
|
|
|
78
74
|
if (hasattr(result, "response_id")) and (result.response_id is not None):
|
|
79
|
-
span.set_attribute(
|
|
75
|
+
span.set_attribute(SpanAttributes.LLM_RESPONSE_ID, result.response_id)
|
|
80
76
|
|
|
81
77
|
if hasattr(result, "meta") and result.meta is not None:
|
|
82
78
|
if (
|
|
@@ -85,30 +81,24 @@ def rerank(original_method, version, tracer):
|
|
|
85
81
|
):
|
|
86
82
|
usage = result.meta.billed_units
|
|
87
83
|
if usage is not None:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
usage.search_units
|
|
107
|
-
if usage.search_units is not None
|
|
108
|
-
else 0
|
|
109
|
-
),
|
|
110
|
-
}
|
|
111
|
-
span.set_attribute("llm.token.counts", json.dumps(usage_dict))
|
|
84
|
+
span.set_attribute(
|
|
85
|
+
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
|
86
|
+
usage.input_tokens or 0,
|
|
87
|
+
)
|
|
88
|
+
span.set_attribute(
|
|
89
|
+
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
|
90
|
+
usage.output_tokens or 0,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
span.set_attribute(
|
|
94
|
+
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
|
95
|
+
(usage.input_tokens or 0) + (usage.output_tokens or 0),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
span.set_attribute(
|
|
99
|
+
"search_units",
|
|
100
|
+
usage.search_units or 0,
|
|
101
|
+
)
|
|
112
102
|
|
|
113
103
|
span.set_status(StatusCode.OK)
|
|
114
104
|
span.end()
|
|
@@ -128,34 +118,27 @@ def embed(original_method, version, tracer):
|
|
|
128
118
|
|
|
129
119
|
def traced_method(wrapped, instance, args, kwargs):
|
|
130
120
|
service_provider = SERVICE_PROVIDERS["COHERE"]
|
|
131
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
132
121
|
|
|
133
122
|
span_attributes = {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
"llm.embedding_job_name": kwargs.get("name"),
|
|
147
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
123
|
+
**get_langtrace_attributes(version, service_provider),
|
|
124
|
+
**get_llm_request_attributes(kwargs),
|
|
125
|
+
**get_llm_url(instance),
|
|
126
|
+
SpanAttributes.LLM_URL: APIS["EMBED"]["URL"],
|
|
127
|
+
SpanAttributes.LLM_PATH: APIS["EMBED"]["ENDPOINT"],
|
|
128
|
+
SpanAttributes.LLM_REQUEST_EMBEDDING_INPUTS: json.dumps(
|
|
129
|
+
kwargs.get("texts")
|
|
130
|
+
),
|
|
131
|
+
SpanAttributes.LLM_REQUEST_EMBEDDING_DATASET_ID: kwargs.get("dataset_id"),
|
|
132
|
+
SpanAttributes.LLM_REQUEST_EMBEDDING_INPUT_TYPE: kwargs.get("input_type"),
|
|
133
|
+
SpanAttributes.LLM_REQUEST_EMBEDDING_JOB_NAME: kwargs.get("name"),
|
|
134
|
+
**get_extra_attributes(),
|
|
148
135
|
}
|
|
149
136
|
|
|
150
137
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
151
138
|
|
|
152
|
-
if kwargs.get("user") is not None:
|
|
153
|
-
attributes.llm_user = kwargs.get("user")
|
|
154
|
-
|
|
155
139
|
span = tracer.start_span(APIS["EMBED"]["METHOD"], kind=SpanKind.CLIENT)
|
|
156
140
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
157
|
-
|
|
158
|
-
span.set_attribute(field, value)
|
|
141
|
+
set_span_attribute(span, field, value)
|
|
159
142
|
try:
|
|
160
143
|
# Attempt to call the original method
|
|
161
144
|
result = wrapped(*args, **kwargs)
|
|
@@ -166,31 +149,7 @@ def embed(original_method, version, tracer):
|
|
|
166
149
|
and result.meta.billed_units is not None
|
|
167
150
|
):
|
|
168
151
|
usage = result.meta.billed_units
|
|
169
|
-
|
|
170
|
-
usage_dict = {
|
|
171
|
-
"input_tokens": (
|
|
172
|
-
usage.input_tokens
|
|
173
|
-
if usage.input_tokens is not None
|
|
174
|
-
else 0
|
|
175
|
-
),
|
|
176
|
-
"output_tokens": (
|
|
177
|
-
usage.output_tokens
|
|
178
|
-
if usage.output_tokens is not None
|
|
179
|
-
else 0
|
|
180
|
-
),
|
|
181
|
-
"total_tokens": (
|
|
182
|
-
usage.input_tokens + usage.output_tokens
|
|
183
|
-
if usage.input_tokens is not None
|
|
184
|
-
and usage.output_tokens is not None
|
|
185
|
-
else 0
|
|
186
|
-
),
|
|
187
|
-
"search_units": (
|
|
188
|
-
usage.search_units
|
|
189
|
-
if usage.search_units is not None
|
|
190
|
-
else 0
|
|
191
|
-
),
|
|
192
|
-
}
|
|
193
|
-
span.set_attribute("llm.token.counts", json.dumps(usage_dict))
|
|
152
|
+
set_usage_attributes(span, dict(usage))
|
|
194
153
|
|
|
195
154
|
span.set_status(StatusCode.OK)
|
|
196
155
|
span.end()
|
|
@@ -212,7 +171,7 @@ def chat_create(original_method, version, tracer):
|
|
|
212
171
|
service_provider = SERVICE_PROVIDERS["COHERE"]
|
|
213
172
|
|
|
214
173
|
message = kwargs.get("message", "")
|
|
215
|
-
prompts = [{"role": "
|
|
174
|
+
prompts = [{"role": "user", "content": message}]
|
|
216
175
|
system_prompts = []
|
|
217
176
|
history = []
|
|
218
177
|
preamble = kwargs.get("preamble")
|
|
@@ -224,7 +183,7 @@ def chat_create(original_method, version, tracer):
|
|
|
224
183
|
history = [
|
|
225
184
|
{
|
|
226
185
|
"role": (
|
|
227
|
-
item.get("role") if item.get("role") is not None else "
|
|
186
|
+
item.get("role") if item.get("role") is not None else "user"
|
|
228
187
|
),
|
|
229
188
|
"content": (
|
|
230
189
|
item.get("message") if item.get("message") is not None else ""
|
|
@@ -236,48 +195,25 @@ def chat_create(original_method, version, tracer):
|
|
|
236
195
|
prompts = history + prompts
|
|
237
196
|
if len(system_prompts) > 0:
|
|
238
197
|
prompts = system_prompts + prompts
|
|
239
|
-
prompts = json.dumps(prompts)
|
|
240
|
-
|
|
241
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
242
198
|
|
|
243
199
|
span_attributes = {
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
"llm.model": (
|
|
252
|
-
kwargs.get("model") if kwargs.get("model") is not None else "command-r"
|
|
253
|
-
),
|
|
254
|
-
"llm.stream": False,
|
|
255
|
-
"llm.prompts": prompts,
|
|
256
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
200
|
+
**get_langtrace_attributes(version, service_provider),
|
|
201
|
+
**get_llm_request_attributes(kwargs, prompts=prompts),
|
|
202
|
+
**get_llm_url(instance),
|
|
203
|
+
SpanAttributes.LLM_REQUEST_MODEL: kwargs.get("model") or "command-r-plus",
|
|
204
|
+
SpanAttributes.LLM_URL: APIS["CHAT_CREATE"]["URL"],
|
|
205
|
+
SpanAttributes.LLM_PATH: APIS["CHAT_CREATE"]["ENDPOINT"],
|
|
206
|
+
**get_extra_attributes(),
|
|
257
207
|
}
|
|
258
208
|
|
|
259
209
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
260
210
|
|
|
261
|
-
if kwargs.get("temperature") is not None:
|
|
262
|
-
attributes.llm_temperature = kwargs.get("temperature")
|
|
263
|
-
if kwargs.get("max_tokens") is not None:
|
|
264
|
-
attributes.llm_max_tokens = str(kwargs.get("max_tokens"))
|
|
265
211
|
if kwargs.get("max_input_tokens") is not None:
|
|
266
212
|
attributes.llm_max_input_tokens = str(kwargs.get("max_input_tokens"))
|
|
267
|
-
|
|
268
|
-
attributes.llm_top_p = kwargs.get("p")
|
|
269
|
-
if kwargs.get("k") is not None:
|
|
270
|
-
attributes.llm_top_k = kwargs.get("k")
|
|
271
|
-
if kwargs.get("user") is not None:
|
|
272
|
-
attributes.llm_user = kwargs.get("user")
|
|
213
|
+
|
|
273
214
|
if kwargs.get("conversation_id") is not None:
|
|
274
215
|
attributes.conversation_id = kwargs.get("conversation_id")
|
|
275
|
-
|
|
276
|
-
attributes.seed = kwargs.get("seed")
|
|
277
|
-
if kwargs.get("frequency_penalty") is not None:
|
|
278
|
-
attributes.frequency_penalty = kwargs.get("frequency_penalty")
|
|
279
|
-
if kwargs.get("presence_penalty") is not None:
|
|
280
|
-
attributes.presence_penalty = kwargs.get("presence_penalty")
|
|
216
|
+
|
|
281
217
|
if kwargs.get("connectors") is not None:
|
|
282
218
|
# stringify the list of objects
|
|
283
219
|
attributes.llm_connectors = json.dumps(kwargs.get("connectors"))
|
|
@@ -292,8 +228,7 @@ def chat_create(original_method, version, tracer):
|
|
|
292
228
|
|
|
293
229
|
# Set the attributes on the span
|
|
294
230
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
295
|
-
|
|
296
|
-
span.set_attribute(field, value)
|
|
231
|
+
set_span_attribute(span, field, value)
|
|
297
232
|
try:
|
|
298
233
|
# Attempt to call the original method
|
|
299
234
|
result = wrapped(*args, **kwargs)
|
|
@@ -302,13 +237,18 @@ def chat_create(original_method, version, tracer):
|
|
|
302
237
|
if (hasattr(result, "generation_id")) and (
|
|
303
238
|
result.generation_id is not None
|
|
304
239
|
):
|
|
305
|
-
span.set_attribute(
|
|
240
|
+
span.set_attribute(
|
|
241
|
+
SpanAttributes.LLM_GENERATION_ID, result.generation_id
|
|
242
|
+
)
|
|
306
243
|
if (hasattr(result, "response_id")) and (result.response_id is not None):
|
|
307
|
-
span.set_attribute(
|
|
244
|
+
span.set_attribute(SpanAttributes.LLM_RESPONSE_ID, result.response_id)
|
|
308
245
|
if (hasattr(result, "is_search_required")) and (
|
|
309
246
|
result.is_search_required is not None
|
|
310
247
|
):
|
|
311
|
-
span.set_attribute(
|
|
248
|
+
span.set_attribute(
|
|
249
|
+
SpanAttributes.LLM_REQUEST_SEARCH_REQUIRED,
|
|
250
|
+
result.is_search_required,
|
|
251
|
+
)
|
|
312
252
|
|
|
313
253
|
if kwargs.get("stream") is False or kwargs.get("stream") is None:
|
|
314
254
|
if (
|
|
@@ -325,7 +265,7 @@ def chat_create(original_method, version, tracer):
|
|
|
325
265
|
"role": (
|
|
326
266
|
item.role
|
|
327
267
|
if hasattr(item, "role") and item.role is not None
|
|
328
|
-
else "
|
|
268
|
+
else "user"
|
|
329
269
|
),
|
|
330
270
|
"content": (
|
|
331
271
|
item.message
|
|
@@ -336,19 +276,19 @@ def chat_create(original_method, version, tracer):
|
|
|
336
276
|
}
|
|
337
277
|
for item in result.chat_history
|
|
338
278
|
]
|
|
339
|
-
span
|
|
279
|
+
set_event_completion(span, responses)
|
|
280
|
+
|
|
340
281
|
else:
|
|
341
282
|
responses = [{"role": "CHATBOT", "content": result.text}]
|
|
342
|
-
span
|
|
283
|
+
set_event_completion(span, responses)
|
|
284
|
+
|
|
343
285
|
elif hasattr(result, "tool_calls") and result.tool_calls is not None:
|
|
344
286
|
tool_calls = []
|
|
345
287
|
for tool_call in result.tool_calls:
|
|
346
288
|
tool_calls.append(tool_call.json())
|
|
347
|
-
span.set_attribute(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
responses = []
|
|
351
|
-
span.set_attribute("llm.responses", json.dumps(responses))
|
|
289
|
+
span.set_attribute(
|
|
290
|
+
SpanAttributes.LLM_TOOL_RESULTS, json.dumps(tool_calls)
|
|
291
|
+
)
|
|
352
292
|
|
|
353
293
|
# Get the usage
|
|
354
294
|
if hasattr(result, "meta") and result.meta is not None:
|
|
@@ -358,31 +298,23 @@ def chat_create(original_method, version, tracer):
|
|
|
358
298
|
):
|
|
359
299
|
usage = result.meta.billed_units
|
|
360
300
|
if usage is not None:
|
|
361
|
-
usage_dict = {
|
|
362
|
-
"input_tokens": (
|
|
363
|
-
usage.input_tokens
|
|
364
|
-
if usage.input_tokens is not None
|
|
365
|
-
else 0
|
|
366
|
-
),
|
|
367
|
-
"output_tokens": (
|
|
368
|
-
usage.output_tokens
|
|
369
|
-
if usage.output_tokens is not None
|
|
370
|
-
else 0
|
|
371
|
-
),
|
|
372
|
-
"total_tokens": (
|
|
373
|
-
usage.input_tokens + usage.output_tokens
|
|
374
|
-
if usage.input_tokens is not None
|
|
375
|
-
and usage.output_tokens is not None
|
|
376
|
-
else 0
|
|
377
|
-
),
|
|
378
|
-
"search_units": (
|
|
379
|
-
usage.search_units
|
|
380
|
-
if usage.search_units is not None
|
|
381
|
-
else 0
|
|
382
|
-
),
|
|
383
|
-
}
|
|
384
301
|
span.set_attribute(
|
|
385
|
-
|
|
302
|
+
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
|
303
|
+
usage.input_tokens or 0,
|
|
304
|
+
)
|
|
305
|
+
span.set_attribute(
|
|
306
|
+
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
|
307
|
+
usage.output_tokens or 0,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
span.set_attribute(
|
|
311
|
+
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
|
312
|
+
(usage.input_tokens or 0) + (usage.output_tokens or 0),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
span.set_attribute(
|
|
316
|
+
"search_units",
|
|
317
|
+
usage.search_units or 0,
|
|
386
318
|
)
|
|
387
319
|
span.set_status(StatusCode.OK)
|
|
388
320
|
span.end()
|
|
@@ -407,7 +339,7 @@ def chat_stream(original_method, version, tracer):
|
|
|
407
339
|
service_provider = SERVICE_PROVIDERS["COHERE"]
|
|
408
340
|
|
|
409
341
|
message = kwargs.get("message", "")
|
|
410
|
-
prompts = [{"role": "
|
|
342
|
+
prompts = [{"role": "user", "content": message}]
|
|
411
343
|
system_prompts = []
|
|
412
344
|
history = []
|
|
413
345
|
preamble = kwargs.get("preamble")
|
|
@@ -419,7 +351,7 @@ def chat_stream(original_method, version, tracer):
|
|
|
419
351
|
history = [
|
|
420
352
|
{
|
|
421
353
|
"role": (
|
|
422
|
-
item.get("role") if item.get("role") is not None else "
|
|
354
|
+
item.get("role") if item.get("role") is not None else "user"
|
|
423
355
|
),
|
|
424
356
|
"content": (
|
|
425
357
|
item.get("message") if item.get("message") is not None else ""
|
|
@@ -431,48 +363,23 @@ def chat_stream(original_method, version, tracer):
|
|
|
431
363
|
prompts = history + prompts
|
|
432
364
|
if len(system_prompts) > 0:
|
|
433
365
|
prompts = system_prompts + prompts
|
|
434
|
-
prompts = json.dumps(prompts)
|
|
435
|
-
|
|
436
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
437
366
|
|
|
438
367
|
span_attributes = {
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
kwargs.get("model") if kwargs.get("model") is not None else "command-r"
|
|
448
|
-
),
|
|
449
|
-
"llm.stream": True,
|
|
450
|
-
"llm.prompts": prompts,
|
|
451
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
368
|
+
**get_langtrace_attributes(version, service_provider),
|
|
369
|
+
**get_llm_request_attributes(kwargs, prompts=prompts),
|
|
370
|
+
**get_llm_url(instance),
|
|
371
|
+
SpanAttributes.LLM_REQUEST_MODEL: kwargs.get("model") or "command-r-plus",
|
|
372
|
+
SpanAttributes.LLM_IS_STREAMING: True,
|
|
373
|
+
SpanAttributes.LLM_URL: APIS["CHAT_STREAM"]["URL"],
|
|
374
|
+
SpanAttributes.LLM_PATH: APIS["CHAT_STREAM"]["ENDPOINT"],
|
|
375
|
+
**get_extra_attributes(),
|
|
452
376
|
}
|
|
453
377
|
|
|
454
378
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
455
379
|
|
|
456
|
-
if kwargs.get("temperature") is not None:
|
|
457
|
-
attributes.llm_temperature = kwargs.get("temperature")
|
|
458
|
-
if kwargs.get("max_tokens") is not None:
|
|
459
|
-
attributes.llm_max_tokens = str(kwargs.get("max_tokens"))
|
|
460
380
|
if kwargs.get("max_input_tokens") is not None:
|
|
461
381
|
attributes.llm_max_input_tokens = str(kwargs.get("max_input_tokens"))
|
|
462
|
-
|
|
463
|
-
attributes.llm_top_p = kwargs.get("p")
|
|
464
|
-
if kwargs.get("k") is not None:
|
|
465
|
-
attributes.llm_top_k = kwargs.get("k")
|
|
466
|
-
if kwargs.get("user") is not None:
|
|
467
|
-
attributes.llm_user = kwargs.get("user")
|
|
468
|
-
if kwargs.get("conversation_id") is not None:
|
|
469
|
-
attributes.conversation_id = kwargs.get("conversation_id")
|
|
470
|
-
if kwargs.get("seed") is not None:
|
|
471
|
-
attributes.seed = kwargs.get("seed")
|
|
472
|
-
if kwargs.get("frequency_penalty") is not None:
|
|
473
|
-
attributes.frequency_penalty = kwargs.get("frequency_penalty")
|
|
474
|
-
if kwargs.get("presence_penalty") is not None:
|
|
475
|
-
attributes.presence_penalty = kwargs.get("presence_penalty")
|
|
382
|
+
|
|
476
383
|
if kwargs.get("connectors") is not None:
|
|
477
384
|
# stringify the list of objects
|
|
478
385
|
attributes.llm_connectors = json.dumps(kwargs.get("connectors"))
|
|
@@ -485,8 +392,7 @@ def chat_stream(original_method, version, tracer):
|
|
|
485
392
|
|
|
486
393
|
span = tracer.start_span(APIS["CHAT_STREAM"]["METHOD"], kind=SpanKind.CLIENT)
|
|
487
394
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
488
|
-
|
|
489
|
-
span.set_attribute(field, value)
|
|
395
|
+
set_span_attribute(span, field, value)
|
|
490
396
|
try:
|
|
491
397
|
# Attempt to call the original method
|
|
492
398
|
result = wrapped(*args, **kwargs)
|
|
@@ -498,7 +404,8 @@ def chat_stream(original_method, version, tracer):
|
|
|
498
404
|
else:
|
|
499
405
|
content = ""
|
|
500
406
|
span.add_event(
|
|
501
|
-
Event.STREAM_OUTPUT.value,
|
|
407
|
+
Event.STREAM_OUTPUT.value,
|
|
408
|
+
{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: "".join(content)},
|
|
502
409
|
)
|
|
503
410
|
|
|
504
411
|
if (
|
|
@@ -506,22 +413,26 @@ def chat_stream(original_method, version, tracer):
|
|
|
506
413
|
and event.finish_reason == "COMPLETE"
|
|
507
414
|
):
|
|
508
415
|
response = event.response
|
|
509
|
-
|
|
510
416
|
if (hasattr(response, "generation_id")) and (
|
|
511
417
|
response.generation_id is not None
|
|
512
418
|
):
|
|
513
419
|
span.set_attribute(
|
|
514
|
-
|
|
420
|
+
SpanAttributes.LLM_GENERATION_ID,
|
|
421
|
+
response.generation_id,
|
|
515
422
|
)
|
|
516
423
|
if (hasattr(response, "response_id")) and (
|
|
517
424
|
response.response_id is not None
|
|
518
425
|
):
|
|
519
|
-
span.set_attribute(
|
|
426
|
+
span.set_attribute(
|
|
427
|
+
SpanAttributes.LLM_RESPONSE_ID,
|
|
428
|
+
response.response_id,
|
|
429
|
+
)
|
|
520
430
|
if (hasattr(response, "is_search_required")) and (
|
|
521
431
|
response.is_search_required is not None
|
|
522
432
|
):
|
|
523
433
|
span.set_attribute(
|
|
524
|
-
|
|
434
|
+
SpanAttributes.LLM_REQUEST_SEARCH_REQUIRED,
|
|
435
|
+
response.is_search_required,
|
|
525
436
|
)
|
|
526
437
|
|
|
527
438
|
# Set the response attributes
|
|
@@ -536,7 +447,7 @@ def chat_stream(original_method, version, tracer):
|
|
|
536
447
|
item.role
|
|
537
448
|
if hasattr(item, "role")
|
|
538
449
|
and item.role is not None
|
|
539
|
-
else "
|
|
450
|
+
else "user"
|
|
540
451
|
),
|
|
541
452
|
"content": (
|
|
542
453
|
item.message
|
|
@@ -547,16 +458,13 @@ def chat_stream(original_method, version, tracer):
|
|
|
547
458
|
}
|
|
548
459
|
for item in response.chat_history
|
|
549
460
|
]
|
|
550
|
-
span
|
|
551
|
-
|
|
552
|
-
)
|
|
461
|
+
set_event_completion(span, responses)
|
|
462
|
+
|
|
553
463
|
else:
|
|
554
464
|
responses = [
|
|
555
465
|
{"role": "CHATBOT", "content": response.text}
|
|
556
466
|
]
|
|
557
|
-
span
|
|
558
|
-
"llm.responses", json.dumps(responses)
|
|
559
|
-
)
|
|
467
|
+
set_event_completion(span, responses)
|
|
560
468
|
|
|
561
469
|
# Get the usage
|
|
562
470
|
if hasattr(response, "meta") and response.meta is not None:
|
|
@@ -566,31 +474,24 @@ def chat_stream(original_method, version, tracer):
|
|
|
566
474
|
):
|
|
567
475
|
usage = response.meta.billed_units
|
|
568
476
|
if usage is not None:
|
|
569
|
-
usage_dict = {
|
|
570
|
-
"input_tokens": (
|
|
571
|
-
usage.input_tokens
|
|
572
|
-
if usage.input_tokens is not None
|
|
573
|
-
else 0
|
|
574
|
-
),
|
|
575
|
-
"output_tokens": (
|
|
576
|
-
usage.output_tokens
|
|
577
|
-
if usage.output_tokens is not None
|
|
578
|
-
else 0
|
|
579
|
-
),
|
|
580
|
-
"total_tokens": (
|
|
581
|
-
usage.input_tokens + usage.output_tokens
|
|
582
|
-
if usage.input_tokens is not None
|
|
583
|
-
and usage.output_tokens is not None
|
|
584
|
-
else 0
|
|
585
|
-
),
|
|
586
|
-
"search_units": (
|
|
587
|
-
usage.search_units
|
|
588
|
-
if usage.search_units is not None
|
|
589
|
-
else 0
|
|
590
|
-
),
|
|
591
|
-
}
|
|
592
477
|
span.set_attribute(
|
|
593
|
-
|
|
478
|
+
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
|
479
|
+
usage.input_tokens or 0,
|
|
480
|
+
)
|
|
481
|
+
span.set_attribute(
|
|
482
|
+
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
|
483
|
+
usage.output_tokens or 0,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
span.set_attribute(
|
|
487
|
+
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
|
488
|
+
(usage.input_tokens or 0)
|
|
489
|
+
+ (usage.output_tokens or 0),
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
span.set_attribute(
|
|
493
|
+
"search_units",
|
|
494
|
+
usage.search_units or 0,
|
|
594
495
|
)
|
|
595
496
|
|
|
596
497
|
yield event
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Collection
|
|
2
|
+
from importlib_metadata import version as v
|
|
3
|
+
from langtrace_python_sdk.constants.instrumentation.gemini import APIS
|
|
4
|
+
from wrapt import wrap_function_wrapper as _W
|
|
5
|
+
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
|
6
|
+
from opentelemetry.trace import get_tracer
|
|
7
|
+
from .patch import patch_gemini, apatch_gemini
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GeminiInstrumentation(BaseInstrumentor):
|
|
11
|
+
def instrumentation_dependencies(self) -> Collection[str]:
|
|
12
|
+
return ["google-generativeai >= 0.5.0"]
|
|
13
|
+
|
|
14
|
+
def _instrument(self, **kwargs):
|
|
15
|
+
trace_provider = kwargs.get("tracer_provider")
|
|
16
|
+
tracer = get_tracer(__name__, "", trace_provider)
|
|
17
|
+
version = v("google-cloud-aiplatform")
|
|
18
|
+
|
|
19
|
+
for _, api_config in APIS.items():
|
|
20
|
+
module = api_config.get("module")
|
|
21
|
+
operation = api_config.get("operation")
|
|
22
|
+
method = api_config.get("method")
|
|
23
|
+
name = f"{method}.{operation}"
|
|
24
|
+
|
|
25
|
+
_W(
|
|
26
|
+
module=module,
|
|
27
|
+
name=name,
|
|
28
|
+
wrapper=(
|
|
29
|
+
apatch_gemini(name, version, tracer)
|
|
30
|
+
if operation == "generate_content_async"
|
|
31
|
+
else patch_gemini(name, version, tracer)
|
|
32
|
+
),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _uninstrument(self, **kwargs):
|
|
36
|
+
pass
|