langtrace-python-sdk 2.1.28__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/cohere_example/chat.py +1 -0
- examples/cohere_example/chat_stream.py +3 -0
- examples/gemini_example/__init__.py +6 -0
- examples/gemini_example/function_tools.py +62 -0
- examples/gemini_example/main.py +91 -0
- examples/langchain_example/__init__.py +8 -0
- examples/langchain_example/groq_example.py +28 -15
- examples/ollama_example/basic.py +1 -0
- examples/openai_example/__init__.py +1 -0
- examples/openai_example/async_tool_calling_nonstreaming.py +1 -1
- examples/openai_example/chat_completion.py +1 -1
- examples/openai_example/embeddings_create.py +1 -0
- examples/openai_example/images_edit.py +2 -2
- examples/vertexai_example/__init__.py +6 -0
- examples/vertexai_example/main.py +214 -0
- langtrace_python_sdk/constants/instrumentation/common.py +2 -0
- langtrace_python_sdk/constants/instrumentation/gemini.py +12 -0
- langtrace_python_sdk/constants/instrumentation/vertexai.py +42 -0
- langtrace_python_sdk/instrumentation/__init__.py +4 -0
- langtrace_python_sdk/instrumentation/anthropic/patch.py +68 -96
- langtrace_python_sdk/instrumentation/chroma/patch.py +29 -29
- langtrace_python_sdk/instrumentation/cohere/patch.py +143 -242
- langtrace_python_sdk/instrumentation/gemini/__init__.py +3 -0
- langtrace_python_sdk/instrumentation/gemini/instrumentation.py +36 -0
- langtrace_python_sdk/instrumentation/gemini/patch.py +186 -0
- langtrace_python_sdk/instrumentation/groq/patch.py +82 -125
- langtrace_python_sdk/instrumentation/ollama/patch.py +62 -65
- langtrace_python_sdk/instrumentation/openai/patch.py +190 -494
- langtrace_python_sdk/instrumentation/qdrant/patch.py +6 -6
- langtrace_python_sdk/instrumentation/vertexai/__init__.py +3 -0
- langtrace_python_sdk/instrumentation/vertexai/instrumentation.py +33 -0
- langtrace_python_sdk/instrumentation/vertexai/patch.py +131 -0
- langtrace_python_sdk/langtrace.py +7 -1
- langtrace_python_sdk/utils/__init__.py +14 -3
- langtrace_python_sdk/utils/llm.py +311 -6
- langtrace_python_sdk/version.py +1 -1
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/METADATA +26 -19
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/RECORD +55 -36
- tests/anthropic/test_anthropic.py +28 -27
- tests/cohere/test_cohere_chat.py +36 -36
- tests/cohere/test_cohere_embed.py +12 -9
- tests/cohere/test_cohere_rerank.py +18 -11
- tests/groq/cassettes/test_async_chat_completion.yaml +113 -0
- tests/groq/cassettes/test_async_chat_completion_streaming.yaml +2232 -0
- tests/groq/cassettes/test_chat_completion.yaml +114 -0
- tests/groq/cassettes/test_chat_completion_streaming.yaml +2512 -0
- tests/groq/conftest.py +33 -0
- tests/groq/test_groq.py +142 -0
- tests/openai/cassettes/test_async_chat_completion_streaming.yaml +28 -28
- tests/openai/test_chat_completion.py +53 -67
- tests/openai/test_image_generation.py +47 -24
- tests/utils.py +40 -5
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/WHEEL +0 -0
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/entry_points.txt +0 -0
- {langtrace_python_sdk-2.1.28.dist-info → langtrace_python_sdk-2.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from langtrace.trace_attributes import LLMSpanAttributes, SpanAttributes
|
|
2
|
+
from opentelemetry import trace
|
|
3
|
+
from opentelemetry.trace import Span, SpanKind, Tracer
|
|
4
|
+
from opentelemetry.trace.propagation import set_span_in_context
|
|
5
|
+
from opentelemetry.trace.status import Status, StatusCode
|
|
6
|
+
|
|
7
|
+
from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
|
|
8
|
+
from langtrace_python_sdk.utils.llm import (
|
|
9
|
+
get_extra_attributes,
|
|
10
|
+
get_langtrace_attributes,
|
|
11
|
+
get_llm_request_attributes,
|
|
12
|
+
get_llm_url,
|
|
13
|
+
is_streaming,
|
|
14
|
+
set_event_completion,
|
|
15
|
+
set_event_completion_chunk,
|
|
16
|
+
set_span_attributes,
|
|
17
|
+
set_usage_attributes,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def patch_gemini(name, version, tracer: Tracer):
|
|
22
|
+
def traced_method(wrapped, instance, args, kwargs):
|
|
23
|
+
service_provider = SERVICE_PROVIDERS["GEMINI"]
|
|
24
|
+
prompts = serialize_prompts(args, kwargs, instance)
|
|
25
|
+
span_attributes = {
|
|
26
|
+
**get_langtrace_attributes(version, service_provider),
|
|
27
|
+
**get_llm_request_attributes(
|
|
28
|
+
kwargs,
|
|
29
|
+
prompts=prompts,
|
|
30
|
+
model=get_llm_model(instance),
|
|
31
|
+
),
|
|
32
|
+
**get_llm_url(instance),
|
|
33
|
+
SpanAttributes.LLM_PATH: "",
|
|
34
|
+
**get_extra_attributes(),
|
|
35
|
+
}
|
|
36
|
+
attributes = LLMSpanAttributes(**span_attributes)
|
|
37
|
+
span = tracer.start_span(
|
|
38
|
+
name=name,
|
|
39
|
+
kind=SpanKind.CLIENT,
|
|
40
|
+
context=set_span_in_context(trace.get_current_span()),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
set_span_attributes(span, attributes)
|
|
45
|
+
result = wrapped(*args, **kwargs)
|
|
46
|
+
if is_streaming(kwargs):
|
|
47
|
+
return build_streaming_response(span, result)
|
|
48
|
+
|
|
49
|
+
else:
|
|
50
|
+
set_response_attributes(span, result)
|
|
51
|
+
span.end()
|
|
52
|
+
return result
|
|
53
|
+
except Exception as error:
|
|
54
|
+
span.record_exception(error)
|
|
55
|
+
span.set_status(Status(StatusCode.ERROR, str(error)))
|
|
56
|
+
span.end()
|
|
57
|
+
raise
|
|
58
|
+
|
|
59
|
+
return traced_method
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def apatch_gemini(name, version, tracer: Tracer):
|
|
63
|
+
async def traced_method(wrapped, instance, args, kwargs):
|
|
64
|
+
service_provider = SERVICE_PROVIDERS["GEMINI"]
|
|
65
|
+
prompts = serialize_prompts(args, kwargs, instance)
|
|
66
|
+
span_attributes = {
|
|
67
|
+
**get_langtrace_attributes(version, service_provider),
|
|
68
|
+
**get_llm_request_attributes(
|
|
69
|
+
kwargs,
|
|
70
|
+
prompts=prompts,
|
|
71
|
+
model=get_llm_model(instance),
|
|
72
|
+
),
|
|
73
|
+
**get_llm_url(instance),
|
|
74
|
+
SpanAttributes.LLM_PATH: "",
|
|
75
|
+
**get_extra_attributes(),
|
|
76
|
+
}
|
|
77
|
+
attributes = LLMSpanAttributes(**span_attributes)
|
|
78
|
+
span = tracer.start_span(
|
|
79
|
+
name=name,
|
|
80
|
+
kind=SpanKind.CLIENT,
|
|
81
|
+
context=set_span_in_context(trace.get_current_span()),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
set_span_attributes(span, attributes)
|
|
86
|
+
result = await wrapped(*args, **kwargs)
|
|
87
|
+
if is_streaming(kwargs):
|
|
88
|
+
return abuild_streaming_response(span, result)
|
|
89
|
+
else:
|
|
90
|
+
set_response_attributes(span, result)
|
|
91
|
+
span.end()
|
|
92
|
+
return result
|
|
93
|
+
except Exception as error:
|
|
94
|
+
span.record_exception(error)
|
|
95
|
+
span.set_status(Status(StatusCode.ERROR, str(error)))
|
|
96
|
+
span.end()
|
|
97
|
+
raise
|
|
98
|
+
|
|
99
|
+
return traced_method
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_llm_model(instance):
|
|
103
|
+
llm_model = "unknown"
|
|
104
|
+
if hasattr(instance, "_model_id"):
|
|
105
|
+
llm_model = instance._model_id
|
|
106
|
+
if hasattr(instance, "_model_name"):
|
|
107
|
+
llm_model = instance._model_name.replace("models/", "")
|
|
108
|
+
return llm_model
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def serialize_prompts(args, kwargs, instance):
|
|
112
|
+
prompts = []
|
|
113
|
+
if hasattr(instance, "_system_instruction") and instance._system_instruction is not None:
|
|
114
|
+
system_prompt = {
|
|
115
|
+
"role": "system",
|
|
116
|
+
"content": instance._system_instruction.__dict__["_pb"].parts[0].text,
|
|
117
|
+
}
|
|
118
|
+
prompts.append(system_prompt)
|
|
119
|
+
|
|
120
|
+
if args is not None and len(args) > 0:
|
|
121
|
+
content = ""
|
|
122
|
+
for arg in args:
|
|
123
|
+
if isinstance(arg, str):
|
|
124
|
+
content = f"{content}{arg}\n"
|
|
125
|
+
elif isinstance(arg, list):
|
|
126
|
+
for subarg in arg:
|
|
127
|
+
content = f"{content}{subarg}\n"
|
|
128
|
+
prompts.append({"role": "user", "content": content})
|
|
129
|
+
return prompts
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def set_response_attributes(
|
|
133
|
+
span: Span,
|
|
134
|
+
result,
|
|
135
|
+
):
|
|
136
|
+
span.set_status(Status(StatusCode.OK))
|
|
137
|
+
if hasattr(result, "text"):
|
|
138
|
+
set_event_completion(span, [{"role": "assistant", "content": result.text}])
|
|
139
|
+
|
|
140
|
+
if hasattr(result, "usage_metadata"):
|
|
141
|
+
usage = result.usage_metadata
|
|
142
|
+
input_tokens = usage.prompt_token_count
|
|
143
|
+
output_tokens = usage.candidates_token_count
|
|
144
|
+
set_usage_attributes(
|
|
145
|
+
span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def build_streaming_response(span, response):
|
|
150
|
+
complete_response = ""
|
|
151
|
+
for item in response:
|
|
152
|
+
item_to_yield = item
|
|
153
|
+
complete_response += str(item.text)
|
|
154
|
+
yield item_to_yield
|
|
155
|
+
set_event_completion_chunk(span, item.text)
|
|
156
|
+
if hasattr(item, "usage_metadata"):
|
|
157
|
+
usage = item.usage_metadata
|
|
158
|
+
input_tokens = usage.prompt_token_count
|
|
159
|
+
output_tokens = usage.candidates_token_count
|
|
160
|
+
set_usage_attributes(
|
|
161
|
+
span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
set_response_attributes(span, response)
|
|
165
|
+
span.set_status(Status(StatusCode.OK))
|
|
166
|
+
span.end()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def abuild_streaming_response(span, response):
|
|
170
|
+
complete_response = ""
|
|
171
|
+
async for item in response:
|
|
172
|
+
item_to_yield = item
|
|
173
|
+
complete_response += str(item.text)
|
|
174
|
+
yield item_to_yield
|
|
175
|
+
set_event_completion_chunk(span, item.text)
|
|
176
|
+
if hasattr(item, "usage_metadata"):
|
|
177
|
+
usage = item.usage_metadata
|
|
178
|
+
input_tokens = usage.prompt_token_count
|
|
179
|
+
output_tokens = usage.candidates_token_count
|
|
180
|
+
set_usage_attributes(
|
|
181
|
+
span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
set_response_attributes(span, response)
|
|
185
|
+
span.set_status(Status(StatusCode.OK))
|
|
186
|
+
span.end()
|
|
@@ -17,11 +17,21 @@ limitations under the License.
|
|
|
17
17
|
import json
|
|
18
18
|
|
|
19
19
|
from langtrace.trace_attributes import Event, LLMSpanAttributes
|
|
20
|
+
from langtrace_python_sdk.utils import set_span_attribute
|
|
20
21
|
from opentelemetry import baggage, trace
|
|
21
22
|
from opentelemetry.trace.propagation import set_span_in_context
|
|
22
23
|
from opentelemetry.trace import SpanKind
|
|
23
24
|
from opentelemetry.trace.status import Status, StatusCode
|
|
24
25
|
|
|
26
|
+
from langtrace_python_sdk.utils.llm import (
|
|
27
|
+
get_base_url,
|
|
28
|
+
get_extra_attributes,
|
|
29
|
+
get_llm_request_attributes,
|
|
30
|
+
get_llm_url,
|
|
31
|
+
get_langtrace_attributes,
|
|
32
|
+
set_event_completion,
|
|
33
|
+
set_usage_attributes,
|
|
34
|
+
)
|
|
25
35
|
from langtrace_python_sdk.constants.instrumentation.common import (
|
|
26
36
|
LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY,
|
|
27
37
|
SERVICE_PROVIDERS,
|
|
@@ -31,26 +41,20 @@ from langtrace_python_sdk.utils.llm import calculate_prompt_tokens, estimate_tok
|
|
|
31
41
|
from importlib_metadata import version as v
|
|
32
42
|
|
|
33
43
|
from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME
|
|
44
|
+
from langtrace.trace_attributes import SpanAttributes
|
|
34
45
|
|
|
35
46
|
|
|
36
47
|
def chat_completions_create(original_method, version, tracer):
|
|
37
48
|
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""
|
|
38
49
|
|
|
39
50
|
def traced_method(wrapped, instance, args, kwargs):
|
|
40
|
-
base_url = (
|
|
41
|
-
str(instance._client._base_url)
|
|
42
|
-
if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
|
|
43
|
-
else ""
|
|
44
|
-
)
|
|
45
51
|
service_provider = SERVICE_PROVIDERS["GROQ"]
|
|
46
52
|
# If base url contains perplexity or azure, set the service provider accordingly
|
|
47
|
-
if "perplexity" in
|
|
53
|
+
if "perplexity" in get_base_url(instance):
|
|
48
54
|
service_provider = SERVICE_PROVIDERS["PPLX"]
|
|
49
|
-
elif "azure" in
|
|
55
|
+
elif "azure" in get_base_url(instance):
|
|
50
56
|
service_provider = SERVICE_PROVIDERS["AZURE"]
|
|
51
57
|
|
|
52
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
53
|
-
|
|
54
58
|
# handle tool calls in the kwargs
|
|
55
59
|
llm_prompts = []
|
|
56
60
|
for item in kwargs.get("messages", []):
|
|
@@ -80,27 +84,16 @@ def chat_completions_create(original_method, version, tracer):
|
|
|
80
84
|
llm_prompts.append(item)
|
|
81
85
|
|
|
82
86
|
span_attributes = {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"url.full": base_url,
|
|
89
|
-
"llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
|
|
90
|
-
"llm.prompts": json.dumps(llm_prompts),
|
|
91
|
-
"llm.stream": kwargs.get("stream"),
|
|
92
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
87
|
+
**get_langtrace_attributes(version, service_provider),
|
|
88
|
+
**get_llm_request_attributes(kwargs, prompts=llm_prompts),
|
|
89
|
+
**get_llm_url(instance),
|
|
90
|
+
SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
|
|
91
|
+
**get_extra_attributes(),
|
|
93
92
|
}
|
|
94
93
|
|
|
95
94
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
96
95
|
|
|
97
96
|
tools = []
|
|
98
|
-
if kwargs.get("temperature") is not None:
|
|
99
|
-
attributes.llm_temperature = kwargs.get("temperature")
|
|
100
|
-
if kwargs.get("top_p") is not None:
|
|
101
|
-
attributes.llm_top_p = kwargs.get("top_p")
|
|
102
|
-
if kwargs.get("user") is not None:
|
|
103
|
-
attributes.llm_user = kwargs.get("user")
|
|
104
97
|
if kwargs.get("functions") is not None:
|
|
105
98
|
for function in kwargs.get("functions"):
|
|
106
99
|
tools.append(json.dumps({"type": "function", "function": function}))
|
|
@@ -111,20 +104,21 @@ def chat_completions_create(original_method, version, tracer):
|
|
|
111
104
|
|
|
112
105
|
# TODO(Karthik): Gotta figure out how to handle streaming with context
|
|
113
106
|
# with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
|
|
114
|
-
# kind=SpanKind.CLIENT) as span:
|
|
107
|
+
# kind=SpanKind.CLIENT.value) as span:
|
|
115
108
|
span = tracer.start_span(
|
|
116
109
|
APIS["CHAT_COMPLETION"]["METHOD"],
|
|
117
|
-
kind=SpanKind.CLIENT,
|
|
110
|
+
kind=SpanKind.CLIENT.value,
|
|
118
111
|
context=set_span_in_context(trace.get_current_span()),
|
|
119
112
|
)
|
|
120
113
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
121
|
-
|
|
122
|
-
span.set_attribute(field, value)
|
|
114
|
+
set_span_attribute(span, field, value)
|
|
123
115
|
try:
|
|
124
116
|
# Attempt to call the original method
|
|
125
117
|
result = wrapped(*args, **kwargs)
|
|
126
118
|
if kwargs.get("stream") is False or kwargs.get("stream") is None:
|
|
127
|
-
|
|
119
|
+
set_span_attribute(
|
|
120
|
+
span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
|
|
121
|
+
)
|
|
128
122
|
if hasattr(result, "choices") and result.choices is not None:
|
|
129
123
|
responses = [
|
|
130
124
|
{
|
|
@@ -146,27 +140,23 @@ def chat_completions_create(original_method, version, tracer):
|
|
|
146
140
|
}
|
|
147
141
|
for choice in result.choices
|
|
148
142
|
]
|
|
149
|
-
span
|
|
150
|
-
|
|
151
|
-
responses = []
|
|
152
|
-
span.set_attribute("llm.responses", json.dumps(responses))
|
|
143
|
+
set_event_completion(span, responses)
|
|
144
|
+
|
|
153
145
|
if (
|
|
154
146
|
hasattr(result, "system_fingerprint")
|
|
155
147
|
and result.system_fingerprint is not None
|
|
156
148
|
):
|
|
157
|
-
|
|
158
|
-
|
|
149
|
+
set_span_attribute(
|
|
150
|
+
span,
|
|
151
|
+
SpanAttributes.LLM_SYSTEM_FINGERPRINT,
|
|
152
|
+
result.system_fingerprint,
|
|
159
153
|
)
|
|
154
|
+
|
|
160
155
|
# Get the usage
|
|
161
156
|
if hasattr(result, "usage") and result.usage is not None:
|
|
162
157
|
usage = result.usage
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
"input_tokens": result.usage.prompt_tokens,
|
|
166
|
-
"output_tokens": usage.completion_tokens,
|
|
167
|
-
"total_tokens": usage.total_tokens,
|
|
168
|
-
}
|
|
169
|
-
span.set_attribute("llm.token.counts", json.dumps(usage_dict))
|
|
158
|
+
set_usage_attributes(span, dict(usage))
|
|
159
|
+
|
|
170
160
|
span.set_status(StatusCode.OK)
|
|
171
161
|
span.end()
|
|
172
162
|
return result
|
|
@@ -255,7 +245,7 @@ def chat_completions_create(original_method, version, tracer):
|
|
|
255
245
|
span.add_event(
|
|
256
246
|
Event.STREAM_OUTPUT.value,
|
|
257
247
|
{
|
|
258
|
-
|
|
248
|
+
SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: (
|
|
259
249
|
"".join(content)
|
|
260
250
|
if len(content) > 0 and content[0] is not None
|
|
261
251
|
else ""
|
|
@@ -267,27 +257,14 @@ def chat_completions_create(original_method, version, tracer):
|
|
|
267
257
|
finally:
|
|
268
258
|
# Finalize span after processing all chunks
|
|
269
259
|
span.add_event(Event.STREAM_END.value)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
{
|
|
274
|
-
"input_tokens": prompt_tokens,
|
|
275
|
-
"output_tokens": completion_tokens,
|
|
276
|
-
"total_tokens": prompt_tokens + completion_tokens,
|
|
277
|
-
}
|
|
278
|
-
),
|
|
260
|
+
set_usage_attributes(
|
|
261
|
+
span,
|
|
262
|
+
{"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
|
|
279
263
|
)
|
|
280
|
-
|
|
281
|
-
"
|
|
282
|
-
json.dumps(
|
|
283
|
-
[
|
|
284
|
-
{
|
|
285
|
-
"role": "assistant",
|
|
286
|
-
"content": "".join(result_content),
|
|
287
|
-
}
|
|
288
|
-
]
|
|
289
|
-
),
|
|
264
|
+
set_event_completion(
|
|
265
|
+
span, [{"role": "assistant", "content": "".join(result_content)}]
|
|
290
266
|
)
|
|
267
|
+
|
|
291
268
|
span.set_status(StatusCode.OK)
|
|
292
269
|
span.end()
|
|
293
270
|
|
|
@@ -299,20 +276,13 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
299
276
|
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""
|
|
300
277
|
|
|
301
278
|
async def traced_method(wrapped, instance, args, kwargs):
|
|
302
|
-
base_url = (
|
|
303
|
-
str(instance._client._base_url)
|
|
304
|
-
if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
|
|
305
|
-
else ""
|
|
306
|
-
)
|
|
307
279
|
service_provider = SERVICE_PROVIDERS["GROQ"]
|
|
308
280
|
# If base url contains perplexity or azure, set the service provider accordingly
|
|
309
|
-
if "perplexity" in
|
|
281
|
+
if "perplexity" in get_base_url(instance):
|
|
310
282
|
service_provider = SERVICE_PROVIDERS["PPLX"]
|
|
311
|
-
elif "azure" in
|
|
283
|
+
elif "azure" in get_base_url(instance):
|
|
312
284
|
service_provider = SERVICE_PROVIDERS["AZURE"]
|
|
313
285
|
|
|
314
|
-
extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
|
|
315
|
-
|
|
316
286
|
# handle tool calls in the kwargs
|
|
317
287
|
llm_prompts = []
|
|
318
288
|
for item in kwargs.get("messages", []):
|
|
@@ -342,27 +312,17 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
342
312
|
llm_prompts.append(item)
|
|
343
313
|
|
|
344
314
|
span_attributes = {
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
"url.full": base_url,
|
|
351
|
-
"llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
|
|
352
|
-
"llm.prompts": json.dumps(llm_prompts),
|
|
353
|
-
"llm.stream": kwargs.get("stream"),
|
|
354
|
-
**(extra_attributes if extra_attributes is not None else {}),
|
|
315
|
+
**get_langtrace_attributes(version, service_provider),
|
|
316
|
+
**get_llm_request_attributes(kwargs, prompts=llm_prompts),
|
|
317
|
+
**get_llm_url(instance),
|
|
318
|
+
SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
|
|
319
|
+
**get_extra_attributes(),
|
|
355
320
|
}
|
|
356
321
|
|
|
357
322
|
attributes = LLMSpanAttributes(**span_attributes)
|
|
358
323
|
|
|
359
324
|
tools = []
|
|
360
|
-
|
|
361
|
-
attributes.llm_temperature = kwargs.get("temperature")
|
|
362
|
-
if kwargs.get("top_p") is not None:
|
|
363
|
-
attributes.llm_top_p = kwargs.get("top_p")
|
|
364
|
-
if kwargs.get("user") is not None:
|
|
365
|
-
attributes.llm_user = kwargs.get("user")
|
|
325
|
+
|
|
366
326
|
if kwargs.get("functions") is not None:
|
|
367
327
|
for function in kwargs.get("functions"):
|
|
368
328
|
tools.append(json.dumps({"type": "function", "function": function}))
|
|
@@ -373,18 +333,19 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
373
333
|
|
|
374
334
|
# TODO(Karthik): Gotta figure out how to handle streaming with context
|
|
375
335
|
# with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
|
|
376
|
-
# kind=SpanKind.CLIENT) as span:
|
|
336
|
+
# kind=SpanKind.CLIENT.value) as span:
|
|
377
337
|
span = tracer.start_span(
|
|
378
|
-
APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT
|
|
338
|
+
APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT.value
|
|
379
339
|
)
|
|
380
340
|
for field, value in attributes.model_dump(by_alias=True).items():
|
|
381
|
-
|
|
382
|
-
span.set_attribute(field, value)
|
|
341
|
+
set_span_attribute(span, field, value)
|
|
383
342
|
try:
|
|
384
343
|
# Attempt to call the original method
|
|
385
344
|
result = await wrapped(*args, **kwargs)
|
|
386
345
|
if kwargs.get("stream") is False or kwargs.get("stream") is None:
|
|
387
|
-
|
|
346
|
+
set_span_attribute(
|
|
347
|
+
span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
|
|
348
|
+
)
|
|
388
349
|
if hasattr(result, "choices") and result.choices is not None:
|
|
389
350
|
responses = [
|
|
390
351
|
{
|
|
@@ -406,27 +367,25 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
406
367
|
}
|
|
407
368
|
for choice in result.choices
|
|
408
369
|
]
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
span.set_attribute("llm.responses", json.dumps(responses))
|
|
370
|
+
|
|
371
|
+
set_event_completion(span, responses)
|
|
372
|
+
|
|
413
373
|
if (
|
|
414
374
|
hasattr(result, "system_fingerprint")
|
|
415
375
|
and result.system_fingerprint is not None
|
|
416
376
|
):
|
|
417
|
-
|
|
418
|
-
|
|
377
|
+
set_span_attribute(
|
|
378
|
+
span,
|
|
379
|
+
SpanAttributes.LLM_SYSTEM_FINGERPRINT,
|
|
380
|
+
result.system_fingerprint,
|
|
419
381
|
)
|
|
382
|
+
|
|
420
383
|
# Get the usage
|
|
421
384
|
if hasattr(result, "usage") and result.usage is not None:
|
|
422
385
|
usage = result.usage
|
|
423
386
|
if usage is not None:
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
"output_tokens": usage.completion_tokens,
|
|
427
|
-
"total_tokens": usage.total_tokens,
|
|
428
|
-
}
|
|
429
|
-
span.set_attribute("llm.token.counts", json.dumps(usage_dict))
|
|
387
|
+
set_usage_attributes(span, dict(usage))
|
|
388
|
+
|
|
430
389
|
span.set_status(StatusCode.OK)
|
|
431
390
|
span.end()
|
|
432
391
|
return result
|
|
@@ -469,6 +428,9 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
469
428
|
try:
|
|
470
429
|
async for chunk in result:
|
|
471
430
|
if hasattr(chunk, "model") and chunk.model is not None:
|
|
431
|
+
set_span_attribute(
|
|
432
|
+
span, SpanAttributes.LLM_RESPONSE_MODEL, chunk.model
|
|
433
|
+
)
|
|
472
434
|
span.set_attribute("llm.model", chunk.model)
|
|
473
435
|
if hasattr(chunk, "choices") and chunk.choices is not None:
|
|
474
436
|
if not function_call and not tool_calls:
|
|
@@ -513,9 +475,9 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
513
475
|
else:
|
|
514
476
|
content = []
|
|
515
477
|
span.add_event(
|
|
516
|
-
Event.
|
|
478
|
+
Event.RESPONSE.value,
|
|
517
479
|
{
|
|
518
|
-
|
|
480
|
+
SpanAttributes.LLM_COMPLETIONS: (
|
|
519
481
|
"".join(content)
|
|
520
482
|
if len(content) > 0 and content[0] is not None
|
|
521
483
|
else ""
|
|
@@ -527,27 +489,22 @@ def async_chat_completions_create(original_method, version, tracer):
|
|
|
527
489
|
finally:
|
|
528
490
|
# Finalize span after processing all chunks
|
|
529
491
|
span.add_event(Event.STREAM_END.value)
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
492
|
+
|
|
493
|
+
set_usage_attributes(
|
|
494
|
+
span,
|
|
495
|
+
{"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
set_event_completion(
|
|
499
|
+
span,
|
|
500
|
+
[
|
|
533
501
|
{
|
|
534
|
-
"
|
|
535
|
-
"
|
|
536
|
-
"total_tokens": prompt_tokens + completion_tokens,
|
|
502
|
+
"role": "assistant",
|
|
503
|
+
"content": "".join(result_content),
|
|
537
504
|
}
|
|
538
|
-
|
|
539
|
-
)
|
|
540
|
-
span.set_attribute(
|
|
541
|
-
"llm.responses",
|
|
542
|
-
json.dumps(
|
|
543
|
-
[
|
|
544
|
-
{
|
|
545
|
-
"role": "assistant",
|
|
546
|
-
"content": "".join(result_content),
|
|
547
|
-
}
|
|
548
|
-
]
|
|
549
|
-
),
|
|
505
|
+
],
|
|
550
506
|
)
|
|
507
|
+
|
|
551
508
|
span.set_status(StatusCode.OK)
|
|
552
509
|
span.end()
|
|
553
510
|
|