langtrace-python-sdk 2.1.29__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. examples/cohere_example/chat.py +1 -0
  2. examples/cohere_example/chat_stream.py +3 -0
  3. examples/gemini_example/__init__.py +6 -0
  4. examples/gemini_example/function_tools.py +62 -0
  5. examples/gemini_example/main.py +91 -0
  6. examples/langchain_example/__init__.py +8 -0
  7. examples/langchain_example/groq_example.py +28 -15
  8. examples/ollama_example/basic.py +1 -0
  9. examples/openai_example/__init__.py +1 -0
  10. examples/openai_example/async_tool_calling_nonstreaming.py +1 -1
  11. examples/openai_example/chat_completion.py +1 -1
  12. examples/openai_example/embeddings_create.py +1 -0
  13. examples/openai_example/images_edit.py +2 -2
  14. examples/vertexai_example/__init__.py +6 -0
  15. examples/vertexai_example/main.py +214 -0
  16. langtrace_python_sdk/constants/instrumentation/common.py +2 -0
  17. langtrace_python_sdk/constants/instrumentation/gemini.py +12 -0
  18. langtrace_python_sdk/constants/instrumentation/vertexai.py +42 -0
  19. langtrace_python_sdk/instrumentation/__init__.py +4 -0
  20. langtrace_python_sdk/instrumentation/anthropic/patch.py +68 -96
  21. langtrace_python_sdk/instrumentation/chroma/patch.py +29 -29
  22. langtrace_python_sdk/instrumentation/cohere/patch.py +143 -242
  23. langtrace_python_sdk/instrumentation/gemini/__init__.py +3 -0
  24. langtrace_python_sdk/instrumentation/gemini/instrumentation.py +36 -0
  25. langtrace_python_sdk/instrumentation/gemini/patch.py +186 -0
  26. langtrace_python_sdk/instrumentation/groq/patch.py +82 -125
  27. langtrace_python_sdk/instrumentation/ollama/patch.py +62 -65
  28. langtrace_python_sdk/instrumentation/openai/patch.py +190 -494
  29. langtrace_python_sdk/instrumentation/qdrant/patch.py +6 -6
  30. langtrace_python_sdk/instrumentation/vertexai/__init__.py +3 -0
  31. langtrace_python_sdk/instrumentation/vertexai/instrumentation.py +33 -0
  32. langtrace_python_sdk/instrumentation/vertexai/patch.py +131 -0
  33. langtrace_python_sdk/langtrace.py +5 -0
  34. langtrace_python_sdk/utils/__init__.py +14 -3
  35. langtrace_python_sdk/utils/llm.py +311 -6
  36. langtrace_python_sdk/version.py +1 -1
  37. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.1.dist-info}/METADATA +26 -19
  38. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.1.dist-info}/RECORD +55 -36
  39. tests/anthropic/test_anthropic.py +28 -27
  40. tests/cohere/test_cohere_chat.py +36 -36
  41. tests/cohere/test_cohere_embed.py +12 -9
  42. tests/cohere/test_cohere_rerank.py +18 -11
  43. tests/groq/cassettes/test_async_chat_completion.yaml +113 -0
  44. tests/groq/cassettes/test_async_chat_completion_streaming.yaml +2232 -0
  45. tests/groq/cassettes/test_chat_completion.yaml +114 -0
  46. tests/groq/cassettes/test_chat_completion_streaming.yaml +2512 -0
  47. tests/groq/conftest.py +33 -0
  48. tests/groq/test_groq.py +142 -0
  49. tests/openai/cassettes/test_async_chat_completion_streaming.yaml +28 -28
  50. tests/openai/test_chat_completion.py +53 -67
  51. tests/openai/test_image_generation.py +47 -24
  52. tests/utils.py +40 -5
  53. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.1.dist-info}/WHEEL +0 -0
  54. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.1.dist-info}/entry_points.txt +0 -0
  55. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,186 @@
1
+ from langtrace.trace_attributes import LLMSpanAttributes, SpanAttributes
2
+ from opentelemetry import trace
3
+ from opentelemetry.trace import Span, SpanKind, Tracer
4
+ from opentelemetry.trace.propagation import set_span_in_context
5
+ from opentelemetry.trace.status import Status, StatusCode
6
+
7
+ from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
8
+ from langtrace_python_sdk.utils.llm import (
9
+ get_extra_attributes,
10
+ get_langtrace_attributes,
11
+ get_llm_request_attributes,
12
+ get_llm_url,
13
+ is_streaming,
14
+ set_event_completion,
15
+ set_event_completion_chunk,
16
+ set_span_attributes,
17
+ set_usage_attributes,
18
+ )
19
+
20
+
21
+ def patch_gemini(name, version, tracer: Tracer):
22
+ def traced_method(wrapped, instance, args, kwargs):
23
+ service_provider = SERVICE_PROVIDERS["GEMINI"]
24
+ prompts = serialize_prompts(args, kwargs, instance)
25
+ span_attributes = {
26
+ **get_langtrace_attributes(version, service_provider),
27
+ **get_llm_request_attributes(
28
+ kwargs,
29
+ prompts=prompts,
30
+ model=get_llm_model(instance),
31
+ ),
32
+ **get_llm_url(instance),
33
+ SpanAttributes.LLM_PATH: "",
34
+ **get_extra_attributes(),
35
+ }
36
+ attributes = LLMSpanAttributes(**span_attributes)
37
+ span = tracer.start_span(
38
+ name=name,
39
+ kind=SpanKind.CLIENT,
40
+ context=set_span_in_context(trace.get_current_span()),
41
+ )
42
+
43
+ try:
44
+ set_span_attributes(span, attributes)
45
+ result = wrapped(*args, **kwargs)
46
+ if is_streaming(kwargs):
47
+ return build_streaming_response(span, result)
48
+
49
+ else:
50
+ set_response_attributes(span, result)
51
+ span.end()
52
+ return result
53
+ except Exception as error:
54
+ span.record_exception(error)
55
+ span.set_status(Status(StatusCode.ERROR, str(error)))
56
+ span.end()
57
+ raise
58
+
59
+ return traced_method
60
+
61
+
62
+ def apatch_gemini(name, version, tracer: Tracer):
63
+ async def traced_method(wrapped, instance, args, kwargs):
64
+ service_provider = SERVICE_PROVIDERS["GEMINI"]
65
+ prompts = serialize_prompts(args, kwargs, instance)
66
+ span_attributes = {
67
+ **get_langtrace_attributes(version, service_provider),
68
+ **get_llm_request_attributes(
69
+ kwargs,
70
+ prompts=prompts,
71
+ model=get_llm_model(instance),
72
+ ),
73
+ **get_llm_url(instance),
74
+ SpanAttributes.LLM_PATH: "",
75
+ **get_extra_attributes(),
76
+ }
77
+ attributes = LLMSpanAttributes(**span_attributes)
78
+ span = tracer.start_span(
79
+ name=name,
80
+ kind=SpanKind.CLIENT,
81
+ context=set_span_in_context(trace.get_current_span()),
82
+ )
83
+
84
+ try:
85
+ set_span_attributes(span, attributes)
86
+ result = await wrapped(*args, **kwargs)
87
+ if is_streaming(kwargs):
88
+ return abuild_streaming_response(span, result)
89
+ else:
90
+ set_response_attributes(span, result)
91
+ span.end()
92
+ return result
93
+ except Exception as error:
94
+ span.record_exception(error)
95
+ span.set_status(Status(StatusCode.ERROR, str(error)))
96
+ span.end()
97
+ raise
98
+
99
+ return traced_method
100
+
101
+
102
+ def get_llm_model(instance):
103
+ llm_model = "unknown"
104
+ if hasattr(instance, "_model_id"):
105
+ llm_model = instance._model_id
106
+ if hasattr(instance, "_model_name"):
107
+ llm_model = instance._model_name.replace("models/", "")
108
+ return llm_model
109
+
110
+
111
+ def serialize_prompts(args, kwargs, instance):
112
+ prompts = []
113
+ if hasattr(instance, "_system_instruction") and instance._system_instruction is not None:
114
+ system_prompt = {
115
+ "role": "system",
116
+ "content": instance._system_instruction.__dict__["_pb"].parts[0].text,
117
+ }
118
+ prompts.append(system_prompt)
119
+
120
+ if args is not None and len(args) > 0:
121
+ content = ""
122
+ for arg in args:
123
+ if isinstance(arg, str):
124
+ content = f"{content}{arg}\n"
125
+ elif isinstance(arg, list):
126
+ for subarg in arg:
127
+ content = f"{content}{subarg}\n"
128
+ prompts.append({"role": "user", "content": content})
129
+ return prompts
130
+
131
+
132
+ def set_response_attributes(
133
+ span: Span,
134
+ result,
135
+ ):
136
+ span.set_status(Status(StatusCode.OK))
137
+ if hasattr(result, "text"):
138
+ set_event_completion(span, [{"role": "assistant", "content": result.text}])
139
+
140
+ if hasattr(result, "usage_metadata"):
141
+ usage = result.usage_metadata
142
+ input_tokens = usage.prompt_token_count
143
+ output_tokens = usage.candidates_token_count
144
+ set_usage_attributes(
145
+ span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
146
+ )
147
+
148
+
149
+ def build_streaming_response(span, response):
150
+ complete_response = ""
151
+ for item in response:
152
+ item_to_yield = item
153
+ complete_response += str(item.text)
154
+ yield item_to_yield
155
+ set_event_completion_chunk(span, item.text)
156
+ if hasattr(item, "usage_metadata"):
157
+ usage = item.usage_metadata
158
+ input_tokens = usage.prompt_token_count
159
+ output_tokens = usage.candidates_token_count
160
+ set_usage_attributes(
161
+ span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
162
+ )
163
+
164
+ set_response_attributes(span, response)
165
+ span.set_status(Status(StatusCode.OK))
166
+ span.end()
167
+
168
+
169
+ async def abuild_streaming_response(span, response):
170
+ complete_response = ""
171
+ async for item in response:
172
+ item_to_yield = item
173
+ complete_response += str(item.text)
174
+ yield item_to_yield
175
+ set_event_completion_chunk(span, item.text)
176
+ if hasattr(item, "usage_metadata"):
177
+ usage = item.usage_metadata
178
+ input_tokens = usage.prompt_token_count
179
+ output_tokens = usage.candidates_token_count
180
+ set_usage_attributes(
181
+ span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
182
+ )
183
+
184
+ set_response_attributes(span, response)
185
+ span.set_status(Status(StatusCode.OK))
186
+ span.end()
@@ -17,11 +17,21 @@ limitations under the License.
17
17
  import json
18
18
 
19
19
  from langtrace.trace_attributes import Event, LLMSpanAttributes
20
+ from langtrace_python_sdk.utils import set_span_attribute
20
21
  from opentelemetry import baggage, trace
21
22
  from opentelemetry.trace.propagation import set_span_in_context
22
23
  from opentelemetry.trace import SpanKind
23
24
  from opentelemetry.trace.status import Status, StatusCode
24
25
 
26
+ from langtrace_python_sdk.utils.llm import (
27
+ get_base_url,
28
+ get_extra_attributes,
29
+ get_llm_request_attributes,
30
+ get_llm_url,
31
+ get_langtrace_attributes,
32
+ set_event_completion,
33
+ set_usage_attributes,
34
+ )
25
35
  from langtrace_python_sdk.constants.instrumentation.common import (
26
36
  LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY,
27
37
  SERVICE_PROVIDERS,
@@ -31,26 +41,20 @@ from langtrace_python_sdk.utils.llm import calculate_prompt_tokens, estimate_tok
31
41
  from importlib_metadata import version as v
32
42
 
33
43
  from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME
44
+ from langtrace.trace_attributes import SpanAttributes
34
45
 
35
46
 
36
47
  def chat_completions_create(original_method, version, tracer):
37
48
  """Wrap the `create` method of the `ChatCompletion` class to trace it."""
38
49
 
39
50
  def traced_method(wrapped, instance, args, kwargs):
40
- base_url = (
41
- str(instance._client._base_url)
42
- if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
43
- else ""
44
- )
45
51
  service_provider = SERVICE_PROVIDERS["GROQ"]
46
52
  # If base url contains perplexity or azure, set the service provider accordingly
47
- if "perplexity" in base_url:
53
+ if "perplexity" in get_base_url(instance):
48
54
  service_provider = SERVICE_PROVIDERS["PPLX"]
49
- elif "azure" in base_url:
55
+ elif "azure" in get_base_url(instance):
50
56
  service_provider = SERVICE_PROVIDERS["AZURE"]
51
57
 
52
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
53
-
54
58
  # handle tool calls in the kwargs
55
59
  llm_prompts = []
56
60
  for item in kwargs.get("messages", []):
@@ -80,27 +84,16 @@ def chat_completions_create(original_method, version, tracer):
80
84
  llm_prompts.append(item)
81
85
 
82
86
  span_attributes = {
83
- "langtrace.sdk.name": "langtrace-python-sdk",
84
- "langtrace.service.name": service_provider,
85
- "langtrace.service.type": "llm",
86
- "langtrace.service.version": version,
87
- "langtrace.version": v(LANGTRACE_SDK_NAME),
88
- "url.full": base_url,
89
- "llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
90
- "llm.prompts": json.dumps(llm_prompts),
91
- "llm.stream": kwargs.get("stream"),
92
- **(extra_attributes if extra_attributes is not None else {}),
87
+ **get_langtrace_attributes(version, service_provider),
88
+ **get_llm_request_attributes(kwargs, prompts=llm_prompts),
89
+ **get_llm_url(instance),
90
+ SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
91
+ **get_extra_attributes(),
93
92
  }
94
93
 
95
94
  attributes = LLMSpanAttributes(**span_attributes)
96
95
 
97
96
  tools = []
98
- if kwargs.get("temperature") is not None:
99
- attributes.llm_temperature = kwargs.get("temperature")
100
- if kwargs.get("top_p") is not None:
101
- attributes.llm_top_p = kwargs.get("top_p")
102
- if kwargs.get("user") is not None:
103
- attributes.llm_user = kwargs.get("user")
104
97
  if kwargs.get("functions") is not None:
105
98
  for function in kwargs.get("functions"):
106
99
  tools.append(json.dumps({"type": "function", "function": function}))
@@ -111,20 +104,21 @@ def chat_completions_create(original_method, version, tracer):
111
104
 
112
105
  # TODO(Karthik): Gotta figure out how to handle streaming with context
113
106
  # with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
114
- # kind=SpanKind.CLIENT) as span:
107
+ # kind=SpanKind.CLIENT.value) as span:
115
108
  span = tracer.start_span(
116
109
  APIS["CHAT_COMPLETION"]["METHOD"],
117
- kind=SpanKind.CLIENT,
110
+ kind=SpanKind.CLIENT.value,
118
111
  context=set_span_in_context(trace.get_current_span()),
119
112
  )
120
113
  for field, value in attributes.model_dump(by_alias=True).items():
121
- if value is not None:
122
- span.set_attribute(field, value)
114
+ set_span_attribute(span, field, value)
123
115
  try:
124
116
  # Attempt to call the original method
125
117
  result = wrapped(*args, **kwargs)
126
118
  if kwargs.get("stream") is False or kwargs.get("stream") is None:
127
- span.set_attribute("llm.model", result.model)
119
+ set_span_attribute(
120
+ span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
121
+ )
128
122
  if hasattr(result, "choices") and result.choices is not None:
129
123
  responses = [
130
124
  {
@@ -146,27 +140,23 @@ def chat_completions_create(original_method, version, tracer):
146
140
  }
147
141
  for choice in result.choices
148
142
  ]
149
- span.set_attribute("llm.responses", json.dumps(responses))
150
- else:
151
- responses = []
152
- span.set_attribute("llm.responses", json.dumps(responses))
143
+ set_event_completion(span, responses)
144
+
153
145
  if (
154
146
  hasattr(result, "system_fingerprint")
155
147
  and result.system_fingerprint is not None
156
148
  ):
157
- span.set_attribute(
158
- "llm.system.fingerprint", result.system_fingerprint
149
+ set_span_attribute(
150
+ span,
151
+ SpanAttributes.LLM_SYSTEM_FINGERPRINT,
152
+ result.system_fingerprint,
159
153
  )
154
+
160
155
  # Get the usage
161
156
  if hasattr(result, "usage") and result.usage is not None:
162
157
  usage = result.usage
163
- if usage is not None:
164
- usage_dict = {
165
- "input_tokens": result.usage.prompt_tokens,
166
- "output_tokens": usage.completion_tokens,
167
- "total_tokens": usage.total_tokens,
168
- }
169
- span.set_attribute("llm.token.counts", json.dumps(usage_dict))
158
+ set_usage_attributes(span, dict(usage))
159
+
170
160
  span.set_status(StatusCode.OK)
171
161
  span.end()
172
162
  return result
@@ -255,7 +245,7 @@ def chat_completions_create(original_method, version, tracer):
255
245
  span.add_event(
256
246
  Event.STREAM_OUTPUT.value,
257
247
  {
258
- "response": (
248
+ SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: (
259
249
  "".join(content)
260
250
  if len(content) > 0 and content[0] is not None
261
251
  else ""
@@ -267,27 +257,14 @@ def chat_completions_create(original_method, version, tracer):
267
257
  finally:
268
258
  # Finalize span after processing all chunks
269
259
  span.add_event(Event.STREAM_END.value)
270
- span.set_attribute(
271
- "llm.token.counts",
272
- json.dumps(
273
- {
274
- "input_tokens": prompt_tokens,
275
- "output_tokens": completion_tokens,
276
- "total_tokens": prompt_tokens + completion_tokens,
277
- }
278
- ),
260
+ set_usage_attributes(
261
+ span,
262
+ {"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
279
263
  )
280
- span.set_attribute(
281
- "llm.responses",
282
- json.dumps(
283
- [
284
- {
285
- "role": "assistant",
286
- "content": "".join(result_content),
287
- }
288
- ]
289
- ),
264
+ set_event_completion(
265
+ span, [{"role": "assistant", "content": "".join(result_content)}]
290
266
  )
267
+
291
268
  span.set_status(StatusCode.OK)
292
269
  span.end()
293
270
 
@@ -299,20 +276,13 @@ def async_chat_completions_create(original_method, version, tracer):
299
276
  """Wrap the `create` method of the `ChatCompletion` class to trace it."""
300
277
 
301
278
  async def traced_method(wrapped, instance, args, kwargs):
302
- base_url = (
303
- str(instance._client._base_url)
304
- if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
305
- else ""
306
- )
307
279
  service_provider = SERVICE_PROVIDERS["GROQ"]
308
280
  # If base url contains perplexity or azure, set the service provider accordingly
309
- if "perplexity" in base_url:
281
+ if "perplexity" in get_base_url(instance):
310
282
  service_provider = SERVICE_PROVIDERS["PPLX"]
311
- elif "azure" in base_url:
283
+ elif "azure" in get_base_url(instance):
312
284
  service_provider = SERVICE_PROVIDERS["AZURE"]
313
285
 
314
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
315
-
316
286
  # handle tool calls in the kwargs
317
287
  llm_prompts = []
318
288
  for item in kwargs.get("messages", []):
@@ -342,27 +312,17 @@ def async_chat_completions_create(original_method, version, tracer):
342
312
  llm_prompts.append(item)
343
313
 
344
314
  span_attributes = {
345
- "langtrace.sdk.name": "langtrace-python-sdk",
346
- "langtrace.service.name": service_provider,
347
- "langtrace.service.type": "llm",
348
- "langtrace.service.version": version,
349
- "langtrace.version": v(LANGTRACE_SDK_NAME),
350
- "url.full": base_url,
351
- "llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
352
- "llm.prompts": json.dumps(llm_prompts),
353
- "llm.stream": kwargs.get("stream"),
354
- **(extra_attributes if extra_attributes is not None else {}),
315
+ **get_langtrace_attributes(version, service_provider),
316
+ **get_llm_request_attributes(kwargs, prompts=llm_prompts),
317
+ **get_llm_url(instance),
318
+ SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
319
+ **get_extra_attributes(),
355
320
  }
356
321
 
357
322
  attributes = LLMSpanAttributes(**span_attributes)
358
323
 
359
324
  tools = []
360
- if kwargs.get("temperature") is not None:
361
- attributes.llm_temperature = kwargs.get("temperature")
362
- if kwargs.get("top_p") is not None:
363
- attributes.llm_top_p = kwargs.get("top_p")
364
- if kwargs.get("user") is not None:
365
- attributes.llm_user = kwargs.get("user")
325
+
366
326
  if kwargs.get("functions") is not None:
367
327
  for function in kwargs.get("functions"):
368
328
  tools.append(json.dumps({"type": "function", "function": function}))
@@ -373,18 +333,19 @@ def async_chat_completions_create(original_method, version, tracer):
373
333
 
374
334
  # TODO(Karthik): Gotta figure out how to handle streaming with context
375
335
  # with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
376
- # kind=SpanKind.CLIENT) as span:
336
+ # kind=SpanKind.CLIENT.value) as span:
377
337
  span = tracer.start_span(
378
- APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT
338
+ APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT.value
379
339
  )
380
340
  for field, value in attributes.model_dump(by_alias=True).items():
381
- if value is not None:
382
- span.set_attribute(field, value)
341
+ set_span_attribute(span, field, value)
383
342
  try:
384
343
  # Attempt to call the original method
385
344
  result = await wrapped(*args, **kwargs)
386
345
  if kwargs.get("stream") is False or kwargs.get("stream") is None:
387
- span.set_attribute("llm.model", result.model)
346
+ set_span_attribute(
347
+ span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
348
+ )
388
349
  if hasattr(result, "choices") and result.choices is not None:
389
350
  responses = [
390
351
  {
@@ -406,27 +367,25 @@ def async_chat_completions_create(original_method, version, tracer):
406
367
  }
407
368
  for choice in result.choices
408
369
  ]
409
- span.set_attribute("llm.responses", json.dumps(responses))
410
- else:
411
- responses = []
412
- span.set_attribute("llm.responses", json.dumps(responses))
370
+
371
+ set_event_completion(span, responses)
372
+
413
373
  if (
414
374
  hasattr(result, "system_fingerprint")
415
375
  and result.system_fingerprint is not None
416
376
  ):
417
- span.set_attribute(
418
- "llm.system.fingerprint", result.system_fingerprint
377
+ set_span_attribute(
378
+ span,
379
+ SpanAttributes.LLM_SYSTEM_FINGERPRINT,
380
+ result.system_fingerprint,
419
381
  )
382
+
420
383
  # Get the usage
421
384
  if hasattr(result, "usage") and result.usage is not None:
422
385
  usage = result.usage
423
386
  if usage is not None:
424
- usage_dict = {
425
- "input_tokens": result.usage.prompt_tokens,
426
- "output_tokens": usage.completion_tokens,
427
- "total_tokens": usage.total_tokens,
428
- }
429
- span.set_attribute("llm.token.counts", json.dumps(usage_dict))
387
+ set_usage_attributes(span, dict(usage))
388
+
430
389
  span.set_status(StatusCode.OK)
431
390
  span.end()
432
391
  return result
@@ -469,6 +428,9 @@ def async_chat_completions_create(original_method, version, tracer):
469
428
  try:
470
429
  async for chunk in result:
471
430
  if hasattr(chunk, "model") and chunk.model is not None:
431
+ set_span_attribute(
432
+ span, SpanAttributes.LLM_RESPONSE_MODEL, chunk.model
433
+ )
472
434
  span.set_attribute("llm.model", chunk.model)
473
435
  if hasattr(chunk, "choices") and chunk.choices is not None:
474
436
  if not function_call and not tool_calls:
@@ -513,9 +475,9 @@ def async_chat_completions_create(original_method, version, tracer):
513
475
  else:
514
476
  content = []
515
477
  span.add_event(
516
- Event.STREAM_OUTPUT.value,
478
+ Event.RESPONSE.value,
517
479
  {
518
- "response": (
480
+ SpanAttributes.LLM_COMPLETIONS: (
519
481
  "".join(content)
520
482
  if len(content) > 0 and content[0] is not None
521
483
  else ""
@@ -527,27 +489,22 @@ def async_chat_completions_create(original_method, version, tracer):
527
489
  finally:
528
490
  # Finalize span after processing all chunks
529
491
  span.add_event(Event.STREAM_END.value)
530
- span.set_attribute(
531
- "llm.token.counts",
532
- json.dumps(
492
+
493
+ set_usage_attributes(
494
+ span,
495
+ {"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
496
+ )
497
+
498
+ set_event_completion(
499
+ span,
500
+ [
533
501
  {
534
- "input_tokens": prompt_tokens,
535
- "output_tokens": completion_tokens,
536
- "total_tokens": prompt_tokens + completion_tokens,
502
+ "role": "assistant",
503
+ "content": "".join(result_content),
537
504
  }
538
- ),
539
- )
540
- span.set_attribute(
541
- "llm.responses",
542
- json.dumps(
543
- [
544
- {
545
- "role": "assistant",
546
- "content": "".join(result_content),
547
- }
548
- ]
549
- ),
505
+ ],
550
506
  )
507
+
551
508
  span.set_status(StatusCode.OK)
552
509
  span.end()
553
510