langtrace-python-sdk 2.1.29__py3-none-any.whl → 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. examples/cohere_example/chat.py +1 -0
  2. examples/cohere_example/chat_stream.py +3 -0
  3. examples/dspy_example/math_problems_cot_parallel.py +59 -0
  4. examples/gemini_example/__init__.py +6 -0
  5. examples/gemini_example/function_tools.py +62 -0
  6. examples/gemini_example/main.py +91 -0
  7. examples/langchain_example/__init__.py +8 -0
  8. examples/langchain_example/groq_example.py +28 -15
  9. examples/ollama_example/basic.py +1 -0
  10. examples/openai_example/__init__.py +1 -0
  11. examples/openai_example/async_tool_calling_nonstreaming.py +1 -1
  12. examples/openai_example/chat_completion.py +1 -1
  13. examples/openai_example/embeddings_create.py +1 -0
  14. examples/openai_example/images_edit.py +2 -2
  15. examples/vertexai_example/__init__.py +6 -0
  16. examples/vertexai_example/main.py +214 -0
  17. langtrace_python_sdk/constants/instrumentation/common.py +2 -0
  18. langtrace_python_sdk/constants/instrumentation/gemini.py +12 -0
  19. langtrace_python_sdk/constants/instrumentation/vertexai.py +42 -0
  20. langtrace_python_sdk/instrumentation/__init__.py +4 -0
  21. langtrace_python_sdk/instrumentation/anthropic/patch.py +68 -96
  22. langtrace_python_sdk/instrumentation/chroma/patch.py +29 -29
  23. langtrace_python_sdk/instrumentation/cohere/patch.py +143 -242
  24. langtrace_python_sdk/instrumentation/dspy/instrumentation.py +2 -2
  25. langtrace_python_sdk/instrumentation/dspy/patch.py +36 -36
  26. langtrace_python_sdk/instrumentation/gemini/__init__.py +3 -0
  27. langtrace_python_sdk/instrumentation/gemini/instrumentation.py +36 -0
  28. langtrace_python_sdk/instrumentation/gemini/patch.py +186 -0
  29. langtrace_python_sdk/instrumentation/groq/patch.py +82 -125
  30. langtrace_python_sdk/instrumentation/ollama/patch.py +62 -65
  31. langtrace_python_sdk/instrumentation/openai/patch.py +190 -494
  32. langtrace_python_sdk/instrumentation/qdrant/patch.py +6 -6
  33. langtrace_python_sdk/instrumentation/vertexai/__init__.py +3 -0
  34. langtrace_python_sdk/instrumentation/vertexai/instrumentation.py +33 -0
  35. langtrace_python_sdk/instrumentation/vertexai/patch.py +131 -0
  36. langtrace_python_sdk/langtrace.py +5 -0
  37. langtrace_python_sdk/utils/__init__.py +14 -3
  38. langtrace_python_sdk/utils/llm.py +311 -6
  39. langtrace_python_sdk/version.py +1 -1
  40. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.2.dist-info}/METADATA +26 -19
  41. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.2.dist-info}/RECORD +58 -38
  42. tests/anthropic/test_anthropic.py +28 -27
  43. tests/cohere/test_cohere_chat.py +36 -36
  44. tests/cohere/test_cohere_embed.py +12 -9
  45. tests/cohere/test_cohere_rerank.py +18 -11
  46. tests/groq/cassettes/test_async_chat_completion.yaml +113 -0
  47. tests/groq/cassettes/test_async_chat_completion_streaming.yaml +2232 -0
  48. tests/groq/cassettes/test_chat_completion.yaml +114 -0
  49. tests/groq/cassettes/test_chat_completion_streaming.yaml +2512 -0
  50. tests/groq/conftest.py +33 -0
  51. tests/groq/test_groq.py +142 -0
  52. tests/openai/cassettes/test_async_chat_completion_streaming.yaml +28 -28
  53. tests/openai/test_chat_completion.py +53 -67
  54. tests/openai/test_image_generation.py +47 -24
  55. tests/utils.py +40 -5
  56. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.2.dist-info}/WHEEL +0 -0
  57. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.2.dist-info}/entry_points.txt +0 -0
  58. {langtrace_python_sdk-2.1.29.dist-info → langtrace_python_sdk-2.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -17,11 +17,21 @@ limitations under the License.
17
17
  import json
18
18
 
19
19
  from langtrace.trace_attributes import Event, LLMSpanAttributes
20
+ from langtrace_python_sdk.utils import set_span_attribute
20
21
  from opentelemetry import baggage, trace
21
22
  from opentelemetry.trace.propagation import set_span_in_context
22
23
  from opentelemetry.trace import SpanKind
23
24
  from opentelemetry.trace.status import Status, StatusCode
24
25
 
26
+ from langtrace_python_sdk.utils.llm import (
27
+ get_base_url,
28
+ get_extra_attributes,
29
+ get_llm_request_attributes,
30
+ get_llm_url,
31
+ get_langtrace_attributes,
32
+ set_event_completion,
33
+ set_usage_attributes,
34
+ )
25
35
  from langtrace_python_sdk.constants.instrumentation.common import (
26
36
  LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY,
27
37
  SERVICE_PROVIDERS,
@@ -31,26 +41,20 @@ from langtrace_python_sdk.utils.llm import calculate_prompt_tokens, estimate_tok
31
41
  from importlib_metadata import version as v
32
42
 
33
43
  from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME
44
+ from langtrace.trace_attributes import SpanAttributes
34
45
 
35
46
 
36
47
  def chat_completions_create(original_method, version, tracer):
37
48
  """Wrap the `create` method of the `ChatCompletion` class to trace it."""
38
49
 
39
50
  def traced_method(wrapped, instance, args, kwargs):
40
- base_url = (
41
- str(instance._client._base_url)
42
- if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
43
- else ""
44
- )
45
51
  service_provider = SERVICE_PROVIDERS["GROQ"]
46
52
  # If base url contains perplexity or azure, set the service provider accordingly
47
- if "perplexity" in base_url:
53
+ if "perplexity" in get_base_url(instance):
48
54
  service_provider = SERVICE_PROVIDERS["PPLX"]
49
- elif "azure" in base_url:
55
+ elif "azure" in get_base_url(instance):
50
56
  service_provider = SERVICE_PROVIDERS["AZURE"]
51
57
 
52
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
53
-
54
58
  # handle tool calls in the kwargs
55
59
  llm_prompts = []
56
60
  for item in kwargs.get("messages", []):
@@ -80,27 +84,16 @@ def chat_completions_create(original_method, version, tracer):
80
84
  llm_prompts.append(item)
81
85
 
82
86
  span_attributes = {
83
- "langtrace.sdk.name": "langtrace-python-sdk",
84
- "langtrace.service.name": service_provider,
85
- "langtrace.service.type": "llm",
86
- "langtrace.service.version": version,
87
- "langtrace.version": v(LANGTRACE_SDK_NAME),
88
- "url.full": base_url,
89
- "llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
90
- "llm.prompts": json.dumps(llm_prompts),
91
- "llm.stream": kwargs.get("stream"),
92
- **(extra_attributes if extra_attributes is not None else {}),
87
+ **get_langtrace_attributes(version, service_provider),
88
+ **get_llm_request_attributes(kwargs, prompts=llm_prompts),
89
+ **get_llm_url(instance),
90
+ SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
91
+ **get_extra_attributes(),
93
92
  }
94
93
 
95
94
  attributes = LLMSpanAttributes(**span_attributes)
96
95
 
97
96
  tools = []
98
- if kwargs.get("temperature") is not None:
99
- attributes.llm_temperature = kwargs.get("temperature")
100
- if kwargs.get("top_p") is not None:
101
- attributes.llm_top_p = kwargs.get("top_p")
102
- if kwargs.get("user") is not None:
103
- attributes.llm_user = kwargs.get("user")
104
97
  if kwargs.get("functions") is not None:
105
98
  for function in kwargs.get("functions"):
106
99
  tools.append(json.dumps({"type": "function", "function": function}))
@@ -111,20 +104,21 @@ def chat_completions_create(original_method, version, tracer):
111
104
 
112
105
  # TODO(Karthik): Gotta figure out how to handle streaming with context
113
106
  # with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
114
- # kind=SpanKind.CLIENT) as span:
107
+ # kind=SpanKind.CLIENT.value) as span:
115
108
  span = tracer.start_span(
116
109
  APIS["CHAT_COMPLETION"]["METHOD"],
117
- kind=SpanKind.CLIENT,
110
+ kind=SpanKind.CLIENT.value,
118
111
  context=set_span_in_context(trace.get_current_span()),
119
112
  )
120
113
  for field, value in attributes.model_dump(by_alias=True).items():
121
- if value is not None:
122
- span.set_attribute(field, value)
114
+ set_span_attribute(span, field, value)
123
115
  try:
124
116
  # Attempt to call the original method
125
117
  result = wrapped(*args, **kwargs)
126
118
  if kwargs.get("stream") is False or kwargs.get("stream") is None:
127
- span.set_attribute("llm.model", result.model)
119
+ set_span_attribute(
120
+ span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
121
+ )
128
122
  if hasattr(result, "choices") and result.choices is not None:
129
123
  responses = [
130
124
  {
@@ -146,27 +140,23 @@ def chat_completions_create(original_method, version, tracer):
146
140
  }
147
141
  for choice in result.choices
148
142
  ]
149
- span.set_attribute("llm.responses", json.dumps(responses))
150
- else:
151
- responses = []
152
- span.set_attribute("llm.responses", json.dumps(responses))
143
+ set_event_completion(span, responses)
144
+
153
145
  if (
154
146
  hasattr(result, "system_fingerprint")
155
147
  and result.system_fingerprint is not None
156
148
  ):
157
- span.set_attribute(
158
- "llm.system.fingerprint", result.system_fingerprint
149
+ set_span_attribute(
150
+ span,
151
+ SpanAttributes.LLM_SYSTEM_FINGERPRINT,
152
+ result.system_fingerprint,
159
153
  )
154
+
160
155
  # Get the usage
161
156
  if hasattr(result, "usage") and result.usage is not None:
162
157
  usage = result.usage
163
- if usage is not None:
164
- usage_dict = {
165
- "input_tokens": result.usage.prompt_tokens,
166
- "output_tokens": usage.completion_tokens,
167
- "total_tokens": usage.total_tokens,
168
- }
169
- span.set_attribute("llm.token.counts", json.dumps(usage_dict))
158
+ set_usage_attributes(span, dict(usage))
159
+
170
160
  span.set_status(StatusCode.OK)
171
161
  span.end()
172
162
  return result
@@ -255,7 +245,7 @@ def chat_completions_create(original_method, version, tracer):
255
245
  span.add_event(
256
246
  Event.STREAM_OUTPUT.value,
257
247
  {
258
- "response": (
248
+ SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: (
259
249
  "".join(content)
260
250
  if len(content) > 0 and content[0] is not None
261
251
  else ""
@@ -267,27 +257,14 @@ def chat_completions_create(original_method, version, tracer):
267
257
  finally:
268
258
  # Finalize span after processing all chunks
269
259
  span.add_event(Event.STREAM_END.value)
270
- span.set_attribute(
271
- "llm.token.counts",
272
- json.dumps(
273
- {
274
- "input_tokens": prompt_tokens,
275
- "output_tokens": completion_tokens,
276
- "total_tokens": prompt_tokens + completion_tokens,
277
- }
278
- ),
260
+ set_usage_attributes(
261
+ span,
262
+ {"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
279
263
  )
280
- span.set_attribute(
281
- "llm.responses",
282
- json.dumps(
283
- [
284
- {
285
- "role": "assistant",
286
- "content": "".join(result_content),
287
- }
288
- ]
289
- ),
264
+ set_event_completion(
265
+ span, [{"role": "assistant", "content": "".join(result_content)}]
290
266
  )
267
+
291
268
  span.set_status(StatusCode.OK)
292
269
  span.end()
293
270
 
@@ -299,20 +276,13 @@ def async_chat_completions_create(original_method, version, tracer):
299
276
  """Wrap the `create` method of the `ChatCompletion` class to trace it."""
300
277
 
301
278
  async def traced_method(wrapped, instance, args, kwargs):
302
- base_url = (
303
- str(instance._client._base_url)
304
- if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
305
- else ""
306
- )
307
279
  service_provider = SERVICE_PROVIDERS["GROQ"]
308
280
  # If base url contains perplexity or azure, set the service provider accordingly
309
- if "perplexity" in base_url:
281
+ if "perplexity" in get_base_url(instance):
310
282
  service_provider = SERVICE_PROVIDERS["PPLX"]
311
- elif "azure" in base_url:
283
+ elif "azure" in get_base_url(instance):
312
284
  service_provider = SERVICE_PROVIDERS["AZURE"]
313
285
 
314
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
315
-
316
286
  # handle tool calls in the kwargs
317
287
  llm_prompts = []
318
288
  for item in kwargs.get("messages", []):
@@ -342,27 +312,17 @@ def async_chat_completions_create(original_method, version, tracer):
342
312
  llm_prompts.append(item)
343
313
 
344
314
  span_attributes = {
345
- "langtrace.sdk.name": "langtrace-python-sdk",
346
- "langtrace.service.name": service_provider,
347
- "langtrace.service.type": "llm",
348
- "langtrace.service.version": version,
349
- "langtrace.version": v(LANGTRACE_SDK_NAME),
350
- "url.full": base_url,
351
- "llm.api": APIS["CHAT_COMPLETION"]["ENDPOINT"],
352
- "llm.prompts": json.dumps(llm_prompts),
353
- "llm.stream": kwargs.get("stream"),
354
- **(extra_attributes if extra_attributes is not None else {}),
315
+ **get_langtrace_attributes(version, service_provider),
316
+ **get_llm_request_attributes(kwargs, prompts=llm_prompts),
317
+ **get_llm_url(instance),
318
+ SpanAttributes.LLM_PATH: APIS["CHAT_COMPLETION"]["ENDPOINT"],
319
+ **get_extra_attributes(),
355
320
  }
356
321
 
357
322
  attributes = LLMSpanAttributes(**span_attributes)
358
323
 
359
324
  tools = []
360
- if kwargs.get("temperature") is not None:
361
- attributes.llm_temperature = kwargs.get("temperature")
362
- if kwargs.get("top_p") is not None:
363
- attributes.llm_top_p = kwargs.get("top_p")
364
- if kwargs.get("user") is not None:
365
- attributes.llm_user = kwargs.get("user")
325
+
366
326
  if kwargs.get("functions") is not None:
367
327
  for function in kwargs.get("functions"):
368
328
  tools.append(json.dumps({"type": "function", "function": function}))
@@ -373,18 +333,19 @@ def async_chat_completions_create(original_method, version, tracer):
373
333
 
374
334
  # TODO(Karthik): Gotta figure out how to handle streaming with context
375
335
  # with tracer.start_as_current_span(APIS["CHAT_COMPLETION"]["METHOD"],
376
- # kind=SpanKind.CLIENT) as span:
336
+ # kind=SpanKind.CLIENT.value) as span:
377
337
  span = tracer.start_span(
378
- APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT
338
+ APIS["CHAT_COMPLETION"]["METHOD"], kind=SpanKind.CLIENT.value
379
339
  )
380
340
  for field, value in attributes.model_dump(by_alias=True).items():
381
- if value is not None:
382
- span.set_attribute(field, value)
341
+ set_span_attribute(span, field, value)
383
342
  try:
384
343
  # Attempt to call the original method
385
344
  result = await wrapped(*args, **kwargs)
386
345
  if kwargs.get("stream") is False or kwargs.get("stream") is None:
387
- span.set_attribute("llm.model", result.model)
346
+ set_span_attribute(
347
+ span, SpanAttributes.LLM_RESPONSE_MODEL, result.model
348
+ )
388
349
  if hasattr(result, "choices") and result.choices is not None:
389
350
  responses = [
390
351
  {
@@ -406,27 +367,25 @@ def async_chat_completions_create(original_method, version, tracer):
406
367
  }
407
368
  for choice in result.choices
408
369
  ]
409
- span.set_attribute("llm.responses", json.dumps(responses))
410
- else:
411
- responses = []
412
- span.set_attribute("llm.responses", json.dumps(responses))
370
+
371
+ set_event_completion(span, responses)
372
+
413
373
  if (
414
374
  hasattr(result, "system_fingerprint")
415
375
  and result.system_fingerprint is not None
416
376
  ):
417
- span.set_attribute(
418
- "llm.system.fingerprint", result.system_fingerprint
377
+ set_span_attribute(
378
+ span,
379
+ SpanAttributes.LLM_SYSTEM_FINGERPRINT,
380
+ result.system_fingerprint,
419
381
  )
382
+
420
383
  # Get the usage
421
384
  if hasattr(result, "usage") and result.usage is not None:
422
385
  usage = result.usage
423
386
  if usage is not None:
424
- usage_dict = {
425
- "input_tokens": result.usage.prompt_tokens,
426
- "output_tokens": usage.completion_tokens,
427
- "total_tokens": usage.total_tokens,
428
- }
429
- span.set_attribute("llm.token.counts", json.dumps(usage_dict))
387
+ set_usage_attributes(span, dict(usage))
388
+
430
389
  span.set_status(StatusCode.OK)
431
390
  span.end()
432
391
  return result
@@ -469,6 +428,9 @@ def async_chat_completions_create(original_method, version, tracer):
469
428
  try:
470
429
  async for chunk in result:
471
430
  if hasattr(chunk, "model") and chunk.model is not None:
431
+ set_span_attribute(
432
+ span, SpanAttributes.LLM_RESPONSE_MODEL, chunk.model
433
+ )
472
434
  span.set_attribute("llm.model", chunk.model)
473
435
  if hasattr(chunk, "choices") and chunk.choices is not None:
474
436
  if not function_call and not tool_calls:
@@ -513,9 +475,9 @@ def async_chat_completions_create(original_method, version, tracer):
513
475
  else:
514
476
  content = []
515
477
  span.add_event(
516
- Event.STREAM_OUTPUT.value,
478
+ Event.RESPONSE.value,
517
479
  {
518
- "response": (
480
+ SpanAttributes.LLM_COMPLETIONS: (
519
481
  "".join(content)
520
482
  if len(content) > 0 and content[0] is not None
521
483
  else ""
@@ -527,27 +489,22 @@ def async_chat_completions_create(original_method, version, tracer):
527
489
  finally:
528
490
  # Finalize span after processing all chunks
529
491
  span.add_event(Event.STREAM_END.value)
530
- span.set_attribute(
531
- "llm.token.counts",
532
- json.dumps(
492
+
493
+ set_usage_attributes(
494
+ span,
495
+ {"input_tokens": prompt_tokens, "output_tokens": completion_tokens},
496
+ )
497
+
498
+ set_event_completion(
499
+ span,
500
+ [
533
501
  {
534
- "input_tokens": prompt_tokens,
535
- "output_tokens": completion_tokens,
536
- "total_tokens": prompt_tokens + completion_tokens,
502
+ "role": "assistant",
503
+ "content": "".join(result_content),
537
504
  }
538
- ),
539
- )
540
- span.set_attribute(
541
- "llm.responses",
542
- json.dumps(
543
- [
544
- {
545
- "role": "assistant",
546
- "content": "".join(result_content),
547
- }
548
- ]
549
- ),
505
+ ],
550
506
  )
507
+
551
508
  span.set_status(StatusCode.OK)
552
509
  span.end()
553
510
 
@@ -1,41 +1,35 @@
1
1
  from langtrace_python_sdk.constants.instrumentation.ollama import APIS
2
- from importlib_metadata import version as v
3
- from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME
4
2
  from langtrace_python_sdk.utils import set_span_attribute
5
- from langtrace_python_sdk.utils.silently_fail import silently_fail
6
- from langtrace_python_sdk.constants.instrumentation.common import (
7
- LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY,
8
- SERVICE_PROVIDERS,
3
+ from langtrace_python_sdk.utils.llm import (
4
+ get_extra_attributes,
5
+ get_langtrace_attributes,
6
+ get_llm_request_attributes,
7
+ get_llm_url,
8
+ set_event_completion,
9
9
  )
10
- from opentelemetry import baggage
10
+ from langtrace_python_sdk.utils.silently_fail import silently_fail
11
+ from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
11
12
  from langtrace.trace_attributes import LLMSpanAttributes, Event
12
13
  from opentelemetry.trace import SpanKind
13
14
  import json
14
15
  from opentelemetry.trace.status import Status, StatusCode
16
+ from langtrace.trace_attributes import SpanAttributes
15
17
 
16
18
 
17
19
  def generic_patch(operation_name, version, tracer):
18
20
  def traced_method(wrapped, instance, args, kwargs):
19
- base_url = (
20
- str(instance._client._base_url)
21
- if hasattr(instance, "_client") and hasattr(instance._client, "_base_url")
22
- else ""
23
- )
24
21
  api = APIS[operation_name]
25
22
  service_provider = SERVICE_PROVIDERS["OLLAMA"]
26
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
27
23
  span_attributes = {
28
- "langtrace.sdk.name": "langtrace-python-sdk",
29
- "langtrace.service.name": service_provider,
30
- "langtrace.service.type": "llm",
31
- "langtrace.service.version": version,
32
- "langtrace.version": v(LANGTRACE_SDK_NAME),
33
- "llm.model": kwargs.get("model"),
34
- "llm.stream": kwargs.get("stream"),
35
- "url.full": base_url,
36
- "llm.api": api["ENDPOINT"],
37
- "llm.response_format": kwargs.get("format"),
38
- **(extra_attributes if extra_attributes is not None else {}),
24
+ **get_langtrace_attributes(version, service_provider),
25
+ **get_llm_request_attributes(
26
+ kwargs,
27
+ prompts=kwargs.get("messages", None),
28
+ ),
29
+ **get_llm_url(instance),
30
+ SpanAttributes.LLM_PATH: api["ENDPOINT"],
31
+ SpanAttributes.LLM_RESPONSE_FORMAT: kwargs.get("format"),
32
+ **get_extra_attributes(),
39
33
  }
40
34
 
41
35
  attributes = LLMSpanAttributes(**span_attributes)
@@ -77,24 +71,14 @@ def ageneric_patch(operation_name, version, tracer):
77
71
  async def traced_method(wrapped, instance, args, kwargs):
78
72
  api = APIS[operation_name]
79
73
  service_provider = SERVICE_PROVIDERS["OLLAMA"]
80
- extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY)
81
74
  span_attributes = {
82
- "langtrace.sdk.name": "langtrace-python-sdk",
83
- "langtrace.service.name": service_provider,
84
- "url.full": "",
85
- "llm.api": "",
86
- "langtrace.service.type": "llm",
87
- "langtrace.service.version": version,
88
- "langtrace.version": v(LANGTRACE_SDK_NAME),
89
- "llm.model": kwargs.get("model"),
90
- "llm.stream": kwargs.get("stream"),
91
- "llm.response_format": kwargs.get("format"),
92
- "http.timeout": (
93
- kwargs.get("keep_alive") if "keep_alive" in kwargs else None
94
- ),
95
- **(extra_attributes if extra_attributes is not None else {}),
75
+ **get_langtrace_attributes(version, service_provider),
76
+ **get_llm_request_attributes(kwargs),
77
+ **get_llm_url(instance),
78
+ SpanAttributes.LLM_PATH: api["ENDPOINT"],
79
+ SpanAttributes.LLM_RESPONSE_FORMAT: kwargs.get("format"),
80
+ **get_extra_attributes(),
96
81
  }
97
-
98
82
  attributes = LLMSpanAttributes(**span_attributes)
99
83
  with tracer.start_as_current_span(api["METHOD"], kind=SpanKind.CLIENT) as span:
100
84
  _set_input_attributes(span, kwargs, attributes)
@@ -130,23 +114,25 @@ def _set_response_attributes(span, response):
130
114
  input_tokens = response.get("prompt_eval_count") or 0
131
115
  output_tokens = response.get("eval_count") or 0
132
116
  total_tokens = input_tokens + output_tokens
133
- usage_dict = {
134
- "input_tokens": input_tokens,
135
- "output_tokens": output_tokens,
136
- "total_tokens": total_tokens,
137
- }
138
117
 
139
118
  if total_tokens > 0:
140
- set_span_attribute(span, "llm.token.counts", json.dumps(usage_dict))
141
- set_span_attribute(span, "llm.finish_reason", response.get("done_reason"))
119
+ set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, input_tokens)
120
+ set_span_attribute(
121
+ span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, output_tokens
122
+ )
123
+ set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)
124
+
125
+ set_span_attribute(
126
+ span,
127
+ SpanAttributes.LLM_RESPONSE_FINISH_REASON,
128
+ response.get("done_reason"),
129
+ )
142
130
  if "message" in response:
143
- set_span_attribute(span, "llm.responses", json.dumps([response.get("message")]))
131
+ set_event_completion(span, [response.get("message")])
144
132
 
145
133
  if "response" in response:
146
- set_span_attribute(
147
- span,
148
- "llm.responses",
149
- json.dumps([{"role": "assistant", "content": response.get("response")}]),
134
+ set_event_completion(
135
+ span, [{"role": "assistant", "content": response.get("response")}]
150
136
  )
151
137
 
152
138
 
@@ -156,26 +142,23 @@ def _set_input_attributes(span, kwargs, attributes):
156
142
 
157
143
  for field, value in attributes.model_dump(by_alias=True).items():
158
144
  set_span_attribute(span, field, value)
159
- if "messages" in kwargs:
145
+
146
+ if "options" in kwargs:
160
147
  set_span_attribute(
161
148
  span,
162
- "llm.prompts",
163
- json.dumps(kwargs.get("messages", [])),
149
+ SpanAttributes.LLM_REQUEST_TEMPERATURE,
150
+ options.get("temperature"),
164
151
  )
165
- if "prompt" in kwargs:
152
+ set_span_attribute(span, SpanAttributes.LLM_REQUEST_TOP_P, options.get("top_p"))
166
153
  set_span_attribute(
167
154
  span,
168
- "llm.prompts",
169
- json.dumps([{"role": "user", "content": kwargs.get("prompt", "")}]),
155
+ SpanAttributes.LLM_FREQUENCY_PENALTY,
156
+ options.get("frequency_penalty"),
170
157
  )
171
- if "options" in kwargs:
172
- set_span_attribute(span, "llm.temperature", options.get("temperature"))
173
- set_span_attribute(span, "llm.top_p", options.get("top_p"))
174
158
  set_span_attribute(
175
- span, "llm.frequency_penalty", options.get("frequency_penalty")
176
- )
177
- set_span_attribute(
178
- span, "llm.presence_penalty", options.get("presence_penalty")
159
+ span,
160
+ SpanAttributes.LLM_PRESENCE_PENALTY,
161
+ options.get("presence_penalty"),
179
162
  )
180
163
 
181
164
 
@@ -194,6 +177,14 @@ def _handle_streaming_response(span, response, api):
194
177
  if api == "generate":
195
178
  accumulated_tokens["response"] += chunk["response"]
196
179
 
180
+ span.add_event(
181
+ Event.STREAM_OUTPUT.value,
182
+ {
183
+ SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: chunk.get("response")
184
+ or chunk.get("message").get("content"),
185
+ },
186
+ )
187
+
197
188
  _set_response_attributes(span, chunk | accumulated_tokens)
198
189
  finally:
199
190
  # Finalize span after processing all chunks
@@ -220,6 +211,12 @@ async def _ahandle_streaming_response(span, response, api):
220
211
  if api == "generate":
221
212
  accumulated_tokens["response"] += chunk["response"]
222
213
 
214
+ span.add_event(
215
+ Event.STREAM_OUTPUT.value,
216
+ {
217
+ SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK: json.dumps(chunk),
218
+ },
219
+ )
223
220
  _set_response_attributes(span, chunk | accumulated_tokens)
224
221
  finally:
225
222
  # Finalize span after processing all chunks