langtrace-python-sdk 3.3.21__py3-none-any.whl → 3.3.22__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +1,19 @@
1
- from examples.awsbedrock_examples.converse import use_converse
1
+ from examples.awsbedrock_examples.converse import (
2
+ use_converse_stream,
3
+ use_converse,
4
+ use_invoke_model_anthropic,
5
+ use_invoke_model_cohere,
6
+ use_invoke_model_amazon,
7
+ )
2
8
  from langtrace_python_sdk import langtrace, with_langtrace_root_span
3
9
 
4
- langtrace.init()
5
-
6
10
 
7
11
  class AWSBedrockRunner:
8
12
  @with_langtrace_root_span("AWS_Bedrock")
9
13
  def run(self):
14
+
15
+ use_converse_stream()
10
16
  use_converse()
17
+ use_invoke_model_anthropic()
18
+ use_invoke_model_cohere()
19
+ use_invoke_model_amazon()
@@ -1,34 +1,174 @@
1
- import os
2
1
  import boto3
2
+ import json
3
3
  from langtrace_python_sdk import langtrace
4
+ from dotenv import load_dotenv
5
+ import botocore
6
+
7
+ load_dotenv()
8
+ langtrace.init(write_spans_to_console=False)
9
+
10
+ brt = boto3.client("bedrock-runtime", region_name="us-east-1")
11
+ brc = boto3.client("bedrock", region_name="us-east-1")
12
+
13
+
14
+ def use_converse_stream():
15
+ model_id = "anthropic.claude-3-haiku-20240307-v1:0"
16
+ conversation = [
17
+ {
18
+ "role": "user",
19
+ "content": [{"text": "what is the capital of France?"}],
20
+ }
21
+ ]
22
+
23
+ try:
24
+ response = brt.converse_stream(
25
+ modelId=model_id,
26
+ messages=conversation,
27
+ inferenceConfig={"maxTokens": 4096, "temperature": 0},
28
+ additionalModelRequestFields={"top_k": 250},
29
+ )
30
+ # response_text = response["output"]["message"]["content"][0]["text"]
31
+ print(response)
32
+
33
+ except Exception as e:
34
+ print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
35
+ exit(1)
4
36
 
5
- langtrace.init(api_key=os.environ["LANGTRACE_API_KEY"])
6
37
 
7
38
  def use_converse():
8
39
  model_id = "anthropic.claude-3-haiku-20240307-v1:0"
9
- client = boto3.client(
10
- "bedrock-runtime",
11
- region_name="us-east-1",
12
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
13
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
14
- )
15
40
  conversation = [
16
41
  {
17
42
  "role": "user",
18
- "content": [{"text": "Write a story about a magic backpack."}],
43
+ "content": [{"text": "what is the capital of France?"}],
19
44
  }
20
45
  ]
21
46
 
22
47
  try:
23
- response = client.converse(
48
+ response = brt.converse(
24
49
  modelId=model_id,
25
50
  messages=conversation,
26
- inferenceConfig={"maxTokens":4096,"temperature":0},
27
- additionalModelRequestFields={"top_k":250}
51
+ inferenceConfig={"maxTokens": 4096, "temperature": 0},
52
+ additionalModelRequestFields={"top_k": 250},
28
53
  )
29
54
  response_text = response["output"]["message"]["content"][0]["text"]
30
55
  print(response_text)
31
56
 
32
- except (Exception) as e:
57
+ except Exception as e:
33
58
  print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
34
- exit(1)
59
+ exit(1)
60
+
61
+
62
+ def get_foundation_models():
63
+ for model in brc.list_foundation_models()["modelSummaries"]:
64
+ print(model["modelId"])
65
+
66
+
67
+ # Invoke Model API
68
+ # Amazon Titan Models
69
+ def use_invoke_model_titan(stream=False):
70
+ try:
71
+ prompt_data = "what's the capital of France?"
72
+ body = json.dumps(
73
+ {
74
+ "inputText": prompt_data,
75
+ "textGenerationConfig": {
76
+ "maxTokenCount": 1024,
77
+ "topP": 0.95,
78
+ "temperature": 0.2,
79
+ },
80
+ }
81
+ )
82
+ modelId = "amazon.titan-text-express-v1" # "amazon.titan-tg1-large"
83
+ accept = "application/json"
84
+ contentType = "application/json"
85
+
86
+ if stream:
87
+
88
+ response = brt.invoke_model_with_response_stream(
89
+ body=body, modelId=modelId, accept=accept, contentType=contentType
90
+ )
91
+ else:
92
+ response = brt.invoke_model(
93
+ body=body, modelId=modelId, accept=accept, contentType=contentType
94
+ )
95
+ response_body = json.loads(response.get("body").read())
96
+
97
+ except botocore.exceptions.ClientError as error:
98
+
99
+ if error.response["Error"]["Code"] == "AccessDeniedException":
100
+ print(
101
+ f"\x1b[41m{error.response['Error']['Message']}\
102
+ \nTo troubeshoot this issue please refer to the following resources.\
103
+ \nhttps://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_access-denied.html\
104
+ \nhttps://docs.aws.amazon.com/bedrock/latest/userguide/security-iam.html\x1b[0m\n"
105
+ )
106
+
107
+ else:
108
+ raise error
109
+
110
+
111
+ # Anthropic Models
112
+ def use_invoke_model_anthropic(stream=False):
113
+ body = json.dumps(
114
+ {
115
+ "anthropic_version": "bedrock-2023-05-31",
116
+ "max_tokens": 1024,
117
+ "temperature": 0.1,
118
+ "top_p": 0.9,
119
+ "messages": [{"role": "user", "content": "Hello, Claude"}],
120
+ }
121
+ )
122
+ modelId = "anthropic.claude-v2"
123
+ accept = "application/json"
124
+ contentType = "application/json"
125
+
126
+ if stream:
127
+ response = brt.invoke_model_with_response_stream(body=body, modelId=modelId)
128
+ stream_response = response.get("body")
129
+ if stream_response:
130
+ for event in stream_response:
131
+ chunk = event.get("chunk")
132
+ if chunk:
133
+ print(json.loads(chunk.get("bytes").decode()))
134
+
135
+ else:
136
+ response = brt.invoke_model(
137
+ body=body, modelId=modelId, accept=accept, contentType=contentType
138
+ )
139
+ response_body = json.loads(response.get("body").read())
140
+ # text
141
+ print(response_body.get("completion"))
142
+
143
+
144
+ def use_invoke_model_llama():
145
+ model_id = "meta.llama3-8b-instruct-v1:0"
146
+ prompt = "What is the capital of France?"
147
+ max_gen_len = 128
148
+ temperature = 0.1
149
+ top_p = 0.9
150
+
151
+ # Create request body.
152
+ body = json.dumps(
153
+ {
154
+ "prompt": prompt,
155
+ "max_gen_len": max_gen_len,
156
+ "temperature": temperature,
157
+ "top_p": top_p,
158
+ }
159
+ )
160
+ response = brt.invoke_model(body=body, modelId=model_id)
161
+
162
+ response_body = json.loads(response.get("body").read())
163
+
164
+ return response_body
165
+
166
+
167
+ # print(get_foundation_models())
168
+ def use_invoke_model_cohere():
169
+ model_id = "cohere.command-r-plus-v1"
170
+ prompt = "What is the capital of France?"
171
+ body = json.dumps({"prompt": prompt, "max_tokens": 1024, "temperature": 0.1})
172
+ response = brt.invoke_model(body=body, modelId=model_id)
173
+ response_body = json.loads(response.get("body").read())
174
+ print(response_body)
@@ -1,6 +1,10 @@
1
1
  from langtrace.trace_attributes import AWSBedrockMethods
2
2
 
3
3
  APIS = {
4
+ "INVOKE_MODEL": {
5
+ "METHOD": "aws_bedrock.invoke_model",
6
+ "ENDPOINT": "/invoke-model",
7
+ },
4
8
  "CONVERSE": {
5
9
  "METHOD": AWSBedrockMethods.CONVERSE.value,
6
10
  "ENDPOINT": "/converse",
@@ -0,0 +1,43 @@
1
+ import json
2
+ from wrapt import ObjectProxy
3
+
4
+
5
+ class StreamingWrapper(ObjectProxy):
6
+ def __init__(
7
+ self,
8
+ response,
9
+ stream_done_callback=None,
10
+ ):
11
+ super().__init__(response)
12
+
13
+ self._stream_done_callback = stream_done_callback
14
+ self._accumulating_body = {}
15
+
16
+ def __iter__(self):
17
+ for event in self.__wrapped__:
18
+ self._process_event(event)
19
+ yield event
20
+
21
+ def _process_event(self, event):
22
+ chunk = event.get("chunk")
23
+ if not chunk:
24
+ return
25
+
26
+ decoded_chunk = json.loads(chunk.get("bytes").decode())
27
+ type = decoded_chunk.get("type")
28
+
29
+ if type == "message_start":
30
+ self._accumulating_body = decoded_chunk.get("message")
31
+ elif type == "content_block_start":
32
+ self._accumulating_body["content"].append(
33
+ decoded_chunk.get("content_block")
34
+ )
35
+ elif type == "content_block_delta":
36
+ self._accumulating_body["content"][-1]["text"] += decoded_chunk.get(
37
+ "delta"
38
+ ).get("text")
39
+ elif type == "message_stop" and self._stream_done_callback:
40
+ self._accumulating_body["invocation_metrics"] = decoded_chunk.get(
41
+ "amazon-bedrock-invocationMetrics"
42
+ )
43
+ self._stream_done_callback(self._accumulating_body)
@@ -22,22 +22,13 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
22
22
  from opentelemetry.trace import get_tracer
23
23
  from wrapt import wrap_function_wrapper as _W
24
24
 
25
- from langtrace_python_sdk.instrumentation.aws_bedrock.patch import (
26
- converse, converse_stream
27
- )
25
+ from langtrace_python_sdk.instrumentation.aws_bedrock.patch import patch_aws_bedrock
28
26
 
29
27
  logging.basicConfig(level=logging.FATAL)
30
28
 
31
- def _patch_client(client, version: str, tracer) -> None:
32
-
33
- # Store original methods
34
- original_converse = client.converse
35
-
36
- # Replace with wrapped versions
37
- client.converse = converse("aws_bedrock.converse", version, tracer)(original_converse)
38
29
 
39
30
  class AWSBedrockInstrumentation(BaseInstrumentor):
40
-
31
+
41
32
  def instrumentation_dependencies(self) -> Collection[str]:
42
33
  return ["boto3 >= 1.35.31"]
43
34
 
@@ -46,13 +37,11 @@ class AWSBedrockInstrumentation(BaseInstrumentor):
46
37
  tracer = get_tracer(__name__, "", tracer_provider)
47
38
  version = importlib.metadata.version("boto3")
48
39
 
49
- def wrap_create_client(wrapped, instance, args, kwargs):
50
- result = wrapped(*args, **kwargs)
51
- if args and args[0] == 'bedrock-runtime':
52
- _patch_client(result, version, tracer)
53
- return result
54
-
55
- _W("boto3", "client", wrap_create_client)
40
+ _W(
41
+ module="boto3",
42
+ name="client",
43
+ wrapper=patch_aws_bedrock(tracer, version),
44
+ )
56
45
 
57
46
  def _uninstrument(self, **kwargs):
58
- pass
47
+ pass
@@ -15,8 +15,12 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  import json
18
- from functools import wraps
19
18
 
19
+ from langtrace_python_sdk.instrumentation.aws_bedrock.bedrock_streaming_wrapper import (
20
+ StreamingWrapper,
21
+ )
22
+ from .stream_body_wrapper import BufferedStreamBody
23
+ from functools import wraps
20
24
  from langtrace.trace_attributes import (
21
25
  LLMSpanAttributes,
22
26
  SpanAttributes,
@@ -39,73 +43,16 @@ from langtrace_python_sdk.utils.llm import (
39
43
  get_span_name,
40
44
  set_event_completion,
41
45
  set_span_attributes,
46
+ set_usage_attributes,
42
47
  )
43
48
 
44
49
 
45
- def traced_aws_bedrock_call(api_name: str, operation_name: str):
46
- def decorator(method_name: str, version: str, tracer):
47
- def wrapper(original_method):
48
- @wraps(original_method)
49
- def wrapped_method(*args, **kwargs):
50
- service_provider = SERVICE_PROVIDERS["AWS_BEDROCK"]
51
-
52
- input_content = [
53
- {
54
- 'role': message.get('role', 'user'),
55
- 'content': message.get('content', [])[0].get('text', "")
56
- }
57
- for message in kwargs.get('messages', [])
58
- ]
59
-
60
- span_attributes = {
61
- **get_langtrace_attributes(version, service_provider, vendor_type="framework"),
62
- **get_llm_request_attributes(kwargs, operation_name=operation_name, prompts=input_content),
63
- **get_llm_url(args[0] if args else None),
64
- SpanAttributes.LLM_PATH: APIS[api_name]["ENDPOINT"],
65
- **get_extra_attributes(),
66
- }
67
-
68
- if api_name == "CONVERSE":
69
- span_attributes.update({
70
- SpanAttributes.LLM_REQUEST_MODEL: kwargs.get('modelId'),
71
- SpanAttributes.LLM_REQUEST_MAX_TOKENS: kwargs.get('inferenceConfig', {}).get('maxTokens'),
72
- SpanAttributes.LLM_REQUEST_TEMPERATURE: kwargs.get('inferenceConfig', {}).get('temperature'),
73
- SpanAttributes.LLM_REQUEST_TOP_P: kwargs.get('inferenceConfig', {}).get('top_p'),
74
- })
75
-
76
- attributes = LLMSpanAttributes(**span_attributes)
77
-
78
- with tracer.start_as_current_span(
79
- name=get_span_name(APIS[api_name]["METHOD"]),
80
- kind=SpanKind.CLIENT,
81
- context=set_span_in_context(trace.get_current_span()),
82
- ) as span:
83
- set_span_attributes(span, attributes)
84
- try:
85
- result = original_method(*args, **kwargs)
86
- _set_response_attributes(span, kwargs, result)
87
- span.set_status(StatusCode.OK)
88
- return result
89
- except Exception as err:
90
- span.record_exception(err)
91
- span.set_status(Status(StatusCode.ERROR, str(err)))
92
- raise err
93
-
94
- return wrapped_method
95
- return wrapper
96
- return decorator
97
-
98
-
99
- converse = traced_aws_bedrock_call("CONVERSE", "converse")
100
-
101
-
102
50
  def converse_stream(original_method, version, tracer):
103
51
  def traced_method(wrapped, instance, args, kwargs):
104
52
  service_provider = SERVICE_PROVIDERS["AWS_BEDROCK"]
105
-
53
+
106
54
  span_attributes = {
107
- **get_langtrace_attributes
108
- (version, service_provider, vendor_type="llm"),
55
+ **get_langtrace_attributes(version, service_provider, vendor_type="llm"),
109
56
  **get_llm_request_attributes(kwargs),
110
57
  **get_llm_url(instance),
111
58
  SpanAttributes.LLM_PATH: APIS["CONVERSE_STREAM"]["ENDPOINT"],
@@ -129,29 +76,321 @@ def converse_stream(original_method, version, tracer):
129
76
  span.record_exception(err)
130
77
  span.set_status(Status(StatusCode.ERROR, str(err)))
131
78
  raise err
132
-
79
+
80
+ return traced_method
81
+
82
+
83
+ def patch_aws_bedrock(tracer, version):
84
+ def traced_method(wrapped, instance, args, kwargs):
85
+ if args and args[0] != "bedrock-runtime":
86
+ return wrapped(*args, **kwargs)
87
+
88
+ client = wrapped(*args, **kwargs)
89
+ client.invoke_model = patch_invoke_model(client.invoke_model, tracer, version)
90
+
91
+ client.converse = patch_converse(client.converse, tracer, version)
92
+ client.converse_stream = patch_converse_stream(
93
+ client.converse_stream, tracer, version
94
+ )
95
+
96
+ return client
97
+
98
+ return traced_method
99
+
100
+
101
+ def patch_converse_stream(original_method, tracer, version):
102
+ def traced_method(*args, **kwargs):
103
+ modelId = kwargs.get("modelId")
104
+ (vendor, _) = modelId.split(".")
105
+ input_content = [
106
+ {
107
+ "role": message.get("role", "user"),
108
+ "content": message.get("content", [])[0].get("text", ""),
109
+ }
110
+ for message in kwargs.get("messages", [])
111
+ ]
112
+
113
+ span_attributes = {
114
+ **get_langtrace_attributes(version, vendor, vendor_type="framework"),
115
+ **get_llm_request_attributes(kwargs, model=modelId, prompts=input_content),
116
+ **get_llm_url(args[0] if args else None),
117
+ **get_extra_attributes(),
118
+ }
119
+ with tracer.start_as_current_span(
120
+ name=get_span_name("aws_bedrock.converse"),
121
+ kind=SpanKind.CLIENT,
122
+ context=set_span_in_context(trace.get_current_span()),
123
+ ) as span:
124
+ set_span_attributes(span, span_attributes)
125
+ response = original_method(*args, **kwargs)
126
+
127
+ if span.is_recording():
128
+ set_span_streaming_response(span, response)
129
+ return response
130
+
131
+ return traced_method
132
+
133
+
134
+ def patch_converse(original_method, tracer, version):
135
+ def traced_method(*args, **kwargs):
136
+ modelId = kwargs.get("modelId")
137
+ (vendor, _) = modelId.split(".")
138
+ input_content = [
139
+ {
140
+ "role": message.get("role", "user"),
141
+ "content": message.get("content", [])[0].get("text", ""),
142
+ }
143
+ for message in kwargs.get("messages", [])
144
+ ]
145
+
146
+ span_attributes = {
147
+ **get_langtrace_attributes(version, vendor, vendor_type="framework"),
148
+ **get_llm_request_attributes(kwargs, model=modelId, prompts=input_content),
149
+ **get_llm_url(args[0] if args else None),
150
+ **get_extra_attributes(),
151
+ }
152
+ with tracer.start_as_current_span(
153
+ name=get_span_name("aws_bedrock.converse"),
154
+ kind=SpanKind.CLIENT,
155
+ context=set_span_in_context(trace.get_current_span()),
156
+ ) as span:
157
+ set_span_attributes(span, span_attributes)
158
+ response = original_method(*args, **kwargs)
159
+
160
+ if span.is_recording():
161
+ _set_response_attributes(span, kwargs, response)
162
+ return response
163
+
164
+ return traced_method
165
+
166
+
167
+ def patch_invoke_model(original_method, tracer, version):
168
+ def traced_method(*args, **kwargs):
169
+ modelId = kwargs.get("modelId")
170
+ (vendor, _) = modelId.split(".")
171
+ span_attributes = {
172
+ **get_langtrace_attributes(version, vendor, vendor_type="framework"),
173
+ **get_extra_attributes(),
174
+ }
175
+ with tracer.start_as_current_span(
176
+ name=get_span_name("aws_bedrock.invoke_model"),
177
+ kind=SpanKind.CLIENT,
178
+ context=set_span_in_context(trace.get_current_span()),
179
+ ) as span:
180
+ set_span_attributes(span, span_attributes)
181
+ response = original_method(*args, **kwargs)
182
+ if span.is_recording():
183
+ handle_call(span, kwargs, response)
184
+ return response
185
+
133
186
  return traced_method
134
187
 
135
188
 
189
+ def handle_call(span, kwargs, response):
190
+ modelId = kwargs.get("modelId")
191
+ (vendor, model_name) = modelId.split(".")
192
+ response["body"] = BufferedStreamBody(
193
+ response["body"]._raw_stream, response["body"]._content_length
194
+ )
195
+ request_body = json.loads(kwargs.get("body"))
196
+ response_body = json.loads(response.get("body").read())
197
+
198
+ set_span_attribute(span, SpanAttributes.LLM_SYSTEM, vendor)
199
+ set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, modelId)
200
+ set_span_attribute(span, SpanAttributes.LLM_REQUEST_MODEL, modelId)
201
+
202
+ if vendor == "amazon":
203
+ set_amazon_attributes(span, request_body, response_body)
204
+
205
+ if vendor == "anthropic":
206
+ if "prompt" in request_body:
207
+ set_anthropic_completions_attributes(span, request_body, response_body)
208
+ elif "messages" in request_body:
209
+ set_anthropic_messages_attributes(span, request_body, response_body)
210
+
211
+ if vendor == "meta":
212
+ set_llama_meta_attributes(span, request_body, response_body)
213
+
214
+
215
+ def set_llama_meta_attributes(span, request_body, response_body):
216
+ set_span_attribute(
217
+ span, SpanAttributes.LLM_REQUEST_TOP_P, request_body.get("top_p")
218
+ )
219
+ set_span_attribute(
220
+ span, SpanAttributes.LLM_REQUEST_TEMPERATURE, request_body.get("temperature")
221
+ )
222
+ set_span_attribute(
223
+ span, SpanAttributes.LLM_REQUEST_MAX_TOKENS, request_body.get("max_gen_len")
224
+ )
225
+
226
+ set_usage_attributes(
227
+ span,
228
+ {
229
+ "input_tokens": response_body.get("prompt_token_count"),
230
+ "output_tokens": response_body.get("generation_token_count"),
231
+ },
232
+ )
233
+
234
+ prompts = [
235
+ {
236
+ "role": "user",
237
+ "content": request_body.get("prompt"),
238
+ }
239
+ ]
240
+
241
+ completions = [
242
+ {
243
+ "role": "assistant",
244
+ "content": response_body.get("generation"),
245
+ }
246
+ ]
247
+ set_span_attribute(span, SpanAttributes.LLM_PROMPTS, json.dumps(prompts))
248
+ print(completions)
249
+ set_event_completion(span, completions)
250
+
251
+
252
+ def set_amazon_attributes(span, request_body, response_body):
253
+ config = request_body.get("textGenerationConfig", {})
254
+ prompts = [
255
+ {
256
+ "role": "user",
257
+ "content": request_body.get("inputText"),
258
+ }
259
+ ]
260
+ completions = [
261
+ {
262
+ "role": "assistant",
263
+ "content": result.get("outputText"),
264
+ }
265
+ for result in response_body.get("results")
266
+ ]
267
+ set_span_attribute(
268
+ span, SpanAttributes.LLM_REQUEST_MAX_TOKENS, config.get("maxTokenCount")
269
+ )
270
+ set_span_attribute(
271
+ span, SpanAttributes.LLM_REQUEST_TEMPERATURE, config.get("temperature")
272
+ )
273
+ set_span_attribute(span, SpanAttributes.LLM_REQUEST_TOP_P, config.get("topP"))
274
+ set_span_attribute(span, SpanAttributes.LLM_PROMPTS, json.dumps(prompts))
275
+ set_usage_attributes(
276
+ span,
277
+ {
278
+ "input_tokens": response_body.get("inputTextTokenCount"),
279
+ "output_tokens": sum(
280
+ int(result.get("tokenCount")) for result in response_body.get("results")
281
+ ),
282
+ },
283
+ )
284
+ set_event_completion(span, completions)
285
+
286
+
287
+ def set_anthropic_completions_attributes(span, request_body, response_body):
288
+ set_span_attribute(
289
+ span,
290
+ SpanAttributes.LLM_REQUEST_MAX_TOKENS,
291
+ request_body.get("max_tokens_to_sample"),
292
+ )
293
+ set_span_attribute(
294
+ span,
295
+ SpanAttributes.LLM_REQUEST_TEMPERATURE,
296
+ str(request_body.get("temperature")),
297
+ )
298
+ set_span_attribute(
299
+ span,
300
+ SpanAttributes.LLM_REQUEST_TOP_P,
301
+ str(request_body.get("top_p")),
302
+ )
303
+ prompts = [
304
+ {
305
+ "role": "user",
306
+ "content": request_body.get("prompt"),
307
+ }
308
+ ]
309
+ completions = [
310
+ {
311
+ "role": "assistant",
312
+ "content": response_body.get("completion"),
313
+ }
314
+ ]
315
+ set_span_attribute(span, SpanAttributes.LLM_PROMPTS, json.dumps(prompts))
316
+ set_event_completion(span, completions)
317
+
318
+
319
+ def set_anthropic_messages_attributes(span, request_body, response_body):
320
+ set_span_attribute(
321
+ span,
322
+ SpanAttributes.LLM_REQUEST_MAX_TOKENS,
323
+ request_body.get("max_tokens_to_sample"),
324
+ )
325
+ set_span_attribute(
326
+ span,
327
+ SpanAttributes.LLM_REQUEST_TEMPERATURE,
328
+ str(request_body.get("temperature")),
329
+ )
330
+ set_span_attribute(
331
+ span,
332
+ SpanAttributes.LLM_REQUEST_TOP_P,
333
+ str(request_body.get("top_p")),
334
+ )
335
+ set_span_attribute(
336
+ span, SpanAttributes.LLM_PROMPTS, json.dumps(request_body.get("messages"))
337
+ )
338
+ set_event_completion(span, response_body.get("content"))
339
+ set_usage_attributes(span, response_body.get("usage"))
340
+
341
+
136
342
  @silently_fail
137
343
  def _set_response_attributes(span, kwargs, result):
138
- set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, kwargs.get('modelId'))
139
- set_span_attribute(span, SpanAttributes.LLM_TOP_K, kwargs.get('additionalModelRequestFields', {}).get('top_k'))
140
- content = result.get('output', {}).get('message', {}).get('content', [])
344
+ set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, kwargs.get("modelId"))
345
+ set_span_attribute(
346
+ span,
347
+ SpanAttributes.LLM_TOP_K,
348
+ kwargs.get("additionalModelRequestFields", {}).get("top_k"),
349
+ )
350
+ content = result.get("output", {}).get("message", {}).get("content", [])
141
351
  if len(content) > 0:
142
- role = result.get('output', {}).get('message', {}).get('role', "assistant")
143
- responses = [
144
- {"role": role, "content": c.get('text', "")}
145
- for c in content
146
- ]
352
+ role = result.get("output", {}).get("message", {}).get("role", "assistant")
353
+ responses = [{"role": role, "content": c.get("text", "")} for c in content]
147
354
  set_event_completion(span, responses)
148
355
 
149
- if 'usage' in result:
356
+ if "usage" in result:
150
357
  set_span_attributes(
151
358
  span,
152
359
  {
153
- SpanAttributes.LLM_USAGE_COMPLETION_TOKENS: result['usage'].get('outputTokens'),
154
- SpanAttributes.LLM_USAGE_PROMPT_TOKENS: result['usage'].get('inputTokens'),
155
- SpanAttributes.LLM_USAGE_TOTAL_TOKENS: result['usage'].get('totalTokens'),
156
- }
360
+ SpanAttributes.LLM_USAGE_COMPLETION_TOKENS: result["usage"].get(
361
+ "outputTokens"
362
+ ),
363
+ SpanAttributes.LLM_USAGE_PROMPT_TOKENS: result["usage"].get(
364
+ "inputTokens"
365
+ ),
366
+ SpanAttributes.LLM_USAGE_TOTAL_TOKENS: result["usage"].get(
367
+ "totalTokens"
368
+ ),
369
+ },
370
+ )
371
+
372
+
373
+ def set_span_streaming_response(span, response):
374
+ streaming_response = ""
375
+ role = None
376
+ for event in response["stream"]:
377
+ if "messageStart" in event:
378
+ role = event["messageStart"]["role"]
379
+ elif "contentBlockDelta" in event:
380
+ delta = event["contentBlockDelta"]["delta"]
381
+ if "text" in delta:
382
+ streaming_response += delta["text"]
383
+ elif "metadata" in event and "usage" in event["metadata"]:
384
+ usage = event["metadata"]["usage"]
385
+ set_usage_attributes(
386
+ span,
387
+ {
388
+ "input_tokens": usage.get("inputTokens"),
389
+ "output_tokens": usage.get("outputTokens"),
390
+ },
391
+ )
392
+
393
+ if streaming_response:
394
+ set_event_completion(
395
+ span, [{"role": role or "assistant", "content": streaming_response}]
157
396
  )
@@ -0,0 +1,41 @@
1
+ from botocore.response import StreamingBody
2
+ from botocore.exceptions import (
3
+ ReadTimeoutError,
4
+ ResponseStreamingError,
5
+ )
6
+ from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
7
+ from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
8
+
9
+
10
+ class BufferedStreamBody(StreamingBody):
11
+ def __init__(self, raw_stream, content_length):
12
+ super().__init__(raw_stream, content_length)
13
+ self._buffer = None
14
+ self._buffer_cursor = 0
15
+
16
+ def read(self, amt=None):
17
+ """Read at most amt bytes from the stream.
18
+
19
+ If the amt argument is omitted, read all data.
20
+ """
21
+ if self._buffer is None:
22
+ try:
23
+ self._buffer = self._raw_stream.read()
24
+ except URLLib3ReadTimeoutError as e:
25
+ # TODO: the url will be None as urllib3 isn't setting it yet
26
+ raise ReadTimeoutError(endpoint_url=e.url, error=e)
27
+ except URLLib3ProtocolError as e:
28
+ raise ResponseStreamingError(error=e)
29
+
30
+ self._amount_read += len(self._buffer)
31
+ if amt is None or (not self._buffer and amt > 0):
32
+ # If the server sends empty contents or
33
+ # we ask to read all of the contents, then we know
34
+ # we need to verify the content length.
35
+ self._verify_content_length()
36
+
37
+ if amt is None:
38
+ return self._buffer[self._buffer_cursor :]
39
+ else:
40
+ self._buffer_cursor += amt
41
+ return self._buffer[self._buffer_cursor - amt : self._buffer_cursor]
@@ -1 +1 @@
1
- __version__ = "3.3.21"
1
+ __version__ = "3.3.22"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langtrace-python-sdk
3
- Version: 3.3.21
3
+ Version: 3.3.22
4
4
  Summary: Python SDK for LangTrace
5
5
  Project-URL: Homepage, https://github.com/Scale3-Labs/langtrace-python-sdk
6
6
  Author-email: Scale3 Labs <engineering@scale3labs.com>
@@ -3,8 +3,8 @@ examples/anthropic_example/__init__.py,sha256=03us1YuvAJR6fqXX8NH2kROBfTmyz7KzFV
3
3
  examples/anthropic_example/completion.py,sha256=3_YEZrt0BLVNJT_RbLXg6JGP2bweuc_HPC2MWR73tOM,713
4
4
  examples/autogen_example/__init__.py,sha256=UJgpzL2yOmzir-DAiGFR1PB1Zz3YcQvYcq5bCN8nl0A,158
5
5
  examples/autogen_example/main.py,sha256=6OJ73VCdHgVrqnekF1S1nK8mXCUABLbUUkQtr7wOCdw,2312
6
- examples/awsbedrock_examples/__init__.py,sha256=MMaW1756Hqv8rRX6do_O_-SIfauLzoYxRgBemR9KL6g,263
7
- examples/awsbedrock_examples/converse.py,sha256=vra4yfXYynWyFenoO8wdUnksPx_o481BQlpuWkddLZY,1024
6
+ examples/awsbedrock_examples/__init__.py,sha256=lVm2bgPNb4h83xRW2JbTsYf0DrZjrUr29g7-QleO4iQ,505
7
+ examples/awsbedrock_examples/converse.py,sha256=-J0Hz6OSnIpO4idHol9nxL_Ri9GWwq95aqqi3wlCTRI,5317
8
8
  examples/azureopenai_example/__init__.py,sha256=PaZM90r6VN4eSOXxb6wGsyhf9-RJCNqBypzk1Xa2GJI,271
9
9
  examples/azureopenai_example/completion.py,sha256=K_GeU0TfJ9lLDfW5VI0Lmm8_I0JXf1x9Qi83ImJ350c,668
10
10
  examples/cerebras_example/__init__.py,sha256=ydfNi0DjFMGVcfo79XVG3VEbzIrHo5wYBgSJzl_asNA,295
@@ -109,12 +109,12 @@ examples/weaviate_example/__init__.py,sha256=8JMDBsRSEV10HfTd-YC7xb4txBjD3la56sn
109
109
  examples/weaviate_example/query_text.py,sha256=wPHQTc_58kPoKTZMygVjTj-2ZcdrIuaausJfMxNQnQc,127162
110
110
  langtrace_python_sdk/__init__.py,sha256=VZM6i71NR7pBQK6XvJWRelknuTYUhqwqE7PlicKa5Wg,1166
111
111
  langtrace_python_sdk/langtrace.py,sha256=nvPaJc426Iww3ildrhsSacXtLdzsZIa94_rlK2giyVM,13153
112
- langtrace_python_sdk/version.py,sha256=FgIp-K9CuZgCkI8xMNisdW4atuWh6YalE8E2kUmjXx4,23
112
+ langtrace_python_sdk/version.py,sha256=RJrshlvNont5yxrTElCH9PRO5xojg3nwHJKaU-kz-pc,23
113
113
  langtrace_python_sdk/constants/__init__.py,sha256=3CNYkWMdd1DrkGqzLUgNZXjdAlM6UFMlf_F-odAToyc,146
114
114
  langtrace_python_sdk/constants/exporter/langtrace_exporter.py,sha256=EVCrouYCpY98f0KSaKr4PzNxPULTZZO6dSA_crEOyJU,106
115
115
  langtrace_python_sdk/constants/instrumentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
116
  langtrace_python_sdk/constants/instrumentation/anthropic.py,sha256=YX3llt3zwDY6XrYk3CB8WEVqgrzRXEw_ffyk56JoF3k,126
117
- langtrace_python_sdk/constants/instrumentation/aws_bedrock.py,sha256=f9eukqoxrPgPeaBJX2gpBUz1uu0dZIPahOpvoudfbH8,310
117
+ langtrace_python_sdk/constants/instrumentation/aws_bedrock.py,sha256=QwKtO4NBarOZoGkt5cFCcpxAw3zvZxcMMWBbzPPGv-g,422
118
118
  langtrace_python_sdk/constants/instrumentation/chroma.py,sha256=hiPGYdHS0Yj4Kh3eaYBbuCAl_swqIygu80yFqkOgdak,955
119
119
  langtrace_python_sdk/constants/instrumentation/cohere.py,sha256=9yD133VdrYZ5BoJR4nJHlj67gHEImB9-KsD-NkzHW1I,1159
120
120
  langtrace_python_sdk/constants/instrumentation/common.py,sha256=DPDX8icb0Tj3OrgpbL9WeiIaMG7Si2IKiSL8YRwwor4,1203
@@ -143,8 +143,10 @@ langtrace_python_sdk/instrumentation/autogen/__init__.py,sha256=unDhpqWQIdHFw24l
143
143
  langtrace_python_sdk/instrumentation/autogen/instrumentation.py,sha256=MVDUCBi6XzLQYmZd6myAounI0HeM8QWX5leuul5Hj0Q,1262
144
144
  langtrace_python_sdk/instrumentation/autogen/patch.py,sha256=7Sq3C8Q5tT27UkWXd1SZgnLC1pbQf_tpTrxoBIYsDw4,5273
145
145
  langtrace_python_sdk/instrumentation/aws_bedrock/__init__.py,sha256=IHqPgR1kdDvcoV1nUb-B21PaJ_qbQB0jc011Udi1ioU,96
146
- langtrace_python_sdk/instrumentation/aws_bedrock/instrumentation.py,sha256=2l-WiyWYUEoGre92rmylq2jPZ5w4jcxTXmCTuQNC1RU,1911
147
- langtrace_python_sdk/instrumentation/aws_bedrock/patch.py,sha256=VAroMezSGKT2jQ5tggbdiMRIPr9mtLItGJJgZ-xoGls,6296
146
+ langtrace_python_sdk/instrumentation/aws_bedrock/bedrock_streaming_wrapper.py,sha256=_EMlxfBhjNsB-2TrP0ek-vx1CK5bpgjXPmQWNT9eBcE,1370
147
+ langtrace_python_sdk/instrumentation/aws_bedrock/instrumentation.py,sha256=M7Dyw1tG18ptD0ctCT9yHzO4UyKCcS8JkHg9WVaz9Ck,1473
148
+ langtrace_python_sdk/instrumentation/aws_bedrock/patch.py,sha256=cZ20LS0lNCKoEeeJLVEG5aduuGrapu_u4v8Cl-z6gLg,13406
149
+ langtrace_python_sdk/instrumentation/aws_bedrock/stream_body_wrapper.py,sha256=ENdhRVHBhdkIlJIc_tkf8ASijUzZdVZM-oonNLdNM48,1584
148
150
  langtrace_python_sdk/instrumentation/cerebras/__init__.py,sha256=9rHNg7PWcZ7a9jExQZlqwWPkvLGcPT-DGWot0_6Bx9k,92
149
151
  langtrace_python_sdk/instrumentation/cerebras/instrumentation.py,sha256=WPsaYxHanYnoxGjDk7fILGJSnSRUs_zoQ30JCyPBMII,1927
150
152
  langtrace_python_sdk/instrumentation/cerebras/patch.py,sha256=HR4slOrE3pMp0ABafnlYeTK61G-EnGhOgq3pd9A_G88,4697
@@ -270,8 +272,8 @@ tests/pinecone/cassettes/test_query.yaml,sha256=b5v9G3ssUy00oG63PlFUR3JErF2Js-5A
270
272
  tests/pinecone/cassettes/test_upsert.yaml,sha256=neWmQ1v3d03V8WoLl8FoFeeCYImb8pxlJBWnFd_lITU,38607
271
273
  tests/qdrant/conftest.py,sha256=9n0uHxxIjWk9fbYc4bx-uP8lSAgLBVx-cV9UjnsyCHM,381
272
274
  tests/qdrant/test_qdrant.py,sha256=pzjAjVY2kmsmGfrI2Gs2xrolfuaNHz7l1fqGQCjp5_o,3353
273
- langtrace_python_sdk-3.3.21.dist-info/METADATA,sha256=nXKNrJYvlgb66g57jWoUtOqOldTmw7X2BQ1nQtHp3NA,15676
274
- langtrace_python_sdk-3.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
275
- langtrace_python_sdk-3.3.21.dist-info/entry_points.txt,sha256=1_b9-qvf2fE7uQNZcbUei9vLpFZBbbh9LrtGw95ssAo,70
276
- langtrace_python_sdk-3.3.21.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
277
- langtrace_python_sdk-3.3.21.dist-info/RECORD,,
275
+ langtrace_python_sdk-3.3.22.dist-info/METADATA,sha256=U0Uj5LhWApEzItJxDzJOIawCkV5_Xx_1gJU4kS05QjU,15676
276
+ langtrace_python_sdk-3.3.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
277
+ langtrace_python_sdk-3.3.22.dist-info/entry_points.txt,sha256=1_b9-qvf2fE7uQNZcbUei9vLpFZBbbh9LrtGw95ssAo,70
278
+ langtrace_python_sdk-3.3.22.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
279
+ langtrace_python_sdk-3.3.22.dist-info/RECORD,,