monocle-apptrace 0.1.1__py3-none-any.whl → 0.3.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocle-apptrace might be problematic. Click here for more details.

Files changed (47) hide show
  1. monocle_apptrace/botocore/__init__.py +9 -0
  2. monocle_apptrace/constants.py +18 -4
  3. monocle_apptrace/exporters/aws/s3_exporter.py +158 -0
  4. monocle_apptrace/exporters/azure/blob_exporter.py +125 -0
  5. monocle_apptrace/exporters/base_exporter.py +48 -0
  6. monocle_apptrace/exporters/exporter_processor.py +19 -0
  7. monocle_apptrace/exporters/monocle_exporters.py +27 -0
  8. monocle_apptrace/exporters/okahu/okahu_exporter.py +115 -0
  9. monocle_apptrace/haystack/__init__.py +4 -4
  10. monocle_apptrace/haystack/wrap_pipeline.py +3 -2
  11. monocle_apptrace/instrumentor.py +14 -17
  12. monocle_apptrace/langchain/__init__.py +6 -3
  13. monocle_apptrace/llamaindex/__init__.py +8 -7
  14. monocle_apptrace/message_processing.py +80 -0
  15. monocle_apptrace/metamodel/entities/README.md +33 -10
  16. monocle_apptrace/metamodel/entities/app_hosting_types.json +29 -0
  17. monocle_apptrace/metamodel/entities/entities.json +49 -0
  18. monocle_apptrace/metamodel/entities/inference_types.json +33 -0
  19. monocle_apptrace/metamodel/entities/model_types.json +41 -0
  20. monocle_apptrace/metamodel/entities/vector_store_types.json +25 -0
  21. monocle_apptrace/metamodel/entities/workflow_types.json +22 -0
  22. monocle_apptrace/metamodel/maps/attributes/inference/botocore_entities.json +27 -0
  23. monocle_apptrace/metamodel/maps/attributes/inference/haystack_entities.json +57 -0
  24. monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json +57 -0
  25. monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json +57 -0
  26. monocle_apptrace/metamodel/maps/attributes/retrieval/haystack_entities.json +31 -0
  27. monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json +31 -0
  28. monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json +31 -0
  29. monocle_apptrace/metamodel/maps/botocore_methods.json +13 -0
  30. monocle_apptrace/metamodel/maps/haystack_methods.json +26 -6
  31. monocle_apptrace/metamodel/maps/{lang_chain_methods.json → langchain_methods.json} +31 -8
  32. monocle_apptrace/metamodel/maps/{llama_index_methods.json → llamaindex_methods.json} +30 -8
  33. monocle_apptrace/metamodel/spans/span_example.json +1 -1
  34. monocle_apptrace/metamodel/spans/span_types.json +16 -0
  35. monocle_apptrace/utils.py +179 -20
  36. monocle_apptrace/wrap_common.py +350 -150
  37. monocle_apptrace/wrapper.py +5 -2
  38. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/METADATA +8 -3
  39. monocle_apptrace-0.3.0b1.dist-info/RECORD +48 -0
  40. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/WHEEL +1 -1
  41. monocle_apptrace/haystack/wrap_node.py +0 -27
  42. monocle_apptrace/haystack/wrap_openai.py +0 -44
  43. monocle_apptrace/metamodel/entities/entity_types.json +0 -157
  44. monocle_apptrace/metamodel/entities/entity_types.py +0 -51
  45. monocle_apptrace-0.1.1.dist-info/RECORD +0 -29
  46. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/licenses/LICENSE +0 -0
  47. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/licenses/NOTICE +0 -0
@@ -1,27 +1,33 @@
1
- #pylint: disable=protected-access
1
+ # pylint: disable=protected-access
2
2
  import logging
3
3
  import os
4
+ import inspect
5
+ from importlib.metadata import version
4
6
  from urllib.parse import urlparse
5
-
6
- from opentelemetry.trace import Span, Tracer
7
- from monocle_apptrace.utils import resolve_from_alias, update_span_with_infra_name, with_tracer_wrapper, get_embedding_model
7
+ from opentelemetry.trace import Tracer
8
+ from opentelemetry.sdk.trace import Span
9
+ from monocle_apptrace.utils import resolve_from_alias, with_tracer_wrapper, get_embedding_model, get_attribute, get_workflow_name, set_embedding_model, set_app_hosting_identifier_attribute
10
+ from monocle_apptrace.utils import set_attribute, get_vectorstore_deployment
11
+ from monocle_apptrace.utils import get_fully_qualified_class_name, get_nested_value
12
+ from monocle_apptrace.message_processing import extract_messages, extract_assistant_message
13
+ from functools import wraps
8
14
 
9
15
  logger = logging.getLogger(__name__)
10
16
  WORKFLOW_TYPE_KEY = "workflow_type"
11
- CONTEXT_INPUT_KEY = "context_input"
12
- CONTEXT_OUTPUT_KEY = "context_output"
13
- PROMPT_INPUT_KEY = "input"
14
- PROMPT_OUTPUT_KEY = "output"
15
- QUERY = "question"
17
+ DATA_INPUT_KEY = "data.input"
18
+ DATA_OUTPUT_KEY = "data.output"
19
+ PROMPT_INPUT_KEY = "data.input"
20
+ PROMPT_OUTPUT_KEY = "data.output"
21
+ QUERY = "input"
16
22
  RESPONSE = "response"
17
- TAGS = "tags"
18
23
  SESSION_PROPERTIES_KEY = "session"
19
24
  INFRA_SERVICE_KEY = "infra_service_name"
25
+
20
26
  TYPE = "type"
21
27
  PROVIDER = "provider_name"
22
28
  EMBEDDING_MODEL = "embedding_model"
23
29
  VECTOR_STORE = 'vector_store'
24
-
30
+ META_DATA = 'metadata'
25
31
 
26
32
  WORKFLOW_TYPE_MAP = {
27
33
  "llama_index": "workflow.llamaindex",
@@ -29,23 +35,18 @@ WORKFLOW_TYPE_MAP = {
29
35
  "haystack": "workflow.haystack"
30
36
  }
31
37
 
32
- framework_vector_store_mapping = {
33
- 'langchain_core.retrievers': lambda instance: {
34
- 'provider': instance.tags[0],
35
- 'embedding_model': instance.tags[1],
36
- 'type': VECTOR_STORE,
37
- },
38
- 'llama_index.core.indices.base_retriever': lambda instance: {
39
- 'provider': type(instance._vector_store).__name__,
40
- 'embedding_model': instance._embed_model.model_name,
41
- 'type': VECTOR_STORE,
42
- },
43
- 'haystack.components.retrievers': lambda instance: {
44
- 'provider': instance.__dict__.get("document_store").__class__.__name__,
45
- 'embedding_model': get_embedding_model(),
46
- 'type': VECTOR_STORE,
47
- },
48
- }
38
+
39
+ def get_embedding_model_haystack(instance):
40
+ try:
41
+ if hasattr(instance, 'get_component'):
42
+ text_embedder = instance.get_component('text_embedder')
43
+ if text_embedder and hasattr(text_embedder, 'model'):
44
+ # Set the embedding model attribute
45
+ return text_embedder.model
46
+ except:
47
+ pass
48
+
49
+ return None
49
50
 
50
51
  @with_tracer_wrapper
51
52
  def task_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
@@ -60,38 +61,162 @@ def task_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
60
61
  elif to_wrap.get("span_name"):
61
62
  name = to_wrap.get("span_name")
62
63
  else:
63
- name = f"langchain.task.{instance.__class__.__name__}"
64
+ name = get_fully_qualified_class_name(instance)
65
+
66
+ if 'haystack.core.pipeline.pipeline' in to_wrap['package']:
67
+ embedding_model = get_embedding_model_haystack(instance)
68
+ set_embedding_model(embedding_model)
69
+ inputs = set()
70
+ workflow_input = get_workflow_input(args, inputs)
71
+ set_attribute(DATA_INPUT_KEY, workflow_input)
72
+
73
+ if to_wrap.get('skip_span'):
74
+ return_value = wrapped(*args, **kwargs)
75
+ botocore_processor(tracer, to_wrap, wrapped, instance, args, kwargs, return_value)
76
+ return return_value
64
77
 
65
78
  with tracer.start_as_current_span(name) as span:
66
79
  pre_task_processing(to_wrap, instance, args, span)
67
80
  return_value = wrapped(*args, **kwargs)
81
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
68
82
  post_task_processing(to_wrap, span, return_value)
69
83
 
70
84
  return return_value
71
85
 
72
- def post_task_processing(to_wrap, span, return_value):
73
- update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span)
86
+ def botocore_processor(tracer, to_wrap, wrapped, instance, args, kwargs,return_value):
87
+ if kwargs.get("service_name") == "sagemaker-runtime":
88
+ return_value.invoke_endpoint = _instrumented_endpoint_invoke(to_wrap,return_value,return_value.invoke_endpoint,tracer)
74
89
 
75
- if is_root_span(span):
76
- workflow_name = span.resource.attributes.get("service.name")
77
- span.set_attribute("workflow_name",workflow_name)
78
- update_span_with_prompt_output(to_wrap=to_wrap, wrapped_args=return_value, span=span)
79
- update_workflow_type(to_wrap, span)
90
+ def _instrumented_endpoint_invoke(to_wrap, instance, fn, tracer):
91
+ @wraps(fn)
92
+ def with_instrumentation(*args, **kwargs):
80
93
 
81
- def pre_task_processing(to_wrap, instance, args, span):
94
+ with tracer.start_as_current_span("botocore-sagemaker-invoke-endpoint") as span:
95
+ response = fn(*args, **kwargs)
96
+ process_span(to_wrap, span, instance=instance,args=args, kwargs=kwargs, return_value=response)
97
+ return response
98
+
99
+ return with_instrumentation
100
+
101
+ def get_workflow_input(args, inputs):
102
+ if args is not None and len(args) > 0:
103
+ for value in args[0].values():
104
+ for text in value.values():
105
+ inputs.add(text)
106
+
107
+ workflow_input: str = ""
108
+
109
+ if inputs is not None and len(inputs) > 0:
110
+ for input_str in inputs:
111
+ workflow_input = workflow_input + input_str
112
+ return workflow_input
113
+
114
+ def process_span(to_wrap, span, instance, args, kwargs, return_value):
115
+ # Check if the output_processor is a valid JSON (in Python, that means it's a dictionary)
116
+ instance_args = {}
117
+ set_provider_name(instance, instance_args)
118
+ span_index = 0
82
119
  if is_root_span(span):
83
- update_span_with_prompt_input(to_wrap=to_wrap, wrapped_args=args, span=span)
120
+ span_index += set_workflow_attributes(to_wrap, span, span_index+1)
121
+ span_index += set_app_hosting_identifier_attribute(span, span_index+1)
122
+ if 'output_processor' in to_wrap:
123
+ output_processor=to_wrap['output_processor']
124
+ if isinstance(output_processor, dict) and len(output_processor) > 0:
125
+ if 'type' in output_processor:
126
+ span.set_attribute("span.type", output_processor['type'])
127
+ else:
128
+ logger.warning("type of span not found or incorrect written in entity json")
129
+ if 'attributes' in output_processor:
130
+ for processors in output_processor["attributes"]:
131
+ for processor in processors:
132
+ attribute = processor.get('attribute')
133
+ accessor = processor.get('accessor')
134
+
135
+ if attribute and accessor:
136
+ attribute_name = f"entity.{span_index+1}.{attribute}"
137
+ try:
138
+ arguments = {"instance":instance, "args":args, "kwargs":kwargs, "output":return_value}
139
+ result = eval(accessor)(arguments)
140
+ if result and isinstance(result, str):
141
+ span.set_attribute(attribute_name, result)
142
+ except Exception as e:
143
+ logger.error(f"Error processing accessor: {e}")
144
+ else:
145
+ logger.warning(f"{' and '.join([key for key in ['attribute', 'accessor'] if not processor.get(key)])} not found or incorrect in entity JSON")
146
+ span_index += 1
147
+ else:
148
+ logger.warning("attributes not found or incorrect written in entity json")
149
+ if 'events' in output_processor:
150
+ events = output_processor['events']
151
+ arguments = {"instance": instance, "args": args, "kwargs": kwargs, "output": return_value}
152
+ accessor_mapping = {
153
+ "arguments": arguments,
154
+ "response": return_value
155
+ }
156
+ for event in events:
157
+ event_name = event.get("name")
158
+ event_attributes = {}
159
+ attributes = event.get("attributes", [])
160
+ for attribute in attributes:
161
+ attribute_key = attribute.get("attribute")
162
+ accessor = attribute.get("accessor")
163
+ if accessor:
164
+ try:
165
+ accessor_function = eval(accessor)
166
+ for keyword, value in accessor_mapping.items():
167
+ if keyword in accessor:
168
+ evaluated_val = accessor_function(value)
169
+ if isinstance(evaluated_val, list):
170
+ evaluated_val = [str(d) for d in evaluated_val]
171
+ event_attributes[attribute_key] = evaluated_val
172
+ except Exception as e:
173
+ logger.error(f"Error evaluating accessor for attribute '{attribute_key}': {e}")
174
+ span.add_event(name=event_name, attributes=event_attributes)
84
175
 
85
- update_span_with_infra_name(span, INFRA_SERVICE_KEY)
176
+ else:
177
+ logger.warning("empty or entities json is not in correct format")
178
+ if span_index > 0:
179
+ span.set_attribute("entity.count", span_index)
180
+
181
+ def set_workflow_attributes(to_wrap, span: Span, span_index):
182
+ return_value = 1
183
+ workflow_name = get_workflow_name(span=span)
184
+ if workflow_name:
185
+ span.set_attribute("span.type", "workflow")
186
+ span.set_attribute(f"entity.{span_index}.name", workflow_name)
187
+ # workflow type
188
+ package_name = to_wrap.get('package')
189
+ workflow_type_set = False
190
+ for (package, workflow_type) in WORKFLOW_TYPE_MAP.items():
191
+ if (package_name is not None and package in package_name):
192
+ span.set_attribute(f"entity.{span_index}.type", workflow_type)
193
+ workflow_type_set = True
194
+ if not workflow_type_set:
195
+ span.set_attribute(f"entity.{span_index}.type", "workflow.generic")
196
+ return return_value
86
197
 
87
- #capture the tags attribute of the instance if present, else ignore
198
+ def post_task_processing(to_wrap, span, return_value):
88
199
  try:
89
- update_tags(instance, span)
90
- update_vectorstore_attributes(to_wrap, instance, span)
91
- except AttributeError:
92
- pass
93
- update_span_with_context_input(to_wrap=to_wrap, wrapped_args=args, span=span)
200
+ update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span)
94
201
 
202
+ if is_root_span(span):
203
+ update_span_with_prompt_output(to_wrap=to_wrap, wrapped_args=return_value, span=span)
204
+ except:
205
+ logger.exception("exception in post_task_processing")
206
+
207
+
208
+ def pre_task_processing(to_wrap, instance, args, span):
209
+ try:
210
+ if is_root_span(span):
211
+ try:
212
+ sdk_version = version("monocle_apptrace")
213
+ span.set_attribute("monocle_apptrace.version", sdk_version)
214
+ except:
215
+ logger.warning(f"Exception finding monocle-apptrace version.")
216
+ update_span_with_prompt_input(to_wrap=to_wrap, wrapped_args=args, span=span)
217
+ update_span_with_context_input(to_wrap=to_wrap, wrapped_args=args, span=span)
218
+ except:
219
+ logger.exception("exception in pre_task_processing")
95
220
 
96
221
 
97
222
  @with_tracer_wrapper
@@ -107,14 +232,23 @@ async def atask_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs):
107
232
  elif to_wrap.get("span_name"):
108
233
  name = to_wrap.get("span_name")
109
234
  else:
110
- name = f"langchain.task.{instance.__class__.__name__}"
235
+ name = get_fully_qualified_class_name(instance)
236
+ if 'haystack.core.pipeline.pipeline' in to_wrap['package']:
237
+ embedding_model = get_embedding_model_haystack(instance)
238
+ set_embedding_model(embedding_model)
239
+ inputs = set()
240
+ workflow_input = get_workflow_input(args, inputs)
241
+ set_attribute(DATA_INPUT_KEY, workflow_input)
242
+
111
243
  with tracer.start_as_current_span(name) as span:
112
244
  pre_task_processing(to_wrap, instance, args, span)
113
245
  return_value = await wrapped(*args, **kwargs)
246
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
114
247
  post_task_processing(to_wrap, span, return_value)
115
248
 
116
249
  return return_value
117
250
 
251
+
118
252
  @with_tracer_wrapper
119
253
  async def allm_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs):
120
254
  # Some Langchain objects are wrapped elsewhere, so we ignore them here
@@ -129,18 +263,19 @@ async def allm_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs):
129
263
  elif to_wrap.get("span_name"):
130
264
  name = to_wrap.get("span_name")
131
265
  else:
132
- name = f"langchain.task.{instance.__class__.__name__}"
266
+ name = get_fully_qualified_class_name(instance)
133
267
  with tracer.start_as_current_span(name) as span:
134
- update_llm_endpoint(curr_span= span, instance=instance)
135
-
268
+ provider_name, inference_endpoint = get_provider_name(instance)
136
269
  return_value = await wrapped(*args, **kwargs)
137
- update_span_from_llm_response(response = return_value, span = span)
270
+ kwargs.update({"provider_name": provider_name, "inference_endpoint": inference_endpoint or getattr(instance, 'endpoint', None)})
271
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
272
+ update_span_from_llm_response(response=return_value, span=span, instance=instance)
138
273
 
139
274
  return return_value
140
275
 
276
+
141
277
  @with_tracer_wrapper
142
278
  def llm_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
143
-
144
279
  # Some Langchain objects are wrapped elsewhere, so we ignore them here
145
280
  if instance.__class__.__name__ in ("AgentExecutor"):
146
281
  return wrapped(*args, **kwargs)
@@ -153,88 +288,157 @@ def llm_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
153
288
  elif to_wrap.get("span_name"):
154
289
  name = to_wrap.get("span_name")
155
290
  else:
156
- name = f"langchain.task.{instance.__class__.__name__}"
157
- with tracer.start_as_current_span(name) as span:
158
- if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name:
159
- update_vectorstore_attributes(to_wrap, instance, span)
160
- update_llm_endpoint(curr_span= span, instance=instance)
291
+ name = get_fully_qualified_class_name(instance)
161
292
 
293
+ with tracer.start_as_current_span(name) as span:
294
+ provider_name, inference_endpoint = get_provider_name(instance)
162
295
  return_value = wrapped(*args, **kwargs)
163
- update_span_from_llm_response(response = return_value, span = span)
296
+ kwargs.update({"provider_name": provider_name, "inference_endpoint": inference_endpoint or getattr(instance, 'endpoint', None)})
297
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
298
+ update_span_from_llm_response(response=return_value, span=span, instance=instance)
164
299
 
165
300
  return return_value
166
301
 
302
+
167
303
  def update_llm_endpoint(curr_span: Span, instance):
304
+ # Lambda to set attributes if values are not None
305
+ __set_span_attribute_if_not_none = lambda span, **kwargs: [
306
+ span.set_attribute(k, v) for k, v in kwargs.items() if v is not None
307
+ ]
308
+
168
309
  triton_llm_endpoint = os.environ.get("TRITON_LLM_ENDPOINT")
169
310
  if triton_llm_endpoint is not None and len(triton_llm_endpoint) > 0:
170
311
  curr_span.set_attribute("server_url", triton_llm_endpoint)
171
312
  else:
172
- if 'temperature' in instance.__dict__:
173
- temp_val = instance.__dict__.get("temperature")
174
- curr_span.set_attribute("temperature", temp_val)
175
- # handling for model name
176
- model_name = resolve_from_alias(instance.__dict__ , ["model","model_name"])
177
- curr_span.set_attribute("model_name", model_name)
178
- set_provider_name(curr_span, instance)
179
- # handling AzureOpenAI deployment
180
- deployment_name = resolve_from_alias(instance.__dict__ , [ "engine", "azure_deployment",
181
- "deployment_name", "deployment_id", "deployment"])
182
- curr_span.set_attribute("az_openai_deployment", deployment_name)
183
- # handling the inference endpoint
184
- inference_ep = resolve_from_alias(instance.__dict__,["azure_endpoint","api_base"])
185
- curr_span.set_attribute("inference_endpoint",inference_ep)
186
-
187
- def set_provider_name(curr_span, instance):
313
+ # Get temperature if present
314
+ temp_val = instance.__dict__.get("temperature")
315
+
316
+ # Resolve values for model name, deployment, and inference endpoint
317
+ model_name = resolve_from_alias(instance.__dict__, ["model", "model_name"])
318
+ deployment_name = resolve_from_alias(instance.__dict__,
319
+ ["engine", "azure_deployment", "deployment_name", "deployment_id",
320
+ "deployment"])
321
+ inference_ep = resolve_from_alias(instance.__dict__, ["azure_endpoint", "api_base"])
322
+
323
+ # Use the lambda to set attributes conditionally
324
+ __set_span_attribute_if_not_none(
325
+ curr_span,
326
+ temperature=temp_val,
327
+ model_name=model_name,
328
+ az_openai_deployment=deployment_name,
329
+ inference_endpoint=inference_ep
330
+ )
331
+
332
+
333
+ def get_provider_name(instance):
188
334
  provider_url = ""
335
+ inference_endpoint = ""
336
+ parsed_provider_url = ""
337
+ try:
338
+ base_url = getattr(instance.client._client, "base_url", None)
339
+ if base_url:
340
+ if isinstance(getattr(base_url, "host", None), str):
341
+ provider_url = base_url.host
342
+ inference_endpoint = base_url if isinstance(base_url, str) else str(base_url)
343
+ except:
344
+ pass
189
345
 
190
- try :
191
- if isinstance(instance.client._client.base_url.host, str) :
192
- provider_url = instance. client._client.base_url.host
346
+ try:
347
+ if isinstance(instance.client.meta.endpoint_url, str):
348
+ inference_endpoint = instance.client.meta.endpoint_url
193
349
  except:
194
350
  pass
195
351
 
196
- try :
352
+ api_base = getattr(instance, "api_base", None)
353
+ if isinstance(api_base, str):
354
+ provider_url = api_base
355
+
356
+ # Handle inference endpoint for Mistral AI (llamaindex)
357
+ sdk_config = getattr(instance, "_client", None)
358
+ if sdk_config and hasattr(sdk_config, "sdk_configuration"):
359
+ inference_endpoint = getattr(sdk_config.sdk_configuration, "server_url", inference_endpoint)
360
+
361
+ if provider_url:
362
+ try:
363
+ parsed_provider_url = urlparse(provider_url)
364
+ except:
365
+ pass
366
+
367
+ return parsed_provider_url.hostname if parsed_provider_url else provider_url, inference_endpoint
368
+
369
+
370
+ def set_provider_name(instance, instance_args: dict):
371
+ provider_url = ""
372
+ parsed_provider_url = ""
373
+ try:
374
+ if isinstance(instance.client._client.base_url.host, str):
375
+ provider_url = instance.client._client.base_url.host
376
+ except:
377
+ pass
378
+
379
+ try:
197
380
  if isinstance(instance.api_base, str):
198
381
  provider_url = instance.api_base
199
382
  except:
200
383
  pass
201
-
202
- try :
384
+ try:
203
385
  if len(provider_url) > 0:
204
- parsed_provider_url = urlparse(provider_url)
205
- curr_span.set_attribute("provider_name", parsed_provider_url.hostname or provider_url)
386
+ parsed_provider_url = urlparse(provider_url).hostname
206
387
  except:
207
388
  pass
389
+ if parsed_provider_url or provider_url:
390
+ instance_args[PROVIDER] = parsed_provider_url or provider_url
391
+
208
392
 
209
393
  def is_root_span(curr_span: Span) -> bool:
210
394
  return curr_span.parent is None
211
395
 
396
+
212
397
  def get_input_from_args(chain_args):
213
398
  if len(chain_args) > 0 and isinstance(chain_args[0], str):
214
399
  return chain_args[0]
215
400
  return ""
216
401
 
217
- def update_span_from_llm_response(response, span: Span):
218
- # extract token uasge from langchain openai
219
- if (response is not None and hasattr(response, "response_metadata")):
220
- response_metadata = response.response_metadata
221
- token_usage = response_metadata.get("token_usage")
402
+
403
+ def update_span_from_llm_response(response, span: Span, instance):
404
+ if (response is not None and isinstance(response, dict) and "meta" in response) or (
405
+ response is not None and hasattr(response, "response_metadata")):
406
+ token_usage = None
407
+ if (response is not None and isinstance(response, dict) and "meta" in response): # haystack
408
+ token_usage = response["meta"][0]["usage"]
409
+
410
+ if (response is not None and hasattr(response, "response_metadata")):
411
+ if hasattr(response, "usage_metadata") and response.usage_metadata is not None:
412
+ token_usage = response.usage_metadata
413
+ else:
414
+ response_metadata = response.response_metadata
415
+ token_usage = response_metadata.get("token_usage")
416
+
417
+ meta_dict = {}
222
418
  if token_usage is not None:
223
- span.set_attribute("completion_tokens", token_usage.get("completion_tokens"))
224
- span.set_attribute("prompt_tokens", token_usage.get("prompt_tokens"))
225
- span.set_attribute("total_tokens", token_usage.get("total_tokens"))
419
+ temperature = instance.__dict__.get("temperature", None)
420
+ meta_dict.update({"temperature": temperature})
421
+ meta_dict.update({"completion_tokens": token_usage.get("completion_tokens") or token_usage.get("output_tokens")})
422
+ meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens") or token_usage.get("input_tokens")})
423
+ meta_dict.update({"total_tokens": token_usage.get("total_tokens")})
424
+ span.add_event(META_DATA, meta_dict)
226
425
  # extract token usage from llamaindex openai
227
- if(response is not None and hasattr(response, "raw")):
426
+ if (response is not None and hasattr(response, "raw")):
228
427
  try:
428
+ meta_dict = {}
229
429
  if response.raw is not None:
230
- token_usage = response.raw.get("usage") if isinstance(response.raw, dict) else getattr(response.raw, "usage", None)
430
+ token_usage = response.raw.get("usage") if isinstance(response.raw, dict) else getattr(response.raw,
431
+ "usage", None)
231
432
  if token_usage is not None:
433
+ temperature = instance.__dict__.get("temperature", None)
434
+ meta_dict.update({"temperature": temperature})
232
435
  if getattr(token_usage, "completion_tokens", None):
233
- span.set_attribute("completion_tokens", getattr(token_usage, "completion_tokens"))
436
+ meta_dict.update({"completion_tokens": getattr(token_usage, "completion_tokens")})
234
437
  if getattr(token_usage, "prompt_tokens", None):
235
- span.set_attribute("prompt_tokens", getattr(token_usage, "prompt_tokens"))
438
+ meta_dict.update({"prompt_tokens": getattr(token_usage, "prompt_tokens")})
236
439
  if getattr(token_usage, "total_tokens", None):
237
- span.set_attribute("total_tokens", getattr(token_usage, "total_tokens"))
440
+ meta_dict.update({"total_tokens": getattr(token_usage, "total_tokens")})
441
+ span.add_event(META_DATA, meta_dict)
238
442
  except AttributeError:
239
443
  token_usage = None
240
444
 
@@ -243,69 +447,65 @@ def update_workflow_type(to_wrap, span: Span):
243
447
  package_name = to_wrap.get('package')
244
448
 
245
449
  for (package, workflow_type) in WORKFLOW_TYPE_MAP.items():
246
- if(package_name is not None and package in package_name):
450
+ if (package_name is not None and package in package_name):
247
451
  span.set_attribute(WORKFLOW_TYPE_KEY, workflow_type)
248
452
 
249
- def update_span_with_context_input(to_wrap, wrapped_args ,span: Span):
250
- package_name: str = to_wrap.get('package')
251
- if "langchain_core.retrievers" in package_name:
252
- input_arg_text = wrapped_args[0]
253
- span.add_event(CONTEXT_INPUT_KEY, {QUERY:input_arg_text})
254
- if "llama_index.core.indices.base_retriever" in package_name:
255
- input_arg_text = wrapped_args[0].query_str
256
- span.add_event(CONTEXT_INPUT_KEY, {QUERY:input_arg_text})
257
453
 
258
- def update_span_with_context_output(to_wrap, return_value ,span: Span):
454
+ def update_span_with_context_input(to_wrap, wrapped_args, span: Span):
259
455
  package_name: str = to_wrap.get('package')
260
- if "llama_index.core.indices.base_retriever" in package_name:
261
- output_arg_text = return_value[0].text
262
- span.add_event(CONTEXT_OUTPUT_KEY, {RESPONSE:output_arg_text})
263
-
264
- def update_span_with_prompt_input(to_wrap, wrapped_args ,span: Span):
456
+ input_arg_text = ""
457
+ if "langchain_core.retrievers" in package_name and len(wrapped_args) > 0:
458
+ input_arg_text += wrapped_args[0]
459
+ if "llama_index.core.indices.base_retriever" in package_name and len(wrapped_args) > 0:
460
+ input_arg_text += wrapped_args[0].query_str
461
+ if "haystack.components.retrievers.in_memory" in package_name:
462
+ input_arg_text += get_attribute(DATA_INPUT_KEY)
463
+ if input_arg_text:
464
+ span.add_event(DATA_INPUT_KEY, {QUERY: input_arg_text})
465
+
466
+
467
+ def update_span_with_context_output(to_wrap, return_value, span: Span):
468
+ package_name: str = to_wrap.get('package')
469
+ output_arg_text = ""
470
+ if "langchain_core.retrievers" in package_name:
471
+ output_arg_text += " ".join([doc.page_content for doc in return_value if hasattr(doc, 'page_content')])
472
+ if len(output_arg_text) > 100:
473
+ output_arg_text = output_arg_text[:100] + "..."
474
+ if "llama_index.core.indices.base_retriever" in package_name and len(return_value) > 0:
475
+ output_arg_text += return_value[0].text
476
+ if "haystack.components.retrievers.in_memory" in package_name:
477
+ output_arg_text += " ".join([doc.content for doc in return_value['documents']])
478
+ if len(output_arg_text) > 100:
479
+ output_arg_text = output_arg_text[:100] + "..."
480
+ if output_arg_text:
481
+ span.add_event(DATA_OUTPUT_KEY, {RESPONSE: output_arg_text})
482
+
483
+
484
+ def update_span_with_prompt_input(to_wrap, wrapped_args, span: Span):
265
485
  input_arg_text = wrapped_args[0]
266
486
 
267
- if isinstance(input_arg_text, dict):
268
- span.add_event(PROMPT_INPUT_KEY,input_arg_text)
487
+ prompt_inputs = get_nested_value(input_arg_text, ['prompt_builder', 'question'])
488
+ if prompt_inputs is not None: # haystack
489
+ span.add_event(PROMPT_INPUT_KEY, {QUERY: prompt_inputs})
490
+ elif isinstance(input_arg_text, dict):
491
+ span.add_event(PROMPT_INPUT_KEY, {QUERY: input_arg_text['input']})
269
492
  else:
270
- span.add_event(PROMPT_INPUT_KEY,{QUERY:input_arg_text})
493
+ span.add_event(PROMPT_INPUT_KEY, {QUERY: input_arg_text})
271
494
 
272
- def update_span_with_prompt_output(to_wrap, wrapped_args ,span: Span):
273
- package_name: str = to_wrap.get('package')
274
- if isinstance(wrapped_args, str):
275
- span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE:wrapped_args})
276
- if "llama_index.core.base.base_query_engine" in package_name:
277
- span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE:wrapped_args.response})
278
495
 
279
- def update_tags(instance, span):
280
- try:
281
- # copy tags as is from langchain
282
- span.set_attribute(TAGS, getattr(instance, TAGS))
283
- except:
284
- pass
285
- try:
286
- # extract embed model and vector store names for llamaindex
287
- model_name = instance.retriever._embed_model.model_name
288
- vector_store_name = type(instance.retriever._vector_store).__name__
289
- span.set_attribute(TAGS, [model_name, vector_store_name])
290
- except:
291
- pass
292
-
293
-
294
- def update_vectorstore_attributes(to_wrap, instance, span):
295
- """
296
- Updates the telemetry span attributes for vector store retrieval tasks.
297
- """
298
- try:
299
- package = to_wrap.get('package')
300
- if package in framework_vector_store_mapping:
301
- attributes = framework_vector_store_mapping[package](instance)
302
- span._attributes.update({
303
- TYPE: attributes['type'],
304
- PROVIDER: attributes['provider'],
305
- EMBEDDING_MODEL: attributes['embedding_model']
306
- })
307
- else:
308
- logger.warning(f"Package '{package}' not recognized for vector store telemetry.")
496
+ def update_span_with_prompt_output(to_wrap, wrapped_args, span: Span):
497
+ package_name: str = to_wrap.get('package')
309
498
 
310
- except Exception as e:
311
- logger.error(f"Error updating span attributes: {e}")
499
+ if "llama_index.core.base.base_query_engine" in package_name:
500
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args.response})
501
+ elif "haystack.core.pipeline.pipeline" in package_name:
502
+ resp = get_nested_value(wrapped_args, ['llm', 'replies'])
503
+ if resp is not None:
504
+ if isinstance(resp, list) and hasattr(resp[0], 'content'):
505
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: resp[0].content})
506
+ else:
507
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: resp[0]})
508
+ elif isinstance(wrapped_args, str):
509
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args})
510
+ elif isinstance(wrapped_args, dict):
511
+ span.add_event(PROMPT_OUTPUT_KEY, wrapped_args)
@@ -1,8 +1,8 @@
1
1
 
2
-
3
2
  from monocle_apptrace.haystack import HAYSTACK_METHODS
4
3
  from monocle_apptrace.langchain import LANGCHAIN_METHODS
5
4
  from monocle_apptrace.llamaindex import LLAMAINDEX_METHODS
5
+ from monocle_apptrace.botocore import BOTOCORE_METHODS
6
6
  from monocle_apptrace.wrap_common import task_wrapper
7
7
 
8
8
  # pylint: disable=too-few-public-methods
@@ -13,12 +13,15 @@ class WrapperMethod:
13
13
  object_name: str,
14
14
  method: str,
15
15
  span_name: str = None,
16
+ output_processor : list[str] = None,
16
17
  wrapper = task_wrapper
17
18
  ):
18
19
  self.package = package
19
20
  self.object = object_name
20
21
  self.method = method
21
22
  self.span_name = span_name
23
+ self.output_processor=output_processor
24
+
22
25
  self.wrapper = wrapper
23
26
 
24
- INBUILT_METHODS_LIST = LANGCHAIN_METHODS + LLAMAINDEX_METHODS + HAYSTACK_METHODS
27
+ INBUILT_METHODS_LIST = LANGCHAIN_METHODS + LLAMAINDEX_METHODS + HAYSTACK_METHODS + BOTOCORE_METHODS