monocle-apptrace 0.2.0__py3-none-any.whl → 0.3.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocle-apptrace might be problematic. Click here for more details.

Files changed (27) hide show
  1. monocle_apptrace/botocore/__init__.py +9 -0
  2. monocle_apptrace/constants.py +18 -4
  3. monocle_apptrace/exporters/aws/s3_exporter.py +16 -16
  4. monocle_apptrace/exporters/azure/blob_exporter.py +2 -5
  5. monocle_apptrace/exporters/base_exporter.py +19 -18
  6. monocle_apptrace/instrumentor.py +3 -3
  7. monocle_apptrace/message_processing.py +80 -0
  8. monocle_apptrace/metamodel/maps/attributes/inference/botocore_entities.json +27 -0
  9. monocle_apptrace/metamodel/maps/attributes/inference/haystack_entities.json +57 -0
  10. monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json +28 -6
  11. monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json +28 -6
  12. monocle_apptrace/metamodel/maps/attributes/retrieval/haystack_entities.json +31 -0
  13. monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json +8 -4
  14. monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json +8 -4
  15. monocle_apptrace/metamodel/maps/botocore_methods.json +13 -0
  16. monocle_apptrace/metamodel/maps/haystack_methods.json +26 -6
  17. monocle_apptrace/metamodel/maps/llamaindex_methods.json +18 -0
  18. monocle_apptrace/utils.py +92 -12
  19. monocle_apptrace/wrap_common.py +208 -114
  20. monocle_apptrace/wrapper.py +2 -1
  21. {monocle_apptrace-0.2.0.dist-info → monocle_apptrace-0.3.0b2.dist-info}/METADATA +5 -2
  22. {monocle_apptrace-0.2.0.dist-info → monocle_apptrace-0.3.0b2.dist-info}/RECORD +25 -21
  23. {monocle_apptrace-0.2.0.dist-info → monocle_apptrace-0.3.0b2.dist-info}/WHEEL +1 -1
  24. monocle_apptrace/haystack/wrap_node.py +0 -27
  25. monocle_apptrace/haystack/wrap_openai.py +0 -44
  26. {monocle_apptrace-0.2.0.dist-info → monocle_apptrace-0.3.0b2.dist-info}/licenses/LICENSE +0 -0
  27. {monocle_apptrace-0.2.0.dist-info → monocle_apptrace-0.3.0b2.dist-info}/licenses/NOTICE +0 -0
@@ -69,6 +69,24 @@
69
69
  "wrapper_package": "wrap_common",
70
70
  "wrapper_method": "allm_wrapper",
71
71
  "output_processor": ["metamodel/maps/attributes/inference/llamaindex_entities.json"]
72
+ },
73
+ {
74
+ "package": "llama_index.llms.mistralai.base",
75
+ "object": "MistralAI",
76
+ "method": "chat",
77
+ "span_name": "llamaindex.mistralai",
78
+ "wrapper_package": "wrap_common",
79
+ "wrapper_method": "llm_wrapper",
80
+ "output_processor": ["metamodel/maps/attributes/inference/llamaindex_entities.json"]
81
+ },
82
+ {
83
+ "package": "llama_index.llms.mistralai.base",
84
+ "object": "MistralAI",
85
+ "method": "achat",
86
+ "span_name": "llamaindex.mistralai",
87
+ "wrapper_package": "wrap_common",
88
+ "wrapper_method": "allm_wrapper",
89
+ "output_processor": ["metamodel/maps/attributes/inference/llamaindex_entities.json"]
72
90
  }
73
91
  ]
74
92
  }
monocle_apptrace/utils.py CHANGED
@@ -2,10 +2,13 @@ import logging
2
2
  import json
3
3
  from importlib import import_module
4
4
  import os
5
- from opentelemetry.trace import Span
5
+ from opentelemetry.trace import NonRecordingSpan,Span
6
+ from opentelemetry.trace.propagation import _SPAN_KEY
7
+ from opentelemetry.context import (attach, detach,get_current)
6
8
  from opentelemetry.context import attach, set_value, get_value
7
- from monocle_apptrace.constants import azure_service_map, aws_service_map
9
+ from monocle_apptrace.constants import service_name_map, service_type_map
8
10
  from json.decoder import JSONDecodeError
11
+
9
12
  logger = logging.getLogger(__name__)
10
13
 
11
14
  embedding_model_context = {}
@@ -39,8 +42,25 @@ def with_tracer_wrapper(func):
39
42
 
40
43
  def _with_tracer(tracer, to_wrap):
41
44
  def wrapper(wrapped, instance, args, kwargs):
42
- return func(tracer, to_wrap, wrapped, instance, args, kwargs)
43
-
45
+ token = None
46
+ try:
47
+ _parent_span_context = get_current()
48
+ if _parent_span_context is not None and _parent_span_context.get(_SPAN_KEY, None):
49
+ parent_span: Span = _parent_span_context.get(_SPAN_KEY, None)
50
+ is_invalid_span = isinstance(parent_span, NonRecordingSpan)
51
+ if is_invalid_span:
52
+ token = attach(context={})
53
+ except Exception as e:
54
+ logger.error("Exception in attaching parent context: %s", e)
55
+
56
+ val = func(tracer, to_wrap, wrapped, instance, args, kwargs)
57
+ # Detach the token if it was set
58
+ if token:
59
+ try:
60
+ detach(token=token)
61
+ except Exception as e:
62
+ logger.error("Exception in detaching parent context: %s", e)
63
+ return val
44
64
  return wrapper
45
65
 
46
66
  return _with_tracer
@@ -118,13 +138,16 @@ def get_wrapper_method(package_name: str, method_name: str):
118
138
  wrapper_module = import_module("monocle_apptrace." + package_name)
119
139
  return getattr(wrapper_module, method_name)
120
140
 
121
- def update_span_with_infra_name(span: Span, span_key: str):
122
- for key, val in azure_service_map.items():
123
- if key in os.environ:
124
- span.set_attribute(span_key, val)
125
- for key, val in aws_service_map.items():
126
- if key in os.environ:
127
- span.set_attribute(span_key, val)
141
+ def set_app_hosting_identifier_attribute(span, span_index):
142
+ return_value = 0
143
+ # Search env to indentify the infra service type, if found check env for service name if possible
144
+ for type_env, type_name in service_type_map.items():
145
+ if type_env in os.environ:
146
+ return_value = 1
147
+ span.set_attribute(f"entity.{span_index}.type", f"app_hosting.{type_name}")
148
+ entity_name_env = service_name_map.get(type_name, "unknown")
149
+ span.set_attribute(f"entity.{span_index}.name", os.environ.get(entity_name_env, "generic"))
150
+ return return_value
128
151
 
129
152
  def set_embedding_model(model_name: str):
130
153
  """
@@ -164,9 +187,66 @@ def get_attribute(key: str) -> str:
164
187
  """
165
188
  return get_value(key)
166
189
 
190
+ def flatten_dict(d, parent_key='', sep='_'):
191
+ items = []
192
+ for k, v in d.items():
193
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
194
+ if isinstance(v, dict):
195
+ items.extend(flatten_dict(v, new_key, sep=sep).items())
196
+ else:
197
+ items.append((new_key, v))
198
+ return dict(items)
199
+
200
+ def get_fully_qualified_class_name(instance):
201
+ if instance is None:
202
+ return None
203
+ module_name = instance.__class__.__module__
204
+ qualname = instance.__class__.__qualname__
205
+ return f"{module_name}.{qualname}"
206
+
207
+ # returns json path like key probe in a dictionary
208
+ def get_nested_value(data, keys):
209
+ for key in keys:
210
+ if isinstance(data, dict) and key in data:
211
+ data = data[key]
212
+ elif hasattr(data, key):
213
+ data = getattr(data, key)
214
+ else:
215
+ return None
216
+ return data
217
+
167
218
  def get_workflow_name(span: Span) -> str:
168
219
  try:
169
220
  return get_value("workflow_name") or span.resource.attributes.get("service.name")
170
221
  except Exception as e:
171
222
  logger.exception(f"Error getting workflow name: {e}")
172
- return None
223
+ return None
224
+
225
+ def get_vectorstore_deployment(my_map):
226
+ if isinstance(my_map, dict):
227
+ if '_client_settings' in my_map:
228
+ client = my_map['_client_settings'].__dict__
229
+ host, port = get_keys_as_tuple(client, 'host', 'port')
230
+ if host:
231
+ return f"{host}:{port}" if port else host
232
+ keys_to_check = ['client', '_client']
233
+ host = get_host_from_map(my_map, keys_to_check)
234
+ if host:
235
+ return host
236
+ else:
237
+ if hasattr(my_map, 'client') and '_endpoint' in my_map.client.__dict__:
238
+ return my_map.client.__dict__['_endpoint']
239
+ host, port = get_keys_as_tuple(my_map.__dict__, 'host', 'port')
240
+ if host:
241
+ return f"{host}:{port}" if port else host
242
+ return None
243
+
244
+ def get_keys_as_tuple(dictionary, *keys):
245
+ return tuple(next((value for key, value in dictionary.items() if key.endswith(k) and value is not None), None) for k in keys)
246
+
247
+ def get_host_from_map(my_map, keys_to_check):
248
+ for key in keys_to_check:
249
+ seed_connections = get_nested_value(my_map, [key, 'transport', 'seed_connections'])
250
+ if seed_connections and 'host' in seed_connections[0].__dict__:
251
+ return seed_connections[0].__dict__['host']
252
+ return None
@@ -2,18 +2,23 @@
2
2
  import logging
3
3
  import os
4
4
  import inspect
5
+ from importlib.metadata import version
5
6
  from urllib.parse import urlparse
6
- from opentelemetry.trace import Span, Tracer
7
- from monocle_apptrace.utils import resolve_from_alias, update_span_with_infra_name, with_tracer_wrapper, get_embedding_model, get_attribute, get_workflow_name
8
- from monocle_apptrace.utils import set_attribute
9
- from opentelemetry.context import get_value, attach, set_value
7
+ from opentelemetry.trace import Tracer
8
+ from opentelemetry.sdk.trace import Span
9
+ from monocle_apptrace.utils import resolve_from_alias, with_tracer_wrapper, get_embedding_model, get_attribute, get_workflow_name, set_embedding_model, set_app_hosting_identifier_attribute
10
+ from monocle_apptrace.utils import set_attribute, get_vectorstore_deployment
11
+ from monocle_apptrace.utils import get_fully_qualified_class_name, get_nested_value
12
+ from monocle_apptrace.message_processing import extract_messages, extract_assistant_message
13
+ from functools import wraps
14
+
10
15
  logger = logging.getLogger(__name__)
11
16
  WORKFLOW_TYPE_KEY = "workflow_type"
12
17
  DATA_INPUT_KEY = "data.input"
13
18
  DATA_OUTPUT_KEY = "data.output"
14
19
  PROMPT_INPUT_KEY = "data.input"
15
20
  PROMPT_OUTPUT_KEY = "data.output"
16
- QUERY = "question"
21
+ QUERY = "input"
17
22
  RESPONSE = "response"
18
23
  SESSION_PROPERTIES_KEY = "session"
19
24
  INFRA_SERVICE_KEY = "infra_service_name"
@@ -30,52 +35,18 @@ WORKFLOW_TYPE_MAP = {
30
35
  "haystack": "workflow.haystack"
31
36
  }
32
37
 
33
- def get_embedding_model_for_vectorstore(instance):
34
- # Handle Langchain or other frameworks where vectorstore exists
35
- if hasattr(instance, 'vectorstore'):
36
- vectorstore_dict = instance.vectorstore.__dict__
37
-
38
- # Use inspect to check if the embedding function is from Sagemaker
39
- if 'embedding_func' in vectorstore_dict:
40
- embedding_func = vectorstore_dict['embedding_func']
41
- class_name = embedding_func.__class__.__name__
42
- file_location = inspect.getfile(embedding_func.__class__)
43
-
44
- # Check if the class is SagemakerEndpointEmbeddings
45
- if class_name == 'SagemakerEndpointEmbeddings' and 'langchain_community' in file_location:
46
- # Set embedding_model as endpoint_name if it's Sagemaker
47
- if hasattr(embedding_func, 'endpoint_name'):
48
- return embedding_func.endpoint_name
49
-
50
- # Default to the regular embedding model if not Sagemaker
51
- return instance.vectorstore.embeddings.model
52
-
53
- # Handle llama_index where _embed_model is present
54
- if hasattr(instance, '_embed_model') and hasattr(instance._embed_model, 'model_name'):
55
- return instance._embed_model.model_name
56
-
57
- # Fallback if no specific model is found
58
- return "Unknown Embedding Model"
59
-
60
-
61
- framework_vector_store_mapping = {
62
- 'langchain_core.retrievers': lambda instance: {
63
- 'provider': type(instance.vectorstore).__name__,
64
- 'embedding_model': get_embedding_model_for_vectorstore(instance),
65
- 'type': VECTOR_STORE,
66
- },
67
- 'llama_index.core.indices.base_retriever': lambda instance: {
68
- 'provider': type(instance._vector_store).__name__,
69
- 'embedding_model': get_embedding_model_for_vectorstore(instance),
70
- 'type': VECTOR_STORE,
71
- },
72
- 'haystack.components.retrievers.in_memory': lambda instance: {
73
- 'provider': instance.__dict__.get("document_store").__class__.__name__,
74
- 'embedding_model': get_embedding_model(),
75
- 'type': VECTOR_STORE,
76
- },
77
- }
78
38
 
39
+ def get_embedding_model_haystack(instance):
40
+ try:
41
+ if hasattr(instance, 'get_component'):
42
+ text_embedder = instance.get_component('text_embedder')
43
+ if text_embedder and hasattr(text_embedder, 'model'):
44
+ # Set the embedding model attribute
45
+ return text_embedder.model
46
+ except:
47
+ pass
48
+
49
+ return None
79
50
 
80
51
  @with_tracer_wrapper
81
52
  def task_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
@@ -90,30 +61,64 @@ def task_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
90
61
  elif to_wrap.get("span_name"):
91
62
  name = to_wrap.get("span_name")
92
63
  else:
93
- name = f"langchain.task.{instance.__class__.__name__}"
64
+ name = get_fully_qualified_class_name(instance)
65
+
66
+ if 'haystack.core.pipeline.pipeline' in to_wrap['package']:
67
+ embedding_model = get_embedding_model_haystack(instance)
68
+ set_embedding_model(embedding_model)
69
+ inputs = set()
70
+ workflow_input = get_workflow_input(args, inputs)
71
+ set_attribute(DATA_INPUT_KEY, workflow_input)
72
+
73
+ if to_wrap.get('skip_span'):
74
+ return_value = wrapped(*args, **kwargs)
75
+ botocore_processor(tracer, to_wrap, wrapped, instance, args, kwargs, return_value)
76
+ return return_value
94
77
 
95
78
  with tracer.start_as_current_span(name) as span:
96
- process_span(to_wrap, span, instance, args)
97
79
  pre_task_processing(to_wrap, instance, args, span)
98
80
  return_value = wrapped(*args, **kwargs)
81
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
99
82
  post_task_processing(to_wrap, span, return_value)
100
83
 
101
84
  return return_value
102
85
 
86
+ def botocore_processor(tracer, to_wrap, wrapped, instance, args, kwargs,return_value):
87
+ if kwargs.get("service_name") == "sagemaker-runtime":
88
+ return_value.invoke_endpoint = _instrumented_endpoint_invoke(to_wrap,return_value,return_value.invoke_endpoint,tracer)
89
+
90
+ def _instrumented_endpoint_invoke(to_wrap, instance, fn, tracer):
91
+ @wraps(fn)
92
+ def with_instrumentation(*args, **kwargs):
93
+
94
+ with tracer.start_as_current_span("botocore-sagemaker-invoke-endpoint") as span:
95
+ response = fn(*args, **kwargs)
96
+ process_span(to_wrap, span, instance=instance,args=args, kwargs=kwargs, return_value=response)
97
+ return response
98
+
99
+ return with_instrumentation
100
+
101
+ def get_workflow_input(args, inputs):
102
+ if args is not None and len(args) > 0:
103
+ for value in args[0].values():
104
+ for text in value.values():
105
+ inputs.add(text)
106
+
107
+ workflow_input: str = ""
108
+
109
+ if inputs is not None and len(inputs) > 0:
110
+ for input_str in inputs:
111
+ workflow_input = workflow_input + input_str
112
+ return workflow_input
103
113
 
104
- def process_span(to_wrap, span, instance, args):
114
+ def process_span(to_wrap, span, instance, args, kwargs, return_value):
105
115
  # Check if the output_processor is a valid JSON (in Python, that means it's a dictionary)
106
- span_index = 1
116
+ instance_args = {}
117
+ set_provider_name(instance, instance_args)
118
+ span_index = 0
107
119
  if is_root_span(span):
108
- workflow_name = get_workflow_name(span)
109
- if workflow_name:
110
- span.set_attribute(f"entity.{span_index}.name", workflow_name)
111
- # workflow type
112
- package_name = to_wrap.get('package')
113
- for (package, workflow_type) in WORKFLOW_TYPE_MAP.items():
114
- if (package_name is not None and package in package_name):
115
- span.set_attribute(f"entity.{span_index}.type", workflow_type)
116
- span_index += 1
120
+ span_index += set_workflow_attributes(to_wrap, span, span_index+1)
121
+ span_index += set_app_hosting_identifier_attribute(span, span_index+1)
117
122
  if 'output_processor' in to_wrap:
118
123
  output_processor=to_wrap['output_processor']
119
124
  if isinstance(output_processor, dict) and len(output_processor) > 0:
@@ -121,20 +126,17 @@ def process_span(to_wrap, span, instance, args):
121
126
  span.set_attribute("span.type", output_processor['type'])
122
127
  else:
123
128
  logger.warning("type of span not found or incorrect written in entity json")
124
- count = 0
125
129
  if 'attributes' in output_processor:
126
- count = len(output_processor["attributes"])
127
- span.set_attribute("entity.count", count)
128
- span_index = 1
129
130
  for processors in output_processor["attributes"]:
130
131
  for processor in processors:
131
132
  attribute = processor.get('attribute')
132
133
  accessor = processor.get('accessor')
133
134
 
134
135
  if attribute and accessor:
135
- attribute_name = f"entity.{span_index}.{attribute}"
136
+ attribute_name = f"entity.{span_index+1}.{attribute}"
136
137
  try:
137
- result = eval(accessor)(instance, args)
138
+ arguments = {"instance":instance, "args":args, "kwargs":kwargs, "output":return_value}
139
+ result = eval(accessor)(arguments)
138
140
  if result and isinstance(result, str):
139
141
  span.set_attribute(attribute_name, result)
140
142
  except Exception as e:
@@ -144,11 +146,54 @@ def process_span(to_wrap, span, instance, args):
144
146
  span_index += 1
145
147
  else:
146
148
  logger.warning("attributes not found or incorrect written in entity json")
147
- span.set_attribute("span.count", count)
149
+ if 'events' in output_processor:
150
+ events = output_processor['events']
151
+ arguments = {"instance": instance, "args": args, "kwargs": kwargs, "output": return_value}
152
+ accessor_mapping = {
153
+ "arguments": arguments,
154
+ "response": return_value
155
+ }
156
+ for event in events:
157
+ event_name = event.get("name")
158
+ event_attributes = {}
159
+ attributes = event.get("attributes", [])
160
+ for attribute in attributes:
161
+ attribute_key = attribute.get("attribute")
162
+ accessor = attribute.get("accessor")
163
+ if accessor:
164
+ try:
165
+ accessor_function = eval(accessor)
166
+ for keyword, value in accessor_mapping.items():
167
+ if keyword in accessor:
168
+ evaluated_val = accessor_function(value)
169
+ if isinstance(evaluated_val, list):
170
+ evaluated_val = [str(d) for d in evaluated_val]
171
+ event_attributes[attribute_key] = evaluated_val
172
+ except Exception as e:
173
+ logger.error(f"Error evaluating accessor for attribute '{attribute_key}': {e}")
174
+ span.add_event(name=event_name, attributes=event_attributes)
148
175
 
149
176
  else:
150
177
  logger.warning("empty or entities json is not in correct format")
151
-
178
+ if span_index > 0:
179
+ span.set_attribute("entity.count", span_index)
180
+
181
+ def set_workflow_attributes(to_wrap, span: Span, span_index):
182
+ return_value = 1
183
+ workflow_name = get_workflow_name(span=span)
184
+ if workflow_name:
185
+ span.set_attribute("span.type", "workflow")
186
+ span.set_attribute(f"entity.{span_index}.name", workflow_name)
187
+ # workflow type
188
+ package_name = to_wrap.get('package')
189
+ workflow_type_set = False
190
+ for (package, workflow_type) in WORKFLOW_TYPE_MAP.items():
191
+ if (package_name is not None and package in package_name):
192
+ span.set_attribute(f"entity.{span_index}.type", workflow_type)
193
+ workflow_type_set = True
194
+ if not workflow_type_set:
195
+ span.set_attribute(f"entity.{span_index}.type", "workflow.generic")
196
+ return return_value
152
197
 
153
198
  def post_task_processing(to_wrap, span, return_value):
154
199
  try:
@@ -163,9 +208,12 @@ def post_task_processing(to_wrap, span, return_value):
163
208
  def pre_task_processing(to_wrap, instance, args, span):
164
209
  try:
165
210
  if is_root_span(span):
211
+ try:
212
+ sdk_version = version("monocle_apptrace")
213
+ span.set_attribute("monocle_apptrace.version", sdk_version)
214
+ except:
215
+ logger.warning(f"Exception finding monocle-apptrace version.")
166
216
  update_span_with_prompt_input(to_wrap=to_wrap, wrapped_args=args, span=span)
167
- update_span_with_infra_name(span, INFRA_SERVICE_KEY)
168
-
169
217
  update_span_with_context_input(to_wrap=to_wrap, wrapped_args=args, span=span)
170
218
  except:
171
219
  logger.exception("exception in pre_task_processing")
@@ -184,11 +232,18 @@ async def atask_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs):
184
232
  elif to_wrap.get("span_name"):
185
233
  name = to_wrap.get("span_name")
186
234
  else:
187
- name = f"langchain.task.{instance.__class__.__name__}"
235
+ name = get_fully_qualified_class_name(instance)
236
+ if 'haystack.core.pipeline.pipeline' in to_wrap['package']:
237
+ embedding_model = get_embedding_model_haystack(instance)
238
+ set_embedding_model(embedding_model)
239
+ inputs = set()
240
+ workflow_input = get_workflow_input(args, inputs)
241
+ set_attribute(DATA_INPUT_KEY, workflow_input)
242
+
188
243
  with tracer.start_as_current_span(name) as span:
189
- process_span(to_wrap, span, instance, args)
190
244
  pre_task_processing(to_wrap, instance, args, span)
191
245
  return_value = await wrapped(*args, **kwargs)
246
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
192
247
  post_task_processing(to_wrap, span, return_value)
193
248
 
194
249
  return return_value
@@ -208,19 +263,12 @@ async def allm_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs):
208
263
  elif to_wrap.get("span_name"):
209
264
  name = to_wrap.get("span_name")
210
265
  else:
211
- name = f"langchain.task.{instance.__class__.__name__}"
266
+ name = get_fully_qualified_class_name(instance)
212
267
  with tracer.start_as_current_span(name) as span:
213
- if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name:
214
- input_arg_text = get_attribute(DATA_INPUT_KEY)
215
- span.add_event(DATA_INPUT_KEY, {QUERY: input_arg_text})
216
268
  provider_name, inference_endpoint = get_provider_name(instance)
217
- instance_args = {"provider_name": provider_name, "inference_endpoint": inference_endpoint}
218
-
219
- process_span(to_wrap, span, instance, instance_args)
220
-
221
269
  return_value = await wrapped(*args, **kwargs)
222
- if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name:
223
- update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span)
270
+ kwargs.update({"provider_name": provider_name, "inference_endpoint": inference_endpoint or getattr(instance, 'endpoint', None)})
271
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
224
272
  update_span_from_llm_response(response=return_value, span=span, instance=instance)
225
273
 
226
274
  return return_value
@@ -240,20 +288,13 @@ def llm_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
240
288
  elif to_wrap.get("span_name"):
241
289
  name = to_wrap.get("span_name")
242
290
  else:
243
- name = f"langchain.task.{instance.__class__.__name__}"
291
+ name = get_fully_qualified_class_name(instance)
244
292
 
245
293
  with tracer.start_as_current_span(name) as span:
246
- if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name:
247
- input_arg_text = get_attribute(DATA_INPUT_KEY)
248
- span.add_event(DATA_INPUT_KEY, {QUERY: input_arg_text})
249
294
  provider_name, inference_endpoint = get_provider_name(instance)
250
- instance_args = {"provider_name": provider_name, "inference_endpoint": inference_endpoint}
251
-
252
- process_span(to_wrap, span, instance, instance_args)
253
-
254
295
  return_value = wrapped(*args, **kwargs)
255
- if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name:
256
- update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span)
296
+ kwargs.update({"provider_name": provider_name, "inference_endpoint": inference_endpoint or getattr(instance, 'endpoint', None)})
297
+ process_span(to_wrap, span, instance, args, kwargs, return_value)
257
298
  update_span_from_llm_response(response=return_value, span=span, instance=instance)
258
299
 
259
300
  return return_value
@@ -292,13 +333,46 @@ def update_llm_endpoint(curr_span: Span, instance):
292
333
  def get_provider_name(instance):
293
334
  provider_url = ""
294
335
  inference_endpoint = ""
336
+ parsed_provider_url = ""
337
+ try:
338
+ base_url = getattr(instance.client._client, "base_url", None)
339
+ if base_url:
340
+ if isinstance(getattr(base_url, "host", None), str):
341
+ provider_url = base_url.host
342
+ inference_endpoint = base_url if isinstance(base_url, str) else str(base_url)
343
+ except:
344
+ pass
345
+
346
+ try:
347
+ if isinstance(instance.client.meta.endpoint_url, str):
348
+ inference_endpoint = instance.client.meta.endpoint_url
349
+ except:
350
+ pass
351
+
352
+ api_base = getattr(instance, "api_base", None)
353
+ if isinstance(api_base, str):
354
+ provider_url = api_base
355
+
356
+ # Handle inference endpoint for Mistral AI (llamaindex)
357
+ sdk_config = getattr(instance, "_client", None)
358
+ if sdk_config and hasattr(sdk_config, "sdk_configuration"):
359
+ inference_endpoint = getattr(sdk_config.sdk_configuration, "server_url", inference_endpoint)
360
+
361
+ if provider_url:
362
+ try:
363
+ parsed_provider_url = urlparse(provider_url)
364
+ except:
365
+ pass
366
+
367
+ return parsed_provider_url.hostname if parsed_provider_url else provider_url, inference_endpoint
368
+
369
+
370
+ def set_provider_name(instance, instance_args: dict):
371
+ provider_url = ""
372
+ parsed_provider_url = ""
295
373
  try:
296
374
  if isinstance(instance.client._client.base_url.host, str):
297
375
  provider_url = instance.client._client.base_url.host
298
- if isinstance(instance.client._client.base_url, str):
299
- inference_endpoint = instance.client._client.base_url
300
- else:
301
- inference_endpoint = str(instance.client._client.base_url)
302
376
  except:
303
377
  pass
304
378
 
@@ -307,13 +381,13 @@ def get_provider_name(instance):
307
381
  provider_url = instance.api_base
308
382
  except:
309
383
  pass
310
-
311
384
  try:
312
385
  if len(provider_url) > 0:
313
- parsed_provider_url = urlparse(provider_url)
386
+ parsed_provider_url = urlparse(provider_url).hostname
314
387
  except:
315
388
  pass
316
- return parsed_provider_url.hostname or provider_url,inference_endpoint
389
+ if parsed_provider_url or provider_url:
390
+ instance_args[PROVIDER] = parsed_provider_url or provider_url
317
391
 
318
392
 
319
393
  def is_root_span(curr_span: Span) -> bool:
@@ -327,16 +401,25 @@ def get_input_from_args(chain_args):
327
401
 
328
402
 
329
403
  def update_span_from_llm_response(response, span: Span, instance):
330
- # extract token uasge from langchain openai
331
- if (response is not None and hasattr(response, "response_metadata")):
332
- response_metadata = response.response_metadata
333
- token_usage = response_metadata.get("token_usage")
404
+ if (response is not None and isinstance(response, dict) and "meta" in response) or (
405
+ response is not None and hasattr(response, "response_metadata")):
406
+ token_usage = None
407
+ if (response is not None and isinstance(response, dict) and "meta" in response): # haystack
408
+ token_usage = response["meta"][0]["usage"]
409
+
410
+ if (response is not None and hasattr(response, "response_metadata")):
411
+ if hasattr(response, "usage_metadata") and response.usage_metadata is not None:
412
+ token_usage = response.usage_metadata
413
+ else:
414
+ response_metadata = response.response_metadata
415
+ token_usage = response_metadata.get("token_usage")
416
+
334
417
  meta_dict = {}
335
418
  if token_usage is not None:
336
419
  temperature = instance.__dict__.get("temperature", None)
337
420
  meta_dict.update({"temperature": temperature})
338
- meta_dict.update({"completion_tokens": token_usage.get("completion_tokens")})
339
- meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens")})
421
+ meta_dict.update({"completion_tokens": token_usage.get("completion_tokens") or token_usage.get("output_tokens")})
422
+ meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens") or token_usage.get("input_tokens")})
340
423
  meta_dict.update({"total_tokens": token_usage.get("total_tokens")})
341
424
  span.add_event(META_DATA, meta_dict)
342
425
  # extract token usage from llamaindex openai
@@ -401,17 +484,28 @@ def update_span_with_context_output(to_wrap, return_value, span: Span):
401
484
  def update_span_with_prompt_input(to_wrap, wrapped_args, span: Span):
402
485
  input_arg_text = wrapped_args[0]
403
486
 
404
- if isinstance(input_arg_text, dict):
405
- span.add_event(PROMPT_INPUT_KEY, input_arg_text)
487
+ prompt_inputs = get_nested_value(input_arg_text, ['prompt_builder', 'question'])
488
+ if prompt_inputs is not None: # haystack
489
+ span.add_event(PROMPT_INPUT_KEY, {QUERY: prompt_inputs})
490
+ elif isinstance(input_arg_text, dict):
491
+ span.add_event(PROMPT_INPUT_KEY, {QUERY: input_arg_text['input']})
406
492
  else:
407
493
  span.add_event(PROMPT_INPUT_KEY, {QUERY: input_arg_text})
408
494
 
409
495
 
410
496
  def update_span_with_prompt_output(to_wrap, wrapped_args, span: Span):
411
497
  package_name: str = to_wrap.get('package')
412
- if isinstance(wrapped_args, str):
413
- span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args})
414
- if isinstance(wrapped_args, dict):
415
- span.add_event(PROMPT_OUTPUT_KEY, wrapped_args)
498
+
416
499
  if "llama_index.core.base.base_query_engine" in package_name:
417
500
  span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args.response})
501
+ elif "haystack.core.pipeline.pipeline" in package_name:
502
+ resp = get_nested_value(wrapped_args, ['llm', 'replies'])
503
+ if resp is not None:
504
+ if isinstance(resp, list) and hasattr(resp[0], 'content'):
505
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: resp[0].content})
506
+ else:
507
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: resp[0]})
508
+ elif isinstance(wrapped_args, str):
509
+ span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args})
510
+ elif isinstance(wrapped_args, dict):
511
+ span.add_event(PROMPT_OUTPUT_KEY, wrapped_args)
@@ -2,6 +2,7 @@
2
2
  from monocle_apptrace.haystack import HAYSTACK_METHODS
3
3
  from monocle_apptrace.langchain import LANGCHAIN_METHODS
4
4
  from monocle_apptrace.llamaindex import LLAMAINDEX_METHODS
5
+ from monocle_apptrace.botocore import BOTOCORE_METHODS
5
6
  from monocle_apptrace.wrap_common import task_wrapper
6
7
 
7
8
  # pylint: disable=too-few-public-methods
@@ -23,4 +24,4 @@ class WrapperMethod:
23
24
 
24
25
  self.wrapper = wrapper
25
26
 
26
- INBUILT_METHODS_LIST = LANGCHAIN_METHODS + LLAMAINDEX_METHODS + HAYSTACK_METHODS
27
+ INBUILT_METHODS_LIST = LANGCHAIN_METHODS + LLAMAINDEX_METHODS + HAYSTACK_METHODS + BOTOCORE_METHODS
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: monocle_apptrace
3
- Version: 0.2.0
3
+ Version: 0.3.0b2
4
4
  Summary: package with monocle genAI tracing
5
5
  Project-URL: Homepage, https://github.com/monocle2ai/monocle
6
6
  Project-URL: Issues, https://github.com/monocle2ai/monocle/issues
@@ -25,11 +25,14 @@ Requires-Dist: faiss-cpu==1.8.0; extra == 'dev'
25
25
  Requires-Dist: instructorembedding==1.0.1; extra == 'dev'
26
26
  Requires-Dist: langchain-chroma==0.1.1; extra == 'dev'
27
27
  Requires-Dist: langchain-community==0.2.5; extra == 'dev'
28
+ Requires-Dist: langchain-mistralai==0.1.13; extra == 'dev'
28
29
  Requires-Dist: langchain-openai==0.1.8; extra == 'dev'
29
30
  Requires-Dist: langchain==0.2.5; extra == 'dev'
30
31
  Requires-Dist: llama-index-embeddings-huggingface==0.2.0; extra == 'dev'
32
+ Requires-Dist: llama-index-llms-mistralai==0.1.20; extra == 'dev'
31
33
  Requires-Dist: llama-index-vector-stores-chroma==0.1.9; extra == 'dev'
32
34
  Requires-Dist: llama-index==0.10.30; extra == 'dev'
35
+ Requires-Dist: mistral-haystack==0.0.2; extra == 'dev'
33
36
  Requires-Dist: numpy==1.26.4; extra == 'dev'
34
37
  Requires-Dist: parameterized==0.9.0; extra == 'dev'
35
38
  Requires-Dist: pytest==8.0.0; extra == 'dev'