ragaai-catalyst 2.1.7.5b5__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,7 @@ class Tracer(AgenticTracing):
142
142
  self.start_time = datetime.datetime.now().astimezone().isoformat()
143
143
  self.model_cost_dict = model_cost
144
144
  self.user_context = "" # Initialize user_context to store context from add_context
145
+ self.user_gt = "" # Initialize user_gt to store gt from add_gt
145
146
  self.file_tracker = TrackName()
146
147
  self.post_processor = None
147
148
  self.max_upload_workers = max_upload_workers
@@ -178,22 +179,21 @@ class Tracer(AgenticTracing):
178
179
  logger.error(f"Failed to retrieve projects list: {e}")
179
180
  raise
180
181
 
181
- if tracer_type == "langchain":
182
- instrumentors = []
183
- from openinference.instrumentation.langchain import LangChainInstrumentor
184
- instrumentors += [(LangChainInstrumentor, [])]
185
- self._setup_agentic_tracer(instrumentors)
186
- elif tracer_type == "llamaindex":
187
- self._upload_task = None
188
- self.llamaindex_tracer = None
189
- elif tracer_type == "rag/langchain":
190
- instrumentors = []
191
- from openinference.instrumentation.langchain import LangChainInstrumentor
192
- instrumentors += [(LangChainInstrumentor, [])]
193
- self._setup_agentic_tracer(instrumentors)
182
+ # if tracer_type == "langchain":
183
+ # instrumentors = []
184
+ # from openinference.instrumentation.langchain import LangChainInstrumentor
185
+ # instrumentors += [(LangChainInstrumentor, [])]
186
+ # self._setup_agentic_tracer(instrumentors)
187
+ # elif tracer_type == "llamaindex":
188
+ # self._upload_task = None
189
+ # self.llamaindex_tracer = None
190
+ # elif tracer_type == "rag/langchain":
191
+ # instrumentors = []
192
+ # from openinference.instrumentation.langchain import LangChainInstrumentor
193
+ # instrumentors += [(LangChainInstrumentor, [])]
194
+ # self._setup_agentic_tracer(instrumentors)
194
195
  # Handle agentic tracers
195
- elif tracer_type == "agentic" or tracer_type.startswith("agentic/"):
196
-
196
+ if tracer_type == "agentic" or tracer_type.startswith("agentic/") or tracer_type == "langchain":
197
197
  # Setup instrumentors based on tracer type
198
198
  instrumentors = []
199
199
 
@@ -308,11 +308,11 @@ class Tracer(AgenticTracing):
308
308
  return
309
309
 
310
310
  # Handle specific framework instrumentation
311
- elif tracer_type == "agentic/llamaindex":
311
+ elif tracer_type == "agentic/llamaindex" or tracer_type == "llamaindex":
312
312
  from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
313
313
  instrumentors += [(LlamaIndexInstrumentor, [])]
314
314
 
315
- elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
315
+ elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph" or tracer_type == "langchain":
316
316
  from openinference.instrumentation.langchain import LangChainInstrumentor
317
317
  instrumentors += [(LangChainInstrumentor, [])]
318
318
 
@@ -378,6 +378,9 @@ class Tracer(AgenticTracing):
378
378
  "input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
379
379
  "output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
380
380
  }
381
+ self.dynamic_exporter.custom_model_cost = self.model_custom_cost
382
+ logger.info(f"Updated custom model cost for {model_name}: {self.model_custom_cost[model_name]}")
383
+
381
384
 
382
385
  def register_masking_function(self, masking_func):
383
386
  """
@@ -401,66 +404,27 @@ class Tracer(AgenticTracing):
401
404
 
402
405
  def recursive_mask_values(obj, parent_key=None):
403
406
  """Apply masking to all values in nested structure."""
404
- if isinstance(obj, dict):
405
- if self.tracer_type == "langchain":
406
- # Special handling for LangChain data
407
- if isinstance(obj, dict):
408
- if obj.get("name", "") == "retrieve_documents.langchain.workflow":
409
- prompt_structured_data = {
410
- "traceloop.entity.input": json.dumps({
411
- "kwargs": {
412
- "input": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.input", "")).get("kwargs", {}).get("input", "")),
413
- }
414
- })
415
- }
416
- prompt_data = {
417
- "name": "retrieve_documents.langchain.workflow",
418
- "attributes": prompt_structured_data,
419
- }
420
- return prompt_data
421
- elif obj.get("name", "") == "PromptTemplate.langchain.task":
422
- context_structured_data = {
423
- "traceloop.entity.input": json.dumps({
424
- "kwargs": {
425
- "context": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.input", "")).get("kwargs", {}).get("context", "")),
426
- }
427
- }),
428
- "traceloop.entity.output": json.dumps({
429
- "kwargs": {
430
- "text": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.output", "")).get("kwargs", {}).get("text", "")),
431
- }
432
- })
433
- }
434
- context_data = {
435
- "name": "PromptTemplate.langchain.task",
436
- "attributes": context_structured_data,
437
- }
438
- return context_data
439
- elif obj.get("name", "") == "ChatOpenAI.langchain.task":
440
- response_structured_data = {"gen_ai.completion.0.content": masking_func(obj.get("attributes", {}).get("gen_ai.completion.0.content", "")),
441
- "gen_ai.prompt.0.content": masking_func(obj.get("attributes", {}).get("gen_ai.prompt.0.content", ""))}
442
- response_data = {
443
- "name": "ChatOpenAI.langchain.task",
444
- "attributes" : response_structured_data
445
- }
446
- return response_data
447
- else:
407
+ try:
408
+ if isinstance(obj, dict):
448
409
  return {k: recursive_mask_values(v, k) for k, v in obj.items()}
449
- elif isinstance(obj, list):
450
- return [recursive_mask_values(item, parent_key) for item in obj]
451
- elif isinstance(obj, str):
452
- # List of keys that should NOT be masked
453
- excluded_keys = {
454
- 'start_time', 'end_time', 'name', 'id',
455
- 'hash_id', 'parent_id', 'source_hash_id',
456
- 'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
457
- 'telemetry.sdk.language','service.name'
458
- }
459
- # Apply masking only if the key is NOT in the excluded list
460
- if parent_key and parent_key.lower() not in excluded_keys:
461
- return masking_func(obj)
462
- return obj
463
- else:
410
+ elif isinstance(obj, list):
411
+ return [recursive_mask_values(item, parent_key) for item in obj]
412
+ elif isinstance(obj, str):
413
+ # List of keys that should NOT be masked
414
+ excluded_keys = {
415
+ 'start_time', 'end_time', 'name', 'id',
416
+ 'hash_id', 'parent_id', 'source_hash_id',
417
+ 'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
418
+ 'telemetry.sdk.language','service.name'
419
+ }
420
+ # Apply masking only if the key is NOT in the excluded list
421
+ if parent_key and parent_key.lower() not in excluded_keys:
422
+ return masking_func(obj)
423
+ return obj
424
+ else:
425
+ return obj
426
+ except Exception as e:
427
+ logger.error(f"Error masking value: {e}")
464
428
  return obj
465
429
 
466
430
  def file_post_processor(original_trace_json_path: os.PathLike) -> os.PathLike:
@@ -535,20 +499,8 @@ class Tracer(AgenticTracing):
535
499
  'max_upload_workers': self.max_upload_workers
536
500
  }
537
501
 
538
- # Save the model_custom_cost before reinitialization
539
- saved_model_custom_cost = self.model_custom_cost.copy()
540
-
541
- # Reinitialize self with new external_id and stored parameters
542
- self.__init__(
543
- external_id=external_id,
544
- **current_params
545
- )
546
-
547
- # Restore the model_custom_cost after reinitialization
548
- self.model_custom_cost = saved_model_custom_cost
549
- self.dynamic_exporter.custom_model_cost = self.model_custom_cost
550
-
551
-
502
+ self.dynamic_exporter.external_id = external_id
503
+ logger.debug(f"Updated external_id to {external_id}")
552
504
 
553
505
  def set_dataset_name(self, dataset_name):
554
506
  """
@@ -646,8 +598,11 @@ class Tracer(AgenticTracing):
646
598
  super().start()
647
599
  return self
648
600
  elif self.tracer_type == "llamaindex":
649
- self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
650
- return self.llamaindex_tracer.start()
601
+ super().start()
602
+ return self
603
+
604
+ # self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
605
+ # return self.llamaindex_tracer.start()
651
606
  elif self.tracer_type == "rag/langchain":
652
607
  super().start()
653
608
  return self
@@ -661,35 +616,39 @@ class Tracer(AgenticTracing):
661
616
  super().stop()
662
617
  return self
663
618
  elif self.tracer_type == "llamaindex":
664
- if self.llamaindex_tracer is None:
665
- raise ValueError("LlamaIndex tracer was not started")
666
-
667
- user_detail = self._pass_user_data()
668
- converted_back_to_callback = self.llamaindex_tracer.stop()
619
+ super().stop()
620
+ return self
669
621
 
670
- filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
671
- with open(filepath_3, 'w') as f:
672
- json.dump(converted_back_to_callback, f, default=str, indent=2)
673
622
 
674
- # Apply post-processor if registered
675
- if self.post_processor is not None:
676
- try:
677
- final_trace_filepath = self.post_processor(filepath_3)
678
- logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
679
- except Exception as e:
680
- logger.error(f"Error in post-processing: {e}")
681
- else:
682
- final_trace_filepath = filepath_3
683
-
684
- if converted_back_to_callback:
685
- UploadTraces(json_file_path=final_trace_filepath,
686
- project_name=self.project_name,
687
- project_id=self.project_id,
688
- dataset_name=self.dataset_name,
689
- user_detail=user_detail,
690
- base_url=self.base_url
691
- ).upload_traces()
692
- return
623
+ # if self.llamaindex_tracer is None:
624
+ # raise ValueError("LlamaIndex tracer was not started")
625
+
626
+ # user_detail = self._pass_user_data()
627
+ # converted_back_to_callback = self.llamaindex_tracer.stop()
628
+
629
+ # filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
630
+ # with open(filepath_3, 'w') as f:
631
+ # json.dump(converted_back_to_callback, f, default=str, indent=2)
632
+
633
+ # # Apply post-processor if registered
634
+ # if self.post_processor is not None:
635
+ # try:
636
+ # final_trace_filepath = self.post_processor(filepath_3)
637
+ # logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
638
+ # except Exception as e:
639
+ # logger.error(f"Error in post-processing: {e}")
640
+ # else:
641
+ # final_trace_filepath = filepath_3
642
+
643
+ # if converted_back_to_callback:
644
+ # UploadTraces(json_file_path=final_trace_filepath,
645
+ # project_name=self.project_name,
646
+ # project_id=self.project_id,
647
+ # dataset_name=self.dataset_name,
648
+ # user_detail=user_detail,
649
+ # base_url=self.base_url
650
+ # ).upload_traces()
651
+ # return
693
652
  elif self.tracer_type == "rag/langchain":
694
653
  super().stop()
695
654
  else:
@@ -697,7 +656,7 @@ class Tracer(AgenticTracing):
697
656
 
698
657
  def get_upload_status(self):
699
658
  """Check the status of the trace upload."""
700
- if self.tracer_type == "langchain":
659
+ if self.tracer_type == "langchain" or self.tracer_type == "llamaindex":
701
660
  if self._upload_task is None:
702
661
  return "No upload task in progress."
703
662
  if self._upload_task.done():
@@ -861,6 +820,7 @@ class Tracer(AgenticTracing):
861
820
  post_processor= self.post_processor,
862
821
  max_upload_workers = self.max_upload_workers,
863
822
  user_context = self.user_context,
823
+ user_gt = self.user_gt,
864
824
  external_id=self.external_id
865
825
  )
866
826
 
@@ -904,33 +864,44 @@ class Tracer(AgenticTracing):
904
864
 
905
865
  Args:
906
866
  context: Additional context information to be added to the trace. Can be a string.
907
-
908
- Raises:
909
- ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
910
867
  """
911
868
  if self.tracer_type not in ["langchain", "llamaindex"]:
912
- raise ValueError("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
869
+ logger.warning("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
870
+ return
913
871
 
914
872
  # Convert string context to string if needed
915
873
  if isinstance(context, str):
916
874
  self.dynamic_exporter.user_context = context
917
875
  self.user_context = context
918
876
  else:
919
- raise TypeError("context must be a string")
877
+ logger.warning("context must be a string")
920
878
 
921
- def add_metadata(self, metadata):
879
+ def add_gt(self, gt):
922
880
  """
923
- Add metadata information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
881
+ Add gt information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
924
882
 
925
883
  Args:
926
- metadata: Additional metadata information to be added to the trace. Can be a dictionary.
927
-
928
- Raises:
929
- ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
884
+ gt: gt information to be added to the trace. Can be a string.
930
885
  """
931
886
  if self.tracer_type not in ["langchain", "llamaindex"]:
932
- raise ValueError("add_metadata is only supported for 'langchain' and 'llamaindex' tracer types")
887
+ logger.warning("add_gt is only supported for 'langchain' and 'llamaindex' tracer types")
888
+ return
933
889
 
890
+ # Convert string gt to string if needed
891
+ if isinstance(gt, str):
892
+ self.dynamic_exporter.user_gt = gt
893
+ self.user_gt = gt
894
+ else:
895
+ logger.warning("gt must be a string")
896
+
897
+ def add_metadata(self, metadata):
898
+ """
899
+ Add metadata information to the trace. If metadata is a dictionary, it will be merged with existing metadata.
900
+ Non-dictionary metadata or keys not present in the existing metadata will be logged as warnings.
901
+
902
+ Args:
903
+ metadata: Additional metadata information to be added to the trace. Should be a dictionary.
904
+ """
934
905
  # Convert string metadata to string if needed
935
906
  user_details = self.user_details
936
907
  user_metadata = user_details["trace_user_detail"]["metadata"]
@@ -939,8 +910,8 @@ class Tracer(AgenticTracing):
939
910
  if key in user_metadata:
940
911
  user_metadata[key] = value
941
912
  else:
942
- raise ValueError(f"Key '{key}' not found in metadata")
913
+ logger.warning(f"Key '{key}' not found in metadata")
943
914
  self.dynamic_exporter.user_details = user_details
944
915
  self.metadata = user_metadata
945
916
  else:
946
- raise TypeError("metadata must be a dictionary")
917
+ logger.warning("metadata must be a dictionary")
@@ -0,0 +1,205 @@
1
+ import logging
2
+ logger = logging.getLogger(__name__)
3
+ import json
4
+
5
+ def rag_trace_json_converter(input_trace):
6
+ tracer_type = input_trace.get("tracer_type")
7
+ input_trace = input_trace.get("data", [])[0].get("spans", [])
8
+ def get_prompt(input_trace):
9
+ try:
10
+ if tracer_type == "langchain":
11
+ for span in input_trace:
12
+ try:
13
+ attributes = span.get("attributes", {})
14
+
15
+ if attributes:
16
+ for key, value in attributes.items():
17
+ try:
18
+ if key.startswith("llm.input_messages.") and key.endswith(".message.role") and value == "user":
19
+ message_num = key.split(".")[2]
20
+ content_key = f"llm.input_messages.{message_num}.message.content"
21
+ if content_key in attributes:
22
+ return attributes.get(content_key)
23
+ except Exception as e:
24
+ logger.warning(f"Error processing attribute key-value pair: {str(e)}")
25
+ continue
26
+
27
+ for key, value in attributes.items():
28
+ try:
29
+ if key.startswith("llm.prompts") and isinstance(value, list):
30
+ human_message = None
31
+ for message in value:
32
+ if isinstance(message, str):
33
+ human_index = message.find("Human:")
34
+ if human_index != -1:
35
+ human_message = message[human_index:].replace("Human:", "")
36
+ break
37
+ return human_message if human_message else value
38
+ except Exception as e:
39
+ logger.warning(f"Error processing attribute key-value pair for prompt: {str(e)}")
40
+ continue
41
+ except Exception as e:
42
+ logger.warning(f"Error processing span for prompt extraction: {str(e)}")
43
+ continue
44
+
45
+ for span in input_trace:
46
+ try:
47
+ if span["name"] == "LLMChain":
48
+ try:
49
+ input_value = span["attributes"].get("input.value", "{}")
50
+ return json.loads(input_value).get("question", "")
51
+ except json.JSONDecodeError:
52
+ logger.warning(f"Invalid JSON in LLMChain input.value: {input_value}")
53
+ continue
54
+ elif span["name"] == "RetrievalQA":
55
+ return span["attributes"].get("input.value", "")
56
+ elif span["name"] == "VectorStoreRetriever":
57
+ return span["attributes"].get("input.value", "")
58
+ except Exception as e:
59
+ logger.warning(f"Error processing span for fallback prompt extraction: {str(e)}")
60
+ continue
61
+
62
+ logger.warning("No user message found in any span")
63
+ logger.warning("Returning empty string for prompt.")
64
+ return ""
65
+ elif tracer_type == "llamaindex":
66
+ for span in input_trace:
67
+ if span["name"] == "BaseQueryEngine.query":
68
+ return span["attributes"]["input.value"]
69
+ elif "query_bundle" in span["attributes"].get("input.value", ""):
70
+ try:
71
+ query_data = json.loads(span["attributes"]["input.value"])
72
+ if "query_bundle" in query_data:
73
+ return query_data["query_bundle"]["query_str"]
74
+ except json.JSONDecodeError:
75
+ logger.error("Failed to parse query_bundle JSON")
76
+ logger.error("Prompt not found in the trace")
77
+ return None
78
+ except Exception as e:
79
+ logger.error(f"Error while extracting prompt from trace: {str(e)}")
80
+ return None
81
+
82
+ def get_response(input_trace):
83
+ try:
84
+ if tracer_type == "langchain":
85
+ for span in input_trace:
86
+ try:
87
+ attributes = span.get("attributes", {})
88
+ if attributes:
89
+ for key, value in attributes.items():
90
+ try:
91
+ if key.startswith("llm.output_messages.") and key.endswith(".message.content"):
92
+ return value
93
+ except Exception as e:
94
+ logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
95
+ continue
96
+
97
+ for key, value in attributes.items():
98
+ try:
99
+ if key.startswith("output.value"):
100
+ try:
101
+ output_json = json.loads(value)
102
+ if "generations" in output_json and isinstance(output_json.get("generations"), list) and len(output_json.get("generations")) > 0:
103
+ if isinstance(output_json.get("generations")[0], list) and len(output_json.get("generations")[0]) > 0:
104
+ first_generation = output_json.get("generations")[0][0]
105
+ if "text" in first_generation:
106
+ return first_generation["text"]
107
+ except json.JSONDecodeError:
108
+ logger.warning(f"Invalid JSON in output.value: {value}")
109
+ continue
110
+ except Exception as e:
111
+ logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
112
+ continue
113
+ except Exception as e:
114
+ logger.warning(f"Error processing span for response extraction: {str(e)}")
115
+ continue
116
+
117
+ for span in input_trace:
118
+ try:
119
+ if span["name"] == "LLMChain":
120
+ try:
121
+ output_value = span["attributes"].get("output.value", "")
122
+ if output_value:
123
+ return json.loads(output_value)
124
+ return ""
125
+ except json.JSONDecodeError:
126
+ logger.warning(f"Invalid JSON in LLMChain output.value: {output_value}")
127
+ continue
128
+ elif span["name"] == "RetrievalQA":
129
+ return span["attributes"].get("output.value", "")
130
+ elif span["name"] == "VectorStoreRetriever":
131
+ return span["attributes"].get("output.value", "")
132
+ except Exception as e:
133
+ logger.warning(f"Error processing span for fallback response extraction: {str(e)}")
134
+ continue
135
+
136
+ logger.warning("No response found in any span")
137
+ return ""
138
+ elif tracer_type == "llamaindex":
139
+ for span in input_trace:
140
+ if span["name"] == "BaseQueryEngine.query":
141
+ return span["attributes"]["output.value"]
142
+ logger.error("Response not found in the trace")
143
+ return None
144
+ except Exception as e:
145
+ logger.error(f"Error while extracting response from trace: {str(e)}")
146
+ return None
147
+
148
+ def get_context(input_trace):
149
+ try:
150
+ if tracer_type == "langchain":
151
+ for span in input_trace:
152
+ try:
153
+ if span["name"] == "CustomContextSpan":
154
+ return span["attributes"].get("input.value", "")
155
+ elif span["name"] == "VectorStoreRetriever":
156
+ return span["attributes"].get("retrieval.documents.1.document.content", "")
157
+ except Exception as e:
158
+ logger.warning(f"Error processing span for context extraction: {str(e)}")
159
+ continue
160
+ elif tracer_type == "llamaindex":
161
+ for span in input_trace:
162
+ try:
163
+ if span["name"] == "CustomContextSpan":
164
+ return span["attributes"].get("input.value", "")
165
+ elif span["name"] == "BaseRetriever.retrieve":
166
+ return span["attributes"]["retrieval.documents.1.document.content"]
167
+ except Exception as e:
168
+ logger.warning(f"Error processing span for context extraction: {str(e)}")
169
+ continue
170
+ logger.warning("Context not found in the trace")
171
+ return ""
172
+ except Exception as e:
173
+ logger.error(f"Error while extracting context from trace: {str(e)}")
174
+ return ""
175
+
176
+ def get_gt(input_trace):
177
+ try:
178
+ if tracer_type == "langchain":
179
+ for span in input_trace:
180
+ try:
181
+ if span["name"] == "CustomGroundTruthSpan":
182
+ return span["attributes"].get("input.value", "")
183
+ except Exception as e:
184
+ logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
185
+ continue
186
+ elif tracer_type == "llamaindex":
187
+ for span in input_trace:
188
+ try:
189
+ if span["name"] == "CustomGroundTruthSpan":
190
+ return span["attributes"].get("input.value", "")
191
+ except Exception as e:
192
+ logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
193
+ continue
194
+ logger.warning("Ground truth not found in the trace")
195
+ return ""
196
+ except Exception as e:
197
+ logger.error(f"Error while extracting ground truth from trace: {str(e)}")
198
+ return ""
199
+
200
+ prompt = get_prompt(input_trace)
201
+ response = get_response(input_trace)
202
+ context = get_context(input_trace)
203
+ gt = get_gt(input_trace)
204
+
205
+ return prompt, response, context, gt