ragaai-catalyst 2.1.7.5b5__py3-none-any.whl → 2.2b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +3 -13
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +18 -17
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +12 -44
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +20 -5
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +22 -1
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +28 -97
- ragaai_catalyst/tracers/tracer.py +85 -75
- ragaai_catalyst/tracers/utils/rag_extraction_logic_final.py +205 -0
- ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +223 -189
- ragaai_catalyst/tracers/utils/trace_json_converter.py +118 -200
- {ragaai_catalyst-2.1.7.5b5.dist-info → ragaai_catalyst-2.2b0.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.1.7.5b5.dist-info → ragaai_catalyst-2.2b0.dist-info}/RECORD +15 -15
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +0 -114
- {ragaai_catalyst-2.1.7.5b5.dist-info → ragaai_catalyst-2.2b0.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.7.5b5.dist-info → ragaai_catalyst-2.2b0.dist-info}/licenses/LICENSE +0 -0
- {ragaai_catalyst-2.1.7.5b5.dist-info → ragaai_catalyst-2.2b0.dist-info}/top_level.txt +0 -0
@@ -142,6 +142,7 @@ class Tracer(AgenticTracing):
|
|
142
142
|
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
143
143
|
self.model_cost_dict = model_cost
|
144
144
|
self.user_context = "" # Initialize user_context to store context from add_context
|
145
|
+
self.user_gt = "" # Initialize user_gt to store gt from add_gt
|
145
146
|
self.file_tracker = TrackName()
|
146
147
|
self.post_processor = None
|
147
148
|
self.max_upload_workers = max_upload_workers
|
@@ -178,22 +179,21 @@ class Tracer(AgenticTracing):
|
|
178
179
|
logger.error(f"Failed to retrieve projects list: {e}")
|
179
180
|
raise
|
180
181
|
|
181
|
-
if tracer_type == "langchain":
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
elif tracer_type == "llamaindex":
|
187
|
-
|
188
|
-
|
189
|
-
elif tracer_type == "rag/langchain":
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
182
|
+
# if tracer_type == "langchain":
|
183
|
+
# instrumentors = []
|
184
|
+
# from openinference.instrumentation.langchain import LangChainInstrumentor
|
185
|
+
# instrumentors += [(LangChainInstrumentor, [])]
|
186
|
+
# self._setup_agentic_tracer(instrumentors)
|
187
|
+
# elif tracer_type == "llamaindex":
|
188
|
+
# self._upload_task = None
|
189
|
+
# self.llamaindex_tracer = None
|
190
|
+
# elif tracer_type == "rag/langchain":
|
191
|
+
# instrumentors = []
|
192
|
+
# from openinference.instrumentation.langchain import LangChainInstrumentor
|
193
|
+
# instrumentors += [(LangChainInstrumentor, [])]
|
194
|
+
# self._setup_agentic_tracer(instrumentors)
|
194
195
|
# Handle agentic tracers
|
195
|
-
|
196
|
-
|
196
|
+
if tracer_type == "agentic" or tracer_type.startswith("agentic/") or tracer_type == "langchain":
|
197
197
|
# Setup instrumentors based on tracer type
|
198
198
|
instrumentors = []
|
199
199
|
|
@@ -308,11 +308,11 @@ class Tracer(AgenticTracing):
|
|
308
308
|
return
|
309
309
|
|
310
310
|
# Handle specific framework instrumentation
|
311
|
-
elif tracer_type == "agentic/llamaindex":
|
311
|
+
elif tracer_type == "agentic/llamaindex" or tracer_type == "llamaindex":
|
312
312
|
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
|
313
313
|
instrumentors += [(LlamaIndexInstrumentor, [])]
|
314
314
|
|
315
|
-
elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
|
315
|
+
elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph" or tracer_type == "langchain":
|
316
316
|
from openinference.instrumentation.langchain import LangChainInstrumentor
|
317
317
|
instrumentors += [(LangChainInstrumentor, [])]
|
318
318
|
|
@@ -378,6 +378,9 @@ class Tracer(AgenticTracing):
|
|
378
378
|
"input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
|
379
379
|
"output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
|
380
380
|
}
|
381
|
+
self.dynamic_exporter.custom_model_cost = self.model_custom_cost
|
382
|
+
logger.info(f"Updated custom model cost for {model_name}: {self.model_custom_cost[model_name]}")
|
383
|
+
|
381
384
|
|
382
385
|
def register_masking_function(self, masking_func):
|
383
386
|
"""
|
@@ -535,20 +538,8 @@ class Tracer(AgenticTracing):
|
|
535
538
|
'max_upload_workers': self.max_upload_workers
|
536
539
|
}
|
537
540
|
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
# Reinitialize self with new external_id and stored parameters
|
542
|
-
self.__init__(
|
543
|
-
external_id=external_id,
|
544
|
-
**current_params
|
545
|
-
)
|
546
|
-
|
547
|
-
# Restore the model_custom_cost after reinitialization
|
548
|
-
self.model_custom_cost = saved_model_custom_cost
|
549
|
-
self.dynamic_exporter.custom_model_cost = self.model_custom_cost
|
550
|
-
|
551
|
-
|
541
|
+
self.dynamic_exporter.external_id = external_id
|
542
|
+
logger.debug(f"Updated external_id to {external_id}")
|
552
543
|
|
553
544
|
def set_dataset_name(self, dataset_name):
|
554
545
|
"""
|
@@ -646,8 +637,11 @@ class Tracer(AgenticTracing):
|
|
646
637
|
super().start()
|
647
638
|
return self
|
648
639
|
elif self.tracer_type == "llamaindex":
|
649
|
-
|
650
|
-
return self
|
640
|
+
super().start()
|
641
|
+
return self
|
642
|
+
|
643
|
+
# self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
|
644
|
+
# return self.llamaindex_tracer.start()
|
651
645
|
elif self.tracer_type == "rag/langchain":
|
652
646
|
super().start()
|
653
647
|
return self
|
@@ -661,35 +655,39 @@ class Tracer(AgenticTracing):
|
|
661
655
|
super().stop()
|
662
656
|
return self
|
663
657
|
elif self.tracer_type == "llamaindex":
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
user_detail = self._pass_user_data()
|
668
|
-
converted_back_to_callback = self.llamaindex_tracer.stop()
|
658
|
+
super().stop()
|
659
|
+
return self
|
669
660
|
|
670
|
-
filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
|
671
|
-
with open(filepath_3, 'w') as f:
|
672
|
-
json.dump(converted_back_to_callback, f, default=str, indent=2)
|
673
661
|
|
674
|
-
#
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
if
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
662
|
+
# if self.llamaindex_tracer is None:
|
663
|
+
# raise ValueError("LlamaIndex tracer was not started")
|
664
|
+
|
665
|
+
# user_detail = self._pass_user_data()
|
666
|
+
# converted_back_to_callback = self.llamaindex_tracer.stop()
|
667
|
+
|
668
|
+
# filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
|
669
|
+
# with open(filepath_3, 'w') as f:
|
670
|
+
# json.dump(converted_back_to_callback, f, default=str, indent=2)
|
671
|
+
|
672
|
+
# # Apply post-processor if registered
|
673
|
+
# if self.post_processor is not None:
|
674
|
+
# try:
|
675
|
+
# final_trace_filepath = self.post_processor(filepath_3)
|
676
|
+
# logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
|
677
|
+
# except Exception as e:
|
678
|
+
# logger.error(f"Error in post-processing: {e}")
|
679
|
+
# else:
|
680
|
+
# final_trace_filepath = filepath_3
|
681
|
+
|
682
|
+
# if converted_back_to_callback:
|
683
|
+
# UploadTraces(json_file_path=final_trace_filepath,
|
684
|
+
# project_name=self.project_name,
|
685
|
+
# project_id=self.project_id,
|
686
|
+
# dataset_name=self.dataset_name,
|
687
|
+
# user_detail=user_detail,
|
688
|
+
# base_url=self.base_url
|
689
|
+
# ).upload_traces()
|
690
|
+
# return
|
693
691
|
elif self.tracer_type == "rag/langchain":
|
694
692
|
super().stop()
|
695
693
|
else:
|
@@ -697,7 +695,7 @@ class Tracer(AgenticTracing):
|
|
697
695
|
|
698
696
|
def get_upload_status(self):
|
699
697
|
"""Check the status of the trace upload."""
|
700
|
-
if self.tracer_type == "langchain":
|
698
|
+
if self.tracer_type == "langchain" or self.tracer_type == "llamaindex":
|
701
699
|
if self._upload_task is None:
|
702
700
|
return "No upload task in progress."
|
703
701
|
if self._upload_task.done():
|
@@ -861,6 +859,7 @@ class Tracer(AgenticTracing):
|
|
861
859
|
post_processor= self.post_processor,
|
862
860
|
max_upload_workers = self.max_upload_workers,
|
863
861
|
user_context = self.user_context,
|
862
|
+
user_gt = self.user_gt,
|
864
863
|
external_id=self.external_id
|
865
864
|
)
|
866
865
|
|
@@ -904,33 +903,44 @@ class Tracer(AgenticTracing):
|
|
904
903
|
|
905
904
|
Args:
|
906
905
|
context: Additional context information to be added to the trace. Can be a string.
|
907
|
-
|
908
|
-
Raises:
|
909
|
-
ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
|
910
906
|
"""
|
911
907
|
if self.tracer_type not in ["langchain", "llamaindex"]:
|
912
|
-
|
908
|
+
logger.warning("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
|
909
|
+
return
|
913
910
|
|
914
911
|
# Convert string context to string if needed
|
915
912
|
if isinstance(context, str):
|
916
913
|
self.dynamic_exporter.user_context = context
|
917
914
|
self.user_context = context
|
918
915
|
else:
|
919
|
-
|
916
|
+
logger.warning("context must be a string")
|
920
917
|
|
921
|
-
def
|
918
|
+
def add_gt(self, gt):
|
922
919
|
"""
|
923
|
-
Add
|
920
|
+
Add gt information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
|
924
921
|
|
925
922
|
Args:
|
926
|
-
|
927
|
-
|
928
|
-
Raises:
|
929
|
-
ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
|
923
|
+
gt: gt information to be added to the trace. Can be a string.
|
930
924
|
"""
|
931
925
|
if self.tracer_type not in ["langchain", "llamaindex"]:
|
932
|
-
|
926
|
+
logger.warning("add_gt is only supported for 'langchain' and 'llamaindex' tracer types")
|
927
|
+
return
|
933
928
|
|
929
|
+
# Convert string gt to string if needed
|
930
|
+
if isinstance(gt, str):
|
931
|
+
self.dynamic_exporter.user_gt = gt
|
932
|
+
self.user_gt = gt
|
933
|
+
else:
|
934
|
+
logger.warning("gt must be a string")
|
935
|
+
|
936
|
+
def add_metadata(self, metadata):
|
937
|
+
"""
|
938
|
+
Add metadata information to the trace. If metadata is a dictionary, it will be merged with existing metadata.
|
939
|
+
Non-dictionary metadata or keys not present in the existing metadata will be logged as warnings.
|
940
|
+
|
941
|
+
Args:
|
942
|
+
metadata: Additional metadata information to be added to the trace. Should be a dictionary.
|
943
|
+
"""
|
934
944
|
# Convert string metadata to string if needed
|
935
945
|
user_details = self.user_details
|
936
946
|
user_metadata = user_details["trace_user_detail"]["metadata"]
|
@@ -939,8 +949,8 @@ class Tracer(AgenticTracing):
|
|
939
949
|
if key in user_metadata:
|
940
950
|
user_metadata[key] = value
|
941
951
|
else:
|
942
|
-
|
952
|
+
logger.warning(f"Key '{key}' not found in metadata")
|
943
953
|
self.dynamic_exporter.user_details = user_details
|
944
954
|
self.metadata = user_metadata
|
945
955
|
else:
|
946
|
-
|
956
|
+
logger.warning("metadata must be a dictionary")
|
@@ -0,0 +1,205 @@
|
|
1
|
+
import logging
|
2
|
+
logger = logging.getLogger(__name__)
|
3
|
+
import json
|
4
|
+
|
5
|
+
def rag_trace_json_converter(input_trace):
|
6
|
+
tracer_type = input_trace.get("tracer_type")
|
7
|
+
input_trace = input_trace.get("data", [])[0].get("spans", [])
|
8
|
+
def get_prompt(input_trace):
|
9
|
+
try:
|
10
|
+
if tracer_type == "langchain":
|
11
|
+
for span in input_trace:
|
12
|
+
try:
|
13
|
+
attributes = span.get("attributes", {})
|
14
|
+
|
15
|
+
if attributes:
|
16
|
+
for key, value in attributes.items():
|
17
|
+
try:
|
18
|
+
if key.startswith("llm.input_messages.") and key.endswith(".message.role") and value == "user":
|
19
|
+
message_num = key.split(".")[2]
|
20
|
+
content_key = f"llm.input_messages.{message_num}.message.content"
|
21
|
+
if content_key in attributes:
|
22
|
+
return attributes.get(content_key)
|
23
|
+
except Exception as e:
|
24
|
+
logger.warning(f"Error processing attribute key-value pair: {str(e)}")
|
25
|
+
continue
|
26
|
+
|
27
|
+
for key, value in attributes.items():
|
28
|
+
try:
|
29
|
+
if key.startswith("llm.prompts") and isinstance(value, list):
|
30
|
+
human_message = None
|
31
|
+
for message in value:
|
32
|
+
if isinstance(message, str):
|
33
|
+
human_index = message.find("Human:")
|
34
|
+
if human_index != -1:
|
35
|
+
human_message = message[human_index:].replace("Human:", "")
|
36
|
+
break
|
37
|
+
return human_message if human_message else value
|
38
|
+
except Exception as e:
|
39
|
+
logger.warning(f"Error processing attribute key-value pair for prompt: {str(e)}")
|
40
|
+
continue
|
41
|
+
except Exception as e:
|
42
|
+
logger.warning(f"Error processing span for prompt extraction: {str(e)}")
|
43
|
+
continue
|
44
|
+
|
45
|
+
for span in input_trace:
|
46
|
+
try:
|
47
|
+
if span["name"] == "LLMChain":
|
48
|
+
try:
|
49
|
+
input_value = span["attributes"].get("input.value", "{}")
|
50
|
+
return json.loads(input_value).get("question", "")
|
51
|
+
except json.JSONDecodeError:
|
52
|
+
logger.warning(f"Invalid JSON in LLMChain input.value: {input_value}")
|
53
|
+
continue
|
54
|
+
elif span["name"] == "RetrievalQA":
|
55
|
+
return span["attributes"].get("input.value", "")
|
56
|
+
elif span["name"] == "VectorStoreRetriever":
|
57
|
+
return span["attributes"].get("input.value", "")
|
58
|
+
except Exception as e:
|
59
|
+
logger.warning(f"Error processing span for fallback prompt extraction: {str(e)}")
|
60
|
+
continue
|
61
|
+
|
62
|
+
logger.warning("No user message found in any span")
|
63
|
+
logger.warning("Returning empty string for prompt.")
|
64
|
+
return ""
|
65
|
+
elif tracer_type == "llamaindex":
|
66
|
+
for span in input_trace:
|
67
|
+
if span["name"] == "BaseQueryEngine.query":
|
68
|
+
return span["attributes"]["input.value"]
|
69
|
+
elif "query_bundle" in span["attributes"].get("input.value", ""):
|
70
|
+
try:
|
71
|
+
query_data = json.loads(span["attributes"]["input.value"])
|
72
|
+
if "query_bundle" in query_data:
|
73
|
+
return query_data["query_bundle"]["query_str"]
|
74
|
+
except json.JSONDecodeError:
|
75
|
+
logger.error("Failed to parse query_bundle JSON")
|
76
|
+
logger.error("Prompt not found in the trace")
|
77
|
+
return None
|
78
|
+
except Exception as e:
|
79
|
+
logger.error(f"Error while extracting prompt from trace: {str(e)}")
|
80
|
+
return None
|
81
|
+
|
82
|
+
def get_response(input_trace):
|
83
|
+
try:
|
84
|
+
if tracer_type == "langchain":
|
85
|
+
for span in input_trace:
|
86
|
+
try:
|
87
|
+
attributes = span.get("attributes", {})
|
88
|
+
if attributes:
|
89
|
+
for key, value in attributes.items():
|
90
|
+
try:
|
91
|
+
if key.startswith("llm.output_messages.") and key.endswith(".message.content"):
|
92
|
+
return value
|
93
|
+
except Exception as e:
|
94
|
+
logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
|
95
|
+
continue
|
96
|
+
|
97
|
+
for key, value in attributes.items():
|
98
|
+
try:
|
99
|
+
if key.startswith("output.value"):
|
100
|
+
try:
|
101
|
+
output_json = json.loads(value)
|
102
|
+
if "generations" in output_json and isinstance(output_json.get("generations"), list) and len(output_json.get("generations")) > 0:
|
103
|
+
if isinstance(output_json.get("generations")[0], list) and len(output_json.get("generations")[0]) > 0:
|
104
|
+
first_generation = output_json.get("generations")[0][0]
|
105
|
+
if "text" in first_generation:
|
106
|
+
return first_generation["text"]
|
107
|
+
except json.JSONDecodeError:
|
108
|
+
logger.warning(f"Invalid JSON in output.value: {value}")
|
109
|
+
continue
|
110
|
+
except Exception as e:
|
111
|
+
logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
|
112
|
+
continue
|
113
|
+
except Exception as e:
|
114
|
+
logger.warning(f"Error processing span for response extraction: {str(e)}")
|
115
|
+
continue
|
116
|
+
|
117
|
+
for span in input_trace:
|
118
|
+
try:
|
119
|
+
if span["name"] == "LLMChain":
|
120
|
+
try:
|
121
|
+
output_value = span["attributes"].get("output.value", "")
|
122
|
+
if output_value:
|
123
|
+
return json.loads(output_value)
|
124
|
+
return ""
|
125
|
+
except json.JSONDecodeError:
|
126
|
+
logger.warning(f"Invalid JSON in LLMChain output.value: {output_value}")
|
127
|
+
continue
|
128
|
+
elif span["name"] == "RetrievalQA":
|
129
|
+
return span["attributes"].get("output.value", "")
|
130
|
+
elif span["name"] == "VectorStoreRetriever":
|
131
|
+
return span["attributes"].get("output.value", "")
|
132
|
+
except Exception as e:
|
133
|
+
logger.warning(f"Error processing span for fallback response extraction: {str(e)}")
|
134
|
+
continue
|
135
|
+
|
136
|
+
logger.warning("No response found in any span")
|
137
|
+
return ""
|
138
|
+
elif tracer_type == "llamaindex":
|
139
|
+
for span in input_trace:
|
140
|
+
if span["name"] == "BaseQueryEngine.query":
|
141
|
+
return span["attributes"]["output.value"]
|
142
|
+
logger.error("Response not found in the trace")
|
143
|
+
return None
|
144
|
+
except Exception as e:
|
145
|
+
logger.error(f"Error while extracting response from trace: {str(e)}")
|
146
|
+
return None
|
147
|
+
|
148
|
+
def get_context(input_trace):
|
149
|
+
try:
|
150
|
+
if tracer_type == "langchain":
|
151
|
+
for span in input_trace:
|
152
|
+
try:
|
153
|
+
if span["name"] == "CustomContextSpan":
|
154
|
+
return span["attributes"].get("input.value", "")
|
155
|
+
elif span["name"] == "VectorStoreRetriever":
|
156
|
+
return span["attributes"].get("retrieval.documents.1.document.content", "")
|
157
|
+
except Exception as e:
|
158
|
+
logger.warning(f"Error processing span for context extraction: {str(e)}")
|
159
|
+
continue
|
160
|
+
elif tracer_type == "llamaindex":
|
161
|
+
for span in input_trace:
|
162
|
+
try:
|
163
|
+
if span["name"] == "CustomContextSpan":
|
164
|
+
return span["attributes"].get("input.value", "")
|
165
|
+
elif span["name"] == "BaseRetriever.retrieve":
|
166
|
+
return span["attributes"]["retrieval.documents.1.document.content"]
|
167
|
+
except Exception as e:
|
168
|
+
logger.warning(f"Error processing span for context extraction: {str(e)}")
|
169
|
+
continue
|
170
|
+
logger.warning("Context not found in the trace")
|
171
|
+
return ""
|
172
|
+
except Exception as e:
|
173
|
+
logger.error(f"Error while extracting context from trace: {str(e)}")
|
174
|
+
return ""
|
175
|
+
|
176
|
+
def get_gt(input_trace):
|
177
|
+
try:
|
178
|
+
if tracer_type == "langchain":
|
179
|
+
for span in input_trace:
|
180
|
+
try:
|
181
|
+
if span["name"] == "CustomGroundTruthSpan":
|
182
|
+
return span["attributes"].get("input.value", "")
|
183
|
+
except Exception as e:
|
184
|
+
logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
|
185
|
+
continue
|
186
|
+
elif tracer_type == "llamaindex":
|
187
|
+
for span in input_trace:
|
188
|
+
try:
|
189
|
+
if span["name"] == "CustomGroundTruthSpan":
|
190
|
+
return span["attributes"].get("input.value", "")
|
191
|
+
except Exception as e:
|
192
|
+
logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
|
193
|
+
continue
|
194
|
+
logger.warning("Ground truth not found in the trace")
|
195
|
+
return ""
|
196
|
+
except Exception as e:
|
197
|
+
logger.error(f"Error while extracting ground truth from trace: {str(e)}")
|
198
|
+
return ""
|
199
|
+
|
200
|
+
prompt = get_prompt(input_trace)
|
201
|
+
response = get_response(input_trace)
|
202
|
+
context = get_context(input_trace)
|
203
|
+
gt = get_gt(input_trace)
|
204
|
+
|
205
|
+
return prompt, response, context, gt
|