ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,269 @@
|
|
1
|
+
import json
|
2
|
+
import sys
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import final
|
5
|
+
import pytz
|
6
|
+
import uuid
|
7
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.llm_utils import calculate_llm_cost, get_model_cost
|
8
|
+
|
9
|
+
def convert_time_format(original_time_str, target_timezone_str="Asia/Kolkata"):
|
10
|
+
"""
|
11
|
+
Converts a UTC time string to a specified timezone format.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
original_time_str (str): The original time string in UTC format (e.g., "2025-02-28T22:05:57.945146Z").
|
15
|
+
target_timezone_str (str): The target timezone to convert the time to (default is "Asia/Kolkata").
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
str: The converted time string in the specified timezone format.
|
19
|
+
"""
|
20
|
+
# Parse the original time string into a datetime object
|
21
|
+
utc_time = datetime.strptime(original_time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
|
22
|
+
# Set the timezone to UTC
|
23
|
+
utc_time = utc_time.replace(tzinfo=pytz.UTC)
|
24
|
+
# Convert the UTC time to the target timezone
|
25
|
+
target_timezone = pytz.timezone(target_timezone_str)
|
26
|
+
target_time = utc_time.astimezone(target_timezone)
|
27
|
+
# Format the datetime object to the desired string format
|
28
|
+
formatted_time = target_time.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
|
29
|
+
# Add a colon in the timezone offset for better readability
|
30
|
+
formatted_time = formatted_time[:-2] + ':' + formatted_time[-2:]
|
31
|
+
return formatted_time
|
32
|
+
|
33
|
+
|
34
|
+
def get_uuid(name):
|
35
|
+
"""Generate a random UUID (not based on name)."""
|
36
|
+
return str(uuid.uuid5(uuid.NAMESPACE_DNS, name))
|
37
|
+
|
38
|
+
def get_spans(input_trace, custom_model_cost):
|
39
|
+
data=[]
|
40
|
+
span_type_mapping={"AGENT":"agent","LLM":"llm","TOOL":"tool"}
|
41
|
+
span_name_occurrence = {}
|
42
|
+
for span in input_trace:
|
43
|
+
final_span = {}
|
44
|
+
span_type=span_type_mapping.get(span["attributes"]["openinference.span.kind"],"custom")
|
45
|
+
final_span["id"] = span["context"]["span_id"]
|
46
|
+
if span["name"] not in span_name_occurrence:
|
47
|
+
span_name_occurrence[span['name']]=0
|
48
|
+
else:
|
49
|
+
span_name_occurrence[span['name']]+=1
|
50
|
+
final_span["name"] = span["name"]+"."+str(span_name_occurrence[span['name']])
|
51
|
+
final_span["hash_id"] = get_uuid(final_span["name"])
|
52
|
+
final_span["source_hash_id"] = None
|
53
|
+
final_span["type"] = span_type
|
54
|
+
final_span["start_time"] = convert_time_format(span['start_time'])
|
55
|
+
final_span["end_time"] = convert_time_format(span['end_time'])
|
56
|
+
final_span["parent_id"] = span["parent_id"]
|
57
|
+
final_span["extra_info"] = None
|
58
|
+
'''Handle Error if any'''
|
59
|
+
if span["status"]["status_code"].lower() == "error":
|
60
|
+
final_span["error"] = span["status"]
|
61
|
+
else:
|
62
|
+
final_span["error"] = None
|
63
|
+
# ToDo: Find final trace format for sending error description
|
64
|
+
final_span["metrics"] = []
|
65
|
+
final_span["feedback"] = None
|
66
|
+
final_span["data"]={}
|
67
|
+
final_span["info"]={}
|
68
|
+
final_span["metrics"] =[]
|
69
|
+
final_span["extra_info"]={}
|
70
|
+
if span_type=="agent":
|
71
|
+
if "input.value" in span["attributes"]:
|
72
|
+
try:
|
73
|
+
final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
|
74
|
+
except Exception as e:
|
75
|
+
final_span["data"]["input"] = span["attributes"]["input.value"]
|
76
|
+
else:
|
77
|
+
final_span["data"]["input"] = ""
|
78
|
+
if "output.value" in span["attributes"]:
|
79
|
+
try:
|
80
|
+
final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
|
81
|
+
except Exception as e:
|
82
|
+
final_span["data"]["output"] = span["attributes"]["output.value"]
|
83
|
+
else:
|
84
|
+
final_span["data"]["output"] = ""
|
85
|
+
|
86
|
+
elif span_type=="tool":
|
87
|
+
available_fields = list(span['attributes'].keys())
|
88
|
+
tool_fields = [key for key in available_fields if 'tool' in key]
|
89
|
+
if "input.value" in span["attributes"]:
|
90
|
+
try:
|
91
|
+
final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
|
92
|
+
except Exception as e:
|
93
|
+
final_span["data"]["input"] = span["attributes"]["input.value"]
|
94
|
+
else:
|
95
|
+
final_span["data"]["input"] = ""
|
96
|
+
if "output.value" in span["attributes"]:
|
97
|
+
try:
|
98
|
+
final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
|
99
|
+
except Exception as e:
|
100
|
+
final_span["data"]["output"] = span["attributes"]["output.value"]
|
101
|
+
else:
|
102
|
+
final_span["data"]["output"] = ""
|
103
|
+
input_data={}
|
104
|
+
for key in tool_fields:
|
105
|
+
input_data[key] = span['attributes'].get(key, None)
|
106
|
+
final_span["info"].update(input_data)
|
107
|
+
|
108
|
+
elif span_type=="llm":
|
109
|
+
available_fields = list(span['attributes'].keys())
|
110
|
+
input_fields = [key for key in available_fields if 'input' in key]
|
111
|
+
input_data = {}
|
112
|
+
for key in input_fields:
|
113
|
+
if 'mime_type' not in key:
|
114
|
+
try:
|
115
|
+
input_data[key] = json.loads(span['attributes'][key])
|
116
|
+
except json.JSONDecodeError as e:
|
117
|
+
input_data[key] = span['attributes'].get(key, None)
|
118
|
+
final_span["data"]["input"] = input_data
|
119
|
+
|
120
|
+
output_fields = [key for key in available_fields if 'output' in key]
|
121
|
+
output_data = {}
|
122
|
+
output_data['content'] = {}
|
123
|
+
for key in output_fields:
|
124
|
+
if 'mime_type' not in key:
|
125
|
+
try:
|
126
|
+
output_data['content'][key] = json.loads(span['attributes'][key])
|
127
|
+
except json.JSONDecodeError as e:
|
128
|
+
output_data['content'][key] = span['attributes'].get(key, None)
|
129
|
+
final_span["data"]["output"] = [output_data]
|
130
|
+
|
131
|
+
if "llm.model_name" in span["attributes"]:
|
132
|
+
final_span["info"]["model"] = span["attributes"]["llm.model_name"]
|
133
|
+
else:
|
134
|
+
final_span["info"]["model"] = None
|
135
|
+
if "llm.invocation_parameters" in span["attributes"]:
|
136
|
+
try:
|
137
|
+
final_span["info"].update(**json.loads(span["attributes"]["llm.invocation_parameters"]))
|
138
|
+
except json.JSONDecodeError as e:
|
139
|
+
print(f"Error in parsing: {e}")
|
140
|
+
|
141
|
+
try:
|
142
|
+
final_span["extra_info"]["llm_parameters"] = json.loads(span["attributes"]["llm.invocation_parameters"])
|
143
|
+
except json.JSONDecodeError as e:
|
144
|
+
final_span["extra_info"]["llm_parameters"] = span["attributes"]["llm.invocation_parameters"]
|
145
|
+
else:
|
146
|
+
final_span["extra_info"]["llm_parameters"] = None
|
147
|
+
|
148
|
+
else:
|
149
|
+
if "input.value" in span["attributes"]:
|
150
|
+
try:
|
151
|
+
final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
|
152
|
+
except Exception as e:
|
153
|
+
final_span["data"]["input"] = span["attributes"]["input.value"]
|
154
|
+
if "output.value" in span["attributes"]:
|
155
|
+
try:
|
156
|
+
final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
|
157
|
+
except Exception as e:
|
158
|
+
final_span["data"]["output"] = span["attributes"]["output.value"]
|
159
|
+
|
160
|
+
final_span["info"]["cost"] = {}
|
161
|
+
final_span["info"]["tokens"] = {}
|
162
|
+
|
163
|
+
if "model" in final_span["info"]:
|
164
|
+
model_name = final_span["info"]["model"]
|
165
|
+
|
166
|
+
model_costs = {
|
167
|
+
"default": {"input_cost_per_token": 0.0, "output_cost_per_token": 0.0}
|
168
|
+
}
|
169
|
+
try:
|
170
|
+
model_costs = get_model_cost()
|
171
|
+
except Exception as e:
|
172
|
+
pass
|
173
|
+
|
174
|
+
if "resource" in span:
|
175
|
+
final_span["info"].update(span["resource"])
|
176
|
+
if "llm.token_count.prompt" in span['attributes']:
|
177
|
+
final_span["info"]["tokens"]["prompt_tokens"] = span['attributes']['llm.token_count.prompt']
|
178
|
+
if "llm.token_count.completion" in span['attributes']:
|
179
|
+
final_span["info"]["tokens"]["completion_tokens"] = span['attributes']['llm.token_count.completion']
|
180
|
+
if "llm.token_count.total" in span['attributes']:
|
181
|
+
final_span["info"]["tokens"]["total_tokens"] = span['attributes']['llm.token_count.total']
|
182
|
+
|
183
|
+
if "info" in final_span:
|
184
|
+
if "tokens" in final_span["info"]:
|
185
|
+
if "prompt_tokens" in final_span["info"]["tokens"]:
|
186
|
+
token_usage = {
|
187
|
+
"prompt_tokens": final_span["info"]["tokens"]["prompt_tokens"],
|
188
|
+
"completion_tokens": final_span["info"]["tokens"]["completion_tokens"],
|
189
|
+
"total_tokens": final_span["info"]["tokens"]["total_tokens"]
|
190
|
+
}
|
191
|
+
final_span["info"]["cost"] = calculate_llm_cost(token_usage=token_usage, model_name=model_name, model_costs=model_costs, model_custom_cost=custom_model_cost)
|
192
|
+
data.append(final_span)
|
193
|
+
return data
|
194
|
+
|
195
|
+
def convert_json_format(input_trace, custom_model_cost):
|
196
|
+
"""
|
197
|
+
Converts a JSON from one format to UI format.
|
198
|
+
|
199
|
+
Args:
|
200
|
+
input_trace (str): The input JSON string.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
final_trace: The converted JSON, or None if an error occurs.
|
204
|
+
"""
|
205
|
+
final_trace = {
|
206
|
+
"id": input_trace[0]["context"]["trace_id"],
|
207
|
+
"trace_name": "",
|
208
|
+
"project_name": "",
|
209
|
+
"start_time": convert_time_format(min(item["start_time"] for item in input_trace)), # Find the minimum start_time of all spans
|
210
|
+
"end_time": convert_time_format(max(item["end_time"] for item in input_trace)) # Find the maximum end_time of all spans
|
211
|
+
}
|
212
|
+
final_trace["metadata"] = {
|
213
|
+
"tokens": {
|
214
|
+
"prompt_tokens": 0.0,
|
215
|
+
"completion_tokens": 0.0,
|
216
|
+
"total_tokens": 0.0
|
217
|
+
},
|
218
|
+
"cost": {
|
219
|
+
"input_cost": 0.0,
|
220
|
+
"output_cost": 0.0,
|
221
|
+
"total_cost": 0.0
|
222
|
+
}
|
223
|
+
}
|
224
|
+
final_trace["replays"]={"source":None}
|
225
|
+
final_trace["data"]=[{}]
|
226
|
+
try:
|
227
|
+
final_trace["data"][0]["spans"] = get_spans(input_trace, custom_model_cost)
|
228
|
+
except Exception as e:
|
229
|
+
raise Exception(f"Error in get_spans function: {e}")
|
230
|
+
final_trace["network_calls"] =[]
|
231
|
+
final_trace["interactions"] = []
|
232
|
+
|
233
|
+
for itr in final_trace["data"][0]["spans"]:
|
234
|
+
if itr["type"]=="llm":
|
235
|
+
if "tokens" in itr["info"]:
|
236
|
+
if "prompt_tokens" in itr["info"]["tokens"]:
|
237
|
+
final_trace["metadata"]["tokens"]["prompt_tokens"] += itr["info"]["tokens"].get('prompt_tokens', 0.0)
|
238
|
+
final_trace["metadata"]["cost"]["input_cost"] += itr["info"]["cost"].get('input_cost', 0.0)
|
239
|
+
if "completion_tokens" in itr["info"]["tokens"]:
|
240
|
+
final_trace["metadata"]["tokens"]["completion_tokens"] += itr["info"]["tokens"].get('completion_tokens', 0.0)
|
241
|
+
final_trace["metadata"]["cost"]["output_cost"] += itr["info"]["cost"].get('output_cost', 0.0)
|
242
|
+
if "tokens" in itr["info"]:
|
243
|
+
if "total_tokens" in itr["info"]["tokens"]:
|
244
|
+
final_trace["metadata"]["tokens"]["total_tokens"] += itr["info"]["tokens"].get('total_tokens', 0.0)
|
245
|
+
final_trace["metadata"]["cost"]["total_cost"] += itr["info"]["cost"].get('total_cost', 0.0)
|
246
|
+
|
247
|
+
# get the total tokens, cost
|
248
|
+
final_trace["metadata"]["total_cost"] = final_trace["metadata"]["cost"]["total_cost"]
|
249
|
+
final_trace["metadata"]["total_tokens"] = final_trace["metadata"]["tokens"]["total_tokens"]
|
250
|
+
|
251
|
+
return final_trace
|
252
|
+
|
253
|
+
if __name__ == "__main__":
|
254
|
+
if len(sys.argv) != 3:
|
255
|
+
print("Usage: python convert.py <input_openinference_trace_path> <output_trace_path>")
|
256
|
+
print("Example: python convert.py sample_openinference_trace/test.json output.json")
|
257
|
+
sys.exit(1)
|
258
|
+
input_file_path = sys.argv[1]
|
259
|
+
output_file_path = sys.argv[2]
|
260
|
+
with open(input_file_path,'r') as fin:
|
261
|
+
input_trace=[]
|
262
|
+
for line in fin:
|
263
|
+
data=json.loads(line)
|
264
|
+
input_trace.append(data)
|
265
|
+
payload = convert_json_format(input_trace)
|
266
|
+
print(payload)
|
267
|
+
with open(output_file_path,"w") as fout:
|
268
|
+
json.dump(payload,fout)
|
269
|
+
fout.write("\n")
|