ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,269 @@
1
+ import json
2
+ import sys
3
+ from datetime import datetime
4
+ from typing import final
5
+ import pytz
6
+ import uuid
7
+ from ragaai_catalyst.tracers.agentic_tracing.utils.llm_utils import calculate_llm_cost, get_model_cost
8
+
9
+ def convert_time_format(original_time_str, target_timezone_str="Asia/Kolkata"):
10
+ """
11
+ Converts a UTC time string to a specified timezone format.
12
+
13
+ Args:
14
+ original_time_str (str): The original time string in UTC format (e.g., "2025-02-28T22:05:57.945146Z").
15
+ target_timezone_str (str): The target timezone to convert the time to (default is "Asia/Kolkata").
16
+
17
+ Returns:
18
+ str: The converted time string in the specified timezone format.
19
+ """
20
+ # Parse the original time string into a datetime object
21
+ utc_time = datetime.strptime(original_time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
22
+ # Set the timezone to UTC
23
+ utc_time = utc_time.replace(tzinfo=pytz.UTC)
24
+ # Convert the UTC time to the target timezone
25
+ target_timezone = pytz.timezone(target_timezone_str)
26
+ target_time = utc_time.astimezone(target_timezone)
27
+ # Format the datetime object to the desired string format
28
+ formatted_time = target_time.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
29
+ # Add a colon in the timezone offset for better readability
30
+ formatted_time = formatted_time[:-2] + ':' + formatted_time[-2:]
31
+ return formatted_time
32
+
33
+
34
+ def get_uuid(name):
35
+ """Generate a random UUID (not based on name)."""
36
+ return str(uuid.uuid5(uuid.NAMESPACE_DNS, name))
37
+
38
+ def get_spans(input_trace, custom_model_cost):
39
+ data=[]
40
+ span_type_mapping={"AGENT":"agent","LLM":"llm","TOOL":"tool"}
41
+ span_name_occurrence = {}
42
+ for span in input_trace:
43
+ final_span = {}
44
+ span_type=span_type_mapping.get(span["attributes"]["openinference.span.kind"],"custom")
45
+ final_span["id"] = span["context"]["span_id"]
46
+ if span["name"] not in span_name_occurrence:
47
+ span_name_occurrence[span['name']]=0
48
+ else:
49
+ span_name_occurrence[span['name']]+=1
50
+ final_span["name"] = span["name"]+"."+str(span_name_occurrence[span['name']])
51
+ final_span["hash_id"] = get_uuid(final_span["name"])
52
+ final_span["source_hash_id"] = None
53
+ final_span["type"] = span_type
54
+ final_span["start_time"] = convert_time_format(span['start_time'])
55
+ final_span["end_time"] = convert_time_format(span['end_time'])
56
+ final_span["parent_id"] = span["parent_id"]
57
+ final_span["extra_info"] = None
58
+ '''Handle Error if any'''
59
+ if span["status"]["status_code"].lower() == "error":
60
+ final_span["error"] = span["status"]
61
+ else:
62
+ final_span["error"] = None
63
+ # ToDo: Find final trace format for sending error description
64
+ final_span["metrics"] = []
65
+ final_span["feedback"] = None
66
+ final_span["data"]={}
67
+ final_span["info"]={}
68
+ final_span["metrics"] =[]
69
+ final_span["extra_info"]={}
70
+ if span_type=="agent":
71
+ if "input.value" in span["attributes"]:
72
+ try:
73
+ final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
74
+ except Exception as e:
75
+ final_span["data"]["input"] = span["attributes"]["input.value"]
76
+ else:
77
+ final_span["data"]["input"] = ""
78
+ if "output.value" in span["attributes"]:
79
+ try:
80
+ final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
81
+ except Exception as e:
82
+ final_span["data"]["output"] = span["attributes"]["output.value"]
83
+ else:
84
+ final_span["data"]["output"] = ""
85
+
86
+ elif span_type=="tool":
87
+ available_fields = list(span['attributes'].keys())
88
+ tool_fields = [key for key in available_fields if 'tool' in key]
89
+ if "input.value" in span["attributes"]:
90
+ try:
91
+ final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
92
+ except Exception as e:
93
+ final_span["data"]["input"] = span["attributes"]["input.value"]
94
+ else:
95
+ final_span["data"]["input"] = ""
96
+ if "output.value" in span["attributes"]:
97
+ try:
98
+ final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
99
+ except Exception as e:
100
+ final_span["data"]["output"] = span["attributes"]["output.value"]
101
+ else:
102
+ final_span["data"]["output"] = ""
103
+ input_data={}
104
+ for key in tool_fields:
105
+ input_data[key] = span['attributes'].get(key, None)
106
+ final_span["info"].update(input_data)
107
+
108
+ elif span_type=="llm":
109
+ available_fields = list(span['attributes'].keys())
110
+ input_fields = [key for key in available_fields if 'input' in key]
111
+ input_data = {}
112
+ for key in input_fields:
113
+ if 'mime_type' not in key:
114
+ try:
115
+ input_data[key] = json.loads(span['attributes'][key])
116
+ except json.JSONDecodeError as e:
117
+ input_data[key] = span['attributes'].get(key, None)
118
+ final_span["data"]["input"] = input_data
119
+
120
+ output_fields = [key for key in available_fields if 'output' in key]
121
+ output_data = {}
122
+ output_data['content'] = {}
123
+ for key in output_fields:
124
+ if 'mime_type' not in key:
125
+ try:
126
+ output_data['content'][key] = json.loads(span['attributes'][key])
127
+ except json.JSONDecodeError as e:
128
+ output_data['content'][key] = span['attributes'].get(key, None)
129
+ final_span["data"]["output"] = [output_data]
130
+
131
+ if "llm.model_name" in span["attributes"]:
132
+ final_span["info"]["model"] = span["attributes"]["llm.model_name"]
133
+ else:
134
+ final_span["info"]["model"] = None
135
+ if "llm.invocation_parameters" in span["attributes"]:
136
+ try:
137
+ final_span["info"].update(**json.loads(span["attributes"]["llm.invocation_parameters"]))
138
+ except json.JSONDecodeError as e:
139
+ print(f"Error in parsing: {e}")
140
+
141
+ try:
142
+ final_span["extra_info"]["llm_parameters"] = json.loads(span["attributes"]["llm.invocation_parameters"])
143
+ except json.JSONDecodeError as e:
144
+ final_span["extra_info"]["llm_parameters"] = span["attributes"]["llm.invocation_parameters"]
145
+ else:
146
+ final_span["extra_info"]["llm_parameters"] = None
147
+
148
+ else:
149
+ if "input.value" in span["attributes"]:
150
+ try:
151
+ final_span["data"]["input"] = json.loads(span["attributes"]["input.value"])
152
+ except Exception as e:
153
+ final_span["data"]["input"] = span["attributes"]["input.value"]
154
+ if "output.value" in span["attributes"]:
155
+ try:
156
+ final_span["data"]["output"] = json.loads(span["attributes"]["output.value"])
157
+ except Exception as e:
158
+ final_span["data"]["output"] = span["attributes"]["output.value"]
159
+
160
+ final_span["info"]["cost"] = {}
161
+ final_span["info"]["tokens"] = {}
162
+
163
+ if "model" in final_span["info"]:
164
+ model_name = final_span["info"]["model"]
165
+
166
+ model_costs = {
167
+ "default": {"input_cost_per_token": 0.0, "output_cost_per_token": 0.0}
168
+ }
169
+ try:
170
+ model_costs = get_model_cost()
171
+ except Exception as e:
172
+ pass
173
+
174
+ if "resource" in span:
175
+ final_span["info"].update(span["resource"])
176
+ if "llm.token_count.prompt" in span['attributes']:
177
+ final_span["info"]["tokens"]["prompt_tokens"] = span['attributes']['llm.token_count.prompt']
178
+ if "llm.token_count.completion" in span['attributes']:
179
+ final_span["info"]["tokens"]["completion_tokens"] = span['attributes']['llm.token_count.completion']
180
+ if "llm.token_count.total" in span['attributes']:
181
+ final_span["info"]["tokens"]["total_tokens"] = span['attributes']['llm.token_count.total']
182
+
183
+ if "info" in final_span:
184
+ if "tokens" in final_span["info"]:
185
+ if "prompt_tokens" in final_span["info"]["tokens"]:
186
+ token_usage = {
187
+ "prompt_tokens": final_span["info"]["tokens"]["prompt_tokens"],
188
+ "completion_tokens": final_span["info"]["tokens"]["completion_tokens"],
189
+ "total_tokens": final_span["info"]["tokens"]["total_tokens"]
190
+ }
191
+ final_span["info"]["cost"] = calculate_llm_cost(token_usage=token_usage, model_name=model_name, model_costs=model_costs, model_custom_cost=custom_model_cost)
192
+ data.append(final_span)
193
+ return data
194
+
195
+ def convert_json_format(input_trace, custom_model_cost):
196
+ """
197
+ Converts a JSON from one format to UI format.
198
+
199
+ Args:
200
+ input_trace (str): The input JSON string.
201
+
202
+ Returns:
203
+ final_trace: The converted JSON, or None if an error occurs.
204
+ """
205
+ final_trace = {
206
+ "id": input_trace[0]["context"]["trace_id"],
207
+ "trace_name": "",
208
+ "project_name": "",
209
+ "start_time": convert_time_format(min(item["start_time"] for item in input_trace)), # Find the minimum start_time of all spans
210
+ "end_time": convert_time_format(max(item["end_time"] for item in input_trace)) # Find the maximum end_time of all spans
211
+ }
212
+ final_trace["metadata"] = {
213
+ "tokens": {
214
+ "prompt_tokens": 0.0,
215
+ "completion_tokens": 0.0,
216
+ "total_tokens": 0.0
217
+ },
218
+ "cost": {
219
+ "input_cost": 0.0,
220
+ "output_cost": 0.0,
221
+ "total_cost": 0.0
222
+ }
223
+ }
224
+ final_trace["replays"]={"source":None}
225
+ final_trace["data"]=[{}]
226
+ try:
227
+ final_trace["data"][0]["spans"] = get_spans(input_trace, custom_model_cost)
228
+ except Exception as e:
229
+ raise Exception(f"Error in get_spans function: {e}")
230
+ final_trace["network_calls"] =[]
231
+ final_trace["interactions"] = []
232
+
233
+ for itr in final_trace["data"][0]["spans"]:
234
+ if itr["type"]=="llm":
235
+ if "tokens" in itr["info"]:
236
+ if "prompt_tokens" in itr["info"]["tokens"]:
237
+ final_trace["metadata"]["tokens"]["prompt_tokens"] += itr["info"]["tokens"].get('prompt_tokens', 0.0)
238
+ final_trace["metadata"]["cost"]["input_cost"] += itr["info"]["cost"].get('input_cost', 0.0)
239
+ if "completion_tokens" in itr["info"]["tokens"]:
240
+ final_trace["metadata"]["tokens"]["completion_tokens"] += itr["info"]["tokens"].get('completion_tokens', 0.0)
241
+ final_trace["metadata"]["cost"]["output_cost"] += itr["info"]["cost"].get('output_cost', 0.0)
242
+ if "tokens" in itr["info"]:
243
+ if "total_tokens" in itr["info"]["tokens"]:
244
+ final_trace["metadata"]["tokens"]["total_tokens"] += itr["info"]["tokens"].get('total_tokens', 0.0)
245
+ final_trace["metadata"]["cost"]["total_cost"] += itr["info"]["cost"].get('total_cost', 0.0)
246
+
247
+ # get the total tokens, cost
248
+ final_trace["metadata"]["total_cost"] = final_trace["metadata"]["cost"]["total_cost"]
249
+ final_trace["metadata"]["total_tokens"] = final_trace["metadata"]["tokens"]["total_tokens"]
250
+
251
+ return final_trace
252
+
253
+ if __name__ == "__main__":
254
+ if len(sys.argv) != 3:
255
+ print("Usage: python convert.py <input_openinference_trace_path> <output_trace_path>")
256
+ print("Example: python convert.py sample_openinference_trace/test.json output.json")
257
+ sys.exit(1)
258
+ input_file_path = sys.argv[1]
259
+ output_file_path = sys.argv[2]
260
+ with open(input_file_path,'r') as fin:
261
+ input_trace=[]
262
+ for line in fin:
263
+ data=json.loads(line)
264
+ input_trace.append(data)
265
+ payload = convert_json_format(input_trace)
266
+ print(payload)
267
+ with open(output_file_path,"w") as fout:
268
+ json.dump(payload,fout)
269
+ fout.write("\n")