ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +217 -106
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +27 -41
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +127 -21
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +88 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +258 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
  23. ragaai_catalyst/tracers/tracer.py +6 -2
  24. ragaai_catalyst/tracers/upload_traces.py +46 -57
  25. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/METADATA +6 -2
  26. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/RECORD +28 -22
  27. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  28. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +0 -0
  29. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -19,8 +19,13 @@ from .data_structure import (
19
19
  )
20
20
 
21
21
  from ..upload_traces import UploadTraces
22
+ from .upload_agentic_traces import UploadAgenticTraces
23
+ from .upload_code import upload_code
22
24
  from ...ragaai_catalyst import RagaAICatalyst
23
25
 
26
+ from .file_name_tracker import TrackName
27
+ from .zip_list_of_unique_files import zip_list_of_unique_files
28
+
24
29
  class TracerJSONEncoder(json.JSONEncoder):
25
30
  def default(self, obj):
26
31
  if isinstance(obj, datetime):
@@ -50,13 +55,10 @@ class BaseTracer:
50
55
  self.project_id = self.user_details['project_id'] # Access the project_id
51
56
 
52
57
  # Initialize trace data
53
- self.trace_id = str(uuid.uuid4())
54
- self.start_time = datetime.now().isoformat()
58
+ self.trace_id = None
59
+ self.start_time = None
55
60
  self.components: List[Component] = []
56
- self.data_key = [{"start_time": self.start_time,
57
- "end_time": "",
58
- "spans": self.components
59
- }]
61
+ self.file_tracker = TrackName()
60
62
 
61
63
  def _get_system_info(self) -> SystemInfo:
62
64
  # Get OS info
@@ -146,11 +148,22 @@ class BaseTracer:
146
148
  system_info=self._get_system_info(),
147
149
  resources=self._get_resources()
148
150
  )
151
+
152
+ # Generate a unique trace ID, when trace starts
153
+ self.trace_id = str(uuid.uuid4())
154
+
155
+ # Get the start time
156
+ self.start_time = datetime.now().isoformat()
157
+
158
+ self.data_key = [{"start_time": datetime.now().isoformat(),
159
+ "end_time": "",
160
+ "spans": self.components
161
+ }]
149
162
 
150
163
  self.trace = Trace(
151
164
  id=self.trace_id,
152
165
  project_name=self.project_name,
153
- start_time=self.start_time,
166
+ start_time=datetime.now().isoformat(),
154
167
  end_time="", # Will be set when trace is stopped
155
168
  metadata=metadata,
156
169
  data=self.data_key,
@@ -165,7 +178,7 @@ class BaseTracer:
165
178
 
166
179
  # Change span ids to int
167
180
  self.trace = self._change_span_ids_to_int(self.trace)
168
- self.trace = self._change_agent_intput_output(self.trace)
181
+ self.trace = self._change_agent_input_output(self.trace)
169
182
  self.trace = self._extract_cost_tokens(self.trace)
170
183
 
171
184
  # Create traces directory if it doesn't exist
@@ -173,21 +186,30 @@ class BaseTracer:
173
186
  self.traces_dir.mkdir(exist_ok=True)
174
187
  filename = self.trace.id + ".json"
175
188
  filepath = self.traces_dir / filename
176
-
177
- # Save to JSON file using custom encoder
189
+
190
+ #get unique files and zip it. Generate a unique hash ID for the contents of the files
191
+ list_of_unique_files = self.file_tracker.get_unique_files()
192
+ hash_id, zip_path = zip_list_of_unique_files(list_of_unique_files)
193
+
194
+ #replace source code with zip_path
195
+ self.trace.metadata.system_info.source_code = hash_id
196
+
197
+ # Clean up trace_data before saving
198
+ trace_data = self.trace.__dict__
199
+ cleaned_trace_data = self._clean_trace(trace_data)
200
+
178
201
  with open(filepath, 'w') as f:
179
- json.dump(self.trace.__dict__, f, cls=TracerJSONEncoder, indent=2)
202
+ json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
180
203
 
181
204
  print(f"Trace saved to {filepath}")
182
- # import pdb; pdb.set_trace()
183
205
  # Upload traces
184
206
  json_file_path = str(filepath)
185
207
  project_name = self.project_name
186
- project_id = self.project_id # TODO: Replace with actual project ID
208
+ project_id = self.project_id
187
209
  dataset_name = self.dataset_name
188
210
  user_detail = self.user_details
189
211
  base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
190
- upload_traces = UploadTraces(
212
+ upload_traces = UploadAgenticTraces(
191
213
  json_file_path=json_file_path,
192
214
  project_name=project_name,
193
215
  project_id=project_id,
@@ -195,7 +217,20 @@ class BaseTracer:
195
217
  user_detail=user_detail,
196
218
  base_url=base_url
197
219
  )
198
- upload_traces.upload_traces()
220
+ upload_traces.upload_agentic_traces()
221
+
222
+ #Upload Codehash
223
+ response = upload_code(
224
+ hash_id=hash_id,
225
+ zip_path=zip_path,
226
+ project_name=project_name,
227
+ dataset_name=dataset_name
228
+ )
229
+ print(response)
230
+
231
+ # Cleanup
232
+ self.components = []
233
+ self.file_tracker = TrackName()
199
234
 
200
235
  def add_component(self, component: Component):
201
236
  """Add a component to the trace"""
@@ -209,7 +244,6 @@ class BaseTracer:
209
244
  self.stop()
210
245
 
211
246
  def _change_span_ids_to_int(self, trace):
212
- # import pdb; pdb.set_trace()
213
247
  id, parent_id = 1, 0
214
248
  for span in trace.data[0]["spans"]:
215
249
  span.id = id
@@ -222,14 +256,32 @@ class BaseTracer:
222
256
  id += 1
223
257
  return trace
224
258
 
225
- def _change_agent_intput_output(self, trace):
259
+ def _change_agent_input_output(self, trace):
226
260
  for span in trace.data[0]["spans"]:
227
261
  if span.type == "agent":
228
- # import pdb; pdb.set_trace()
229
262
  childrens = span.data["children"]
230
- if childrens != []:
231
- span.data["input"] = childrens[0]["data"]["input"]
232
- span.data["output"] = childrens[-1]["data"]["output"]
263
+ span.data["input"] = None
264
+ span.data["output"] = None
265
+ if childrens:
266
+ # Find first non-null input going forward
267
+ for child in childrens:
268
+ if "data" not in child:
269
+ continue
270
+ input_data = child["data"].get("input")
271
+
272
+ if input_data:
273
+ span.data["input"] = input_data['args'] if hasattr(input_data, 'args') else input_data
274
+ break
275
+
276
+ # Find first non-null output going backward
277
+ for child in reversed(childrens):
278
+ if "data" not in child:
279
+ continue
280
+ output_data = child["data"].get("output")
281
+
282
+ if output_data and output_data != "" and output_data != "None":
283
+ span.data["output"] = output_data
284
+ break
233
285
  return trace
234
286
 
235
287
  def _extract_cost_tokens(self, trace):
@@ -251,6 +303,8 @@ class BaseTracer:
251
303
  tokens[key] += value
252
304
  if span.type == "agent":
253
305
  for children in span.data["children"]:
306
+ if 'type' not in children:
307
+ continue
254
308
  if children["type"] != "llm":
255
309
  continue
256
310
  info = children["info"]
@@ -267,4 +321,56 @@ class BaseTracer:
267
321
  tokens[key] += value
268
322
  trace.metadata.cost = cost
269
323
  trace.metadata.tokens = tokens
324
+ return trace
325
+
326
+ def _clean_trace(self, trace):
327
+ # Convert span to dict if it has to_dict method
328
+ def _to_dict_if_needed(obj):
329
+ if hasattr(obj, 'to_dict'):
330
+ return obj.to_dict()
331
+ return obj
332
+
333
+ def deduplicate_spans(spans):
334
+ seen_llm_spans = {} # Dictionary to track unique LLM spans
335
+ unique_spans = []
336
+
337
+ for span in spans:
338
+ # Convert span to dictionary if needed
339
+ span_dict = _to_dict_if_needed(span)
340
+
341
+ # Skip spans without hash_id
342
+ if 'hash_id' not in span_dict:
343
+ continue
344
+
345
+ if span_dict.get('type') == 'llm':
346
+ # Create a unique key based on hash_id, input, and output
347
+ span_key = (
348
+ span_dict.get('hash_id'),
349
+ str(span_dict.get('data', {}).get('input')),
350
+ str(span_dict.get('data', {}).get('output'))
351
+ )
352
+
353
+ if span_key not in seen_llm_spans:
354
+ seen_llm_spans[span_key] = True
355
+ unique_spans.append(span)
356
+ else:
357
+ # For non-LLM spans, process their children if they exist
358
+ if 'data' in span_dict and 'children' in span_dict['data']:
359
+ children = span_dict['data']['children']
360
+ # Filter and deduplicate children
361
+ filtered_children = deduplicate_spans(children)
362
+ if isinstance(span, dict):
363
+ span['data']['children'] = filtered_children
364
+ else:
365
+ span.data['children'] = filtered_children
366
+ unique_spans.append(span)
367
+
368
+ return unique_spans
369
+
370
+ # Remove any spans without hash ids
371
+ for data in trace.get('data', []):
372
+ if 'spans' in data:
373
+ # First filter out spans without hash_ids, then deduplicate
374
+ data['spans'] = deduplicate_spans(data['spans'])
375
+
270
376
  return trace
@@ -1,6 +1,7 @@
1
1
  from dataclasses import dataclass
2
- from typing import List, Dict, Optional, Any
2
+ from typing import List, Dict, Optional, Any, Union
3
3
  from datetime import datetime
4
+ import uuid
4
5
 
5
6
  @dataclass
6
7
  class OSInfo:
@@ -99,12 +100,20 @@ class NetworkCall:
99
100
  request: Dict[str, Any]
100
101
  response: Dict[str, Any]
101
102
 
102
- @dataclass
103
103
  class Interaction:
104
- id: str
105
- interaction_type: str
106
- content: Optional[str]
107
- timestamp: str
104
+ def __init__(self, id, type: str, content: str, timestamp: str):
105
+ self.id = id
106
+ self.type = type
107
+ self.content = content
108
+ self.timestamp = timestamp
109
+
110
+ def to_dict(self):
111
+ return {
112
+ "id": self.id,
113
+ "interaction_type": self.type,
114
+ "content": self.content,
115
+ "timestamp": self.timestamp
116
+ }
108
117
 
109
118
  @dataclass
110
119
  class Error:
@@ -150,54 +159,60 @@ class ToolInfo:
150
159
  version: str
151
160
  memory_used: int
152
161
 
153
- @dataclass
154
- class LLMComponent:
155
- id: str
156
- hash_id: str
157
- source_hash_id: Optional[str]
158
- type: str = "llm"
159
- name: str = ""
160
- start_time: str = ""
161
- end_time: str = ""
162
- error: Optional[Error] = None
163
- parent_id: Optional[str] = None
164
- info: LLMInfo = None
165
- data: Dict[str, Any] = None
166
- network_calls: List[NetworkCall] = None
167
- interactions: List[Interaction] = None
168
-
169
- @dataclass
170
- class AgentComponent:
171
- id: str
172
- hash_id: str
173
- source_hash_id: Optional[str]
174
- type: str = "agent"
175
- name: str = ""
176
- start_time: str = ""
177
- end_time: str = ""
178
- error: Optional[Error] = None
179
- parent_id: Optional[str] = None
180
- info: AgentInfo = None
181
- data: Dict[str, Any] = None
182
- network_calls: List[NetworkCall] = None
183
- interactions: List[Interaction] = None
184
- # children: List['Component'] = None
185
-
186
- @dataclass
187
- class ToolComponent:
188
- id: str
189
- hash_id: str
190
- source_hash_id: Optional[str]
191
- type: str = "tool"
192
- name: str = ""
193
- start_time: str = ""
194
- end_time: str = ""
195
- error: Optional[Error] = None
196
- parent_id: Optional[str] = None
197
- info: ToolInfo = None
198
- data: Dict[str, Any] = None
199
- network_calls: List[NetworkCall] = None
200
- interactions: List[Interaction] = None
162
+ class Component:
163
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Error] = None):
164
+ self.id = id
165
+ self.error = error
166
+ self.hash_id = hash_id
167
+ self.type = type
168
+ self.name = name
169
+ self.start_time = start_time
170
+ self.end_time = end_time
171
+ self.parent_id = parent_id
172
+ self.info = info
173
+ self.data = data
174
+ self.network_calls = network_calls or []
175
+ self.interactions = []
176
+ if interactions:
177
+ for interaction in interactions:
178
+ if isinstance(interaction, dict):
179
+ self.interactions.append(
180
+ Interaction(
181
+ id=interaction.get("id", str(uuid.uuid4())),
182
+ type=interaction.get("interaction_type", ""),
183
+ content=str(interaction.get("content", "")),
184
+ timestamp=interaction.get("timestamp", datetime.utcnow().isoformat())
185
+ )
186
+ )
187
+ else:
188
+ self.interactions.append(interaction)
189
+
190
+ def to_dict(self):
191
+ return {
192
+ "id": self.id,
193
+ "hash_id": self.hash_id,
194
+ "type": self.type,
195
+ "name": self.name,
196
+ "start_time": self.start_time,
197
+ "end_time": self.end_time,
198
+ "parent_id": self.parent_id,
199
+ "info": self.info,
200
+ "data": self.data,
201
+ "network_calls": [call.to_dict() if hasattr(call, 'to_dict') else call for call in self.network_calls],
202
+ "interactions": self.interactions
203
+ }
204
+
205
+ class LLMComponent(Component):
206
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
207
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
208
+
209
+ class AgentComponent(Component):
210
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
211
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
212
+
213
+ class ToolComponent(Component):
214
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
215
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
201
216
 
202
217
  @dataclass
203
218
  class ComponentInfo:
@@ -211,29 +226,23 @@ class ComponentInfo:
211
226
  token_usage: Optional[Dict[str, int]] = None
212
227
  cost: Optional[Dict[str, float]] = None
213
228
 
214
- @dataclass
215
- class Component:
216
- id: str
217
- hash_id: str
218
- source_hash_id: Optional[str]
219
- type: str
220
- name: str
221
- start_time: str
222
- end_time: str
223
- error: Optional[Error]
224
- parent_id: Optional[str]
225
- info: ComponentInfo
226
- data: Dict[str, Any]
227
- network_calls: List[NetworkCall]
228
- interactions: List[Interaction]
229
- children: Optional[List['Component']] = None
230
-
231
- @dataclass
232
229
  class Trace:
233
- id: str
234
- project_name: str
235
- start_time: str
236
- end_time: str
237
- metadata: Metadata
238
- data: List[Dict[str, Any]]
239
- replays: Optional[Dict[str, Any]]
230
+ def __init__(self, id: str, project_name: str, start_time: str, end_time: str, metadata: Optional[Metadata] = None, data: Optional[List[Dict[str, Any]]] = None, replays: Optional[Dict[str, Any]] = None):
231
+ self.id = id
232
+ self.project_name = project_name
233
+ self.start_time = start_time
234
+ self.end_time = end_time
235
+ self.metadata = metadata or Metadata()
236
+ self.data = data or []
237
+ self.replays = replays
238
+
239
+ def to_dict(self):
240
+ return {
241
+ "id": self.id,
242
+ "project_name": self.project_name,
243
+ "start_time": self.start_time,
244
+ "end_time": self.end_time,
245
+ "metadata": self.metadata.to_dict() if self.metadata else None,
246
+ "data": self.data,
247
+ "replays": self.replays,
248
+ }