ragaai-catalyst 2.0.7.2b0__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +231 -74
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +32 -42
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +132 -30
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +91 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +262 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +186 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/tracer.py +6 -2
  23. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/METADATA +8 -4
  24. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/RECORD +26 -20
  25. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/WHEEL +1 -1
  26. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  27. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,27 @@
1
1
  import json
2
2
  import os
3
3
  import platform
4
- import re
5
4
  import psutil
6
5
  import pkg_resources
7
6
  from datetime import datetime
8
7
  from pathlib import Path
9
- from typing import Optional, Dict, Any, List
8
+ from typing import List
10
9
  import uuid
11
10
  import sys
11
+ import tempfile
12
12
 
13
13
  from .data_structure import (
14
14
  Trace, Metadata, SystemInfo, OSInfo, EnvironmentInfo,
15
15
  Resources, CPUResource, MemoryResource, DiskResource, NetworkResource,
16
16
  ResourceInfo, MemoryInfo, DiskInfo, NetworkInfo,
17
- Component, LLMComponent, AgentComponent, ToolComponent,
18
- NetworkCall, Interaction, Error
17
+ Component,
19
18
  )
20
19
 
21
- from ..upload_traces import UploadTraces
22
- from ...ragaai_catalyst import RagaAICatalyst
20
+ from .upload_agentic_traces import UploadAgenticTraces
21
+ from .upload_code import upload_code
22
+
23
+ from .file_name_tracker import TrackName
24
+ from .zip_list_of_unique_files import zip_list_of_unique_files
23
25
 
24
26
  class TracerJSONEncoder(json.JSONEncoder):
25
27
  def default(self, obj):
@@ -50,13 +52,10 @@ class BaseTracer:
50
52
  self.project_id = self.user_details['project_id'] # Access the project_id
51
53
 
52
54
  # Initialize trace data
53
- self.trace_id = str(uuid.uuid4())
54
- self.start_time = datetime.now().isoformat()
55
+ self.trace_id = None
56
+ self.start_time = None
55
57
  self.components: List[Component] = []
56
- self.data_key = [{"start_time": self.start_time,
57
- "end_time": "",
58
- "spans": self.components
59
- }]
58
+ self.file_tracker = TrackName()
60
59
 
61
60
  def _get_system_info(self) -> SystemInfo:
62
61
  # Get OS info
@@ -146,11 +145,22 @@ class BaseTracer:
146
145
  system_info=self._get_system_info(),
147
146
  resources=self._get_resources()
148
147
  )
148
+
149
+ # Generate a unique trace ID, when trace starts
150
+ self.trace_id = str(uuid.uuid4())
151
+
152
+ # Get the start time
153
+ self.start_time = datetime.now().isoformat()
154
+
155
+ self.data_key = [{"start_time": datetime.now().isoformat(),
156
+ "end_time": "",
157
+ "spans": self.components
158
+ }]
149
159
 
150
160
  self.trace = Trace(
151
161
  id=self.trace_id,
152
162
  project_name=self.project_name,
153
- start_time=self.start_time,
163
+ start_time=datetime.now().isoformat(),
154
164
  end_time="", # Will be set when trace is stopped
155
165
  metadata=metadata,
156
166
  data=self.data_key,
@@ -165,29 +175,37 @@ class BaseTracer:
165
175
 
166
176
  # Change span ids to int
167
177
  self.trace = self._change_span_ids_to_int(self.trace)
168
- self.trace = self._change_agent_intput_output(self.trace)
178
+ self.trace = self._change_agent_input_output(self.trace)
169
179
  self.trace = self._extract_cost_tokens(self.trace)
170
180
 
171
181
  # Create traces directory if it doesn't exist
172
- self.traces_dir = Path("traces")
173
- self.traces_dir.mkdir(exist_ok=True)
182
+ self.traces_dir = tempfile.gettempdir()
174
183
  filename = self.trace.id + ".json"
175
- filepath = self.traces_dir / filename
176
-
177
- # Save to JSON file using custom encoder
184
+ filepath = f"{self.traces_dir}/{filename}"
185
+
186
+ #get unique files and zip it. Generate a unique hash ID for the contents of the files
187
+ list_of_unique_files = self.file_tracker.get_unique_files()
188
+ hash_id, zip_path = zip_list_of_unique_files(list_of_unique_files, output_dir=self.traces_dir)
189
+
190
+ #replace source code with zip_path
191
+ self.trace.metadata.system_info.source_code = hash_id
192
+
193
+ # Clean up trace_data before saving
194
+ trace_data = self.trace.__dict__
195
+ cleaned_trace_data = self._clean_trace(trace_data)
196
+
178
197
  with open(filepath, 'w') as f:
179
- json.dump(self.trace.__dict__, f, cls=TracerJSONEncoder, indent=2)
198
+ json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
180
199
 
181
200
  print(f"Trace saved to {filepath}")
182
- # import pdb; pdb.set_trace()
183
201
  # Upload traces
184
202
  json_file_path = str(filepath)
185
203
  project_name = self.project_name
186
- project_id = self.project_id # TODO: Replace with actual project ID
204
+ project_id = self.project_id
187
205
  dataset_name = self.dataset_name
188
206
  user_detail = self.user_details
189
207
  base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
190
- upload_traces = UploadTraces(
208
+ upload_traces = UploadAgenticTraces(
191
209
  json_file_path=json_file_path,
192
210
  project_name=project_name,
193
211
  project_id=project_id,
@@ -195,7 +213,20 @@ class BaseTracer:
195
213
  user_detail=user_detail,
196
214
  base_url=base_url
197
215
  )
198
- upload_traces.upload_traces()
216
+ upload_traces.upload_agentic_traces()
217
+
218
+ #Upload Codehash
219
+ response = upload_code(
220
+ hash_id=hash_id,
221
+ zip_path=zip_path,
222
+ project_name=project_name,
223
+ dataset_name=dataset_name
224
+ )
225
+ print(response)
226
+
227
+ # Cleanup
228
+ self.components = []
229
+ self.file_tracker.reset()
199
230
 
200
231
  def add_component(self, component: Component):
201
232
  """Add a component to the trace"""
@@ -209,7 +240,6 @@ class BaseTracer:
209
240
  self.stop()
210
241
 
211
242
  def _change_span_ids_to_int(self, trace):
212
- # import pdb; pdb.set_trace()
213
243
  id, parent_id = 1, 0
214
244
  for span in trace.data[0]["spans"]:
215
245
  span.id = id
@@ -222,14 +252,32 @@ class BaseTracer:
222
252
  id += 1
223
253
  return trace
224
254
 
225
- def _change_agent_intput_output(self, trace):
255
+ def _change_agent_input_output(self, trace):
226
256
  for span in trace.data[0]["spans"]:
227
257
  if span.type == "agent":
228
- # import pdb; pdb.set_trace()
229
258
  childrens = span.data["children"]
230
- if childrens != []:
231
- span.data["input"] = childrens[0]["data"]["input"]
232
- span.data["output"] = childrens[-1]["data"]["output"]
259
+ span.data["input"] = None
260
+ span.data["output"] = None
261
+ if childrens:
262
+ # Find first non-null input going forward
263
+ for child in childrens:
264
+ if "data" not in child:
265
+ continue
266
+ input_data = child["data"].get("input")
267
+
268
+ if input_data:
269
+ span.data["input"] = input_data['args'] if hasattr(input_data, 'args') else input_data
270
+ break
271
+
272
+ # Find first non-null output going backward
273
+ for child in reversed(childrens):
274
+ if "data" not in child:
275
+ continue
276
+ output_data = child["data"].get("output")
277
+
278
+ if output_data and output_data != "" and output_data != "None":
279
+ span.data["output"] = output_data
280
+ break
233
281
  return trace
234
282
 
235
283
  def _extract_cost_tokens(self, trace):
@@ -251,6 +299,8 @@ class BaseTracer:
251
299
  tokens[key] += value
252
300
  if span.type == "agent":
253
301
  for children in span.data["children"]:
302
+ if 'type' not in children:
303
+ continue
254
304
  if children["type"] != "llm":
255
305
  continue
256
306
  info = children["info"]
@@ -267,4 +317,56 @@ class BaseTracer:
267
317
  tokens[key] += value
268
318
  trace.metadata.cost = cost
269
319
  trace.metadata.tokens = tokens
320
+ return trace
321
+
322
+ def _clean_trace(self, trace):
323
+ # Convert span to dict if it has to_dict method
324
+ def _to_dict_if_needed(obj):
325
+ if hasattr(obj, 'to_dict'):
326
+ return obj.to_dict()
327
+ return obj
328
+
329
+ def deduplicate_spans(spans):
330
+ seen_llm_spans = {} # Dictionary to track unique LLM spans
331
+ unique_spans = []
332
+
333
+ for span in spans:
334
+ # Convert span to dictionary if needed
335
+ span_dict = _to_dict_if_needed(span)
336
+
337
+ # Skip spans without hash_id
338
+ if 'hash_id' not in span_dict:
339
+ continue
340
+
341
+ if span_dict.get('type') == 'llm':
342
+ # Create a unique key based on hash_id, input, and output
343
+ span_key = (
344
+ span_dict.get('hash_id'),
345
+ str(span_dict.get('data', {}).get('input')),
346
+ str(span_dict.get('data', {}).get('output'))
347
+ )
348
+
349
+ if span_key not in seen_llm_spans:
350
+ seen_llm_spans[span_key] = True
351
+ unique_spans.append(span)
352
+ else:
353
+ # For non-LLM spans, process their children if they exist
354
+ if 'data' in span_dict and 'children' in span_dict['data']:
355
+ children = span_dict['data']['children']
356
+ # Filter and deduplicate children
357
+ filtered_children = deduplicate_spans(children)
358
+ if isinstance(span, dict):
359
+ span['data']['children'] = filtered_children
360
+ else:
361
+ span.data['children'] = filtered_children
362
+ unique_spans.append(span)
363
+
364
+ return unique_spans
365
+
366
+ # Remove any spans without hash ids
367
+ for data in trace.get('data', []):
368
+ if 'spans' in data:
369
+ # First filter out spans without hash_ids, then deduplicate
370
+ data['spans'] = deduplicate_spans(data['spans'])
371
+
270
372
  return trace
@@ -1,6 +1,7 @@
1
1
  from dataclasses import dataclass
2
- from typing import List, Dict, Optional, Any
2
+ from typing import List, Dict, Optional, Any, Union
3
3
  from datetime import datetime
4
+ import uuid
4
5
 
5
6
  @dataclass
6
7
  class OSInfo:
@@ -99,12 +100,20 @@ class NetworkCall:
99
100
  request: Dict[str, Any]
100
101
  response: Dict[str, Any]
101
102
 
102
- @dataclass
103
103
  class Interaction:
104
- id: str
105
- interaction_type: str
106
- content: Optional[str]
107
- timestamp: str
104
+ def __init__(self, id, type: str, content: str, timestamp: str):
105
+ self.id = id
106
+ self.type = type
107
+ self.content = content
108
+ self.timestamp = timestamp
109
+
110
+ def to_dict(self):
111
+ return {
112
+ "id": self.id,
113
+ "interaction_type": self.type,
114
+ "content": self.content,
115
+ "timestamp": self.timestamp
116
+ }
108
117
 
109
118
  @dataclass
110
119
  class Error:
@@ -150,54 +159,63 @@ class ToolInfo:
150
159
  version: str
151
160
  memory_used: int
152
161
 
153
- @dataclass
154
- class LLMComponent:
155
- id: str
156
- hash_id: str
157
- source_hash_id: Optional[str]
158
- type: str = "llm"
159
- name: str = ""
160
- start_time: str = ""
161
- end_time: str = ""
162
- error: Optional[Error] = None
163
- parent_id: Optional[str] = None
164
- info: LLMInfo = None
165
- data: Dict[str, Any] = None
166
- network_calls: List[NetworkCall] = None
167
- interactions: List[Interaction] = None
168
-
169
- @dataclass
170
- class AgentComponent:
171
- id: str
172
- hash_id: str
173
- source_hash_id: Optional[str]
174
- type: str = "agent"
175
- name: str = ""
176
- start_time: str = ""
177
- end_time: str = ""
178
- error: Optional[Error] = None
179
- parent_id: Optional[str] = None
180
- info: AgentInfo = None
181
- data: Dict[str, Any] = None
182
- network_calls: List[NetworkCall] = None
183
- interactions: List[Interaction] = None
184
- # children: List['Component'] = None
185
-
186
- @dataclass
187
- class ToolComponent:
188
- id: str
189
- hash_id: str
190
- source_hash_id: Optional[str]
191
- type: str = "tool"
192
- name: str = ""
193
- start_time: str = ""
194
- end_time: str = ""
195
- error: Optional[Error] = None
196
- parent_id: Optional[str] = None
197
- info: ToolInfo = None
198
- data: Dict[str, Any] = None
199
- network_calls: List[NetworkCall] = None
200
- interactions: List[Interaction] = None
162
+ class Component:
163
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
164
+ self.id = id
165
+ self.hash_id = hash_id
166
+ self.type = type
167
+ self.name = name
168
+ self.start_time = start_time
169
+ self.end_time = end_time
170
+ self.parent_id = parent_id
171
+ self.info = info
172
+ self.data = data
173
+ self.error = error
174
+ self.network_calls = network_calls or []
175
+ self.interactions = []
176
+ self.error = error
177
+ if interactions:
178
+ for interaction in interactions:
179
+ if isinstance(interaction, dict):
180
+ self.interactions.append(
181
+ Interaction(
182
+ id=interaction.get("id", str(uuid.uuid4())),
183
+ type=interaction.get("interaction_type", ""),
184
+ content=str(interaction.get("content", "")),
185
+ timestamp=interaction.get("timestamp", datetime.utcnow().isoformat())
186
+ )
187
+ )
188
+ else:
189
+ self.interactions.append(interaction)
190
+
191
+ def to_dict(self):
192
+ return {
193
+ "id": self.id,
194
+ "hash_id": self.hash_id,
195
+ "type": self.type,
196
+ "name": self.name,
197
+ "start_time": self.start_time,
198
+ "end_time": self.end_time,
199
+ "parent_id": self.parent_id,
200
+ "info": self.info,
201
+ "error": self.error,
202
+ "data": self.data,
203
+ "error": self.error,
204
+ "network_calls": [call.to_dict() if hasattr(call, 'to_dict') else call for call in self.network_calls],
205
+ "interactions": self.interactions
206
+ }
207
+
208
+ class LLMComponent(Component):
209
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
210
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
211
+
212
+ class AgentComponent(Component):
213
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
214
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
215
+
216
+ class ToolComponent(Component):
217
+ def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
218
+ super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
201
219
 
202
220
  @dataclass
203
221
  class ComponentInfo:
@@ -211,29 +229,23 @@ class ComponentInfo:
211
229
  token_usage: Optional[Dict[str, int]] = None
212
230
  cost: Optional[Dict[str, float]] = None
213
231
 
214
- @dataclass
215
- class Component:
216
- id: str
217
- hash_id: str
218
- source_hash_id: Optional[str]
219
- type: str
220
- name: str
221
- start_time: str
222
- end_time: str
223
- error: Optional[Error]
224
- parent_id: Optional[str]
225
- info: ComponentInfo
226
- data: Dict[str, Any]
227
- network_calls: List[NetworkCall]
228
- interactions: List[Interaction]
229
- children: Optional[List['Component']] = None
230
-
231
- @dataclass
232
232
  class Trace:
233
- id: str
234
- project_name: str
235
- start_time: str
236
- end_time: str
237
- metadata: Metadata
238
- data: List[Dict[str, Any]]
239
- replays: Optional[Dict[str, Any]]
233
+ def __init__(self, id: str, project_name: str, start_time: str, end_time: str, metadata: Optional[Metadata] = None, data: Optional[List[Dict[str, Any]]] = None, replays: Optional[Dict[str, Any]] = None):
234
+ self.id = id
235
+ self.project_name = project_name
236
+ self.start_time = start_time
237
+ self.end_time = end_time
238
+ self.metadata = metadata or Metadata()
239
+ self.data = data or []
240
+ self.replays = replays
241
+
242
+ def to_dict(self):
243
+ return {
244
+ "id": self.id,
245
+ "project_name": self.project_name,
246
+ "start_time": self.start_time,
247
+ "end_time": self.end_time,
248
+ "metadata": self.metadata.to_dict() if self.metadata else None,
249
+ "data": self.data,
250
+ "replays": self.replays,
251
+ }