ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +0 -3
- ragaai_catalyst/evaluation.py +1 -2
- ragaai_catalyst/tracers/__init__.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +217 -106
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +27 -41
- ragaai_catalyst/tracers/agentic_tracing/base.py +127 -21
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +88 -79
- ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
- ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +258 -356
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
- ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
- ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
- ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
- ragaai_catalyst/tracers/tracer.py +6 -2
- ragaai_catalyst/tracers/upload_traces.py +46 -57
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/METADATA +6 -2
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/RECORD +28 -22
- ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -19,8 +19,13 @@ from .data_structure import (
|
|
19
19
|
)
|
20
20
|
|
21
21
|
from ..upload_traces import UploadTraces
|
22
|
+
from .upload_agentic_traces import UploadAgenticTraces
|
23
|
+
from .upload_code import upload_code
|
22
24
|
from ...ragaai_catalyst import RagaAICatalyst
|
23
25
|
|
26
|
+
from .file_name_tracker import TrackName
|
27
|
+
from .zip_list_of_unique_files import zip_list_of_unique_files
|
28
|
+
|
24
29
|
class TracerJSONEncoder(json.JSONEncoder):
|
25
30
|
def default(self, obj):
|
26
31
|
if isinstance(obj, datetime):
|
@@ -50,13 +55,10 @@ class BaseTracer:
|
|
50
55
|
self.project_id = self.user_details['project_id'] # Access the project_id
|
51
56
|
|
52
57
|
# Initialize trace data
|
53
|
-
self.trace_id =
|
54
|
-
self.start_time =
|
58
|
+
self.trace_id = None
|
59
|
+
self.start_time = None
|
55
60
|
self.components: List[Component] = []
|
56
|
-
self.
|
57
|
-
"end_time": "",
|
58
|
-
"spans": self.components
|
59
|
-
}]
|
61
|
+
self.file_tracker = TrackName()
|
60
62
|
|
61
63
|
def _get_system_info(self) -> SystemInfo:
|
62
64
|
# Get OS info
|
@@ -146,11 +148,22 @@ class BaseTracer:
|
|
146
148
|
system_info=self._get_system_info(),
|
147
149
|
resources=self._get_resources()
|
148
150
|
)
|
151
|
+
|
152
|
+
# Generate a unique trace ID, when trace starts
|
153
|
+
self.trace_id = str(uuid.uuid4())
|
154
|
+
|
155
|
+
# Get the start time
|
156
|
+
self.start_time = datetime.now().isoformat()
|
157
|
+
|
158
|
+
self.data_key = [{"start_time": datetime.now().isoformat(),
|
159
|
+
"end_time": "",
|
160
|
+
"spans": self.components
|
161
|
+
}]
|
149
162
|
|
150
163
|
self.trace = Trace(
|
151
164
|
id=self.trace_id,
|
152
165
|
project_name=self.project_name,
|
153
|
-
start_time=
|
166
|
+
start_time=datetime.now().isoformat(),
|
154
167
|
end_time="", # Will be set when trace is stopped
|
155
168
|
metadata=metadata,
|
156
169
|
data=self.data_key,
|
@@ -165,7 +178,7 @@ class BaseTracer:
|
|
165
178
|
|
166
179
|
# Change span ids to int
|
167
180
|
self.trace = self._change_span_ids_to_int(self.trace)
|
168
|
-
self.trace = self.
|
181
|
+
self.trace = self._change_agent_input_output(self.trace)
|
169
182
|
self.trace = self._extract_cost_tokens(self.trace)
|
170
183
|
|
171
184
|
# Create traces directory if it doesn't exist
|
@@ -173,21 +186,30 @@ class BaseTracer:
|
|
173
186
|
self.traces_dir.mkdir(exist_ok=True)
|
174
187
|
filename = self.trace.id + ".json"
|
175
188
|
filepath = self.traces_dir / filename
|
176
|
-
|
177
|
-
#
|
189
|
+
|
190
|
+
#get unique files and zip it. Generate a unique hash ID for the contents of the files
|
191
|
+
list_of_unique_files = self.file_tracker.get_unique_files()
|
192
|
+
hash_id, zip_path = zip_list_of_unique_files(list_of_unique_files)
|
193
|
+
|
194
|
+
#replace source code with zip_path
|
195
|
+
self.trace.metadata.system_info.source_code = hash_id
|
196
|
+
|
197
|
+
# Clean up trace_data before saving
|
198
|
+
trace_data = self.trace.__dict__
|
199
|
+
cleaned_trace_data = self._clean_trace(trace_data)
|
200
|
+
|
178
201
|
with open(filepath, 'w') as f:
|
179
|
-
json.dump(
|
202
|
+
json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
|
180
203
|
|
181
204
|
print(f"Trace saved to {filepath}")
|
182
|
-
# import pdb; pdb.set_trace()
|
183
205
|
# Upload traces
|
184
206
|
json_file_path = str(filepath)
|
185
207
|
project_name = self.project_name
|
186
|
-
project_id = self.project_id
|
208
|
+
project_id = self.project_id
|
187
209
|
dataset_name = self.dataset_name
|
188
210
|
user_detail = self.user_details
|
189
211
|
base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
|
190
|
-
upload_traces =
|
212
|
+
upload_traces = UploadAgenticTraces(
|
191
213
|
json_file_path=json_file_path,
|
192
214
|
project_name=project_name,
|
193
215
|
project_id=project_id,
|
@@ -195,7 +217,20 @@ class BaseTracer:
|
|
195
217
|
user_detail=user_detail,
|
196
218
|
base_url=base_url
|
197
219
|
)
|
198
|
-
upload_traces.
|
220
|
+
upload_traces.upload_agentic_traces()
|
221
|
+
|
222
|
+
#Upload Codehash
|
223
|
+
response = upload_code(
|
224
|
+
hash_id=hash_id,
|
225
|
+
zip_path=zip_path,
|
226
|
+
project_name=project_name,
|
227
|
+
dataset_name=dataset_name
|
228
|
+
)
|
229
|
+
print(response)
|
230
|
+
|
231
|
+
# Cleanup
|
232
|
+
self.components = []
|
233
|
+
self.file_tracker = TrackName()
|
199
234
|
|
200
235
|
def add_component(self, component: Component):
|
201
236
|
"""Add a component to the trace"""
|
@@ -209,7 +244,6 @@ class BaseTracer:
|
|
209
244
|
self.stop()
|
210
245
|
|
211
246
|
def _change_span_ids_to_int(self, trace):
|
212
|
-
# import pdb; pdb.set_trace()
|
213
247
|
id, parent_id = 1, 0
|
214
248
|
for span in trace.data[0]["spans"]:
|
215
249
|
span.id = id
|
@@ -222,14 +256,32 @@ class BaseTracer:
|
|
222
256
|
id += 1
|
223
257
|
return trace
|
224
258
|
|
225
|
-
def
|
259
|
+
def _change_agent_input_output(self, trace):
|
226
260
|
for span in trace.data[0]["spans"]:
|
227
261
|
if span.type == "agent":
|
228
|
-
# import pdb; pdb.set_trace()
|
229
262
|
childrens = span.data["children"]
|
230
|
-
|
231
|
-
|
232
|
-
|
263
|
+
span.data["input"] = None
|
264
|
+
span.data["output"] = None
|
265
|
+
if childrens:
|
266
|
+
# Find first non-null input going forward
|
267
|
+
for child in childrens:
|
268
|
+
if "data" not in child:
|
269
|
+
continue
|
270
|
+
input_data = child["data"].get("input")
|
271
|
+
|
272
|
+
if input_data:
|
273
|
+
span.data["input"] = input_data['args'] if hasattr(input_data, 'args') else input_data
|
274
|
+
break
|
275
|
+
|
276
|
+
# Find first non-null output going backward
|
277
|
+
for child in reversed(childrens):
|
278
|
+
if "data" not in child:
|
279
|
+
continue
|
280
|
+
output_data = child["data"].get("output")
|
281
|
+
|
282
|
+
if output_data and output_data != "" and output_data != "None":
|
283
|
+
span.data["output"] = output_data
|
284
|
+
break
|
233
285
|
return trace
|
234
286
|
|
235
287
|
def _extract_cost_tokens(self, trace):
|
@@ -251,6 +303,8 @@ class BaseTracer:
|
|
251
303
|
tokens[key] += value
|
252
304
|
if span.type == "agent":
|
253
305
|
for children in span.data["children"]:
|
306
|
+
if 'type' not in children:
|
307
|
+
continue
|
254
308
|
if children["type"] != "llm":
|
255
309
|
continue
|
256
310
|
info = children["info"]
|
@@ -267,4 +321,56 @@ class BaseTracer:
|
|
267
321
|
tokens[key] += value
|
268
322
|
trace.metadata.cost = cost
|
269
323
|
trace.metadata.tokens = tokens
|
324
|
+
return trace
|
325
|
+
|
326
|
+
def _clean_trace(self, trace):
|
327
|
+
# Convert span to dict if it has to_dict method
|
328
|
+
def _to_dict_if_needed(obj):
|
329
|
+
if hasattr(obj, 'to_dict'):
|
330
|
+
return obj.to_dict()
|
331
|
+
return obj
|
332
|
+
|
333
|
+
def deduplicate_spans(spans):
|
334
|
+
seen_llm_spans = {} # Dictionary to track unique LLM spans
|
335
|
+
unique_spans = []
|
336
|
+
|
337
|
+
for span in spans:
|
338
|
+
# Convert span to dictionary if needed
|
339
|
+
span_dict = _to_dict_if_needed(span)
|
340
|
+
|
341
|
+
# Skip spans without hash_id
|
342
|
+
if 'hash_id' not in span_dict:
|
343
|
+
continue
|
344
|
+
|
345
|
+
if span_dict.get('type') == 'llm':
|
346
|
+
# Create a unique key based on hash_id, input, and output
|
347
|
+
span_key = (
|
348
|
+
span_dict.get('hash_id'),
|
349
|
+
str(span_dict.get('data', {}).get('input')),
|
350
|
+
str(span_dict.get('data', {}).get('output'))
|
351
|
+
)
|
352
|
+
|
353
|
+
if span_key not in seen_llm_spans:
|
354
|
+
seen_llm_spans[span_key] = True
|
355
|
+
unique_spans.append(span)
|
356
|
+
else:
|
357
|
+
# For non-LLM spans, process their children if they exist
|
358
|
+
if 'data' in span_dict and 'children' in span_dict['data']:
|
359
|
+
children = span_dict['data']['children']
|
360
|
+
# Filter and deduplicate children
|
361
|
+
filtered_children = deduplicate_spans(children)
|
362
|
+
if isinstance(span, dict):
|
363
|
+
span['data']['children'] = filtered_children
|
364
|
+
else:
|
365
|
+
span.data['children'] = filtered_children
|
366
|
+
unique_spans.append(span)
|
367
|
+
|
368
|
+
return unique_spans
|
369
|
+
|
370
|
+
# Remove any spans without hash ids
|
371
|
+
for data in trace.get('data', []):
|
372
|
+
if 'spans' in data:
|
373
|
+
# First filter out spans without hash_ids, then deduplicate
|
374
|
+
data['spans'] = deduplicate_spans(data['spans'])
|
375
|
+
|
270
376
|
return trace
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
|
-
from typing import List, Dict, Optional, Any
|
2
|
+
from typing import List, Dict, Optional, Any, Union
|
3
3
|
from datetime import datetime
|
4
|
+
import uuid
|
4
5
|
|
5
6
|
@dataclass
|
6
7
|
class OSInfo:
|
@@ -99,12 +100,20 @@ class NetworkCall:
|
|
99
100
|
request: Dict[str, Any]
|
100
101
|
response: Dict[str, Any]
|
101
102
|
|
102
|
-
@dataclass
|
103
103
|
class Interaction:
|
104
|
-
id: str
|
105
|
-
|
106
|
-
|
107
|
-
|
104
|
+
def __init__(self, id, type: str, content: str, timestamp: str):
|
105
|
+
self.id = id
|
106
|
+
self.type = type
|
107
|
+
self.content = content
|
108
|
+
self.timestamp = timestamp
|
109
|
+
|
110
|
+
def to_dict(self):
|
111
|
+
return {
|
112
|
+
"id": self.id,
|
113
|
+
"interaction_type": self.type,
|
114
|
+
"content": self.content,
|
115
|
+
"timestamp": self.timestamp
|
116
|
+
}
|
108
117
|
|
109
118
|
@dataclass
|
110
119
|
class Error:
|
@@ -150,54 +159,60 @@ class ToolInfo:
|
|
150
159
|
version: str
|
151
160
|
memory_used: int
|
152
161
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
info:
|
198
|
-
|
199
|
-
|
200
|
-
|
162
|
+
class Component:
|
163
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Error] = None):
|
164
|
+
self.id = id
|
165
|
+
self.error = error
|
166
|
+
self.hash_id = hash_id
|
167
|
+
self.type = type
|
168
|
+
self.name = name
|
169
|
+
self.start_time = start_time
|
170
|
+
self.end_time = end_time
|
171
|
+
self.parent_id = parent_id
|
172
|
+
self.info = info
|
173
|
+
self.data = data
|
174
|
+
self.network_calls = network_calls or []
|
175
|
+
self.interactions = []
|
176
|
+
if interactions:
|
177
|
+
for interaction in interactions:
|
178
|
+
if isinstance(interaction, dict):
|
179
|
+
self.interactions.append(
|
180
|
+
Interaction(
|
181
|
+
id=interaction.get("id", str(uuid.uuid4())),
|
182
|
+
type=interaction.get("interaction_type", ""),
|
183
|
+
content=str(interaction.get("content", "")),
|
184
|
+
timestamp=interaction.get("timestamp", datetime.utcnow().isoformat())
|
185
|
+
)
|
186
|
+
)
|
187
|
+
else:
|
188
|
+
self.interactions.append(interaction)
|
189
|
+
|
190
|
+
def to_dict(self):
|
191
|
+
return {
|
192
|
+
"id": self.id,
|
193
|
+
"hash_id": self.hash_id,
|
194
|
+
"type": self.type,
|
195
|
+
"name": self.name,
|
196
|
+
"start_time": self.start_time,
|
197
|
+
"end_time": self.end_time,
|
198
|
+
"parent_id": self.parent_id,
|
199
|
+
"info": self.info,
|
200
|
+
"data": self.data,
|
201
|
+
"network_calls": [call.to_dict() if hasattr(call, 'to_dict') else call for call in self.network_calls],
|
202
|
+
"interactions": self.interactions
|
203
|
+
}
|
204
|
+
|
205
|
+
class LLMComponent(Component):
|
206
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
|
207
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
|
208
|
+
|
209
|
+
class AgentComponent(Component):
|
210
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
|
211
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
|
212
|
+
|
213
|
+
class ToolComponent(Component):
|
214
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None):
|
215
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions)
|
201
216
|
|
202
217
|
@dataclass
|
203
218
|
class ComponentInfo:
|
@@ -211,29 +226,23 @@ class ComponentInfo:
|
|
211
226
|
token_usage: Optional[Dict[str, int]] = None
|
212
227
|
cost: Optional[Dict[str, float]] = None
|
213
228
|
|
214
|
-
@dataclass
|
215
|
-
class Component:
|
216
|
-
id: str
|
217
|
-
hash_id: str
|
218
|
-
source_hash_id: Optional[str]
|
219
|
-
type: str
|
220
|
-
name: str
|
221
|
-
start_time: str
|
222
|
-
end_time: str
|
223
|
-
error: Optional[Error]
|
224
|
-
parent_id: Optional[str]
|
225
|
-
info: ComponentInfo
|
226
|
-
data: Dict[str, Any]
|
227
|
-
network_calls: List[NetworkCall]
|
228
|
-
interactions: List[Interaction]
|
229
|
-
children: Optional[List['Component']] = None
|
230
|
-
|
231
|
-
@dataclass
|
232
229
|
class Trace:
|
233
|
-
id: str
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
230
|
+
def __init__(self, id: str, project_name: str, start_time: str, end_time: str, metadata: Optional[Metadata] = None, data: Optional[List[Dict[str, Any]]] = None, replays: Optional[Dict[str, Any]] = None):
|
231
|
+
self.id = id
|
232
|
+
self.project_name = project_name
|
233
|
+
self.start_time = start_time
|
234
|
+
self.end_time = end_time
|
235
|
+
self.metadata = metadata or Metadata()
|
236
|
+
self.data = data or []
|
237
|
+
self.replays = replays
|
238
|
+
|
239
|
+
def to_dict(self):
|
240
|
+
return {
|
241
|
+
"id": self.id,
|
242
|
+
"project_name": self.project_name,
|
243
|
+
"start_time": self.start_time,
|
244
|
+
"end_time": self.end_time,
|
245
|
+
"metadata": self.metadata.to_dict() if self.metadata else None,
|
246
|
+
"data": self.data,
|
247
|
+
"replays": self.replays,
|
248
|
+
}
|