ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +0 -3
- ragaai_catalyst/evaluation.py +1 -2
- ragaai_catalyst/tracers/__init__.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +231 -74
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +32 -42
- ragaai_catalyst/tracers/agentic_tracing/base.py +132 -30
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +91 -79
- ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
- ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +262 -356
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
- ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
- ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +186 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
- ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
- ragaai_catalyst/tracers/tracer.py +6 -2
- ragaai_catalyst/tracers/upload_traces.py +46 -57
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/METADATA +8 -4
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/RECORD +28 -22
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/WHEEL +1 -1
- ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,27 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
import platform
|
4
|
-
import re
|
5
4
|
import psutil
|
6
5
|
import pkg_resources
|
7
6
|
from datetime import datetime
|
8
7
|
from pathlib import Path
|
9
|
-
from typing import
|
8
|
+
from typing import List
|
10
9
|
import uuid
|
11
10
|
import sys
|
11
|
+
import tempfile
|
12
12
|
|
13
13
|
from .data_structure import (
|
14
14
|
Trace, Metadata, SystemInfo, OSInfo, EnvironmentInfo,
|
15
15
|
Resources, CPUResource, MemoryResource, DiskResource, NetworkResource,
|
16
16
|
ResourceInfo, MemoryInfo, DiskInfo, NetworkInfo,
|
17
|
-
Component,
|
18
|
-
NetworkCall, Interaction, Error
|
17
|
+
Component,
|
19
18
|
)
|
20
19
|
|
21
|
-
from
|
22
|
-
from
|
20
|
+
from .upload_agentic_traces import UploadAgenticTraces
|
21
|
+
from .upload_code import upload_code
|
22
|
+
|
23
|
+
from .file_name_tracker import TrackName
|
24
|
+
from .zip_list_of_unique_files import zip_list_of_unique_files
|
23
25
|
|
24
26
|
class TracerJSONEncoder(json.JSONEncoder):
|
25
27
|
def default(self, obj):
|
@@ -50,13 +52,10 @@ class BaseTracer:
|
|
50
52
|
self.project_id = self.user_details['project_id'] # Access the project_id
|
51
53
|
|
52
54
|
# Initialize trace data
|
53
|
-
self.trace_id =
|
54
|
-
self.start_time =
|
55
|
+
self.trace_id = None
|
56
|
+
self.start_time = None
|
55
57
|
self.components: List[Component] = []
|
56
|
-
self.
|
57
|
-
"end_time": "",
|
58
|
-
"spans": self.components
|
59
|
-
}]
|
58
|
+
self.file_tracker = TrackName()
|
60
59
|
|
61
60
|
def _get_system_info(self) -> SystemInfo:
|
62
61
|
# Get OS info
|
@@ -146,11 +145,22 @@ class BaseTracer:
|
|
146
145
|
system_info=self._get_system_info(),
|
147
146
|
resources=self._get_resources()
|
148
147
|
)
|
148
|
+
|
149
|
+
# Generate a unique trace ID, when trace starts
|
150
|
+
self.trace_id = str(uuid.uuid4())
|
151
|
+
|
152
|
+
# Get the start time
|
153
|
+
self.start_time = datetime.now().isoformat()
|
154
|
+
|
155
|
+
self.data_key = [{"start_time": datetime.now().isoformat(),
|
156
|
+
"end_time": "",
|
157
|
+
"spans": self.components
|
158
|
+
}]
|
149
159
|
|
150
160
|
self.trace = Trace(
|
151
161
|
id=self.trace_id,
|
152
162
|
project_name=self.project_name,
|
153
|
-
start_time=
|
163
|
+
start_time=datetime.now().isoformat(),
|
154
164
|
end_time="", # Will be set when trace is stopped
|
155
165
|
metadata=metadata,
|
156
166
|
data=self.data_key,
|
@@ -165,29 +175,37 @@ class BaseTracer:
|
|
165
175
|
|
166
176
|
# Change span ids to int
|
167
177
|
self.trace = self._change_span_ids_to_int(self.trace)
|
168
|
-
self.trace = self.
|
178
|
+
self.trace = self._change_agent_input_output(self.trace)
|
169
179
|
self.trace = self._extract_cost_tokens(self.trace)
|
170
180
|
|
171
181
|
# Create traces directory if it doesn't exist
|
172
|
-
self.traces_dir =
|
173
|
-
self.traces_dir.mkdir(exist_ok=True)
|
182
|
+
self.traces_dir = tempfile.gettempdir()
|
174
183
|
filename = self.trace.id + ".json"
|
175
|
-
filepath = self.traces_dir
|
176
|
-
|
177
|
-
#
|
184
|
+
filepath = f"{self.traces_dir}/{filename}"
|
185
|
+
|
186
|
+
#get unique files and zip it. Generate a unique hash ID for the contents of the files
|
187
|
+
list_of_unique_files = self.file_tracker.get_unique_files()
|
188
|
+
hash_id, zip_path = zip_list_of_unique_files(list_of_unique_files, output_dir=self.traces_dir)
|
189
|
+
|
190
|
+
#replace source code with zip_path
|
191
|
+
self.trace.metadata.system_info.source_code = hash_id
|
192
|
+
|
193
|
+
# Clean up trace_data before saving
|
194
|
+
trace_data = self.trace.__dict__
|
195
|
+
cleaned_trace_data = self._clean_trace(trace_data)
|
196
|
+
|
178
197
|
with open(filepath, 'w') as f:
|
179
|
-
json.dump(
|
198
|
+
json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
|
180
199
|
|
181
200
|
print(f"Trace saved to {filepath}")
|
182
|
-
# import pdb; pdb.set_trace()
|
183
201
|
# Upload traces
|
184
202
|
json_file_path = str(filepath)
|
185
203
|
project_name = self.project_name
|
186
|
-
project_id = self.project_id
|
204
|
+
project_id = self.project_id
|
187
205
|
dataset_name = self.dataset_name
|
188
206
|
user_detail = self.user_details
|
189
207
|
base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
|
190
|
-
upload_traces =
|
208
|
+
upload_traces = UploadAgenticTraces(
|
191
209
|
json_file_path=json_file_path,
|
192
210
|
project_name=project_name,
|
193
211
|
project_id=project_id,
|
@@ -195,7 +213,20 @@ class BaseTracer:
|
|
195
213
|
user_detail=user_detail,
|
196
214
|
base_url=base_url
|
197
215
|
)
|
198
|
-
upload_traces.
|
216
|
+
upload_traces.upload_agentic_traces()
|
217
|
+
|
218
|
+
#Upload Codehash
|
219
|
+
response = upload_code(
|
220
|
+
hash_id=hash_id,
|
221
|
+
zip_path=zip_path,
|
222
|
+
project_name=project_name,
|
223
|
+
dataset_name=dataset_name
|
224
|
+
)
|
225
|
+
print(response)
|
226
|
+
|
227
|
+
# Cleanup
|
228
|
+
self.components = []
|
229
|
+
self.file_tracker.reset()
|
199
230
|
|
200
231
|
def add_component(self, component: Component):
|
201
232
|
"""Add a component to the trace"""
|
@@ -209,7 +240,6 @@ class BaseTracer:
|
|
209
240
|
self.stop()
|
210
241
|
|
211
242
|
def _change_span_ids_to_int(self, trace):
|
212
|
-
# import pdb; pdb.set_trace()
|
213
243
|
id, parent_id = 1, 0
|
214
244
|
for span in trace.data[0]["spans"]:
|
215
245
|
span.id = id
|
@@ -222,14 +252,32 @@ class BaseTracer:
|
|
222
252
|
id += 1
|
223
253
|
return trace
|
224
254
|
|
225
|
-
def
|
255
|
+
def _change_agent_input_output(self, trace):
|
226
256
|
for span in trace.data[0]["spans"]:
|
227
257
|
if span.type == "agent":
|
228
|
-
# import pdb; pdb.set_trace()
|
229
258
|
childrens = span.data["children"]
|
230
|
-
|
231
|
-
|
232
|
-
|
259
|
+
span.data["input"] = None
|
260
|
+
span.data["output"] = None
|
261
|
+
if childrens:
|
262
|
+
# Find first non-null input going forward
|
263
|
+
for child in childrens:
|
264
|
+
if "data" not in child:
|
265
|
+
continue
|
266
|
+
input_data = child["data"].get("input")
|
267
|
+
|
268
|
+
if input_data:
|
269
|
+
span.data["input"] = input_data['args'] if hasattr(input_data, 'args') else input_data
|
270
|
+
break
|
271
|
+
|
272
|
+
# Find first non-null output going backward
|
273
|
+
for child in reversed(childrens):
|
274
|
+
if "data" not in child:
|
275
|
+
continue
|
276
|
+
output_data = child["data"].get("output")
|
277
|
+
|
278
|
+
if output_data and output_data != "" and output_data != "None":
|
279
|
+
span.data["output"] = output_data
|
280
|
+
break
|
233
281
|
return trace
|
234
282
|
|
235
283
|
def _extract_cost_tokens(self, trace):
|
@@ -251,6 +299,8 @@ class BaseTracer:
|
|
251
299
|
tokens[key] += value
|
252
300
|
if span.type == "agent":
|
253
301
|
for children in span.data["children"]:
|
302
|
+
if 'type' not in children:
|
303
|
+
continue
|
254
304
|
if children["type"] != "llm":
|
255
305
|
continue
|
256
306
|
info = children["info"]
|
@@ -267,4 +317,56 @@ class BaseTracer:
|
|
267
317
|
tokens[key] += value
|
268
318
|
trace.metadata.cost = cost
|
269
319
|
trace.metadata.tokens = tokens
|
320
|
+
return trace
|
321
|
+
|
322
|
+
def _clean_trace(self, trace):
|
323
|
+
# Convert span to dict if it has to_dict method
|
324
|
+
def _to_dict_if_needed(obj):
|
325
|
+
if hasattr(obj, 'to_dict'):
|
326
|
+
return obj.to_dict()
|
327
|
+
return obj
|
328
|
+
|
329
|
+
def deduplicate_spans(spans):
|
330
|
+
seen_llm_spans = {} # Dictionary to track unique LLM spans
|
331
|
+
unique_spans = []
|
332
|
+
|
333
|
+
for span in spans:
|
334
|
+
# Convert span to dictionary if needed
|
335
|
+
span_dict = _to_dict_if_needed(span)
|
336
|
+
|
337
|
+
# Skip spans without hash_id
|
338
|
+
if 'hash_id' not in span_dict:
|
339
|
+
continue
|
340
|
+
|
341
|
+
if span_dict.get('type') == 'llm':
|
342
|
+
# Create a unique key based on hash_id, input, and output
|
343
|
+
span_key = (
|
344
|
+
span_dict.get('hash_id'),
|
345
|
+
str(span_dict.get('data', {}).get('input')),
|
346
|
+
str(span_dict.get('data', {}).get('output'))
|
347
|
+
)
|
348
|
+
|
349
|
+
if span_key not in seen_llm_spans:
|
350
|
+
seen_llm_spans[span_key] = True
|
351
|
+
unique_spans.append(span)
|
352
|
+
else:
|
353
|
+
# For non-LLM spans, process their children if they exist
|
354
|
+
if 'data' in span_dict and 'children' in span_dict['data']:
|
355
|
+
children = span_dict['data']['children']
|
356
|
+
# Filter and deduplicate children
|
357
|
+
filtered_children = deduplicate_spans(children)
|
358
|
+
if isinstance(span, dict):
|
359
|
+
span['data']['children'] = filtered_children
|
360
|
+
else:
|
361
|
+
span.data['children'] = filtered_children
|
362
|
+
unique_spans.append(span)
|
363
|
+
|
364
|
+
return unique_spans
|
365
|
+
|
366
|
+
# Remove any spans without hash ids
|
367
|
+
for data in trace.get('data', []):
|
368
|
+
if 'spans' in data:
|
369
|
+
# First filter out spans without hash_ids, then deduplicate
|
370
|
+
data['spans'] = deduplicate_spans(data['spans'])
|
371
|
+
|
270
372
|
return trace
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
|
-
from typing import List, Dict, Optional, Any
|
2
|
+
from typing import List, Dict, Optional, Any, Union
|
3
3
|
from datetime import datetime
|
4
|
+
import uuid
|
4
5
|
|
5
6
|
@dataclass
|
6
7
|
class OSInfo:
|
@@ -99,12 +100,20 @@ class NetworkCall:
|
|
99
100
|
request: Dict[str, Any]
|
100
101
|
response: Dict[str, Any]
|
101
102
|
|
102
|
-
@dataclass
|
103
103
|
class Interaction:
|
104
|
-
id: str
|
105
|
-
|
106
|
-
|
107
|
-
|
104
|
+
def __init__(self, id, type: str, content: str, timestamp: str):
|
105
|
+
self.id = id
|
106
|
+
self.type = type
|
107
|
+
self.content = content
|
108
|
+
self.timestamp = timestamp
|
109
|
+
|
110
|
+
def to_dict(self):
|
111
|
+
return {
|
112
|
+
"id": self.id,
|
113
|
+
"interaction_type": self.type,
|
114
|
+
"content": self.content,
|
115
|
+
"timestamp": self.timestamp
|
116
|
+
}
|
108
117
|
|
109
118
|
@dataclass
|
110
119
|
class Error:
|
@@ -150,54 +159,63 @@ class ToolInfo:
|
|
150
159
|
version: str
|
151
160
|
memory_used: int
|
152
161
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
interactions: List[Interaction] = None
|
162
|
+
class Component:
|
163
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
|
164
|
+
self.id = id
|
165
|
+
self.hash_id = hash_id
|
166
|
+
self.type = type
|
167
|
+
self.name = name
|
168
|
+
self.start_time = start_time
|
169
|
+
self.end_time = end_time
|
170
|
+
self.parent_id = parent_id
|
171
|
+
self.info = info
|
172
|
+
self.data = data
|
173
|
+
self.error = error
|
174
|
+
self.network_calls = network_calls or []
|
175
|
+
self.interactions = []
|
176
|
+
self.error = error
|
177
|
+
if interactions:
|
178
|
+
for interaction in interactions:
|
179
|
+
if isinstance(interaction, dict):
|
180
|
+
self.interactions.append(
|
181
|
+
Interaction(
|
182
|
+
id=interaction.get("id", str(uuid.uuid4())),
|
183
|
+
type=interaction.get("interaction_type", ""),
|
184
|
+
content=str(interaction.get("content", "")),
|
185
|
+
timestamp=interaction.get("timestamp", datetime.utcnow().isoformat())
|
186
|
+
)
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
self.interactions.append(interaction)
|
190
|
+
|
191
|
+
def to_dict(self):
|
192
|
+
return {
|
193
|
+
"id": self.id,
|
194
|
+
"hash_id": self.hash_id,
|
195
|
+
"type": self.type,
|
196
|
+
"name": self.name,
|
197
|
+
"start_time": self.start_time,
|
198
|
+
"end_time": self.end_time,
|
199
|
+
"parent_id": self.parent_id,
|
200
|
+
"info": self.info,
|
201
|
+
"error": self.error,
|
202
|
+
"data": self.data,
|
203
|
+
"error": self.error,
|
204
|
+
"network_calls": [call.to_dict() if hasattr(call, 'to_dict') else call for call in self.network_calls],
|
205
|
+
"interactions": self.interactions
|
206
|
+
}
|
207
|
+
|
208
|
+
class LLMComponent(Component):
|
209
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
|
210
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
|
211
|
+
|
212
|
+
class AgentComponent(Component):
|
213
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
|
214
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
|
215
|
+
|
216
|
+
class ToolComponent(Component):
|
217
|
+
def __init__(self, id: str, hash_id: str, type: str, name: str, start_time: str, end_time: str, parent_id: int, info: Dict[str, Any], data: Dict[str, Any], network_calls: Optional[List[NetworkCall]] = None, interactions: Optional[List[Union[Interaction, Dict]]] = None, error: Optional[Dict[str, Any]] = None):
|
218
|
+
super().__init__(id, hash_id, type, name, start_time, end_time, parent_id, info, data, network_calls, interactions, error)
|
201
219
|
|
202
220
|
@dataclass
|
203
221
|
class ComponentInfo:
|
@@ -211,29 +229,23 @@ class ComponentInfo:
|
|
211
229
|
token_usage: Optional[Dict[str, int]] = None
|
212
230
|
cost: Optional[Dict[str, float]] = None
|
213
231
|
|
214
|
-
@dataclass
|
215
|
-
class Component:
|
216
|
-
id: str
|
217
|
-
hash_id: str
|
218
|
-
source_hash_id: Optional[str]
|
219
|
-
type: str
|
220
|
-
name: str
|
221
|
-
start_time: str
|
222
|
-
end_time: str
|
223
|
-
error: Optional[Error]
|
224
|
-
parent_id: Optional[str]
|
225
|
-
info: ComponentInfo
|
226
|
-
data: Dict[str, Any]
|
227
|
-
network_calls: List[NetworkCall]
|
228
|
-
interactions: List[Interaction]
|
229
|
-
children: Optional[List['Component']] = None
|
230
|
-
|
231
|
-
@dataclass
|
232
232
|
class Trace:
|
233
|
-
id: str
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
233
|
+
def __init__(self, id: str, project_name: str, start_time: str, end_time: str, metadata: Optional[Metadata] = None, data: Optional[List[Dict[str, Any]]] = None, replays: Optional[Dict[str, Any]] = None):
|
234
|
+
self.id = id
|
235
|
+
self.project_name = project_name
|
236
|
+
self.start_time = start_time
|
237
|
+
self.end_time = end_time
|
238
|
+
self.metadata = metadata or Metadata()
|
239
|
+
self.data = data or []
|
240
|
+
self.replays = replays
|
241
|
+
|
242
|
+
def to_dict(self):
|
243
|
+
return {
|
244
|
+
"id": self.id,
|
245
|
+
"project_name": self.project_name,
|
246
|
+
"start_time": self.start_time,
|
247
|
+
"end_time": self.end_time,
|
248
|
+
"metadata": self.metadata.to_dict() if self.metadata else None,
|
249
|
+
"data": self.data,
|
250
|
+
"replays": self.replays,
|
251
|
+
}
|