ragaai-catalyst 2.0.7.2__py3-none-any.whl → 2.0.7.2b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/evaluation.py +107 -153
- ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +660 -0
- ragaai_catalyst/tracers/agentic_tracing/__init__.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +311 -0
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +212 -0
- ragaai_catalyst/tracers/agentic_tracing/base.py +270 -0
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +239 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +906 -0
- ragaai_catalyst/tracers/agentic_tracing/network_tracer.py +286 -0
- ragaai_catalyst/tracers/agentic_tracing/sample.py +197 -0
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +235 -0
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +221 -0
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator_test.py +172 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +67 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +18 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/data_classes.py +61 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +32 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +181 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +5946 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +74 -0
- ragaai_catalyst/tracers/tracer.py +26 -4
- ragaai_catalyst/tracers/upload_traces.py +127 -0
- ragaai_catalyst-2.0.7.2b0.dist-info/METADATA +39 -0
- ragaai_catalyst-2.0.7.2b0.dist-info/RECORD +50 -0
- ragaai_catalyst-2.0.7.2.dist-info/METADATA +0 -386
- ragaai_catalyst-2.0.7.2.dist-info/RECORD +0 -29
- {ragaai_catalyst-2.0.7.2.dist-info → ragaai_catalyst-2.0.7.2b0.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.0.7.2.dist-info → ragaai_catalyst-2.0.7.2b0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,270 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import platform
|
4
|
+
import re
|
5
|
+
import psutil
|
6
|
+
import pkg_resources
|
7
|
+
from datetime import datetime
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Optional, Dict, Any, List
|
10
|
+
import uuid
|
11
|
+
import sys
|
12
|
+
|
13
|
+
from .data_structure import (
|
14
|
+
Trace, Metadata, SystemInfo, OSInfo, EnvironmentInfo,
|
15
|
+
Resources, CPUResource, MemoryResource, DiskResource, NetworkResource,
|
16
|
+
ResourceInfo, MemoryInfo, DiskInfo, NetworkInfo,
|
17
|
+
Component, LLMComponent, AgentComponent, ToolComponent,
|
18
|
+
NetworkCall, Interaction, Error
|
19
|
+
)
|
20
|
+
|
21
|
+
from ..upload_traces import UploadTraces
|
22
|
+
from ...ragaai_catalyst import RagaAICatalyst
|
23
|
+
|
24
|
+
class TracerJSONEncoder(json.JSONEncoder):
|
25
|
+
def default(self, obj):
|
26
|
+
if isinstance(obj, datetime):
|
27
|
+
return obj.isoformat()
|
28
|
+
if isinstance(obj, bytes):
|
29
|
+
try:
|
30
|
+
return obj.decode('utf-8')
|
31
|
+
except UnicodeDecodeError:
|
32
|
+
return str(obj) # Fallback to string representation
|
33
|
+
if hasattr(obj, 'to_dict'): # Handle objects with to_dict method
|
34
|
+
return obj.to_dict()
|
35
|
+
if hasattr(obj, '__dict__'):
|
36
|
+
# Filter out None values and handle nested serialization
|
37
|
+
return {k: v for k, v in obj.__dict__.items()
|
38
|
+
if v is not None and not k.startswith('_')}
|
39
|
+
try:
|
40
|
+
# Try to convert to a basic type
|
41
|
+
return str(obj)
|
42
|
+
except:
|
43
|
+
return None # Last resort: return None instead of failing
|
44
|
+
|
45
|
+
class BaseTracer:
|
46
|
+
def __init__(self, user_details):
|
47
|
+
self.user_details = user_details
|
48
|
+
self.project_name = self.user_details['project_name'] # Access the project_name
|
49
|
+
self.dataset_name = self.user_details['dataset_name'] # Access the dataset_name
|
50
|
+
self.project_id = self.user_details['project_id'] # Access the project_id
|
51
|
+
|
52
|
+
# Initialize trace data
|
53
|
+
self.trace_id = str(uuid.uuid4())
|
54
|
+
self.start_time = datetime.now().isoformat()
|
55
|
+
self.components: List[Component] = []
|
56
|
+
self.data_key = [{"start_time": self.start_time,
|
57
|
+
"end_time": "",
|
58
|
+
"spans": self.components
|
59
|
+
}]
|
60
|
+
|
61
|
+
def _get_system_info(self) -> SystemInfo:
|
62
|
+
# Get OS info
|
63
|
+
os_info = OSInfo(
|
64
|
+
name=platform.system(),
|
65
|
+
version=platform.version(),
|
66
|
+
platform=platform.machine(),
|
67
|
+
kernel_version=platform.release()
|
68
|
+
)
|
69
|
+
|
70
|
+
# Get Python environment info
|
71
|
+
installed_packages = [f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set]
|
72
|
+
env_info = EnvironmentInfo(
|
73
|
+
name="Python",
|
74
|
+
version=platform.python_version(),
|
75
|
+
packages=installed_packages,
|
76
|
+
env_path=sys.prefix,
|
77
|
+
command_to_run=f"python {sys.argv[0]}"
|
78
|
+
)
|
79
|
+
|
80
|
+
return SystemInfo(
|
81
|
+
id=f"sys_{self.trace_id}",
|
82
|
+
os=os_info,
|
83
|
+
environment=env_info,
|
84
|
+
source_code="Path to the source code .zip file in format hashid.zip" # TODO: Implement source code archiving
|
85
|
+
)
|
86
|
+
|
87
|
+
def _get_resources(self) -> Resources:
|
88
|
+
# CPU info
|
89
|
+
cpu_info = ResourceInfo(
|
90
|
+
name=platform.processor(),
|
91
|
+
cores=psutil.cpu_count(logical=False),
|
92
|
+
threads=psutil.cpu_count(logical=True)
|
93
|
+
)
|
94
|
+
cpu = CPUResource(
|
95
|
+
info=cpu_info,
|
96
|
+
interval="5s",
|
97
|
+
values=[psutil.cpu_percent()]
|
98
|
+
)
|
99
|
+
|
100
|
+
# Memory info
|
101
|
+
memory = psutil.virtual_memory()
|
102
|
+
mem_info = MemoryInfo(
|
103
|
+
total=memory.total / (1024**3), # Convert to GB
|
104
|
+
free=memory.available / (1024**3)
|
105
|
+
)
|
106
|
+
mem = MemoryResource(
|
107
|
+
info=mem_info,
|
108
|
+
interval="5s",
|
109
|
+
values=[memory.percent]
|
110
|
+
)
|
111
|
+
|
112
|
+
# Disk info
|
113
|
+
disk = psutil.disk_usage('/')
|
114
|
+
disk_info = DiskInfo(
|
115
|
+
total=disk.total / (1024**3),
|
116
|
+
free=disk.free / (1024**3)
|
117
|
+
)
|
118
|
+
disk_io = psutil.disk_io_counters()
|
119
|
+
disk_resource = DiskResource(
|
120
|
+
info=disk_info,
|
121
|
+
interval="5s",
|
122
|
+
read=[disk_io.read_bytes / (1024**2)], # MB
|
123
|
+
write=[disk_io.write_bytes / (1024**2)]
|
124
|
+
)
|
125
|
+
|
126
|
+
# Network info
|
127
|
+
net_io = psutil.net_io_counters()
|
128
|
+
net_info = NetworkInfo(
|
129
|
+
upload_speed=net_io.bytes_sent / (1024**2), # MB
|
130
|
+
download_speed=net_io.bytes_recv / (1024**2)
|
131
|
+
)
|
132
|
+
net = NetworkResource(
|
133
|
+
info=net_info,
|
134
|
+
interval="5s",
|
135
|
+
uploads=[net_io.bytes_sent / (1024**2)],
|
136
|
+
downloads=[net_io.bytes_recv / (1024**2)]
|
137
|
+
)
|
138
|
+
|
139
|
+
return Resources(cpu=cpu, memory=mem, disk=disk_resource, network=net)
|
140
|
+
|
141
|
+
def start(self):
|
142
|
+
"""Initialize a new trace"""
|
143
|
+
metadata = Metadata(
|
144
|
+
cost={},
|
145
|
+
tokens={},
|
146
|
+
system_info=self._get_system_info(),
|
147
|
+
resources=self._get_resources()
|
148
|
+
)
|
149
|
+
|
150
|
+
self.trace = Trace(
|
151
|
+
id=self.trace_id,
|
152
|
+
project_name=self.project_name,
|
153
|
+
start_time=self.start_time,
|
154
|
+
end_time="", # Will be set when trace is stopped
|
155
|
+
metadata=metadata,
|
156
|
+
data=self.data_key,
|
157
|
+
replays={"source": None}
|
158
|
+
)
|
159
|
+
|
160
|
+
def stop(self):
|
161
|
+
"""Stop the trace and save to JSON file"""
|
162
|
+
if hasattr(self, 'trace'):
|
163
|
+
self.trace.data[0]["end_time"] = datetime.now().isoformat()
|
164
|
+
self.trace.end_time = datetime.now().isoformat()
|
165
|
+
|
166
|
+
# Change span ids to int
|
167
|
+
self.trace = self._change_span_ids_to_int(self.trace)
|
168
|
+
self.trace = self._change_agent_intput_output(self.trace)
|
169
|
+
self.trace = self._extract_cost_tokens(self.trace)
|
170
|
+
|
171
|
+
# Create traces directory if it doesn't exist
|
172
|
+
self.traces_dir = Path("traces")
|
173
|
+
self.traces_dir.mkdir(exist_ok=True)
|
174
|
+
filename = self.trace.id + ".json"
|
175
|
+
filepath = self.traces_dir / filename
|
176
|
+
|
177
|
+
# Save to JSON file using custom encoder
|
178
|
+
with open(filepath, 'w') as f:
|
179
|
+
json.dump(self.trace.__dict__, f, cls=TracerJSONEncoder, indent=2)
|
180
|
+
|
181
|
+
print(f"Trace saved to {filepath}")
|
182
|
+
# import pdb; pdb.set_trace()
|
183
|
+
# Upload traces
|
184
|
+
json_file_path = str(filepath)
|
185
|
+
project_name = self.project_name
|
186
|
+
project_id = self.project_id # TODO: Replace with actual project ID
|
187
|
+
dataset_name = self.dataset_name
|
188
|
+
user_detail = self.user_details
|
189
|
+
base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
|
190
|
+
upload_traces = UploadTraces(
|
191
|
+
json_file_path=json_file_path,
|
192
|
+
project_name=project_name,
|
193
|
+
project_id=project_id,
|
194
|
+
dataset_name=dataset_name,
|
195
|
+
user_detail=user_detail,
|
196
|
+
base_url=base_url
|
197
|
+
)
|
198
|
+
upload_traces.upload_traces()
|
199
|
+
|
200
|
+
def add_component(self, component: Component):
|
201
|
+
"""Add a component to the trace"""
|
202
|
+
self.components.append(component)
|
203
|
+
|
204
|
+
def __enter__(self):
|
205
|
+
self.start()
|
206
|
+
return self
|
207
|
+
|
208
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
209
|
+
self.stop()
|
210
|
+
|
211
|
+
def _change_span_ids_to_int(self, trace):
|
212
|
+
# import pdb; pdb.set_trace()
|
213
|
+
id, parent_id = 1, 0
|
214
|
+
for span in trace.data[0]["spans"]:
|
215
|
+
span.id = id
|
216
|
+
span.parent_id = parent_id
|
217
|
+
id += 1
|
218
|
+
if span.type=="agent":
|
219
|
+
for children in span.data["children"]:
|
220
|
+
children["id"] = id
|
221
|
+
children["parent_id"] = span.id
|
222
|
+
id += 1
|
223
|
+
return trace
|
224
|
+
|
225
|
+
def _change_agent_intput_output(self, trace):
|
226
|
+
for span in trace.data[0]["spans"]:
|
227
|
+
if span.type == "agent":
|
228
|
+
# import pdb; pdb.set_trace()
|
229
|
+
childrens = span.data["children"]
|
230
|
+
if childrens != []:
|
231
|
+
span.data["input"] = childrens[0]["data"]["input"]
|
232
|
+
span.data["output"] = childrens[-1]["data"]["output"]
|
233
|
+
return trace
|
234
|
+
|
235
|
+
def _extract_cost_tokens(self, trace):
|
236
|
+
cost = {}
|
237
|
+
tokens = {}
|
238
|
+
for span in trace.data[0]["spans"]:
|
239
|
+
if span.type == "llm":
|
240
|
+
info = span.info
|
241
|
+
if isinstance(info, dict):
|
242
|
+
cost_info = info.get('cost', {})
|
243
|
+
for key, value in cost_info.items():
|
244
|
+
if key not in cost:
|
245
|
+
cost[key] = 0
|
246
|
+
cost[key] += value
|
247
|
+
token_info = info.get('tokens', {})
|
248
|
+
for key, value in token_info.items():
|
249
|
+
if key not in tokens:
|
250
|
+
tokens[key] = 0
|
251
|
+
tokens[key] += value
|
252
|
+
if span.type == "agent":
|
253
|
+
for children in span.data["children"]:
|
254
|
+
if children["type"] != "llm":
|
255
|
+
continue
|
256
|
+
info = children["info"]
|
257
|
+
if isinstance(info, dict):
|
258
|
+
cost_info = info.get('cost', {})
|
259
|
+
for key, value in cost_info.items():
|
260
|
+
if key not in cost:
|
261
|
+
cost[key] = 0
|
262
|
+
cost[key] += value
|
263
|
+
token_info = info.get('tokens', {})
|
264
|
+
for key, value in token_info.items():
|
265
|
+
if key not in tokens:
|
266
|
+
tokens[key] = 0
|
267
|
+
tokens[key] += value
|
268
|
+
trace.metadata.cost = cost
|
269
|
+
trace.metadata.tokens = tokens
|
270
|
+
return trace
|
@@ -0,0 +1,239 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Dict, Optional, Any
|
3
|
+
from datetime import datetime
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class OSInfo:
|
7
|
+
name: str
|
8
|
+
version: str
|
9
|
+
platform: str
|
10
|
+
kernel_version: str
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class EnvironmentInfo:
|
14
|
+
name: str
|
15
|
+
version: str
|
16
|
+
packages: List[str]
|
17
|
+
env_path: str
|
18
|
+
command_to_run: str
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class SystemInfo:
|
22
|
+
id: str
|
23
|
+
os: OSInfo
|
24
|
+
environment: EnvironmentInfo
|
25
|
+
source_code: str
|
26
|
+
|
27
|
+
@dataclass
|
28
|
+
class ResourceInfo:
|
29
|
+
name: str
|
30
|
+
cores: int
|
31
|
+
threads: int
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class CPUResource:
|
35
|
+
info: ResourceInfo
|
36
|
+
interval: str
|
37
|
+
values: List[float]
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class MemoryInfo:
|
41
|
+
total: float
|
42
|
+
free: float
|
43
|
+
|
44
|
+
@dataclass
|
45
|
+
class MemoryResource:
|
46
|
+
info: MemoryInfo
|
47
|
+
interval: str
|
48
|
+
values: List[float]
|
49
|
+
|
50
|
+
@dataclass
|
51
|
+
class DiskInfo:
|
52
|
+
total: float
|
53
|
+
free: float
|
54
|
+
|
55
|
+
@dataclass
|
56
|
+
class DiskResource:
|
57
|
+
info: DiskInfo
|
58
|
+
interval: str
|
59
|
+
read: List[float]
|
60
|
+
write: List[float]
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class NetworkInfo:
|
64
|
+
upload_speed: float
|
65
|
+
download_speed: float
|
66
|
+
|
67
|
+
@dataclass
|
68
|
+
class NetworkResource:
|
69
|
+
info: NetworkInfo
|
70
|
+
interval: str
|
71
|
+
uploads: List[float]
|
72
|
+
downloads: List[float]
|
73
|
+
|
74
|
+
@dataclass
|
75
|
+
class Resources:
|
76
|
+
cpu: CPUResource
|
77
|
+
memory: MemoryResource
|
78
|
+
disk: DiskResource
|
79
|
+
network: NetworkResource
|
80
|
+
|
81
|
+
@dataclass
|
82
|
+
class Metadata:
|
83
|
+
cost: Dict[str, Any]
|
84
|
+
tokens: Dict[str, Any]
|
85
|
+
system_info: SystemInfo
|
86
|
+
resources: Resources
|
87
|
+
|
88
|
+
@dataclass
|
89
|
+
class NetworkCall:
|
90
|
+
url: str
|
91
|
+
method: str
|
92
|
+
status_code: int
|
93
|
+
response_time: float
|
94
|
+
bytes_sent: int
|
95
|
+
bytes_received: int
|
96
|
+
protocol: str
|
97
|
+
connection_id: str
|
98
|
+
parent_id: str
|
99
|
+
request: Dict[str, Any]
|
100
|
+
response: Dict[str, Any]
|
101
|
+
|
102
|
+
@dataclass
|
103
|
+
class Interaction:
|
104
|
+
id: str
|
105
|
+
interaction_type: str
|
106
|
+
content: Optional[str]
|
107
|
+
timestamp: str
|
108
|
+
|
109
|
+
@dataclass
|
110
|
+
class Error:
|
111
|
+
code: int
|
112
|
+
type: str
|
113
|
+
message: str
|
114
|
+
details: Dict[str, Any]
|
115
|
+
|
116
|
+
@dataclass
|
117
|
+
class LLMParameters:
|
118
|
+
temperature: float
|
119
|
+
top_p: float
|
120
|
+
max_tokens: int
|
121
|
+
|
122
|
+
@dataclass
|
123
|
+
class TokenUsage:
|
124
|
+
prompt_tokens: int
|
125
|
+
completion_tokens: int
|
126
|
+
total_tokens: int
|
127
|
+
|
128
|
+
@dataclass
|
129
|
+
class Cost:
|
130
|
+
prompt_cost: float
|
131
|
+
completion_cost: float
|
132
|
+
total_cost: float
|
133
|
+
|
134
|
+
@dataclass
|
135
|
+
class LLMInfo:
|
136
|
+
model: str
|
137
|
+
parameters: LLMParameters
|
138
|
+
token_usage: TokenUsage
|
139
|
+
cost: Cost
|
140
|
+
|
141
|
+
@dataclass
|
142
|
+
class AgentInfo:
|
143
|
+
agent_type: str
|
144
|
+
version: str
|
145
|
+
capabilities: List[str]
|
146
|
+
|
147
|
+
@dataclass
|
148
|
+
class ToolInfo:
|
149
|
+
tool_type: str
|
150
|
+
version: str
|
151
|
+
memory_used: int
|
152
|
+
|
153
|
+
@dataclass
|
154
|
+
class LLMComponent:
|
155
|
+
id: str
|
156
|
+
hash_id: str
|
157
|
+
source_hash_id: Optional[str]
|
158
|
+
type: str = "llm"
|
159
|
+
name: str = ""
|
160
|
+
start_time: str = ""
|
161
|
+
end_time: str = ""
|
162
|
+
error: Optional[Error] = None
|
163
|
+
parent_id: Optional[str] = None
|
164
|
+
info: LLMInfo = None
|
165
|
+
data: Dict[str, Any] = None
|
166
|
+
network_calls: List[NetworkCall] = None
|
167
|
+
interactions: List[Interaction] = None
|
168
|
+
|
169
|
+
@dataclass
|
170
|
+
class AgentComponent:
|
171
|
+
id: str
|
172
|
+
hash_id: str
|
173
|
+
source_hash_id: Optional[str]
|
174
|
+
type: str = "agent"
|
175
|
+
name: str = ""
|
176
|
+
start_time: str = ""
|
177
|
+
end_time: str = ""
|
178
|
+
error: Optional[Error] = None
|
179
|
+
parent_id: Optional[str] = None
|
180
|
+
info: AgentInfo = None
|
181
|
+
data: Dict[str, Any] = None
|
182
|
+
network_calls: List[NetworkCall] = None
|
183
|
+
interactions: List[Interaction] = None
|
184
|
+
# children: List['Component'] = None
|
185
|
+
|
186
|
+
@dataclass
|
187
|
+
class ToolComponent:
|
188
|
+
id: str
|
189
|
+
hash_id: str
|
190
|
+
source_hash_id: Optional[str]
|
191
|
+
type: str = "tool"
|
192
|
+
name: str = ""
|
193
|
+
start_time: str = ""
|
194
|
+
end_time: str = ""
|
195
|
+
error: Optional[Error] = None
|
196
|
+
parent_id: Optional[str] = None
|
197
|
+
info: ToolInfo = None
|
198
|
+
data: Dict[str, Any] = None
|
199
|
+
network_calls: List[NetworkCall] = None
|
200
|
+
interactions: List[Interaction] = None
|
201
|
+
|
202
|
+
@dataclass
|
203
|
+
class ComponentInfo:
|
204
|
+
tool_type: Optional[str] = None
|
205
|
+
agent_type: Optional[str] = None
|
206
|
+
version: str = ""
|
207
|
+
capabilities: Optional[List[str]] = None
|
208
|
+
memory_used: Optional[int] = None
|
209
|
+
model: Optional[str] = None
|
210
|
+
parameters: Optional[Dict[str, Any]] = None
|
211
|
+
token_usage: Optional[Dict[str, int]] = None
|
212
|
+
cost: Optional[Dict[str, float]] = None
|
213
|
+
|
214
|
+
@dataclass
|
215
|
+
class Component:
|
216
|
+
id: str
|
217
|
+
hash_id: str
|
218
|
+
source_hash_id: Optional[str]
|
219
|
+
type: str
|
220
|
+
name: str
|
221
|
+
start_time: str
|
222
|
+
end_time: str
|
223
|
+
error: Optional[Error]
|
224
|
+
parent_id: Optional[str]
|
225
|
+
info: ComponentInfo
|
226
|
+
data: Dict[str, Any]
|
227
|
+
network_calls: List[NetworkCall]
|
228
|
+
interactions: List[Interaction]
|
229
|
+
children: Optional[List['Component']] = None
|
230
|
+
|
231
|
+
@dataclass
|
232
|
+
class Trace:
|
233
|
+
id: str
|
234
|
+
project_name: str
|
235
|
+
start_time: str
|
236
|
+
end_time: str
|
237
|
+
metadata: Metadata
|
238
|
+
data: List[Dict[str, Any]]
|
239
|
+
replays: Optional[Dict[str, Any]]
|