ragaai-catalyst 2.0.7.2__py3-none-any.whl → 2.0.7.2b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ragaai_catalyst/evaluation.py +107 -153
  2. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +660 -0
  3. ragaai_catalyst/tracers/agentic_tracing/__init__.py +3 -0
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +311 -0
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +212 -0
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +270 -0
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +239 -0
  8. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +906 -0
  9. ragaai_catalyst/tracers/agentic_tracing/network_tracer.py +286 -0
  10. ragaai_catalyst/tracers/agentic_tracing/sample.py +197 -0
  11. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +235 -0
  12. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +221 -0
  13. ragaai_catalyst/tracers/agentic_tracing/unique_decorator_test.py +172 -0
  14. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +67 -0
  15. ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +3 -0
  16. ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +18 -0
  17. ragaai_catalyst/tracers/agentic_tracing/utils/data_classes.py +61 -0
  18. ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +32 -0
  19. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +181 -0
  20. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +5946 -0
  21. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +74 -0
  22. ragaai_catalyst/tracers/tracer.py +26 -4
  23. ragaai_catalyst/tracers/upload_traces.py +127 -0
  24. ragaai_catalyst-2.0.7.2b0.dist-info/METADATA +39 -0
  25. ragaai_catalyst-2.0.7.2b0.dist-info/RECORD +50 -0
  26. ragaai_catalyst-2.0.7.2.dist-info/METADATA +0 -386
  27. ragaai_catalyst-2.0.7.2.dist-info/RECORD +0 -29
  28. {ragaai_catalyst-2.0.7.2.dist-info → ragaai_catalyst-2.0.7.2b0.dist-info}/WHEEL +0 -0
  29. {ragaai_catalyst-2.0.7.2.dist-info → ragaai_catalyst-2.0.7.2b0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,270 @@
1
+ import json
2
+ import os
3
+ import platform
4
+ import re
5
+ import psutil
6
+ import pkg_resources
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional, Dict, Any, List
10
+ import uuid
11
+ import sys
12
+
13
+ from .data_structure import (
14
+ Trace, Metadata, SystemInfo, OSInfo, EnvironmentInfo,
15
+ Resources, CPUResource, MemoryResource, DiskResource, NetworkResource,
16
+ ResourceInfo, MemoryInfo, DiskInfo, NetworkInfo,
17
+ Component, LLMComponent, AgentComponent, ToolComponent,
18
+ NetworkCall, Interaction, Error
19
+ )
20
+
21
+ from ..upload_traces import UploadTraces
22
+ from ...ragaai_catalyst import RagaAICatalyst
23
+
24
+ class TracerJSONEncoder(json.JSONEncoder):
25
+ def default(self, obj):
26
+ if isinstance(obj, datetime):
27
+ return obj.isoformat()
28
+ if isinstance(obj, bytes):
29
+ try:
30
+ return obj.decode('utf-8')
31
+ except UnicodeDecodeError:
32
+ return str(obj) # Fallback to string representation
33
+ if hasattr(obj, 'to_dict'): # Handle objects with to_dict method
34
+ return obj.to_dict()
35
+ if hasattr(obj, '__dict__'):
36
+ # Filter out None values and handle nested serialization
37
+ return {k: v for k, v in obj.__dict__.items()
38
+ if v is not None and not k.startswith('_')}
39
+ try:
40
+ # Try to convert to a basic type
41
+ return str(obj)
42
+ except:
43
+ return None # Last resort: return None instead of failing
44
+
45
+ class BaseTracer:
46
+ def __init__(self, user_details):
47
+ self.user_details = user_details
48
+ self.project_name = self.user_details['project_name'] # Access the project_name
49
+ self.dataset_name = self.user_details['dataset_name'] # Access the dataset_name
50
+ self.project_id = self.user_details['project_id'] # Access the project_id
51
+
52
+ # Initialize trace data
53
+ self.trace_id = str(uuid.uuid4())
54
+ self.start_time = datetime.now().isoformat()
55
+ self.components: List[Component] = []
56
+ self.data_key = [{"start_time": self.start_time,
57
+ "end_time": "",
58
+ "spans": self.components
59
+ }]
60
+
61
+ def _get_system_info(self) -> SystemInfo:
62
+ # Get OS info
63
+ os_info = OSInfo(
64
+ name=platform.system(),
65
+ version=platform.version(),
66
+ platform=platform.machine(),
67
+ kernel_version=platform.release()
68
+ )
69
+
70
+ # Get Python environment info
71
+ installed_packages = [f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set]
72
+ env_info = EnvironmentInfo(
73
+ name="Python",
74
+ version=platform.python_version(),
75
+ packages=installed_packages,
76
+ env_path=sys.prefix,
77
+ command_to_run=f"python {sys.argv[0]}"
78
+ )
79
+
80
+ return SystemInfo(
81
+ id=f"sys_{self.trace_id}",
82
+ os=os_info,
83
+ environment=env_info,
84
+ source_code="Path to the source code .zip file in format hashid.zip" # TODO: Implement source code archiving
85
+ )
86
+
87
+ def _get_resources(self) -> Resources:
88
+ # CPU info
89
+ cpu_info = ResourceInfo(
90
+ name=platform.processor(),
91
+ cores=psutil.cpu_count(logical=False),
92
+ threads=psutil.cpu_count(logical=True)
93
+ )
94
+ cpu = CPUResource(
95
+ info=cpu_info,
96
+ interval="5s",
97
+ values=[psutil.cpu_percent()]
98
+ )
99
+
100
+ # Memory info
101
+ memory = psutil.virtual_memory()
102
+ mem_info = MemoryInfo(
103
+ total=memory.total / (1024**3), # Convert to GB
104
+ free=memory.available / (1024**3)
105
+ )
106
+ mem = MemoryResource(
107
+ info=mem_info,
108
+ interval="5s",
109
+ values=[memory.percent]
110
+ )
111
+
112
+ # Disk info
113
+ disk = psutil.disk_usage('/')
114
+ disk_info = DiskInfo(
115
+ total=disk.total / (1024**3),
116
+ free=disk.free / (1024**3)
117
+ )
118
+ disk_io = psutil.disk_io_counters()
119
+ disk_resource = DiskResource(
120
+ info=disk_info,
121
+ interval="5s",
122
+ read=[disk_io.read_bytes / (1024**2)], # MB
123
+ write=[disk_io.write_bytes / (1024**2)]
124
+ )
125
+
126
+ # Network info
127
+ net_io = psutil.net_io_counters()
128
+ net_info = NetworkInfo(
129
+ upload_speed=net_io.bytes_sent / (1024**2), # MB
130
+ download_speed=net_io.bytes_recv / (1024**2)
131
+ )
132
+ net = NetworkResource(
133
+ info=net_info,
134
+ interval="5s",
135
+ uploads=[net_io.bytes_sent / (1024**2)],
136
+ downloads=[net_io.bytes_recv / (1024**2)]
137
+ )
138
+
139
+ return Resources(cpu=cpu, memory=mem, disk=disk_resource, network=net)
140
+
141
+ def start(self):
142
+ """Initialize a new trace"""
143
+ metadata = Metadata(
144
+ cost={},
145
+ tokens={},
146
+ system_info=self._get_system_info(),
147
+ resources=self._get_resources()
148
+ )
149
+
150
+ self.trace = Trace(
151
+ id=self.trace_id,
152
+ project_name=self.project_name,
153
+ start_time=self.start_time,
154
+ end_time="", # Will be set when trace is stopped
155
+ metadata=metadata,
156
+ data=self.data_key,
157
+ replays={"source": None}
158
+ )
159
+
160
+ def stop(self):
161
+ """Stop the trace and save to JSON file"""
162
+ if hasattr(self, 'trace'):
163
+ self.trace.data[0]["end_time"] = datetime.now().isoformat()
164
+ self.trace.end_time = datetime.now().isoformat()
165
+
166
+ # Change span ids to int
167
+ self.trace = self._change_span_ids_to_int(self.trace)
168
+ self.trace = self._change_agent_intput_output(self.trace)
169
+ self.trace = self._extract_cost_tokens(self.trace)
170
+
171
+ # Create traces directory if it doesn't exist
172
+ self.traces_dir = Path("traces")
173
+ self.traces_dir.mkdir(exist_ok=True)
174
+ filename = self.trace.id + ".json"
175
+ filepath = self.traces_dir / filename
176
+
177
+ # Save to JSON file using custom encoder
178
+ with open(filepath, 'w') as f:
179
+ json.dump(self.trace.__dict__, f, cls=TracerJSONEncoder, indent=2)
180
+
181
+ print(f"Trace saved to {filepath}")
182
+ # import pdb; pdb.set_trace()
183
+ # Upload traces
184
+ json_file_path = str(filepath)
185
+ project_name = self.project_name
186
+ project_id = self.project_id # TODO: Replace with actual project ID
187
+ dataset_name = self.dataset_name
188
+ user_detail = self.user_details
189
+ base_url = os.getenv('RAGAAI_CATALYST_BASE_URL')
190
+ upload_traces = UploadTraces(
191
+ json_file_path=json_file_path,
192
+ project_name=project_name,
193
+ project_id=project_id,
194
+ dataset_name=dataset_name,
195
+ user_detail=user_detail,
196
+ base_url=base_url
197
+ )
198
+ upload_traces.upload_traces()
199
+
200
+ def add_component(self, component: Component):
201
+ """Add a component to the trace"""
202
+ self.components.append(component)
203
+
204
+ def __enter__(self):
205
+ self.start()
206
+ return self
207
+
208
+ def __exit__(self, exc_type, exc_value, traceback):
209
+ self.stop()
210
+
211
+ def _change_span_ids_to_int(self, trace):
212
+ # import pdb; pdb.set_trace()
213
+ id, parent_id = 1, 0
214
+ for span in trace.data[0]["spans"]:
215
+ span.id = id
216
+ span.parent_id = parent_id
217
+ id += 1
218
+ if span.type=="agent":
219
+ for children in span.data["children"]:
220
+ children["id"] = id
221
+ children["parent_id"] = span.id
222
+ id += 1
223
+ return trace
224
+
225
+ def _change_agent_intput_output(self, trace):
226
+ for span in trace.data[0]["spans"]:
227
+ if span.type == "agent":
228
+ # import pdb; pdb.set_trace()
229
+ childrens = span.data["children"]
230
+ if childrens != []:
231
+ span.data["input"] = childrens[0]["data"]["input"]
232
+ span.data["output"] = childrens[-1]["data"]["output"]
233
+ return trace
234
+
235
+ def _extract_cost_tokens(self, trace):
236
+ cost = {}
237
+ tokens = {}
238
+ for span in trace.data[0]["spans"]:
239
+ if span.type == "llm":
240
+ info = span.info
241
+ if isinstance(info, dict):
242
+ cost_info = info.get('cost', {})
243
+ for key, value in cost_info.items():
244
+ if key not in cost:
245
+ cost[key] = 0
246
+ cost[key] += value
247
+ token_info = info.get('tokens', {})
248
+ for key, value in token_info.items():
249
+ if key not in tokens:
250
+ tokens[key] = 0
251
+ tokens[key] += value
252
+ if span.type == "agent":
253
+ for children in span.data["children"]:
254
+ if children["type"] != "llm":
255
+ continue
256
+ info = children["info"]
257
+ if isinstance(info, dict):
258
+ cost_info = info.get('cost', {})
259
+ for key, value in cost_info.items():
260
+ if key not in cost:
261
+ cost[key] = 0
262
+ cost[key] += value
263
+ token_info = info.get('tokens', {})
264
+ for key, value in token_info.items():
265
+ if key not in tokens:
266
+ tokens[key] = 0
267
+ tokens[key] += value
268
+ trace.metadata.cost = cost
269
+ trace.metadata.tokens = tokens
270
+ return trace
@@ -0,0 +1,239 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Dict, Optional, Any
3
+ from datetime import datetime
4
+
5
+ @dataclass
6
+ class OSInfo:
7
+ name: str
8
+ version: str
9
+ platform: str
10
+ kernel_version: str
11
+
12
+ @dataclass
13
+ class EnvironmentInfo:
14
+ name: str
15
+ version: str
16
+ packages: List[str]
17
+ env_path: str
18
+ command_to_run: str
19
+
20
+ @dataclass
21
+ class SystemInfo:
22
+ id: str
23
+ os: OSInfo
24
+ environment: EnvironmentInfo
25
+ source_code: str
26
+
27
+ @dataclass
28
+ class ResourceInfo:
29
+ name: str
30
+ cores: int
31
+ threads: int
32
+
33
+ @dataclass
34
+ class CPUResource:
35
+ info: ResourceInfo
36
+ interval: str
37
+ values: List[float]
38
+
39
+ @dataclass
40
+ class MemoryInfo:
41
+ total: float
42
+ free: float
43
+
44
+ @dataclass
45
+ class MemoryResource:
46
+ info: MemoryInfo
47
+ interval: str
48
+ values: List[float]
49
+
50
+ @dataclass
51
+ class DiskInfo:
52
+ total: float
53
+ free: float
54
+
55
+ @dataclass
56
+ class DiskResource:
57
+ info: DiskInfo
58
+ interval: str
59
+ read: List[float]
60
+ write: List[float]
61
+
62
+ @dataclass
63
+ class NetworkInfo:
64
+ upload_speed: float
65
+ download_speed: float
66
+
67
+ @dataclass
68
+ class NetworkResource:
69
+ info: NetworkInfo
70
+ interval: str
71
+ uploads: List[float]
72
+ downloads: List[float]
73
+
74
+ @dataclass
75
+ class Resources:
76
+ cpu: CPUResource
77
+ memory: MemoryResource
78
+ disk: DiskResource
79
+ network: NetworkResource
80
+
81
+ @dataclass
82
+ class Metadata:
83
+ cost: Dict[str, Any]
84
+ tokens: Dict[str, Any]
85
+ system_info: SystemInfo
86
+ resources: Resources
87
+
88
+ @dataclass
89
+ class NetworkCall:
90
+ url: str
91
+ method: str
92
+ status_code: int
93
+ response_time: float
94
+ bytes_sent: int
95
+ bytes_received: int
96
+ protocol: str
97
+ connection_id: str
98
+ parent_id: str
99
+ request: Dict[str, Any]
100
+ response: Dict[str, Any]
101
+
102
+ @dataclass
103
+ class Interaction:
104
+ id: str
105
+ interaction_type: str
106
+ content: Optional[str]
107
+ timestamp: str
108
+
109
+ @dataclass
110
+ class Error:
111
+ code: int
112
+ type: str
113
+ message: str
114
+ details: Dict[str, Any]
115
+
116
+ @dataclass
117
+ class LLMParameters:
118
+ temperature: float
119
+ top_p: float
120
+ max_tokens: int
121
+
122
+ @dataclass
123
+ class TokenUsage:
124
+ prompt_tokens: int
125
+ completion_tokens: int
126
+ total_tokens: int
127
+
128
+ @dataclass
129
+ class Cost:
130
+ prompt_cost: float
131
+ completion_cost: float
132
+ total_cost: float
133
+
134
+ @dataclass
135
+ class LLMInfo:
136
+ model: str
137
+ parameters: LLMParameters
138
+ token_usage: TokenUsage
139
+ cost: Cost
140
+
141
+ @dataclass
142
+ class AgentInfo:
143
+ agent_type: str
144
+ version: str
145
+ capabilities: List[str]
146
+
147
+ @dataclass
148
+ class ToolInfo:
149
+ tool_type: str
150
+ version: str
151
+ memory_used: int
152
+
153
+ @dataclass
154
+ class LLMComponent:
155
+ id: str
156
+ hash_id: str
157
+ source_hash_id: Optional[str]
158
+ type: str = "llm"
159
+ name: str = ""
160
+ start_time: str = ""
161
+ end_time: str = ""
162
+ error: Optional[Error] = None
163
+ parent_id: Optional[str] = None
164
+ info: LLMInfo = None
165
+ data: Dict[str, Any] = None
166
+ network_calls: List[NetworkCall] = None
167
+ interactions: List[Interaction] = None
168
+
169
+ @dataclass
170
+ class AgentComponent:
171
+ id: str
172
+ hash_id: str
173
+ source_hash_id: Optional[str]
174
+ type: str = "agent"
175
+ name: str = ""
176
+ start_time: str = ""
177
+ end_time: str = ""
178
+ error: Optional[Error] = None
179
+ parent_id: Optional[str] = None
180
+ info: AgentInfo = None
181
+ data: Dict[str, Any] = None
182
+ network_calls: List[NetworkCall] = None
183
+ interactions: List[Interaction] = None
184
+ # children: List['Component'] = None
185
+
186
+ @dataclass
187
+ class ToolComponent:
188
+ id: str
189
+ hash_id: str
190
+ source_hash_id: Optional[str]
191
+ type: str = "tool"
192
+ name: str = ""
193
+ start_time: str = ""
194
+ end_time: str = ""
195
+ error: Optional[Error] = None
196
+ parent_id: Optional[str] = None
197
+ info: ToolInfo = None
198
+ data: Dict[str, Any] = None
199
+ network_calls: List[NetworkCall] = None
200
+ interactions: List[Interaction] = None
201
+
202
+ @dataclass
203
+ class ComponentInfo:
204
+ tool_type: Optional[str] = None
205
+ agent_type: Optional[str] = None
206
+ version: str = ""
207
+ capabilities: Optional[List[str]] = None
208
+ memory_used: Optional[int] = None
209
+ model: Optional[str] = None
210
+ parameters: Optional[Dict[str, Any]] = None
211
+ token_usage: Optional[Dict[str, int]] = None
212
+ cost: Optional[Dict[str, float]] = None
213
+
214
+ @dataclass
215
+ class Component:
216
+ id: str
217
+ hash_id: str
218
+ source_hash_id: Optional[str]
219
+ type: str
220
+ name: str
221
+ start_time: str
222
+ end_time: str
223
+ error: Optional[Error]
224
+ parent_id: Optional[str]
225
+ info: ComponentInfo
226
+ data: Dict[str, Any]
227
+ network_calls: List[NetworkCall]
228
+ interactions: List[Interaction]
229
+ children: Optional[List['Component']] = None
230
+
231
+ @dataclass
232
+ class Trace:
233
+ id: str
234
+ project_name: str
235
+ start_time: str
236
+ end_time: str
237
+ metadata: Metadata
238
+ data: List[Dict[str, Any]]
239
+ replays: Optional[Dict[str, Any]]