ragaai-catalyst 2.1.4.1b1__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +578 -258
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +40 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b1.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b1.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b1.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b1.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b1.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,29 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
-
import platform
|
4
|
-
import psutil
|
5
|
-
import pkg_resources
|
6
3
|
from datetime import datetime
|
7
4
|
from pathlib import Path
|
8
|
-
from typing import List, Any, Dict
|
5
|
+
from typing import List, Any, Dict, Optional
|
9
6
|
import uuid
|
10
7
|
import sys
|
11
8
|
import tempfile
|
12
9
|
import threading
|
13
10
|
import time
|
14
|
-
|
15
|
-
from
|
11
|
+
|
12
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_local_metric import calculate_metric
|
13
|
+
from ragaai_catalyst import RagaAICatalyst
|
14
|
+
from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
|
16
15
|
Trace,
|
17
16
|
Metadata,
|
18
17
|
SystemInfo,
|
19
|
-
OSInfo,
|
20
|
-
EnvironmentInfo,
|
21
18
|
Resources,
|
22
|
-
CPUResource,
|
23
|
-
MemoryResource,
|
24
|
-
DiskResource,
|
25
|
-
NetworkResource,
|
26
|
-
ResourceInfo,
|
27
|
-
MemoryInfo,
|
28
|
-
DiskInfo,
|
29
|
-
NetworkInfo,
|
30
19
|
Component,
|
31
20
|
)
|
21
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
|
22
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
|
23
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
|
24
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
|
25
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.trace_uploader import submit_upload_task, get_task_status, ensure_uploader_running
|
32
26
|
|
33
|
-
from ..upload.upload_agentic_traces import UploadAgenticTraces
|
34
|
-
from ..upload.upload_code import upload_code
|
35
|
-
from ..upload.upload_trace_metric import upload_trace_metric
|
36
|
-
from ..utils.file_name_tracker import TrackName
|
37
|
-
from ..utils.zip_list_of_unique_files import zip_list_of_unique_files
|
38
|
-
from ..utils.span_attributes import SpanAttributes
|
39
|
-
from ..utils.create_dataset_schema import create_dataset_schema_with_trace
|
40
|
-
|
41
|
-
|
42
|
-
# Configure logging to show debug messages (which includes info messages as well)
|
43
27
|
import logging
|
44
28
|
|
45
29
|
logger = logging.getLogger(__name__)
|
@@ -76,12 +60,13 @@ class TracerJSONEncoder(json.JSONEncoder):
|
|
76
60
|
class BaseTracer:
|
77
61
|
def __init__(self, user_details):
|
78
62
|
self.user_details = user_details
|
79
|
-
self.project_name = self.user_details["project_name"]
|
80
|
-
self.dataset_name = self.user_details["dataset_name"]
|
81
|
-
self.project_id = self.user_details["project_id"]
|
82
|
-
self.trace_name = self.user_details["trace_name"]
|
63
|
+
self.project_name = self.user_details["project_name"]
|
64
|
+
self.dataset_name = self.user_details["dataset_name"]
|
65
|
+
self.project_id = self.user_details["project_id"]
|
66
|
+
self.trace_name = self.user_details["trace_name"]
|
67
|
+
self.base_url = self.user_details.get("base_url", RagaAICatalyst.BASE_URL) # Get base_url from user_details or fallback to default
|
83
68
|
self.visited_metrics = []
|
84
|
-
self.trace_metrics = []
|
69
|
+
self.trace_metrics = []
|
85
70
|
|
86
71
|
# Initialize trace data
|
87
72
|
self.trace_id = None
|
@@ -97,117 +82,72 @@ class BaseTracer:
|
|
97
82
|
self.network_usage_list = []
|
98
83
|
self.tracking_thread = None
|
99
84
|
self.tracking = False
|
85
|
+
self.system_monitor = None
|
86
|
+
self.gt = None
|
100
87
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
# Get Python environment info
|
111
|
-
installed_packages = [
|
112
|
-
f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set
|
113
|
-
]
|
114
|
-
env_info = EnvironmentInfo(
|
115
|
-
name="Python",
|
116
|
-
version=platform.python_version(),
|
117
|
-
packages=installed_packages,
|
118
|
-
env_path=sys.prefix,
|
119
|
-
command_to_run=f"python {sys.argv[0]}",
|
120
|
-
)
|
88
|
+
# For upload tracking
|
89
|
+
self.upload_task_id = None
|
90
|
+
|
91
|
+
# For backward compatibility
|
92
|
+
self._upload_tasks = []
|
93
|
+
self._is_uploading = False
|
94
|
+
self._upload_completed_callback = None
|
95
|
+
|
96
|
+
ensure_uploader_running()
|
121
97
|
|
122
|
-
|
123
|
-
|
124
|
-
os=os_info,
|
125
|
-
environment=env_info,
|
126
|
-
source_code="Path to the source code .zip file in format hashid.zip", # TODO: Implement source code archiving
|
127
|
-
)
|
98
|
+
def _get_system_info(self) -> SystemInfo:
|
99
|
+
return self.system_monitor.get_system_info()
|
128
100
|
|
129
101
|
def _get_resources(self) -> Resources:
|
130
|
-
|
131
|
-
cpu_info = ResourceInfo(
|
132
|
-
name=platform.processor(),
|
133
|
-
cores=psutil.cpu_count(logical=False),
|
134
|
-
threads=psutil.cpu_count(logical=True),
|
135
|
-
)
|
136
|
-
cpu = CPUResource(info=cpu_info, interval="5s", values=[psutil.cpu_percent()])
|
137
|
-
|
138
|
-
# Memory info
|
139
|
-
memory = psutil.virtual_memory()
|
140
|
-
mem_info = MemoryInfo(
|
141
|
-
total=memory.total / (1024**3), # Convert to GB
|
142
|
-
free=memory.available / (1024**3),
|
143
|
-
)
|
144
|
-
mem = MemoryResource(info=mem_info, interval="5s", values=[memory.percent])
|
145
|
-
|
146
|
-
# Disk info
|
147
|
-
disk = psutil.disk_usage("/")
|
148
|
-
disk_info = DiskInfo(total=disk.total / (1024**3), free=disk.free / (1024**3))
|
149
|
-
disk_io = psutil.disk_io_counters()
|
150
|
-
disk_resource = DiskResource(
|
151
|
-
info=disk_info,
|
152
|
-
interval="5s",
|
153
|
-
read=[disk_io.read_bytes / (1024**2)], # MB
|
154
|
-
write=[disk_io.write_bytes / (1024**2)],
|
155
|
-
)
|
156
|
-
|
157
|
-
# Network info
|
158
|
-
net_io = psutil.net_io_counters()
|
159
|
-
net_info = NetworkInfo(
|
160
|
-
upload_speed=net_io.bytes_sent / (1024**2), # MB
|
161
|
-
download_speed=net_io.bytes_recv / (1024**2),
|
162
|
-
)
|
163
|
-
net = NetworkResource(
|
164
|
-
info=net_info,
|
165
|
-
interval="5s",
|
166
|
-
uploads=[net_io.bytes_sent / (1024**2)],
|
167
|
-
downloads=[net_io.bytes_recv / (1024**2)],
|
168
|
-
)
|
169
|
-
|
170
|
-
return Resources(cpu=cpu, memory=mem, disk=disk_resource, network=net)
|
102
|
+
return self.system_monitor.get_resources()
|
171
103
|
|
172
104
|
def _track_memory_usage(self):
|
173
105
|
self.memory_usage_list = []
|
174
106
|
while self.tracking:
|
175
|
-
|
176
|
-
self.memory_usage_list.append(
|
177
|
-
|
107
|
+
usage = self.system_monitor.track_memory_usage()
|
108
|
+
self.memory_usage_list.append(usage)
|
109
|
+
try:
|
110
|
+
time.sleep(self.interval_time)
|
111
|
+
except Exception as e:
|
112
|
+
logger.warning(f"Sleep interrupted in memory tracking: {str(e)}")
|
178
113
|
|
179
114
|
def _track_cpu_usage(self):
|
180
115
|
self.cpu_usage_list = []
|
181
116
|
while self.tracking:
|
182
|
-
|
183
|
-
self.cpu_usage_list.append(
|
184
|
-
|
117
|
+
usage = self.system_monitor.track_cpu_usage(self.interval_time)
|
118
|
+
self.cpu_usage_list.append(usage)
|
119
|
+
try:
|
120
|
+
time.sleep(self.interval_time)
|
121
|
+
except Exception as e:
|
122
|
+
logger.warning(f"Sleep interrupted in CPU tracking: {str(e)}")
|
185
123
|
|
186
124
|
def _track_disk_usage(self):
|
187
125
|
self.disk_usage_list = []
|
188
126
|
while self.tracking:
|
189
|
-
|
190
|
-
self.disk_usage_list.append(
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
127
|
+
usage = self.system_monitor.track_disk_usage()
|
128
|
+
self.disk_usage_list.append(usage)
|
129
|
+
try:
|
130
|
+
time.sleep(self.interval_time)
|
131
|
+
except Exception as e:
|
132
|
+
logger.warning(f"Sleep interrupted in disk tracking: {str(e)}")
|
195
133
|
|
196
134
|
def _track_network_usage(self):
|
197
135
|
self.network_usage_list = []
|
198
136
|
while self.tracking:
|
199
|
-
|
200
|
-
self.network_usage_list.append(
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
137
|
+
usage = self.system_monitor.track_network_usage()
|
138
|
+
self.network_usage_list.append(usage)
|
139
|
+
try:
|
140
|
+
time.sleep(self.interval_time)
|
141
|
+
except Exception as e:
|
142
|
+
logger.warning(f"Sleep interrupted in network tracking: {str(e)}")
|
205
143
|
|
206
144
|
def start(self):
|
207
145
|
"""Initialize a new trace"""
|
208
146
|
self.tracking = True
|
209
|
-
self.
|
210
|
-
self.
|
147
|
+
self.trace_id = str(uuid.uuid4())
|
148
|
+
self.file_tracker.trace_main_file()
|
149
|
+
self.system_monitor = SystemMonitor(self.trace_id)
|
150
|
+
threading.Thread(target=self._track_memory_usage).start()
|
211
151
|
threading.Thread(target=self._track_cpu_usage).start()
|
212
152
|
threading.Thread(target=self._track_disk_usage).start()
|
213
153
|
threading.Thread(target=self._track_network_usage).start()
|
@@ -223,9 +163,6 @@ class BaseTracer:
|
|
223
163
|
resources=self._get_resources(),
|
224
164
|
)
|
225
165
|
|
226
|
-
# Generate a unique trace ID, when trace starts
|
227
|
-
self.trace_id = str(uuid.uuid4())
|
228
|
-
|
229
166
|
# Get the start time
|
230
167
|
self.start_time = datetime.now().astimezone().isoformat()
|
231
168
|
|
@@ -249,124 +186,221 @@ class BaseTracer:
|
|
249
186
|
metrics=[] # Initialize empty metrics list
|
250
187
|
)
|
251
188
|
|
189
|
+
def on_upload_completed(self, callback_fn):
|
190
|
+
"""
|
191
|
+
Register a callback function to be called when all uploads are completed.
|
192
|
+
For backward compatibility - simulates the old callback mechanism.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
callback_fn: A function that takes a single argument (the tracer instance)
|
196
|
+
"""
|
197
|
+
self._upload_completed_callback = callback_fn
|
198
|
+
|
199
|
+
# Check for status periodically and call callback when complete
|
200
|
+
def check_status_and_callback():
|
201
|
+
if self.upload_task_id:
|
202
|
+
status = self.get_upload_status()
|
203
|
+
if status.get("status") in ["completed", "failed"]:
|
204
|
+
self._is_uploading = False
|
205
|
+
# Execute callback
|
206
|
+
try:
|
207
|
+
if self._upload_completed_callback:
|
208
|
+
self._upload_completed_callback(self)
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Error in upload completion callback: {e}")
|
211
|
+
return
|
212
|
+
|
213
|
+
# Schedule next check
|
214
|
+
threading.Timer(5.0, check_status_and_callback).start()
|
215
|
+
|
216
|
+
# Start status checking if we already have a task
|
217
|
+
if self.upload_task_id:
|
218
|
+
threading.Timer(5.0, check_status_and_callback).start()
|
219
|
+
|
220
|
+
return self
|
221
|
+
|
222
|
+
def wait_for_uploads(self, timeout=None):
|
223
|
+
"""
|
224
|
+
Wait for all async uploads to complete.
|
225
|
+
This provides backward compatibility with the old API.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
timeout: Maximum time to wait in seconds (None means wait indefinitely)
|
229
|
+
|
230
|
+
Returns:
|
231
|
+
True if all uploads completed successfully, False otherwise
|
232
|
+
"""
|
233
|
+
if not self.upload_task_id:
|
234
|
+
return True
|
235
|
+
|
236
|
+
start_time = time.time()
|
237
|
+
while True:
|
238
|
+
# Check if timeout expired
|
239
|
+
if timeout is not None and time.time() - start_time > timeout:
|
240
|
+
logger.warning(f"Upload wait timed out after {timeout} seconds")
|
241
|
+
return False
|
242
|
+
|
243
|
+
# Get current status
|
244
|
+
status = self.get_upload_status()
|
245
|
+
if status.get("status") == "completed":
|
246
|
+
return True
|
247
|
+
elif status.get("status") == "failed":
|
248
|
+
logger.error(f"Upload failed: {status.get('error')}")
|
249
|
+
return False
|
250
|
+
elif status.get("status") == "unknown":
|
251
|
+
logger.warning("Upload task not found, assuming completed")
|
252
|
+
return True
|
253
|
+
|
254
|
+
# Sleep before checking again
|
255
|
+
time.sleep(1.0)
|
256
|
+
|
252
257
|
def stop(self):
|
253
|
-
"""Stop the trace and save to JSON file"""
|
258
|
+
"""Stop the trace and save to JSON file, then submit to background uploader"""
|
254
259
|
if hasattr(self, "trace"):
|
260
|
+
# Set end times
|
255
261
|
self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
|
256
262
|
self.trace.end_time = datetime.now().astimezone().isoformat()
|
257
263
|
|
258
|
-
#
|
264
|
+
# Stop tracking metrics
|
259
265
|
self.tracking = False
|
260
|
-
|
261
|
-
|
262
|
-
self.
|
263
|
-
|
264
|
-
#
|
265
|
-
self.trace.metadata.resources.cpu.values = self.cpu_usage_list
|
266
|
-
|
267
|
-
#track network and disk usage
|
268
|
-
network_upoloads, network_downloads = 0, 0
|
269
|
-
disk_read, disk_write = 0, 0
|
270
|
-
for network_usage, disk_usage in zip(self.network_usage_list, self.disk_usage_list):
|
271
|
-
network_upoloads += network_usage['uploads']
|
272
|
-
network_downloads += network_usage['downloads']
|
273
|
-
disk_read += disk_usage['disk_read']
|
274
|
-
disk_write += disk_usage['disk_write']
|
275
|
-
|
276
|
-
#track disk usage
|
277
|
-
self.trace.metadata.resources.disk.read = [disk_read / len(self.disk_usage_list)]
|
278
|
-
self.trace.metadata.resources.disk.write = [disk_write / len(self.disk_usage_list)]
|
279
|
-
|
280
|
-
#track network usage
|
281
|
-
self.trace.metadata.resources.network.uploads = [network_upoloads / len(self.network_usage_list)]
|
282
|
-
self.trace.metadata.resources.network.downloads = [network_downloads / len(self.network_usage_list)]
|
283
|
-
|
284
|
-
# update interval time
|
285
|
-
self.trace.metadata.resources.cpu.interval = float(self.interval_time)
|
286
|
-
self.trace.metadata.resources.memory.interval = float(self.interval_time)
|
287
|
-
self.trace.metadata.resources.disk.interval = float(self.interval_time)
|
288
|
-
self.trace.metadata.resources.network.interval = float(self.interval_time)
|
289
|
-
|
290
|
-
# Change span ids to int
|
266
|
+
|
267
|
+
# Process and aggregate metrics
|
268
|
+
self._process_resource_metrics()
|
269
|
+
|
270
|
+
# Process trace spans
|
291
271
|
self.trace = self._change_span_ids_to_int(self.trace)
|
292
272
|
self.trace = self._change_agent_input_output(self.trace)
|
293
273
|
self.trace = self._extract_cost_tokens(self.trace)
|
294
274
|
|
295
|
-
# Create traces directory
|
275
|
+
# Create traces directory and prepare file paths
|
296
276
|
self.traces_dir = tempfile.gettempdir()
|
297
277
|
filename = self.trace.id + ".json"
|
298
278
|
filepath = f"{self.traces_dir}/{filename}"
|
299
279
|
|
300
|
-
#
|
280
|
+
# Process source files
|
301
281
|
list_of_unique_files = self.file_tracker.get_unique_files()
|
302
282
|
hash_id, zip_path = zip_list_of_unique_files(
|
303
283
|
list_of_unique_files, output_dir=self.traces_dir
|
304
284
|
)
|
305
|
-
|
306
|
-
# replace source code with zip_path
|
307
285
|
self.trace.metadata.system_info.source_code = hash_id
|
308
286
|
|
309
|
-
#
|
287
|
+
# Prepare trace data for saving
|
310
288
|
trace_data = self.trace.to_dict()
|
311
|
-
|
312
289
|
trace_data["metrics"] = self.trace_metrics
|
313
|
-
|
314
|
-
# Clean up trace_data before saving
|
315
290
|
cleaned_trace_data = self._clean_trace(trace_data)
|
316
|
-
|
317
|
-
#
|
291
|
+
|
292
|
+
# Add interactions
|
318
293
|
interactions = self.format_interactions()
|
319
|
-
|
294
|
+
cleaned_trace_data["workflow"] = interactions["workflow"]
|
320
295
|
|
296
|
+
# Save trace data to file
|
321
297
|
with open(filepath, "w") as f:
|
322
298
|
json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
|
323
299
|
|
324
|
-
logger.info("
|
300
|
+
logger.info("Traces saved successfully.")
|
325
301
|
logger.debug(f"Trace saved to {filepath}")
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
project_name = self.project_name
|
330
|
-
project_id = self.project_id
|
331
|
-
dataset_name = self.dataset_name
|
332
|
-
user_detail = self.user_details
|
333
|
-
base_url = RagaAICatalyst.BASE_URL
|
334
|
-
|
335
|
-
## create dataset schema
|
336
|
-
response = create_dataset_schema_with_trace(
|
337
|
-
dataset_name=dataset_name, project_name=project_name
|
338
|
-
)
|
339
|
-
|
340
|
-
##Upload trace metrics
|
341
|
-
response = upload_trace_metric(
|
342
|
-
json_file_path=json_file_path,
|
343
|
-
dataset_name=self.dataset_name,
|
344
|
-
project_name=self.project_name,
|
345
|
-
)
|
346
|
-
|
347
|
-
upload_traces = UploadAgenticTraces(
|
348
|
-
json_file_path=json_file_path,
|
349
|
-
project_name=project_name,
|
350
|
-
project_id=project_id,
|
351
|
-
dataset_name=dataset_name,
|
352
|
-
user_detail=user_detail,
|
353
|
-
base_url=base_url,
|
354
|
-
)
|
355
|
-
upload_traces.upload_agentic_traces()
|
302
|
+
|
303
|
+
# Make sure uploader process is available
|
304
|
+
ensure_uploader_running()
|
356
305
|
|
357
|
-
|
358
|
-
|
306
|
+
logger.debug("Base URL used for uploading: {}".format(self.base_url))
|
307
|
+
|
308
|
+
# Submit to background process for uploading using futures
|
309
|
+
self.upload_task_id = submit_upload_task(
|
310
|
+
filepath=filepath,
|
359
311
|
hash_id=hash_id,
|
360
312
|
zip_path=zip_path,
|
361
|
-
project_name=project_name,
|
362
|
-
|
313
|
+
project_name=self.project_name,
|
314
|
+
project_id=self.project_id,
|
315
|
+
dataset_name=self.dataset_name,
|
316
|
+
user_details=self.user_details,
|
317
|
+
base_url=self.base_url
|
363
318
|
)
|
364
|
-
|
319
|
+
|
320
|
+
# For backward compatibility
|
321
|
+
self._is_uploading = True
|
322
|
+
|
323
|
+
# Start checking for completion if a callback is registered
|
324
|
+
if self._upload_completed_callback:
|
325
|
+
# Start a thread to check status and call callback when complete
|
326
|
+
def check_status_and_callback():
|
327
|
+
status = self.get_upload_status()
|
328
|
+
if status.get("status") in ["completed", "failed"]:
|
329
|
+
self._is_uploading = False
|
330
|
+
# Execute callback
|
331
|
+
try:
|
332
|
+
self._upload_completed_callback(self)
|
333
|
+
except Exception as e:
|
334
|
+
logger.error(f"Error in upload completion callback: {e}")
|
335
|
+
return
|
336
|
+
|
337
|
+
# Check again after a delay
|
338
|
+
threading.Timer(5.0, check_status_and_callback).start()
|
339
|
+
|
340
|
+
# Start checking
|
341
|
+
threading.Timer(5.0, check_status_and_callback).start()
|
342
|
+
|
343
|
+
logger.info(f"Submitted upload task with ID: {self.upload_task_id}")
|
365
344
|
|
366
|
-
# Cleanup
|
345
|
+
# Cleanup local resources
|
367
346
|
self.components = []
|
368
347
|
self.file_tracker.reset()
|
348
|
+
|
349
|
+
def get_upload_status(self):
|
350
|
+
"""
|
351
|
+
Get the status of the upload task.
|
352
|
+
|
353
|
+
Returns:
|
354
|
+
dict: Status information
|
355
|
+
"""
|
356
|
+
if not self.upload_task_id:
|
357
|
+
return {"status": "not_started", "message": "No upload has been initiated"}
|
358
|
+
|
359
|
+
return get_task_status(self.upload_task_id)
|
369
360
|
|
361
|
+
def _process_resource_metrics(self):
|
362
|
+
"""Process and aggregate all resource metrics"""
|
363
|
+
# Process memory metrics
|
364
|
+
self.trace.metadata.resources.memory.values = self.memory_usage_list
|
365
|
+
|
366
|
+
# Process CPU metrics
|
367
|
+
self.trace.metadata.resources.cpu.values = self.cpu_usage_list
|
368
|
+
|
369
|
+
# Process network and disk metrics
|
370
|
+
network_uploads, network_downloads = 0, 0
|
371
|
+
disk_read, disk_write = 0, 0
|
372
|
+
|
373
|
+
# Handle cases where lists might have different lengths
|
374
|
+
min_len = min(len(self.network_usage_list), len(self.disk_usage_list)) if self.network_usage_list and self.disk_usage_list else 0
|
375
|
+
for i in range(min_len):
|
376
|
+
network_usage = self.network_usage_list[i]
|
377
|
+
disk_usage = self.disk_usage_list[i]
|
378
|
+
|
379
|
+
# Safely get network usage values with defaults of 0
|
380
|
+
network_uploads += network_usage.get('uploads', 0) or 0
|
381
|
+
network_downloads += network_usage.get('downloads', 0) or 0
|
382
|
+
|
383
|
+
# Safely get disk usage values with defaults of 0
|
384
|
+
disk_read += disk_usage.get('disk_read', 0) or 0
|
385
|
+
disk_write += disk_usage.get('disk_write', 0) or 0
|
386
|
+
|
387
|
+
# Set aggregate values
|
388
|
+
disk_list_len = len(self.disk_usage_list)
|
389
|
+
self.trace.metadata.resources.disk.read = [disk_read / disk_list_len if disk_list_len > 0 else 0]
|
390
|
+
self.trace.metadata.resources.disk.write = [disk_write / disk_list_len if disk_list_len > 0 else 0]
|
391
|
+
|
392
|
+
network_list_len = len(self.network_usage_list)
|
393
|
+
self.trace.metadata.resources.network.uploads = [
|
394
|
+
network_uploads / network_list_len if network_list_len > 0 else 0]
|
395
|
+
self.trace.metadata.resources.network.downloads = [
|
396
|
+
network_downloads / network_list_len if network_list_len > 0 else 0]
|
397
|
+
|
398
|
+
# Set interval times
|
399
|
+
self.trace.metadata.resources.cpu.interval = float(self.interval_time)
|
400
|
+
self.trace.metadata.resources.memory.interval = float(self.interval_time)
|
401
|
+
self.trace.metadata.resources.disk.interval = float(self.interval_time)
|
402
|
+
self.trace.metadata.resources.network.interval = float(self.interval_time)
|
403
|
+
|
370
404
|
def add_component(self, component: Component):
|
371
405
|
"""Add a component to the trace"""
|
372
406
|
self.components.append(component)
|
@@ -434,38 +468,44 @@ class BaseTracer:
|
|
434
468
|
def _extract_cost_tokens(self, trace):
|
435
469
|
cost = {}
|
436
470
|
tokens = {}
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
471
|
+
|
472
|
+
def process_span_info(info):
|
473
|
+
if not isinstance(info, dict):
|
474
|
+
return
|
475
|
+
cost_info = info.get("cost", {})
|
476
|
+
for key, value in cost_info.items():
|
477
|
+
if key not in cost:
|
478
|
+
cost[key] = 0
|
479
|
+
cost[key] += value
|
480
|
+
token_info = info.get("tokens", {})
|
481
|
+
for key, value in token_info.items():
|
482
|
+
if key not in tokens:
|
483
|
+
tokens[key] = 0
|
484
|
+
tokens[key] += value
|
485
|
+
|
486
|
+
def process_spans(spans):
|
487
|
+
for span in spans:
|
488
|
+
# Get span type, handling both span objects and dictionaries
|
489
|
+
span_type = span.type if hasattr(span, 'type') else span.get('type')
|
490
|
+
span_info = span.info if hasattr(span, 'info') else span.get('info', {})
|
491
|
+
span_data = span.data if hasattr(span, 'data') else span.get('data', {})
|
492
|
+
|
493
|
+
# Process direct LLM spans
|
494
|
+
if span_type == "llm":
|
495
|
+
process_span_info(span_info)
|
496
|
+
# Process agent spans recursively
|
497
|
+
elif span_type == "agent":
|
498
|
+
# Process LLM children in the current agent span
|
499
|
+
children = span_data.get("children", [])
|
500
|
+
for child in children:
|
501
|
+
child_type = child.get("type")
|
502
|
+
if child_type == "llm":
|
503
|
+
process_span_info(child.get("info", {}))
|
504
|
+
# Recursively process nested agent spans
|
505
|
+
elif child_type == "agent":
|
506
|
+
process_spans([child])
|
507
|
+
|
508
|
+
process_spans(trace.data[0]["spans"])
|
469
509
|
trace.metadata.cost = cost
|
470
510
|
trace.metadata.tokens = tokens
|
471
511
|
return trace
|
@@ -513,15 +553,16 @@ class BaseTracer:
|
|
513
553
|
else existing_span.__dict__
|
514
554
|
)
|
515
555
|
if (
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
556
|
+
existing_dict.get("hash_id")
|
557
|
+
== span_dict.get("hash_id")
|
558
|
+
and str(existing_dict.get("data", {}).get("input"))
|
559
|
+
== str(span_dict.get("data", {}).get("input"))
|
560
|
+
and str(existing_dict.get("data", {}).get("output"))
|
561
|
+
== str(span_dict.get("data", {}).get("output"))
|
522
562
|
):
|
523
563
|
unique_spans[i] = span
|
524
564
|
break
|
565
|
+
|
525
566
|
else:
|
526
567
|
# For non-LLM spans, process their children if they exist
|
527
568
|
if "data" in span_dict and "children" in span_dict["data"]:
|
@@ -532,8 +573,44 @@ class BaseTracer:
|
|
532
573
|
span["data"]["children"] = filtered_children
|
533
574
|
else:
|
534
575
|
span.data["children"] = filtered_children
|
535
|
-
unique_spans.append(span)
|
576
|
+
unique_spans.append(span)
|
536
577
|
|
578
|
+
# Process spans to update model information for LLM spans with same name
|
579
|
+
llm_spans_by_name = {}
|
580
|
+
for i, span in enumerate(unique_spans):
|
581
|
+
span_dict = span if isinstance(span, dict) else span.__dict__
|
582
|
+
|
583
|
+
if span_dict.get('type') == 'llm':
|
584
|
+
span_name = span_dict.get('name')
|
585
|
+
if span_name:
|
586
|
+
if span_name not in llm_spans_by_name:
|
587
|
+
llm_spans_by_name[span_name] = []
|
588
|
+
llm_spans_by_name[span_name].append((i, span_dict))
|
589
|
+
|
590
|
+
# Update model information for spans with same name
|
591
|
+
for spans_with_same_name in llm_spans_by_name.values():
|
592
|
+
if len(spans_with_same_name) > 1:
|
593
|
+
# Check if any span has non-default model
|
594
|
+
has_custom_model = any(
|
595
|
+
span[1].get('info', {}).get('model') != 'default'
|
596
|
+
for span in spans_with_same_name
|
597
|
+
)
|
598
|
+
|
599
|
+
# If we have a custom model, update all default models to 'custom'
|
600
|
+
if has_custom_model:
|
601
|
+
for idx, span_dict in spans_with_same_name:
|
602
|
+
if span_dict.get('info', {}).get('model') == 'default':
|
603
|
+
if isinstance(unique_spans[idx], dict):
|
604
|
+
if 'info' not in unique_spans[idx]:
|
605
|
+
unique_spans[idx]['info'] = {}
|
606
|
+
# unique_spans[idx]['info']['model'] = 'custom'
|
607
|
+
unique_spans[idx]['type'] = 'custom'
|
608
|
+
else:
|
609
|
+
if not hasattr(unique_spans[idx], 'info'):
|
610
|
+
unique_spans[idx].info = {}
|
611
|
+
# unique_spans[idx].info['model'] = 'custom'
|
612
|
+
unique_spans[idx].type = 'custom'
|
613
|
+
|
537
614
|
return unique_spans
|
538
615
|
|
539
616
|
# Remove any spans without hash ids
|
@@ -560,7 +637,7 @@ class BaseTracer:
|
|
560
637
|
int: Next interaction ID to use
|
561
638
|
"""
|
562
639
|
child_type = child.get("type")
|
563
|
-
|
640
|
+
|
564
641
|
if child_type == "tool":
|
565
642
|
# Tool call start
|
566
643
|
interactions.append(
|
@@ -665,7 +742,7 @@ class BaseTracer:
|
|
665
742
|
{
|
666
743
|
"id": str(interaction_id),
|
667
744
|
"span_id": child.get("id"),
|
668
|
-
"interaction_type": child_type,
|
745
|
+
"interaction_type": f"{child_type}_call_start",
|
669
746
|
"name": child.get("name"),
|
670
747
|
"content": child.get("data", {}),
|
671
748
|
"timestamp": child.get("start_time"),
|
@@ -674,6 +751,19 @@ class BaseTracer:
|
|
674
751
|
)
|
675
752
|
interaction_id += 1
|
676
753
|
|
754
|
+
interactions.append(
|
755
|
+
{
|
756
|
+
"id": str(interaction_id),
|
757
|
+
"span_id": child.get("id"),
|
758
|
+
"interaction_type": f"{child_type}_call_end",
|
759
|
+
"name": child.get("name"),
|
760
|
+
"content": child.get("data", {}),
|
761
|
+
"timestamp": child.get("end_time"),
|
762
|
+
"error": child.get("error"),
|
763
|
+
}
|
764
|
+
)
|
765
|
+
interaction_id += 1
|
766
|
+
|
677
767
|
# Process additional interactions and network calls
|
678
768
|
if "interactions" in child:
|
679
769
|
for interaction in child["interactions"]:
|
@@ -833,7 +923,7 @@ class BaseTracer:
|
|
833
923
|
{
|
834
924
|
"id": str(interaction_id),
|
835
925
|
"span_id": span.id,
|
836
|
-
"interaction_type": span.type,
|
926
|
+
"interaction_type": f"{span.type}_call_start",
|
837
927
|
"name": span.name,
|
838
928
|
"content": span.data,
|
839
929
|
"timestamp": span.start_time,
|
@@ -842,6 +932,19 @@ class BaseTracer:
|
|
842
932
|
)
|
843
933
|
interaction_id += 1
|
844
934
|
|
935
|
+
interactions.append(
|
936
|
+
{
|
937
|
+
"id": str(interaction_id),
|
938
|
+
"span_id": span.id,
|
939
|
+
"interaction_type": f"{span.type}_call_end",
|
940
|
+
"name": span.name,
|
941
|
+
"content": span.data,
|
942
|
+
"timestamp": span.end_time,
|
943
|
+
"error": span.error,
|
944
|
+
}
|
945
|
+
)
|
946
|
+
interaction_id += 1
|
947
|
+
|
845
948
|
# Process interactions from span.data if they exist
|
846
949
|
if span.interactions:
|
847
950
|
for span_interaction in span.interactions:
|
@@ -890,15 +993,83 @@ class BaseTracer:
|
|
890
993
|
|
891
994
|
return {"workflow": sorted_interactions}
|
892
995
|
|
996
|
+
# TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
|
997
|
+
|
998
|
+
def execute_metrics(self,
|
999
|
+
name: str,
|
1000
|
+
model: str,
|
1001
|
+
provider: str,
|
1002
|
+
prompt: str,
|
1003
|
+
context: str,
|
1004
|
+
response: str
|
1005
|
+
):
|
1006
|
+
if not hasattr(self, 'trace'):
|
1007
|
+
logger.warning("Cannot add metrics before trace is initialized. Call start() first.")
|
1008
|
+
return
|
1009
|
+
|
1010
|
+
# Convert individual parameters to metric dict if needed
|
1011
|
+
if isinstance(name, str):
|
1012
|
+
metrics = [{
|
1013
|
+
"name": name
|
1014
|
+
}]
|
1015
|
+
else:
|
1016
|
+
# Handle dict or list input
|
1017
|
+
metrics = name if isinstance(name, list) else [name] if isinstance(name, dict) else []
|
1018
|
+
|
1019
|
+
try:
|
1020
|
+
for metric in metrics:
|
1021
|
+
if not isinstance(metric, dict):
|
1022
|
+
raise ValueError(f"Expected dict, got {type(metric)}")
|
1023
|
+
|
1024
|
+
if "name" not in metric :
|
1025
|
+
raise ValueError("Metric must contain 'name'") #score was written not required here
|
1026
|
+
|
1027
|
+
# Handle duplicate metric names on executing metric
|
1028
|
+
metric_name = metric["name"]
|
1029
|
+
if metric_name in self.visited_metrics:
|
1030
|
+
count = sum(1 for m in self.visited_metrics if m.startswith(metric_name))
|
1031
|
+
metric_name = f"{metric_name}_{count + 1}"
|
1032
|
+
self.visited_metrics.append(metric_name)
|
1033
|
+
|
1034
|
+
result = calculate_metric(project_id=self.project_id,
|
1035
|
+
metric_name=metric_name,
|
1036
|
+
model=model,
|
1037
|
+
org_domain="raga",
|
1038
|
+
provider=provider,
|
1039
|
+
user_id="1", # self.user_details['id'],
|
1040
|
+
prompt=prompt,
|
1041
|
+
context=context,
|
1042
|
+
response=response
|
1043
|
+
)
|
1044
|
+
|
1045
|
+
result = result['data']
|
1046
|
+
formatted_metric = {
|
1047
|
+
"name": metric_name,
|
1048
|
+
"score": result.get("score"),
|
1049
|
+
"reason": result.get("reason", ""),
|
1050
|
+
"source": "user",
|
1051
|
+
"cost": result.get("cost"),
|
1052
|
+
"latency": result.get("latency"),
|
1053
|
+
"mappings": [],
|
1054
|
+
"config": result.get("metric_config", {})
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
logger.debug(f"Executed metric: {formatted_metric}")
|
1058
|
+
|
1059
|
+
except ValueError as e:
|
1060
|
+
logger.error(f"Validation Error: {e}")
|
1061
|
+
except Exception as e:
|
1062
|
+
logger.error(f"Error adding metric: {e}")
|
1063
|
+
|
893
1064
|
def add_metrics(
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
1065
|
+
self,
|
1066
|
+
name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
|
1067
|
+
score: float | int = None,
|
1068
|
+
reasoning: str = "",
|
1069
|
+
cost: float = None,
|
1070
|
+
latency: float = None,
|
1071
|
+
metadata: Dict[str, Any] = None,
|
1072
|
+
config: Dict[str, Any] = None,
|
902
1073
|
):
|
903
1074
|
"""Add metrics at the trace level.
|
904
1075
|
|
@@ -942,7 +1113,7 @@ class BaseTracer:
|
|
942
1113
|
for metric in metrics:
|
943
1114
|
if not isinstance(metric, dict):
|
944
1115
|
raise ValueError(f"Expected dict, got {type(metric)}")
|
945
|
-
|
1116
|
+
|
946
1117
|
if "name" not in metric or "score" not in metric:
|
947
1118
|
raise ValueError("Metric must contain 'name' and 'score' fields")
|
948
1119
|
|
@@ -954,7 +1125,7 @@ class BaseTracer:
|
|
954
1125
|
self.visited_metrics.append(metric_name)
|
955
1126
|
|
956
1127
|
formatted_metric = {
|
957
|
-
"name": metric_name,
|
1128
|
+
"name": metric_name,
|
958
1129
|
"score": metric["score"],
|
959
1130
|
"reason": metric.get("reasoning", ""),
|
960
1131
|
"source": "user",
|
@@ -964,7 +1135,7 @@ class BaseTracer:
|
|
964
1135
|
"mappings": [],
|
965
1136
|
"config": metric.get("config", {})
|
966
1137
|
}
|
967
|
-
|
1138
|
+
|
968
1139
|
self.trace_metrics.append(formatted_metric)
|
969
1140
|
logger.debug(f"Added trace-level metric: {formatted_metric}")
|
970
1141
|
|
@@ -972,8 +1143,157 @@ class BaseTracer:
|
|
972
1143
|
logger.error(f"Validation Error: {e}")
|
973
1144
|
except Exception as e:
|
974
1145
|
logger.error(f"Error adding metric: {e}")
|
975
|
-
|
1146
|
+
|
976
1147
|
def span(self, span_name):
|
977
1148
|
if span_name not in self.span_attributes_dict:
|
978
|
-
self.span_attributes_dict[span_name] = SpanAttributes(span_name)
|
979
|
-
return self.span_attributes_dict[span_name]
|
1149
|
+
self.span_attributes_dict[span_name] = SpanAttributes(span_name, self.project_id)
|
1150
|
+
return self.span_attributes_dict[span_name]
|
1151
|
+
|
1152
|
+
@staticmethod
|
1153
|
+
def get_formatted_metric(span_attributes_dict, project_id, name):
|
1154
|
+
if name in span_attributes_dict:
|
1155
|
+
local_metrics = span_attributes_dict[name].local_metrics or []
|
1156
|
+
local_metrics_results = []
|
1157
|
+
for metric in local_metrics:
|
1158
|
+
try:
|
1159
|
+
logger.info("calculating the metric, please wait....")
|
1160
|
+
|
1161
|
+
mapping = metric.get("mapping", {})
|
1162
|
+
result = calculate_metric(project_id=project_id,
|
1163
|
+
metric_name=metric.get("name"),
|
1164
|
+
model=metric.get("model"),
|
1165
|
+
provider=metric.get("provider"),
|
1166
|
+
**mapping
|
1167
|
+
)
|
1168
|
+
|
1169
|
+
result = result['data']['data'][0]
|
1170
|
+
config = result['metric_config']
|
1171
|
+
metric_config = {
|
1172
|
+
"job_id": config.get("job_id"),
|
1173
|
+
"metric_name": config.get("displayName"),
|
1174
|
+
"model": config.get("model"),
|
1175
|
+
"org_domain": config.get("orgDomain"),
|
1176
|
+
"provider": config.get("provider"),
|
1177
|
+
"reason": config.get("reason"),
|
1178
|
+
"request_id": config.get("request_id"),
|
1179
|
+
"user_id": config.get("user_id"),
|
1180
|
+
"threshold": {
|
1181
|
+
"is_editable": config.get("threshold").get("isEditable"),
|
1182
|
+
"lte": config.get("threshold").get("lte")
|
1183
|
+
}
|
1184
|
+
}
|
1185
|
+
formatted_metric = {
|
1186
|
+
"name": metric.get("displayName"),
|
1187
|
+
"displayName": metric.get("displayName"),
|
1188
|
+
"score": result.get("score"),
|
1189
|
+
"reason": result.get("reason", ""),
|
1190
|
+
"source": "user",
|
1191
|
+
"cost": result.get("cost"),
|
1192
|
+
"latency": result.get("latency"),
|
1193
|
+
"mappings": [],
|
1194
|
+
"config": metric_config
|
1195
|
+
}
|
1196
|
+
local_metrics_results.append(formatted_metric)
|
1197
|
+
except ValueError as e:
|
1198
|
+
logger.error(f"Validation Error: {e}")
|
1199
|
+
except Exception as e:
|
1200
|
+
logger.error(f"Error executing metric: {e}")
|
1201
|
+
|
1202
|
+
return local_metrics_results
|
1203
|
+
|
1204
|
+
|
1205
|
+
def upload_directly(self):
|
1206
|
+
"""Upload trace directly without using the background process"""
|
1207
|
+
# Check if we have necessary details
|
1208
|
+
if not hasattr(self, 'trace') or not self.trace_id:
|
1209
|
+
print("No trace to upload")
|
1210
|
+
return False
|
1211
|
+
|
1212
|
+
# Get the filepath from the last trace
|
1213
|
+
trace_dir = tempfile.gettempdir()
|
1214
|
+
trace_file = os.path.join(trace_dir, f"{self.trace_id}.json")
|
1215
|
+
|
1216
|
+
# If filepath wasn't saved from previous stop() call, try to find it
|
1217
|
+
if not os.path.exists(trace_file):
|
1218
|
+
print(f"Looking for trace file for {self.trace_id}")
|
1219
|
+
# Try to find the trace file by pattern
|
1220
|
+
for file in os.listdir(trace_dir):
|
1221
|
+
if file.endswith(".json") and self.trace_id in file:
|
1222
|
+
trace_file = os.path.join(trace_dir, file)
|
1223
|
+
print(f"Found trace file: {trace_file}")
|
1224
|
+
break
|
1225
|
+
|
1226
|
+
if not os.path.exists(trace_file):
|
1227
|
+
print(f"Trace file not found for ID {self.trace_id}")
|
1228
|
+
return False
|
1229
|
+
|
1230
|
+
print(f"Starting direct upload of {trace_file}")
|
1231
|
+
|
1232
|
+
try:
|
1233
|
+
# 1. Create the dataset schema
|
1234
|
+
print("Creating dataset schema...")
|
1235
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
|
1236
|
+
response = create_dataset_schema_with_trace(
|
1237
|
+
dataset_name=self.dataset_name,
|
1238
|
+
project_name=self.project_name
|
1239
|
+
)
|
1240
|
+
print(f"Schema created: {response}")
|
1241
|
+
|
1242
|
+
# 2. Upload trace metrics
|
1243
|
+
print("Uploading trace metrics...")
|
1244
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
|
1245
|
+
response = upload_trace_metric(
|
1246
|
+
json_file_path=trace_file,
|
1247
|
+
dataset_name=self.dataset_name,
|
1248
|
+
project_name=self.project_name,
|
1249
|
+
)
|
1250
|
+
print(f"Metrics uploaded: {response}")
|
1251
|
+
|
1252
|
+
# 3. Get code hash and zip path if available
|
1253
|
+
code_hash = None
|
1254
|
+
zip_path = None
|
1255
|
+
try:
|
1256
|
+
with open(trace_file, 'r') as f:
|
1257
|
+
data = json.load(f)
|
1258
|
+
code_hash = data.get("metadata", {}).get("system_info", {}).get("source_code")
|
1259
|
+
if code_hash:
|
1260
|
+
zip_path = os.path.join(trace_dir, f"{code_hash}.zip")
|
1261
|
+
print(f"Found code hash: {code_hash}")
|
1262
|
+
print(f"Zip path: {zip_path}")
|
1263
|
+
except Exception as e:
|
1264
|
+
print(f"Error getting code hash: {e}")
|
1265
|
+
|
1266
|
+
# 4. Upload agentic traces
|
1267
|
+
print("Uploading agentic traces...")
|
1268
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
|
1269
|
+
from ragaai_catalyst import RagaAICatalyst
|
1270
|
+
upload_traces = UploadAgenticTraces(
|
1271
|
+
json_file_path=trace_file,
|
1272
|
+
project_name=self.project_name,
|
1273
|
+
project_id=self.project_id,
|
1274
|
+
dataset_name=self.dataset_name,
|
1275
|
+
user_detail=self.user_details,
|
1276
|
+
base_url=RagaAICatalyst.BASE_URL,
|
1277
|
+
)
|
1278
|
+
upload_traces.upload_agentic_traces()
|
1279
|
+
print("Agentic traces uploaded successfully")
|
1280
|
+
|
1281
|
+
# 5. Upload code hash if available
|
1282
|
+
if code_hash and zip_path and os.path.exists(zip_path):
|
1283
|
+
print(f"Uploading code hash: {code_hash}")
|
1284
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
|
1285
|
+
response = upload_code(
|
1286
|
+
hash_id=code_hash,
|
1287
|
+
zip_path=zip_path,
|
1288
|
+
project_name=self.project_name,
|
1289
|
+
dataset_name=self.dataset_name,
|
1290
|
+
)
|
1291
|
+
print(f"Code uploaded: {response}")
|
1292
|
+
|
1293
|
+
print("Upload completed successfully - check UI now")
|
1294
|
+
return True
|
1295
|
+
except Exception as e:
|
1296
|
+
print(f"Error during direct upload: {e}")
|
1297
|
+
import traceback
|
1298
|
+
traceback.print_exc()
|
1299
|
+
return False
|