ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,358 @@
|
|
1
|
+
"""
|
2
|
+
trace_uploader.py - A dedicated process for handling trace uploads
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import sys
|
7
|
+
import json
|
8
|
+
import time
|
9
|
+
import signal
|
10
|
+
import logging
|
11
|
+
import argparse
|
12
|
+
import tempfile
|
13
|
+
from pathlib import Path
|
14
|
+
import multiprocessing
|
15
|
+
import queue
|
16
|
+
from datetime import datetime
|
17
|
+
import atexit
|
18
|
+
import glob
|
19
|
+
from logging.handlers import RotatingFileHandler
|
20
|
+
import concurrent.futures
|
21
|
+
from typing import Dict, Any, Optional
|
22
|
+
|
23
|
+
# Set up logging
|
24
|
+
log_dir = os.path.join(tempfile.gettempdir(), "ragaai_logs")
|
25
|
+
os.makedirs(log_dir, exist_ok=True)
|
26
|
+
|
27
|
+
# Define maximum file size (e.g., 5 MB) and backup count
|
28
|
+
max_file_size = 5 * 1024 * 1024 # 5 MB
|
29
|
+
backup_count = 1 # Number of backup files to keep
|
30
|
+
|
31
|
+
logging.basicConfig(
|
32
|
+
level=logging.DEBUG,
|
33
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
34
|
+
handlers=[
|
35
|
+
logging.StreamHandler(),
|
36
|
+
RotatingFileHandler(
|
37
|
+
os.path.join(log_dir, "trace_uploader.log"),
|
38
|
+
maxBytes=max_file_size,
|
39
|
+
backupCount=backup_count
|
40
|
+
)
|
41
|
+
]
|
42
|
+
)
|
43
|
+
logger = logging.getLogger("trace_uploader")
|
44
|
+
|
45
|
+
try:
|
46
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
|
47
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
|
48
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
|
49
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
|
50
|
+
from ragaai_catalyst import RagaAICatalyst
|
51
|
+
IMPORTS_AVAILABLE = True
|
52
|
+
except ImportError:
|
53
|
+
logger.warning("RagaAI Catalyst imports not available - running in test mode")
|
54
|
+
IMPORTS_AVAILABLE = False
|
55
|
+
|
56
|
+
# Define task queue directory
|
57
|
+
QUEUE_DIR = os.path.join(tempfile.gettempdir(), "ragaai_tasks")
|
58
|
+
os.makedirs(QUEUE_DIR, exist_ok=True)
|
59
|
+
|
60
|
+
# Status codes
|
61
|
+
STATUS_PENDING = "pending"
|
62
|
+
STATUS_PROCESSING = "processing"
|
63
|
+
STATUS_COMPLETED = "completed"
|
64
|
+
STATUS_FAILED = "failed"
|
65
|
+
|
66
|
+
# Global executor for handling uploads
|
67
|
+
_executor = None
|
68
|
+
# Dictionary to track futures and their associated task IDs
|
69
|
+
_futures: Dict[str, Any] = {}
|
70
|
+
|
71
|
+
def get_executor():
|
72
|
+
"""Get or create the thread pool executor"""
|
73
|
+
global _executor
|
74
|
+
if _executor is None:
|
75
|
+
_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8, thread_name_prefix="trace_uploader")
|
76
|
+
return _executor
|
77
|
+
|
78
|
+
def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
|
79
|
+
project_name: str, project_id: str, dataset_name: str,
|
80
|
+
user_details: Dict[str, Any], base_url: str) -> Dict[str, Any]:
|
81
|
+
"""
|
82
|
+
Process a single upload task
|
83
|
+
|
84
|
+
Args:
|
85
|
+
task_id: Unique identifier for the task
|
86
|
+
filepath: Path to the trace file
|
87
|
+
hash_id: Hash ID for the code
|
88
|
+
zip_path: Path to the code zip file
|
89
|
+
project_name: Project name
|
90
|
+
project_id: Project ID
|
91
|
+
dataset_name: Dataset name
|
92
|
+
user_details: User details dictionary
|
93
|
+
base_url: Base URL for API calls
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
Dict containing status and any error information
|
97
|
+
"""
|
98
|
+
# Correct base_url
|
99
|
+
base_url = base_url[0] if isinstance(base_url, tuple) else base_url
|
100
|
+
|
101
|
+
logger.info(f"Processing upload task {task_id}")
|
102
|
+
result = {
|
103
|
+
"task_id": task_id,
|
104
|
+
"status": STATUS_PROCESSING,
|
105
|
+
"error": None,
|
106
|
+
"start_time": datetime.now().isoformat()
|
107
|
+
}
|
108
|
+
|
109
|
+
# Save initial status to file
|
110
|
+
save_task_status(result)
|
111
|
+
|
112
|
+
try:
|
113
|
+
# Check if file exists
|
114
|
+
if not os.path.exists(filepath):
|
115
|
+
error_msg = f"Task filepath does not exist: {filepath}"
|
116
|
+
logger.error(error_msg)
|
117
|
+
result["status"] = STATUS_FAILED
|
118
|
+
result["error"] = error_msg
|
119
|
+
save_task_status(result)
|
120
|
+
return result
|
121
|
+
|
122
|
+
if not IMPORTS_AVAILABLE:
|
123
|
+
logger.warning(f"Test mode: Simulating processing of task {task_id}")
|
124
|
+
# time.sleep(2) # Simulate work
|
125
|
+
result["status"] = STATUS_COMPLETED
|
126
|
+
save_task_status(result)
|
127
|
+
return result
|
128
|
+
|
129
|
+
# Step 1: Create dataset schema
|
130
|
+
logger.info(f"Creating dataset schema for {dataset_name} with base_url: {base_url}")
|
131
|
+
try:
|
132
|
+
response = create_dataset_schema_with_trace(
|
133
|
+
dataset_name=dataset_name,
|
134
|
+
project_name=project_name,
|
135
|
+
base_url=base_url
|
136
|
+
)
|
137
|
+
logger.info(f"Dataset schema created: {response}")
|
138
|
+
except Exception as e:
|
139
|
+
logger.error(f"Error creating dataset schema: {e}")
|
140
|
+
# Continue with other steps
|
141
|
+
|
142
|
+
# Step 2: Upload trace metrics
|
143
|
+
if filepath and os.path.exists(filepath):
|
144
|
+
logger.info(f"Uploading trace metrics for {filepath}")
|
145
|
+
try:
|
146
|
+
response = upload_trace_metric(
|
147
|
+
json_file_path=filepath,
|
148
|
+
dataset_name=dataset_name,
|
149
|
+
project_name=project_name,
|
150
|
+
base_url=base_url
|
151
|
+
)
|
152
|
+
logger.info(f"Trace metrics uploaded: {response}")
|
153
|
+
except Exception as e:
|
154
|
+
logger.error(f"Error uploading trace metrics: {e}")
|
155
|
+
# Continue with other uploads
|
156
|
+
else:
|
157
|
+
logger.warning(f"Trace file {filepath} not found, skipping metrics upload")
|
158
|
+
|
159
|
+
# Step 3: Upload agentic traces
|
160
|
+
if filepath and os.path.exists(filepath):
|
161
|
+
logger.info(f"Uploading agentic traces for {filepath}")
|
162
|
+
try:
|
163
|
+
upload_traces = UploadAgenticTraces(
|
164
|
+
json_file_path=filepath,
|
165
|
+
project_name=project_name,
|
166
|
+
project_id=project_id,
|
167
|
+
dataset_name=dataset_name,
|
168
|
+
user_detail=user_details,
|
169
|
+
base_url=base_url,
|
170
|
+
)
|
171
|
+
upload_traces.upload_agentic_traces()
|
172
|
+
logger.info("Agentic traces uploaded successfully")
|
173
|
+
except Exception as e:
|
174
|
+
logger.error(f"Error uploading agentic traces: {e}")
|
175
|
+
# Continue with code upload
|
176
|
+
else:
|
177
|
+
logger.warning(f"Trace file {filepath} not found, skipping traces upload")
|
178
|
+
|
179
|
+
# Step 4: Upload code hash
|
180
|
+
if hash_id and zip_path and os.path.exists(zip_path):
|
181
|
+
logger.info(f"Uploading code hash {hash_id}")
|
182
|
+
try:
|
183
|
+
response = upload_code(
|
184
|
+
hash_id=hash_id,
|
185
|
+
zip_path=zip_path,
|
186
|
+
project_name=project_name,
|
187
|
+
dataset_name=dataset_name,
|
188
|
+
base_url=base_url
|
189
|
+
)
|
190
|
+
logger.info(f"Code hash uploaded: {response}")
|
191
|
+
except Exception as e:
|
192
|
+
logger.error(f"Error uploading code hash: {e}")
|
193
|
+
else:
|
194
|
+
logger.warning(f"Code zip {zip_path} not found, skipping code upload")
|
195
|
+
|
196
|
+
# Mark task as completed
|
197
|
+
result["status"] = STATUS_COMPLETED
|
198
|
+
result["end_time"] = datetime.now().isoformat()
|
199
|
+
logger.info(f"Task {task_id} completed successfully")
|
200
|
+
|
201
|
+
except Exception as e:
|
202
|
+
logger.error(f"Error processing task {task_id}: {e}")
|
203
|
+
result["status"] = STATUS_FAILED
|
204
|
+
result["error"] = str(e)
|
205
|
+
result["end_time"] = datetime.now().isoformat()
|
206
|
+
|
207
|
+
# Save final status
|
208
|
+
save_task_status(result)
|
209
|
+
return result
|
210
|
+
|
211
|
+
def save_task_status(task_status: Dict[str, Any]):
|
212
|
+
"""Save task status to a file"""
|
213
|
+
task_id = task_status["task_id"]
|
214
|
+
status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
|
215
|
+
with open(status_path, "w") as f:
|
216
|
+
json.dump(task_status, f, indent=2)
|
217
|
+
|
218
|
+
def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, dataset_name, user_details, base_url):
|
219
|
+
"""
|
220
|
+
Submit a new upload task using futures.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
filepath: Path to the trace file
|
224
|
+
hash_id: Hash ID for the code
|
225
|
+
zip_path: Path to the code zip file
|
226
|
+
project_name: Project name
|
227
|
+
project_id: Project ID
|
228
|
+
dataset_name: Dataset name
|
229
|
+
user_details: User details dictionary
|
230
|
+
base_url: Base URL for API calls
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
str: Task ID
|
234
|
+
"""
|
235
|
+
logger.info(f"Submitting new upload task for file: {filepath}")
|
236
|
+
logger.debug(f"Task details - Project: {project_name}, Dataset: {dataset_name}, Hash: {hash_id}, Base_URL: {base_url}")
|
237
|
+
|
238
|
+
# Verify the trace file exists
|
239
|
+
if not os.path.exists(filepath):
|
240
|
+
logger.error(f"Trace file not found: {filepath}")
|
241
|
+
return None
|
242
|
+
|
243
|
+
# Create absolute path to the trace file
|
244
|
+
filepath = os.path.abspath(filepath)
|
245
|
+
logger.debug(f"Using absolute filepath: {filepath}")
|
246
|
+
|
247
|
+
# Generate a unique task ID
|
248
|
+
task_id = f"task_{int(time.time())}_{os.getpid()}_{hash(str(time.time()))}"
|
249
|
+
|
250
|
+
# Submit the task to the executor
|
251
|
+
executor = get_executor()
|
252
|
+
future = executor.submit(
|
253
|
+
process_upload,
|
254
|
+
task_id=task_id,
|
255
|
+
filepath=filepath,
|
256
|
+
hash_id=hash_id,
|
257
|
+
zip_path=zip_path,
|
258
|
+
project_name=project_name,
|
259
|
+
project_id=project_id,
|
260
|
+
dataset_name=dataset_name,
|
261
|
+
user_details=user_details,
|
262
|
+
base_url=base_url
|
263
|
+
)
|
264
|
+
|
265
|
+
# Store the future for later status checks
|
266
|
+
_futures[task_id] = future
|
267
|
+
|
268
|
+
# Create initial status
|
269
|
+
initial_status = {
|
270
|
+
"task_id": task_id,
|
271
|
+
"status": STATUS_PENDING,
|
272
|
+
"error": None,
|
273
|
+
"start_time": datetime.now().isoformat()
|
274
|
+
}
|
275
|
+
save_task_status(initial_status)
|
276
|
+
|
277
|
+
return task_id
|
278
|
+
|
279
|
+
def get_task_status(task_id):
|
280
|
+
"""
|
281
|
+
Get the status of a task by ID.
|
282
|
+
|
283
|
+
Args:
|
284
|
+
task_id: Task ID to check
|
285
|
+
|
286
|
+
Returns:
|
287
|
+
dict: Task status information
|
288
|
+
"""
|
289
|
+
logger.debug(f"Getting status for task {task_id}")
|
290
|
+
|
291
|
+
# Check if we have a future for this task
|
292
|
+
future = _futures.get(task_id)
|
293
|
+
|
294
|
+
# If we have a future, check its status
|
295
|
+
if future:
|
296
|
+
if future.done():
|
297
|
+
try:
|
298
|
+
# Get the result (this will re-raise any exception that occurred)
|
299
|
+
result = future.result(timeout=0)
|
300
|
+
return result
|
301
|
+
except concurrent.futures.TimeoutError:
|
302
|
+
return {"status": STATUS_PROCESSING, "error": None}
|
303
|
+
except Exception as e:
|
304
|
+
logger.error(f"Error retrieving future result for task {task_id}: {e}")
|
305
|
+
return {"status": STATUS_FAILED, "error": str(e)}
|
306
|
+
else:
|
307
|
+
return {"status": STATUS_PROCESSING, "error": None}
|
308
|
+
|
309
|
+
# If we don't have a future, try to read from the status file
|
310
|
+
status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
|
311
|
+
if os.path.exists(status_path):
|
312
|
+
try:
|
313
|
+
with open(status_path, "r") as f:
|
314
|
+
return json.load(f)
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"Error reading status file for task {task_id}: {e}")
|
317
|
+
return {"status": "unknown", "error": f"Error reading status: {e}"}
|
318
|
+
|
319
|
+
return {"status": "unknown", "error": "Task not found"}
|
320
|
+
|
321
|
+
def shutdown():
|
322
|
+
"""Shutdown the executor"""
|
323
|
+
global _executor
|
324
|
+
if _executor:
|
325
|
+
logger.info("Shutting down executor")
|
326
|
+
_executor.shutdown(wait=False)
|
327
|
+
_executor = None
|
328
|
+
|
329
|
+
# Register shutdown handler
|
330
|
+
atexit.register(shutdown)
|
331
|
+
|
332
|
+
# For backward compatibility
|
333
|
+
def ensure_uploader_running():
|
334
|
+
"""
|
335
|
+
Ensure the uploader is running.
|
336
|
+
This is a no-op in the futures implementation, but kept for API compatibility.
|
337
|
+
"""
|
338
|
+
get_executor() # Just ensure the executor is created
|
339
|
+
return True
|
340
|
+
|
341
|
+
# For backward compatibility with the old daemon mode
|
342
|
+
def run_daemon():
|
343
|
+
"""
|
344
|
+
Run the uploader as a daemon process.
|
345
|
+
This is a no-op in the futures implementation, but kept for API compatibility.
|
346
|
+
"""
|
347
|
+
logger.info("Daemon mode not needed in futures implementation")
|
348
|
+
return
|
349
|
+
|
350
|
+
if __name__ == "__main__":
|
351
|
+
parser = argparse.ArgumentParser(description="Trace uploader process")
|
352
|
+
parser.add_argument("--daemon", action="store_true", help="Run as daemon process")
|
353
|
+
args = parser.parse_args()
|
354
|
+
|
355
|
+
if args.daemon:
|
356
|
+
logger.info("Daemon mode not needed in futures implementation")
|
357
|
+
else:
|
358
|
+
logger.info("Interactive mode not needed in futures implementation")
|
@@ -1,7 +1,13 @@
|
|
1
1
|
import requests
|
2
2
|
import json
|
3
3
|
import os
|
4
|
+
import time
|
5
|
+
import logging
|
4
6
|
from datetime import datetime
|
7
|
+
from urllib.parse import urlparse, urlunparse
|
8
|
+
import re
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
5
11
|
|
6
12
|
|
7
13
|
class UploadAgenticTraces:
|
@@ -33,17 +39,41 @@ class UploadAgenticTraces:
|
|
33
39
|
}
|
34
40
|
|
35
41
|
try:
|
42
|
+
start_time = time.time()
|
43
|
+
endpoint = f"{self.base_url}/v1/llm/presigned-url"
|
36
44
|
response = requests.request("GET",
|
37
|
-
|
45
|
+
endpoint,
|
38
46
|
headers=headers,
|
39
47
|
data=payload,
|
40
48
|
timeout=self.timeout)
|
49
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
50
|
+
logger.debug(
|
51
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
52
|
+
|
41
53
|
if response.status_code == 200:
|
42
|
-
|
43
|
-
|
54
|
+
presignedURLs = response.json()["data"]["presignedUrls"][0]
|
55
|
+
presignedurl = self.update_presigned_url(presignedURLs,self.base_url)
|
56
|
+
return presignedurl
|
57
|
+
|
44
58
|
except requests.exceptions.RequestException as e:
|
45
59
|
print(f"Error while getting presigned url: {e}")
|
46
60
|
return None
|
61
|
+
|
62
|
+
def update_presigned_url(self, presigned_url, base_url):
|
63
|
+
"""Replaces the domain (and port, if applicable) of the presigned URL
|
64
|
+
with that of the base URL only if the base URL contains 'localhost' or an IP address."""
|
65
|
+
#To Do: If Proxy URL has domain name how do we handle such cases
|
66
|
+
|
67
|
+
presigned_parts = urlparse(presigned_url)
|
68
|
+
base_parts = urlparse(base_url)
|
69
|
+
# Check if base_url contains localhost or an IP address
|
70
|
+
if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
|
71
|
+
new_netloc = base_parts.hostname # Extract domain from base_url
|
72
|
+
if base_parts.port: # Add port if present in base_url
|
73
|
+
new_netloc += f":{base_parts.port}"
|
74
|
+
updated_parts = presigned_parts._replace(netloc=new_netloc)
|
75
|
+
return urlunparse(updated_parts)
|
76
|
+
return presigned_url
|
47
77
|
|
48
78
|
def _put_presigned_url(self, presignedUrl, filename):
|
49
79
|
headers = {
|
@@ -60,11 +90,15 @@ class UploadAgenticTraces:
|
|
60
90
|
print(f"Error while reading file: {e}")
|
61
91
|
return None
|
62
92
|
try:
|
93
|
+
start_time = time.time()
|
63
94
|
response = requests.request("PUT",
|
64
95
|
presignedUrl,
|
65
96
|
headers=headers,
|
66
97
|
data=payload,
|
67
98
|
timeout=self.timeout)
|
99
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
100
|
+
logger.debug(
|
101
|
+
f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
68
102
|
if response.status_code != 200 or response.status_code != 201:
|
69
103
|
return response, response.status_code
|
70
104
|
except requests.exceptions.RequestException as e:
|
@@ -83,11 +117,16 @@ class UploadAgenticTraces:
|
|
83
117
|
"datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
|
84
118
|
})
|
85
119
|
try:
|
120
|
+
start_time = time.time()
|
121
|
+
endpoint = f"{self.base_url}/v1/llm/insert/trace"
|
86
122
|
response = requests.request("POST",
|
87
|
-
|
123
|
+
endpoint,
|
88
124
|
headers=headers,
|
89
125
|
data=payload,
|
90
126
|
timeout=self.timeout)
|
127
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
128
|
+
logger.debug(
|
129
|
+
f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
91
130
|
if response.status_code != 200:
|
92
131
|
print(f"Error inserting traces: {response.json()['message']}")
|
93
132
|
return None
|
@@ -116,27 +155,43 @@ class UploadAgenticTraces:
|
|
116
155
|
"spanType": span["type"],
|
117
156
|
})
|
118
157
|
else:
|
119
|
-
datasetSpans.
|
158
|
+
datasetSpans.extend(self._get_agent_dataset_spans(span, datasetSpans))
|
159
|
+
datasetSpans = [dict(t) for t in set(tuple(sorted(d.items())) for d in datasetSpans)]
|
160
|
+
|
161
|
+
return datasetSpans
|
162
|
+
except Exception as e:
|
163
|
+
print(f"Error while reading dataset spans: {e}")
|
164
|
+
return None
|
165
|
+
|
166
|
+
def _get_agent_dataset_spans(self, span, datasetSpans):
|
167
|
+
datasetSpans.append({
|
120
168
|
"spanId": span["id"],
|
121
169
|
"spanName": span["name"],
|
122
170
|
"spanHash": span["hash_id"],
|
123
171
|
"spanType": span["type"],
|
124
172
|
})
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
173
|
+
children = span["data"]["children"]
|
174
|
+
for child in children:
|
175
|
+
if child["type"] != "agent":
|
176
|
+
existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
|
177
|
+
if existing_span is None:
|
178
|
+
datasetSpans.append({
|
179
|
+
"spanId": child["id"],
|
180
|
+
"spanName": child["name"],
|
181
|
+
"spanHash": child["hash_id"],
|
182
|
+
"spanType": child["type"],
|
183
|
+
})
|
184
|
+
else:
|
185
|
+
datasetSpans.append({
|
186
|
+
"spanId": child["id"],
|
187
|
+
"spanName": child["name"],
|
188
|
+
"spanHash": child["hash_id"],
|
189
|
+
"spanType": child["type"],
|
190
|
+
})
|
191
|
+
self._get_agent_dataset_spans(child, datasetSpans)
|
192
|
+
return datasetSpans
|
193
|
+
|
194
|
+
|
140
195
|
def upload_agentic_traces(self):
|
141
196
|
try:
|
142
197
|
presignedUrl = self._get_presigned_url()
|
@@ -2,23 +2,26 @@ from aiohttp import payload
|
|
2
2
|
import requests
|
3
3
|
import json
|
4
4
|
import os
|
5
|
+
import time
|
5
6
|
import logging
|
6
7
|
from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
|
7
8
|
logger = logging.getLogger(__name__)
|
9
|
+
from urllib.parse import urlparse, urlunparse
|
10
|
+
import re
|
8
11
|
|
9
|
-
def upload_code(hash_id, zip_path, project_name, dataset_name):
|
10
|
-
code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
|
12
|
+
def upload_code(hash_id, zip_path, project_name, dataset_name, base_url=None):
|
13
|
+
code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name, base_url)
|
11
14
|
|
12
15
|
if hash_id not in code_hashes_list:
|
13
|
-
presigned_url = _fetch_presigned_url(project_name, dataset_name)
|
16
|
+
presigned_url = _fetch_presigned_url(project_name, dataset_name, base_url)
|
14
17
|
_put_zip_presigned_url(project_name, presigned_url, zip_path)
|
15
18
|
|
16
|
-
response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
|
19
|
+
response = _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url)
|
17
20
|
return response
|
18
21
|
else:
|
19
22
|
return "Code already exists"
|
20
23
|
|
21
|
-
def _fetch_dataset_code_hashes(project_name, dataset_name):
|
24
|
+
def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None):
|
22
25
|
payload = {}
|
23
26
|
headers = {
|
24
27
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
@@ -26,11 +29,17 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
|
|
26
29
|
}
|
27
30
|
|
28
31
|
try:
|
32
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
33
|
+
start_time = time.time()
|
34
|
+
endpoint = f"{url_base}/v2/llm/dataset/code?datasetName={dataset_name}"
|
29
35
|
response = requests.request("GET",
|
30
|
-
|
36
|
+
endpoint,
|
31
37
|
headers=headers,
|
32
38
|
data=payload,
|
33
39
|
timeout=99999)
|
40
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
41
|
+
logger.debug(
|
42
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
34
43
|
|
35
44
|
if response.status_code == 200:
|
36
45
|
return response.json()["data"]["codeHashes"]
|
@@ -40,7 +49,24 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
|
|
40
49
|
logger.error(f"Failed to list datasets: {e}")
|
41
50
|
raise
|
42
51
|
|
43
|
-
|
52
|
+
|
53
|
+
def update_presigned_url(presigned_url, base_url):
|
54
|
+
"""Replaces the domain (and port, if applicable) of the presigned URL with that of the base URL."""
|
55
|
+
#To Do: If Proxy URL has domain name how do we handle such cases? Engineering Dependency.
|
56
|
+
|
57
|
+
presigned_parts = urlparse(presigned_url)
|
58
|
+
base_parts = urlparse(base_url)
|
59
|
+
# Check if base_url contains localhost or an IP address
|
60
|
+
if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
|
61
|
+
new_netloc = base_parts.hostname # Extract domain from base_url
|
62
|
+
if base_parts.port: # Add port if present in base_url
|
63
|
+
new_netloc += f":{base_parts.port}"
|
64
|
+
updated_parts = presigned_parts._replace(netloc=new_netloc)
|
65
|
+
return urlunparse(updated_parts)
|
66
|
+
return presigned_url
|
67
|
+
|
68
|
+
|
69
|
+
def _fetch_presigned_url(project_name, dataset_name, base_url=None):
|
44
70
|
payload = json.dumps({
|
45
71
|
"datasetName": dataset_name,
|
46
72
|
"numFiles": 1,
|
@@ -54,14 +80,22 @@ def _fetch_presigned_url(project_name, dataset_name):
|
|
54
80
|
}
|
55
81
|
|
56
82
|
try:
|
83
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
84
|
+
start_time = time.time()
|
85
|
+
endpoint = f"{url_base}/v1/llm/presigned-url"
|
57
86
|
response = requests.request("GET",
|
58
|
-
|
87
|
+
endpoint,
|
59
88
|
headers=headers,
|
60
89
|
data=payload,
|
61
90
|
timeout=99999)
|
91
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
92
|
+
logger.debug(
|
93
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
62
94
|
|
63
95
|
if response.status_code == 200:
|
64
|
-
|
96
|
+
presigned_url = response.json()["data"]["presignedUrls"][0]
|
97
|
+
presigned_url = update_presigned_url(presigned_url,url_base)
|
98
|
+
return presigned_url
|
65
99
|
else:
|
66
100
|
raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
|
67
101
|
except requests.exceptions.RequestException as e:
|
@@ -80,15 +114,19 @@ def _put_zip_presigned_url(project_name, presignedUrl, filename):
|
|
80
114
|
with open(filename, 'rb') as f:
|
81
115
|
payload = f.read()
|
82
116
|
|
117
|
+
start_time = time.time()
|
83
118
|
response = requests.request("PUT",
|
84
119
|
presignedUrl,
|
85
120
|
headers=headers,
|
86
121
|
data=payload,
|
87
122
|
timeout=99999)
|
123
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
124
|
+
logger.debug(
|
125
|
+
f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
88
126
|
if response.status_code != 200 or response.status_code != 201:
|
89
127
|
return response, response.status_code
|
90
128
|
|
91
|
-
def _insert_code(dataset_name, hash_id, presigned_url, project_name):
|
129
|
+
def _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url=None):
|
92
130
|
payload = json.dumps({
|
93
131
|
"datasetName": dataset_name,
|
94
132
|
"codeHash": hash_id,
|
@@ -102,11 +140,17 @@ def _insert_code(dataset_name, hash_id, presigned_url, project_name):
|
|
102
140
|
}
|
103
141
|
|
104
142
|
try:
|
143
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
144
|
+
start_time = time.time()
|
145
|
+
endpoint = f"{url_base}/v2/llm/dataset/code"
|
105
146
|
response = requests.request("POST",
|
106
|
-
|
147
|
+
endpoint,
|
107
148
|
headers=headers,
|
108
149
|
data=payload,
|
109
150
|
timeout=99999)
|
151
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
152
|
+
logger.debug(
|
153
|
+
f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
110
154
|
if response.status_code == 200:
|
111
155
|
return response.json()["message"]
|
112
156
|
else:
|