ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,358 @@
1
+ """
2
+ trace_uploader.py - A dedicated process for handling trace uploads
3
+ """
4
+
5
+ import os
6
+ import sys
7
+ import json
8
+ import time
9
+ import signal
10
+ import logging
11
+ import argparse
12
+ import tempfile
13
+ from pathlib import Path
14
+ import multiprocessing
15
+ import queue
16
+ from datetime import datetime
17
+ import atexit
18
+ import glob
19
+ from logging.handlers import RotatingFileHandler
20
+ import concurrent.futures
21
+ from typing import Dict, Any, Optional
22
+
23
+ # Set up logging
24
+ log_dir = os.path.join(tempfile.gettempdir(), "ragaai_logs")
25
+ os.makedirs(log_dir, exist_ok=True)
26
+
27
+ # Define maximum file size (e.g., 5 MB) and backup count
28
+ max_file_size = 5 * 1024 * 1024 # 5 MB
29
+ backup_count = 1 # Number of backup files to keep
30
+
31
+ logging.basicConfig(
32
+ level=logging.DEBUG,
33
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
34
+ handlers=[
35
+ logging.StreamHandler(),
36
+ RotatingFileHandler(
37
+ os.path.join(log_dir, "trace_uploader.log"),
38
+ maxBytes=max_file_size,
39
+ backupCount=backup_count
40
+ )
41
+ ]
42
+ )
43
+ logger = logging.getLogger("trace_uploader")
44
+
45
+ try:
46
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
47
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
48
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
49
+ from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
50
+ from ragaai_catalyst import RagaAICatalyst
51
+ IMPORTS_AVAILABLE = True
52
+ except ImportError:
53
+ logger.warning("RagaAI Catalyst imports not available - running in test mode")
54
+ IMPORTS_AVAILABLE = False
55
+
56
+ # Define task queue directory
57
+ QUEUE_DIR = os.path.join(tempfile.gettempdir(), "ragaai_tasks")
58
+ os.makedirs(QUEUE_DIR, exist_ok=True)
59
+
60
+ # Status codes
61
+ STATUS_PENDING = "pending"
62
+ STATUS_PROCESSING = "processing"
63
+ STATUS_COMPLETED = "completed"
64
+ STATUS_FAILED = "failed"
65
+
66
+ # Global executor for handling uploads
67
+ _executor = None
68
+ # Dictionary to track futures and their associated task IDs
69
+ _futures: Dict[str, Any] = {}
70
+
71
+ def get_executor():
72
+ """Get or create the thread pool executor"""
73
+ global _executor
74
+ if _executor is None:
75
+ _executor = concurrent.futures.ThreadPoolExecutor(max_workers=8, thread_name_prefix="trace_uploader")
76
+ return _executor
77
+
78
+ def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
79
+ project_name: str, project_id: str, dataset_name: str,
80
+ user_details: Dict[str, Any], base_url: str) -> Dict[str, Any]:
81
+ """
82
+ Process a single upload task
83
+
84
+ Args:
85
+ task_id: Unique identifier for the task
86
+ filepath: Path to the trace file
87
+ hash_id: Hash ID for the code
88
+ zip_path: Path to the code zip file
89
+ project_name: Project name
90
+ project_id: Project ID
91
+ dataset_name: Dataset name
92
+ user_details: User details dictionary
93
+ base_url: Base URL for API calls
94
+
95
+ Returns:
96
+ Dict containing status and any error information
97
+ """
98
+ # Correct base_url
99
+ base_url = base_url[0] if isinstance(base_url, tuple) else base_url
100
+
101
+ logger.info(f"Processing upload task {task_id}")
102
+ result = {
103
+ "task_id": task_id,
104
+ "status": STATUS_PROCESSING,
105
+ "error": None,
106
+ "start_time": datetime.now().isoformat()
107
+ }
108
+
109
+ # Save initial status to file
110
+ save_task_status(result)
111
+
112
+ try:
113
+ # Check if file exists
114
+ if not os.path.exists(filepath):
115
+ error_msg = f"Task filepath does not exist: {filepath}"
116
+ logger.error(error_msg)
117
+ result["status"] = STATUS_FAILED
118
+ result["error"] = error_msg
119
+ save_task_status(result)
120
+ return result
121
+
122
+ if not IMPORTS_AVAILABLE:
123
+ logger.warning(f"Test mode: Simulating processing of task {task_id}")
124
+ # time.sleep(2) # Simulate work
125
+ result["status"] = STATUS_COMPLETED
126
+ save_task_status(result)
127
+ return result
128
+
129
+ # Step 1: Create dataset schema
130
+ logger.info(f"Creating dataset schema for {dataset_name} with base_url: {base_url}")
131
+ try:
132
+ response = create_dataset_schema_with_trace(
133
+ dataset_name=dataset_name,
134
+ project_name=project_name,
135
+ base_url=base_url
136
+ )
137
+ logger.info(f"Dataset schema created: {response}")
138
+ except Exception as e:
139
+ logger.error(f"Error creating dataset schema: {e}")
140
+ # Continue with other steps
141
+
142
+ # Step 2: Upload trace metrics
143
+ if filepath and os.path.exists(filepath):
144
+ logger.info(f"Uploading trace metrics for {filepath}")
145
+ try:
146
+ response = upload_trace_metric(
147
+ json_file_path=filepath,
148
+ dataset_name=dataset_name,
149
+ project_name=project_name,
150
+ base_url=base_url
151
+ )
152
+ logger.info(f"Trace metrics uploaded: {response}")
153
+ except Exception as e:
154
+ logger.error(f"Error uploading trace metrics: {e}")
155
+ # Continue with other uploads
156
+ else:
157
+ logger.warning(f"Trace file {filepath} not found, skipping metrics upload")
158
+
159
+ # Step 3: Upload agentic traces
160
+ if filepath and os.path.exists(filepath):
161
+ logger.info(f"Uploading agentic traces for {filepath}")
162
+ try:
163
+ upload_traces = UploadAgenticTraces(
164
+ json_file_path=filepath,
165
+ project_name=project_name,
166
+ project_id=project_id,
167
+ dataset_name=dataset_name,
168
+ user_detail=user_details,
169
+ base_url=base_url,
170
+ )
171
+ upload_traces.upload_agentic_traces()
172
+ logger.info("Agentic traces uploaded successfully")
173
+ except Exception as e:
174
+ logger.error(f"Error uploading agentic traces: {e}")
175
+ # Continue with code upload
176
+ else:
177
+ logger.warning(f"Trace file {filepath} not found, skipping traces upload")
178
+
179
+ # Step 4: Upload code hash
180
+ if hash_id and zip_path and os.path.exists(zip_path):
181
+ logger.info(f"Uploading code hash {hash_id}")
182
+ try:
183
+ response = upload_code(
184
+ hash_id=hash_id,
185
+ zip_path=zip_path,
186
+ project_name=project_name,
187
+ dataset_name=dataset_name,
188
+ base_url=base_url
189
+ )
190
+ logger.info(f"Code hash uploaded: {response}")
191
+ except Exception as e:
192
+ logger.error(f"Error uploading code hash: {e}")
193
+ else:
194
+ logger.warning(f"Code zip {zip_path} not found, skipping code upload")
195
+
196
+ # Mark task as completed
197
+ result["status"] = STATUS_COMPLETED
198
+ result["end_time"] = datetime.now().isoformat()
199
+ logger.info(f"Task {task_id} completed successfully")
200
+
201
+ except Exception as e:
202
+ logger.error(f"Error processing task {task_id}: {e}")
203
+ result["status"] = STATUS_FAILED
204
+ result["error"] = str(e)
205
+ result["end_time"] = datetime.now().isoformat()
206
+
207
+ # Save final status
208
+ save_task_status(result)
209
+ return result
210
+
211
+ def save_task_status(task_status: Dict[str, Any]):
212
+ """Save task status to a file"""
213
+ task_id = task_status["task_id"]
214
+ status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
215
+ with open(status_path, "w") as f:
216
+ json.dump(task_status, f, indent=2)
217
+
218
+ def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, dataset_name, user_details, base_url):
219
+ """
220
+ Submit a new upload task using futures.
221
+
222
+ Args:
223
+ filepath: Path to the trace file
224
+ hash_id: Hash ID for the code
225
+ zip_path: Path to the code zip file
226
+ project_name: Project name
227
+ project_id: Project ID
228
+ dataset_name: Dataset name
229
+ user_details: User details dictionary
230
+ base_url: Base URL for API calls
231
+
232
+ Returns:
233
+ str: Task ID
234
+ """
235
+ logger.info(f"Submitting new upload task for file: {filepath}")
236
+ logger.debug(f"Task details - Project: {project_name}, Dataset: {dataset_name}, Hash: {hash_id}, Base_URL: {base_url}")
237
+
238
+ # Verify the trace file exists
239
+ if not os.path.exists(filepath):
240
+ logger.error(f"Trace file not found: {filepath}")
241
+ return None
242
+
243
+ # Create absolute path to the trace file
244
+ filepath = os.path.abspath(filepath)
245
+ logger.debug(f"Using absolute filepath: {filepath}")
246
+
247
+ # Generate a unique task ID
248
+ task_id = f"task_{int(time.time())}_{os.getpid()}_{hash(str(time.time()))}"
249
+
250
+ # Submit the task to the executor
251
+ executor = get_executor()
252
+ future = executor.submit(
253
+ process_upload,
254
+ task_id=task_id,
255
+ filepath=filepath,
256
+ hash_id=hash_id,
257
+ zip_path=zip_path,
258
+ project_name=project_name,
259
+ project_id=project_id,
260
+ dataset_name=dataset_name,
261
+ user_details=user_details,
262
+ base_url=base_url
263
+ )
264
+
265
+ # Store the future for later status checks
266
+ _futures[task_id] = future
267
+
268
+ # Create initial status
269
+ initial_status = {
270
+ "task_id": task_id,
271
+ "status": STATUS_PENDING,
272
+ "error": None,
273
+ "start_time": datetime.now().isoformat()
274
+ }
275
+ save_task_status(initial_status)
276
+
277
+ return task_id
278
+
279
+ def get_task_status(task_id):
280
+ """
281
+ Get the status of a task by ID.
282
+
283
+ Args:
284
+ task_id: Task ID to check
285
+
286
+ Returns:
287
+ dict: Task status information
288
+ """
289
+ logger.debug(f"Getting status for task {task_id}")
290
+
291
+ # Check if we have a future for this task
292
+ future = _futures.get(task_id)
293
+
294
+ # If we have a future, check its status
295
+ if future:
296
+ if future.done():
297
+ try:
298
+ # Get the result (this will re-raise any exception that occurred)
299
+ result = future.result(timeout=0)
300
+ return result
301
+ except concurrent.futures.TimeoutError:
302
+ return {"status": STATUS_PROCESSING, "error": None}
303
+ except Exception as e:
304
+ logger.error(f"Error retrieving future result for task {task_id}: {e}")
305
+ return {"status": STATUS_FAILED, "error": str(e)}
306
+ else:
307
+ return {"status": STATUS_PROCESSING, "error": None}
308
+
309
+ # If we don't have a future, try to read from the status file
310
+ status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
311
+ if os.path.exists(status_path):
312
+ try:
313
+ with open(status_path, "r") as f:
314
+ return json.load(f)
315
+ except Exception as e:
316
+ logger.error(f"Error reading status file for task {task_id}: {e}")
317
+ return {"status": "unknown", "error": f"Error reading status: {e}"}
318
+
319
+ return {"status": "unknown", "error": "Task not found"}
320
+
321
+ def shutdown():
322
+ """Shutdown the executor"""
323
+ global _executor
324
+ if _executor:
325
+ logger.info("Shutting down executor")
326
+ _executor.shutdown(wait=False)
327
+ _executor = None
328
+
329
+ # Register shutdown handler
330
+ atexit.register(shutdown)
331
+
332
+ # For backward compatibility
333
+ def ensure_uploader_running():
334
+ """
335
+ Ensure the uploader is running.
336
+ This is a no-op in the futures implementation, but kept for API compatibility.
337
+ """
338
+ get_executor() # Just ensure the executor is created
339
+ return True
340
+
341
+ # For backward compatibility with the old daemon mode
342
+ def run_daemon():
343
+ """
344
+ Run the uploader as a daemon process.
345
+ This is a no-op in the futures implementation, but kept for API compatibility.
346
+ """
347
+ logger.info("Daemon mode not needed in futures implementation")
348
+ return
349
+
350
+ if __name__ == "__main__":
351
+ parser = argparse.ArgumentParser(description="Trace uploader process")
352
+ parser.add_argument("--daemon", action="store_true", help="Run as daemon process")
353
+ args = parser.parse_args()
354
+
355
+ if args.daemon:
356
+ logger.info("Daemon mode not needed in futures implementation")
357
+ else:
358
+ logger.info("Interactive mode not needed in futures implementation")
@@ -1,7 +1,13 @@
1
1
  import requests
2
2
  import json
3
3
  import os
4
+ import time
5
+ import logging
4
6
  from datetime import datetime
7
+ from urllib.parse import urlparse, urlunparse
8
+ import re
9
+
10
+ logger = logging.getLogger(__name__)
5
11
 
6
12
 
7
13
  class UploadAgenticTraces:
@@ -33,17 +39,41 @@ class UploadAgenticTraces:
33
39
  }
34
40
 
35
41
  try:
42
+ start_time = time.time()
43
+ endpoint = f"{self.base_url}/v1/llm/presigned-url"
36
44
  response = requests.request("GET",
37
- f"{self.base_url}/v1/llm/presigned-url",
45
+ endpoint,
38
46
  headers=headers,
39
47
  data=payload,
40
48
  timeout=self.timeout)
49
+ elapsed_ms = (time.time() - start_time) * 1000
50
+ logger.debug(
51
+ f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
52
+
41
53
  if response.status_code == 200:
42
- presignedUrls = response.json()["data"]["presignedUrls"][0]
43
- return presignedUrls
54
+ presignedURLs = response.json()["data"]["presignedUrls"][0]
55
+ presignedurl = self.update_presigned_url(presignedURLs,self.base_url)
56
+ return presignedurl
57
+
44
58
  except requests.exceptions.RequestException as e:
45
59
  print(f"Error while getting presigned url: {e}")
46
60
  return None
61
+
62
+ def update_presigned_url(self, presigned_url, base_url):
63
+ """Replaces the domain (and port, if applicable) of the presigned URL
64
+ with that of the base URL only if the base URL contains 'localhost' or an IP address."""
65
+ #To Do: If Proxy URL has domain name how do we handle such cases
66
+
67
+ presigned_parts = urlparse(presigned_url)
68
+ base_parts = urlparse(base_url)
69
+ # Check if base_url contains localhost or an IP address
70
+ if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
71
+ new_netloc = base_parts.hostname # Extract domain from base_url
72
+ if base_parts.port: # Add port if present in base_url
73
+ new_netloc += f":{base_parts.port}"
74
+ updated_parts = presigned_parts._replace(netloc=new_netloc)
75
+ return urlunparse(updated_parts)
76
+ return presigned_url
47
77
 
48
78
  def _put_presigned_url(self, presignedUrl, filename):
49
79
  headers = {
@@ -60,11 +90,15 @@ class UploadAgenticTraces:
60
90
  print(f"Error while reading file: {e}")
61
91
  return None
62
92
  try:
93
+ start_time = time.time()
63
94
  response = requests.request("PUT",
64
95
  presignedUrl,
65
96
  headers=headers,
66
97
  data=payload,
67
98
  timeout=self.timeout)
99
+ elapsed_ms = (time.time() - start_time) * 1000
100
+ logger.debug(
101
+ f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
68
102
  if response.status_code != 200 or response.status_code != 201:
69
103
  return response, response.status_code
70
104
  except requests.exceptions.RequestException as e:
@@ -83,11 +117,16 @@ class UploadAgenticTraces:
83
117
  "datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
84
118
  })
85
119
  try:
120
+ start_time = time.time()
121
+ endpoint = f"{self.base_url}/v1/llm/insert/trace"
86
122
  response = requests.request("POST",
87
- f"{self.base_url}/v1/llm/insert/trace",
123
+ endpoint,
88
124
  headers=headers,
89
125
  data=payload,
90
126
  timeout=self.timeout)
127
+ elapsed_ms = (time.time() - start_time) * 1000
128
+ logger.debug(
129
+ f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
91
130
  if response.status_code != 200:
92
131
  print(f"Error inserting traces: {response.json()['message']}")
93
132
  return None
@@ -116,27 +155,43 @@ class UploadAgenticTraces:
116
155
  "spanType": span["type"],
117
156
  })
118
157
  else:
119
- datasetSpans.append({
158
+ datasetSpans.extend(self._get_agent_dataset_spans(span, datasetSpans))
159
+ datasetSpans = [dict(t) for t in set(tuple(sorted(d.items())) for d in datasetSpans)]
160
+
161
+ return datasetSpans
162
+ except Exception as e:
163
+ print(f"Error while reading dataset spans: {e}")
164
+ return None
165
+
166
+ def _get_agent_dataset_spans(self, span, datasetSpans):
167
+ datasetSpans.append({
120
168
  "spanId": span["id"],
121
169
  "spanName": span["name"],
122
170
  "spanHash": span["hash_id"],
123
171
  "spanType": span["type"],
124
172
  })
125
- children = span["data"]["children"]
126
- for child in children:
127
- existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
128
- if existing_span is None:
129
- datasetSpans.append({
130
- "spanId": child["id"],
131
- "spanName": child["name"],
132
- "spanHash": child["hash_id"],
133
- "spanType": child["type"],
134
- })
135
- return datasetSpans
136
- except Exception as e:
137
- print(f"Error while reading dataset spans: {e}")
138
- return None
139
-
173
+ children = span["data"]["children"]
174
+ for child in children:
175
+ if child["type"] != "agent":
176
+ existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
177
+ if existing_span is None:
178
+ datasetSpans.append({
179
+ "spanId": child["id"],
180
+ "spanName": child["name"],
181
+ "spanHash": child["hash_id"],
182
+ "spanType": child["type"],
183
+ })
184
+ else:
185
+ datasetSpans.append({
186
+ "spanId": child["id"],
187
+ "spanName": child["name"],
188
+ "spanHash": child["hash_id"],
189
+ "spanType": child["type"],
190
+ })
191
+ self._get_agent_dataset_spans(child, datasetSpans)
192
+ return datasetSpans
193
+
194
+
140
195
  def upload_agentic_traces(self):
141
196
  try:
142
197
  presignedUrl = self._get_presigned_url()
@@ -2,23 +2,26 @@ from aiohttp import payload
2
2
  import requests
3
3
  import json
4
4
  import os
5
+ import time
5
6
  import logging
6
7
  from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
7
8
  logger = logging.getLogger(__name__)
9
+ from urllib.parse import urlparse, urlunparse
10
+ import re
8
11
 
9
- def upload_code(hash_id, zip_path, project_name, dataset_name):
10
- code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
12
+ def upload_code(hash_id, zip_path, project_name, dataset_name, base_url=None):
13
+ code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name, base_url)
11
14
 
12
15
  if hash_id not in code_hashes_list:
13
- presigned_url = _fetch_presigned_url(project_name, dataset_name)
16
+ presigned_url = _fetch_presigned_url(project_name, dataset_name, base_url)
14
17
  _put_zip_presigned_url(project_name, presigned_url, zip_path)
15
18
 
16
- response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
19
+ response = _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url)
17
20
  return response
18
21
  else:
19
22
  return "Code already exists"
20
23
 
21
- def _fetch_dataset_code_hashes(project_name, dataset_name):
24
+ def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None):
22
25
  payload = {}
23
26
  headers = {
24
27
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
@@ -26,11 +29,17 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
26
29
  }
27
30
 
28
31
  try:
32
+ url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
33
+ start_time = time.time()
34
+ endpoint = f"{url_base}/v2/llm/dataset/code?datasetName={dataset_name}"
29
35
  response = requests.request("GET",
30
- f"{RagaAICatalyst.BASE_URL}/v2/llm/dataset/code?datasetName={dataset_name}",
36
+ endpoint,
31
37
  headers=headers,
32
38
  data=payload,
33
39
  timeout=99999)
40
+ elapsed_ms = (time.time() - start_time) * 1000
41
+ logger.debug(
42
+ f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
34
43
 
35
44
  if response.status_code == 200:
36
45
  return response.json()["data"]["codeHashes"]
@@ -40,7 +49,24 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
40
49
  logger.error(f"Failed to list datasets: {e}")
41
50
  raise
42
51
 
43
- def _fetch_presigned_url(project_name, dataset_name):
52
+
53
+ def update_presigned_url(presigned_url, base_url):
54
+ """Replaces the domain (and port, if applicable) of the presigned URL with that of the base URL."""
55
+ #To Do: If Proxy URL has domain name how do we handle such cases? Engineering Dependency.
56
+
57
+ presigned_parts = urlparse(presigned_url)
58
+ base_parts = urlparse(base_url)
59
+ # Check if base_url contains localhost or an IP address
60
+ if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
61
+ new_netloc = base_parts.hostname # Extract domain from base_url
62
+ if base_parts.port: # Add port if present in base_url
63
+ new_netloc += f":{base_parts.port}"
64
+ updated_parts = presigned_parts._replace(netloc=new_netloc)
65
+ return urlunparse(updated_parts)
66
+ return presigned_url
67
+
68
+
69
+ def _fetch_presigned_url(project_name, dataset_name, base_url=None):
44
70
  payload = json.dumps({
45
71
  "datasetName": dataset_name,
46
72
  "numFiles": 1,
@@ -54,14 +80,22 @@ def _fetch_presigned_url(project_name, dataset_name):
54
80
  }
55
81
 
56
82
  try:
83
+ url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
84
+ start_time = time.time()
85
+ endpoint = f"{url_base}/v1/llm/presigned-url"
57
86
  response = requests.request("GET",
58
- f"{RagaAICatalyst.BASE_URL}/v1/llm/presigned-url",
87
+ endpoint,
59
88
  headers=headers,
60
89
  data=payload,
61
90
  timeout=99999)
91
+ elapsed_ms = (time.time() - start_time) * 1000
92
+ logger.debug(
93
+ f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
62
94
 
63
95
  if response.status_code == 200:
64
- return response.json()["data"]["presignedUrls"][0]
96
+ presigned_url = response.json()["data"]["presignedUrls"][0]
97
+ presigned_url = update_presigned_url(presigned_url,url_base)
98
+ return presigned_url
65
99
  else:
66
100
  raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
67
101
  except requests.exceptions.RequestException as e:
@@ -80,15 +114,19 @@ def _put_zip_presigned_url(project_name, presignedUrl, filename):
80
114
  with open(filename, 'rb') as f:
81
115
  payload = f.read()
82
116
 
117
+ start_time = time.time()
83
118
  response = requests.request("PUT",
84
119
  presignedUrl,
85
120
  headers=headers,
86
121
  data=payload,
87
122
  timeout=99999)
123
+ elapsed_ms = (time.time() - start_time) * 1000
124
+ logger.debug(
125
+ f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
88
126
  if response.status_code != 200 or response.status_code != 201:
89
127
  return response, response.status_code
90
128
 
91
- def _insert_code(dataset_name, hash_id, presigned_url, project_name):
129
+ def _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url=None):
92
130
  payload = json.dumps({
93
131
  "datasetName": dataset_name,
94
132
  "codeHash": hash_id,
@@ -102,11 +140,17 @@ def _insert_code(dataset_name, hash_id, presigned_url, project_name):
102
140
  }
103
141
 
104
142
  try:
143
+ url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
144
+ start_time = time.time()
145
+ endpoint = f"{url_base}/v2/llm/dataset/code"
105
146
  response = requests.request("POST",
106
- f"{RagaAICatalyst.BASE_URL}/v2/llm/dataset/code",
147
+ endpoint,
107
148
  headers=headers,
108
149
  data=payload,
109
150
  timeout=99999)
151
+ elapsed_ms = (time.time() - start_time) * 1000
152
+ logger.debug(
153
+ f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
110
154
  if response.status_code == 200:
111
155
  return response.json()["message"]
112
156
  else: