PyPI - ragaai-catalyst - Versions diffs - 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl - Mend

ragaai-catalyst 2.1.4.1b0py3-none-any.whl → 2.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py ADDED Viewed

@@ -0,0 +1,358 @@
+"""
+trace_uploader.py - A dedicated process for handling trace uploads
+"""
+import os
+import sys
+import json
+import time
+import signal
+import logging
+import argparse
+import tempfile
+from pathlib import Path
+import multiprocessing
+import queue
+from datetime import datetime
+import atexit
+import glob
+from logging.handlers import RotatingFileHandler
+import concurrent.futures
+from typing import Dict, Any, Optional
+# Set up logging
+log_dir = os.path.join(tempfile.gettempdir(), "ragaai_logs")
+os.makedirs(log_dir, exist_ok=True)
+# Define maximum file size (e.g., 5 MB) and backup count
+max_file_size = 5 * 1024 * 1024  # 5 MB
+backup_count = 1  # Number of backup files to keep
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        RotatingFileHandler(
+            os.path.join(log_dir, "trace_uploader.log"),
+            maxBytes=max_file_size,
+            backupCount=backup_count
+        )
+    ]
+)
+logger = logging.getLogger("trace_uploader")
+try:
+    from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
+    from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
+    from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
+    from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
+    from ragaai_catalyst import RagaAICatalyst
+    IMPORTS_AVAILABLE = True
+except ImportError:
+    logger.warning("RagaAI Catalyst imports not available - running in test mode")
+    IMPORTS_AVAILABLE = False
+# Define task queue directory
+QUEUE_DIR = os.path.join(tempfile.gettempdir(), "ragaai_tasks")
+os.makedirs(QUEUE_DIR, exist_ok=True)
+# Status codes
+STATUS_PENDING = "pending"
+STATUS_PROCESSING = "processing"
+STATUS_COMPLETED = "completed"
+STATUS_FAILED = "failed"
+# Global executor for handling uploads
+_executor = None
+# Dictionary to track futures and their associated task IDs
+_futures: Dict[str, Any] = {}
+def get_executor():
+    """Get or create the thread pool executor"""
+    global _executor
+    if _executor is None:
+        _executor = concurrent.futures.ThreadPoolExecutor(max_workers=8, thread_name_prefix="trace_uploader")
+    return _executor
+def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
+                  project_name: str, project_id: str, dataset_name: str,
+                  user_details: Dict[str, Any], base_url: str) -> Dict[str, Any]:
+    """
+    Process a single upload task
+    Args:
+        task_id: Unique identifier for the task
+        filepath: Path to the trace file
+        hash_id: Hash ID for the code
+        zip_path: Path to the code zip file
+        project_name: Project name
+        project_id: Project ID
+        dataset_name: Dataset name
+        user_details: User details dictionary
+        base_url: Base URL for API calls
+    Returns:
+        Dict containing status and any error information
+    """
+    # Correct base_url
+    base_url = base_url[0] if isinstance(base_url, tuple) else base_url
+    logger.info(f"Processing upload task {task_id}")
+    result = {
+        "task_id": task_id,
+        "status": STATUS_PROCESSING,
+        "error": None,
+        "start_time": datetime.now().isoformat()
+    }
+    # Save initial status to file
+    save_task_status(result)
+    try:
+        # Check if file exists
+        if not os.path.exists(filepath):
+            error_msg = f"Task filepath does not exist: {filepath}"
+            logger.error(error_msg)
+            result["status"] = STATUS_FAILED
+            result["error"] = error_msg
+            save_task_status(result)
+            return result
+        if not IMPORTS_AVAILABLE:
+            logger.warning(f"Test mode: Simulating processing of task {task_id}")
+            # time.sleep(2)  # Simulate work
+            result["status"] = STATUS_COMPLETED
+            save_task_status(result)
+            return result
+        # Step 1: Create dataset schema
+        logger.info(f"Creating dataset schema for {dataset_name} with base_url: {base_url}")
+        try:
+            response = create_dataset_schema_with_trace(
+                dataset_name=dataset_name,
+                project_name=project_name,
+                base_url=base_url
+            )
+            logger.info(f"Dataset schema created: {response}")
+        except Exception as e:
+            logger.error(f"Error creating dataset schema: {e}")
+            # Continue with other steps
+        # Step 2: Upload trace metrics
+        if filepath and os.path.exists(filepath):
+            logger.info(f"Uploading trace metrics for {filepath}")
+            try:
+                response = upload_trace_metric(
+                    json_file_path=filepath,
+                    dataset_name=dataset_name,
+                    project_name=project_name,
+                    base_url=base_url
+                )
+                logger.info(f"Trace metrics uploaded: {response}")
+            except Exception as e:
+                logger.error(f"Error uploading trace metrics: {e}")
+                # Continue with other uploads
+        else:
+            logger.warning(f"Trace file {filepath} not found, skipping metrics upload")
+        # Step 3: Upload agentic traces
+        if filepath and os.path.exists(filepath):
+            logger.info(f"Uploading agentic traces for {filepath}")
+            try:
+                upload_traces = UploadAgenticTraces(
+                    json_file_path=filepath,
+                    project_name=project_name,
+                    project_id=project_id,
+                    dataset_name=dataset_name,
+                    user_detail=user_details,
+                    base_url=base_url,
+                )
+                upload_traces.upload_agentic_traces()
+                logger.info("Agentic traces uploaded successfully")
+            except Exception as e:
+                logger.error(f"Error uploading agentic traces: {e}")
+                # Continue with code upload
+        else:
+            logger.warning(f"Trace file {filepath} not found, skipping traces upload")
+        # Step 4: Upload code hash
+        if hash_id and zip_path and os.path.exists(zip_path):
+            logger.info(f"Uploading code hash {hash_id}")
+            try:
+                response = upload_code(
+                    hash_id=hash_id,
+                    zip_path=zip_path,
+                    project_name=project_name,
+                    dataset_name=dataset_name,
+                    base_url=base_url
+                )
+                logger.info(f"Code hash uploaded: {response}")
+            except Exception as e:
+                logger.error(f"Error uploading code hash: {e}")
+        else:
+            logger.warning(f"Code zip {zip_path} not found, skipping code upload")
+        # Mark task as completed
+        result["status"] = STATUS_COMPLETED
+        result["end_time"] = datetime.now().isoformat()
+        logger.info(f"Task {task_id} completed successfully")
+    except Exception as e:
+        logger.error(f"Error processing task {task_id}: {e}")
+        result["status"] = STATUS_FAILED
+        result["error"] = str(e)
+        result["end_time"] = datetime.now().isoformat()
+    # Save final status
+    save_task_status(result)
+    return result
+def save_task_status(task_status: Dict[str, Any]):
+    """Save task status to a file"""
+    task_id = task_status["task_id"]
+    status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
+    with open(status_path, "w") as f:
+        json.dump(task_status, f, indent=2)
+def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, dataset_name, user_details, base_url):
+    """
+    Submit a new upload task using futures.
+    Args:
+        filepath: Path to the trace file
+        hash_id: Hash ID for the code
+        zip_path: Path to the code zip file
+        project_name: Project name
+        project_id: Project ID
+        dataset_name: Dataset name
+        user_details: User details dictionary
+        base_url: Base URL for API calls
+    Returns:
+        str: Task ID
+    """
+    logger.info(f"Submitting new upload task for file: {filepath}")
+    logger.debug(f"Task details - Project: {project_name}, Dataset: {dataset_name}, Hash: {hash_id}, Base_URL: {base_url}")
+    # Verify the trace file exists
+    if not os.path.exists(filepath):
+        logger.error(f"Trace file not found: {filepath}")
+        return None
+    # Create absolute path to the trace file
+    filepath = os.path.abspath(filepath)
+    logger.debug(f"Using absolute filepath: {filepath}")
+    # Generate a unique task ID
+    task_id = f"task_{int(time.time())}_{os.getpid()}_{hash(str(time.time()))}"
+    # Submit the task to the executor
+    executor = get_executor()
+    future = executor.submit(
+        process_upload,
+        task_id=task_id,
+        filepath=filepath,
+        hash_id=hash_id,
+        zip_path=zip_path,
+        project_name=project_name,
+        project_id=project_id,
+        dataset_name=dataset_name,
+        user_details=user_details,
+        base_url=base_url
+    )
+    # Store the future for later status checks
+    _futures[task_id] = future
+    # Create initial status
+    initial_status = {
+        "task_id": task_id,
+        "status": STATUS_PENDING,
+        "error": None,
+        "start_time": datetime.now().isoformat()
+    }
+    save_task_status(initial_status)
+    return task_id
+def get_task_status(task_id):
+    """
+    Get the status of a task by ID.
+    Args:
+        task_id: Task ID to check
+    Returns:
+        dict: Task status information
+    """
+    logger.debug(f"Getting status for task {task_id}")
+    # Check if we have a future for this task
+    future = _futures.get(task_id)
+    # If we have a future, check its status
+    if future:
+        if future.done():
+            try:
+                # Get the result (this will re-raise any exception that occurred)
+                result = future.result(timeout=0)
+                return result
+            except concurrent.futures.TimeoutError:
+                return {"status": STATUS_PROCESSING, "error": None}
+            except Exception as e:
+                logger.error(f"Error retrieving future result for task {task_id}: {e}")
+                return {"status": STATUS_FAILED, "error": str(e)}
+        else:
+            return {"status": STATUS_PROCESSING, "error": None}
+    # If we don't have a future, try to read from the status file
+    status_path = os.path.join(QUEUE_DIR, f"{task_id}_status.json")
+    if os.path.exists(status_path):
+        try:
+            with open(status_path, "r") as f:
+                return json.load(f)
+        except Exception as e:
+            logger.error(f"Error reading status file for task {task_id}: {e}")
+            return {"status": "unknown", "error": f"Error reading status: {e}"}
+    return {"status": "unknown", "error": "Task not found"}
+def shutdown():
+    """Shutdown the executor"""
+    global _executor
+    if _executor:
+        logger.info("Shutting down executor")
+        _executor.shutdown(wait=False)
+        _executor = None
+# Register shutdown handler
+atexit.register(shutdown)
+# For backward compatibility
+def ensure_uploader_running():
+    """
+    Ensure the uploader is running.
+    This is a no-op in the futures implementation, but kept for API compatibility.
+    """
+    get_executor()  # Just ensure the executor is created
+    return True
+# For backward compatibility with the old daemon mode
+def run_daemon():
+    """
+    Run the uploader as a daemon process.
+    This is a no-op in the futures implementation, but kept for API compatibility.
+    """
+    logger.info("Daemon mode not needed in futures implementation")
+    return
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Trace uploader process")
+    parser.add_argument("--daemon", action="store_true", help="Run as daemon process")
+    args = parser.parse_args()
+    if args.daemon:
+        logger.info("Daemon mode not needed in futures implementation")
+    else:
+        logger.info("Interactive mode not needed in futures implementation")

ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py CHANGED Viewed

@@ -1,7 +1,13 @@
 import requests
 import json
 import os
+import time
+import logging
 from datetime import datetime
+from urllib.parse import urlparse, urlunparse
+import re
+logger = logging.getLogger(__name__)
 class UploadAgenticTraces:
@@ -33,17 +39,41 @@ class UploadAgenticTraces:
         }
         try:
+            start_time = time.time()
+            endpoint = f"{self.base_url}/v1/llm/presigned-url"
             response = requests.request("GET",
-                                        f"{self.base_url}/v1/llm/presigned-url",
+                                        endpoint,
                                         headers=headers,
                                         data=payload,
                                         timeout=self.timeout)
+            elapsed_ms = (time.time() - start_time) * 1000
+            logger.debug(
+                f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
             if response.status_code == 200:
-                presignedUrls = response.json()["data"]["presignedUrls"][0]
-                return presignedUrls
+                presignedURLs = response.json()["data"]["presignedUrls"][0]
+                presignedurl = self.update_presigned_url(presignedURLs,self.base_url)
+                return presignedurl
         except requests.exceptions.RequestException as e:
             print(f"Error while getting presigned url: {e}")
             return None
+    def update_presigned_url(self, presigned_url, base_url):
+        """Replaces the domain (and port, if applicable) of the presigned URL
+        with that of the base URL only if the base URL contains 'localhost' or an IP address."""
+        #To Do: If Proxy URL has domain name how do we handle such cases
+        presigned_parts = urlparse(presigned_url)
+        base_parts = urlparse(base_url)
+        # Check if base_url contains localhost or an IP address
+        if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
+            new_netloc = base_parts.hostname  # Extract domain from base_url
+            if base_parts.port:  # Add port if present in base_url
+                new_netloc += f":{base_parts.port}"
+            updated_parts = presigned_parts._replace(netloc=new_netloc)
+            return urlunparse(updated_parts)
+        return presigned_url
     def _put_presigned_url(self, presignedUrl, filename):
         headers = {
@@ -60,11 +90,15 @@ class UploadAgenticTraces:
             print(f"Error while reading file: {e}")
             return None
         try:
+            start_time = time.time()
             response = requests.request("PUT",
                                         presignedUrl,
                                         headers=headers,
                                         data=payload,
                                         timeout=self.timeout)
+            elapsed_ms = (time.time() - start_time) * 1000
+            logger.debug(
+                f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
             if response.status_code != 200 or response.status_code != 201:
                 return response, response.status_code
         except requests.exceptions.RequestException as e:
@@ -83,11 +117,16 @@ class UploadAgenticTraces:
                 "datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
             })
         try:
+            start_time = time.time()
+            endpoint = f"{self.base_url}/v1/llm/insert/trace"
             response = requests.request("POST",
-                                        f"{self.base_url}/v1/llm/insert/trace",
+                                        endpoint,
                                         headers=headers,
                                         data=payload,
                                         timeout=self.timeout)
+            elapsed_ms = (time.time() - start_time) * 1000
+            logger.debug(
+                f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
             if response.status_code != 200:
                 print(f"Error inserting traces: {response.json()['message']}")
                 return None
@@ -116,27 +155,43 @@ class UploadAgenticTraces:
                             "spanType": span["type"],
                         })
                 else:
-                    datasetSpans.append({
+                    datasetSpans.extend(self._get_agent_dataset_spans(span, datasetSpans))
+            datasetSpans = [dict(t) for t in set(tuple(sorted(d.items())) for d in datasetSpans)]
+            return datasetSpans
+        except Exception as e:
+            print(f"Error while reading dataset spans: {e}")
+            return None
+    def _get_agent_dataset_spans(self, span, datasetSpans):
+        datasetSpans.append({
                                 "spanId": span["id"],
                                 "spanName": span["name"],
                                 "spanHash": span["hash_id"],
                                 "spanType": span["type"],
                             })
-                    children = span["data"]["children"]
-                    for child in children:
-                        existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
-                        if existing_span is None:
-                            datasetSpans.append({
-                                "spanId": child["id"],
-                                "spanName": child["name"],
-                                "spanHash": child["hash_id"],
-                                "spanType": child["type"],
-                            })
-            return datasetSpans
-        except Exception as e:
-            print(f"Error while reading dataset spans: {e}")
-            return None
+        children = span["data"]["children"]
+        for child in children:
+            if child["type"] != "agent":
+                existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
+                if existing_span is None:
+                    datasetSpans.append({
+                        "spanId": child["id"],
+                        "spanName": child["name"],
+                        "spanHash": child["hash_id"],
+                        "spanType": child["type"],
+                    })
+            else:
+                datasetSpans.append({
+                            "spanId": child["id"],
+                            "spanName": child["name"],
+                            "spanHash": child["hash_id"],
+                            "spanType": child["type"],
+                        })
+                self._get_agent_dataset_spans(child, datasetSpans)
+        return datasetSpans
     def upload_agentic_traces(self):
         try:
             presignedUrl = self._get_presigned_url()

ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py CHANGED Viewed

@@ -2,23 +2,26 @@ from aiohttp import payload
 import requests
 import json
 import os
+import time
 import logging
 from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
 logger = logging.getLogger(__name__)
+from urllib.parse import urlparse, urlunparse
+import re
-def upload_code(hash_id, zip_path, project_name, dataset_name):
-    code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
+def upload_code(hash_id, zip_path, project_name, dataset_name, base_url=None):
+    code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name, base_url)
     if hash_id not in code_hashes_list:
-        presigned_url = _fetch_presigned_url(project_name, dataset_name)
+        presigned_url = _fetch_presigned_url(project_name, dataset_name, base_url)
         _put_zip_presigned_url(project_name, presigned_url, zip_path)
-        response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
+        response = _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url)
         return response
     else:
         return "Code already exists"
-def _fetch_dataset_code_hashes(project_name, dataset_name):
+def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None):
     payload = {}
     headers = {
         "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
@@ -26,11 +29,17 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
     }
     try:
+        url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
+        start_time = time.time()
+        endpoint = f"{url_base}/v2/llm/dataset/code?datasetName={dataset_name}"
         response = requests.request("GET",
-                                    f"{RagaAICatalyst.BASE_URL}/v2/llm/dataset/code?datasetName={dataset_name}",
+                                    endpoint,
                                     headers=headers,
                                     data=payload,
                                     timeout=99999)
+        elapsed_ms = (time.time() - start_time) * 1000
+        logger.debug(
+            f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
         if response.status_code == 200:
             return response.json()["data"]["codeHashes"]
@@ -40,7 +49,24 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
         logger.error(f"Failed to list datasets: {e}")
         raise
-def _fetch_presigned_url(project_name, dataset_name):
+def update_presigned_url(presigned_url, base_url):
+    """Replaces the domain (and port, if applicable) of the presigned URL with that of the base URL."""
+    #To Do: If Proxy URL has domain name how do we handle such cases? Engineering Dependency.
+    presigned_parts = urlparse(presigned_url)
+    base_parts = urlparse(base_url)
+    # Check if base_url contains localhost or an IP address
+    if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
+        new_netloc = base_parts.hostname  # Extract domain from base_url
+        if base_parts.port:  # Add port if present in base_url
+            new_netloc += f":{base_parts.port}"
+        updated_parts = presigned_parts._replace(netloc=new_netloc)
+        return urlunparse(updated_parts)
+    return presigned_url
+def _fetch_presigned_url(project_name, dataset_name, base_url=None):
     payload = json.dumps({
             "datasetName": dataset_name,
             "numFiles": 1,
@@ -54,14 +80,22 @@ def _fetch_presigned_url(project_name, dataset_name):
     }
     try:
+        url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
+        start_time = time.time()
+        endpoint = f"{url_base}/v1/llm/presigned-url"
         response = requests.request("GET",
-                                    f"{RagaAICatalyst.BASE_URL}/v1/llm/presigned-url",
+                                    endpoint,
                                     headers=headers,
                                     data=payload,
                                     timeout=99999)
+        elapsed_ms = (time.time() - start_time) * 1000
+        logger.debug(
+            f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
         if response.status_code == 200:
-            return response.json()["data"]["presignedUrls"][0]
+            presigned_url = response.json()["data"]["presignedUrls"][0]
+            presigned_url = update_presigned_url(presigned_url,url_base)
+            return presigned_url
         else:
             raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
     except requests.exceptions.RequestException as e:
@@ -80,15 +114,19 @@ def _put_zip_presigned_url(project_name, presignedUrl, filename):
     with open(filename, 'rb') as f:
         payload = f.read()
+    start_time = time.time()
     response = requests.request("PUT",
                                 presignedUrl,
                                 headers=headers,
                                 data=payload,
                                 timeout=99999)
+    elapsed_ms = (time.time() - start_time) * 1000
+    logger.debug(
+        f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
     if response.status_code != 200 or response.status_code != 201:
         return response, response.status_code
-def _insert_code(dataset_name, hash_id, presigned_url, project_name):
+def _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url=None):
     payload = json.dumps({
         "datasetName": dataset_name,
         "codeHash": hash_id,
@@ -102,11 +140,17 @@ def _insert_code(dataset_name, hash_id, presigned_url, project_name):
         }
     try:
+        url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
+        start_time = time.time()
+        endpoint = f"{url_base}/v2/llm/dataset/code"
         response = requests.request("POST",
-                                    f"{RagaAICatalyst.BASE_URL}/v2/llm/dataset/code",
+                                    endpoint,
                                     headers=headers,
                                     data=payload,
                                     timeout=99999)
+        elapsed_ms = (time.time() - start_time) * 1000
+        logger.debug(
+            f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
         if response.status_code == 200:
             return response.json()["message"]
         else:

ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

ragaai-catalyst 2.1.4.1b0py3-none-any.whl → 2.1.5py3-none-any.whl