PyPI - ragaai-catalyst - Versions diffs - 2.1.6.4b1__py3-none-any.whl → 2.1.7__py3-none-any.whl - Mend

ragaai-catalyst 2.1.6.4b1py3-none-any.whl → 2.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ragaai_catalyst/dataset.py CHANGED Viewed

@@ -69,7 +69,7 @@ class Dataset:
                 "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
                 "X-Project-Id": str(self.project_id),
             }
-            json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
+            json_data = {"size": 99999, "page": "0", "projectId": str(self.project_id), "search": ""}
             try:
                 response = requests.post(
                     f"{Dataset.BASE_URL}/v2/llm/dataset",

ragaai_catalyst/tracers/agentic_tracing/tracers/base.py CHANGED Viewed

@@ -85,6 +85,9 @@ class BaseTracer:
         self.system_monitor = None
         self.gt = None
+        # For post processing of tracing file before uploading
+        self.post_processor = None
         # For upload tracking
         self.upload_task_id = None
@@ -142,6 +145,21 @@ class BaseTracer:
             except Exception as e:
                 logger.warning(f"Sleep interrupted in network tracking: {str(e)}")
+    def register_post_processor(self, post_processor_func):
+        """
+        Register a post-processing function that will be called after trace generation.
+        Args:
+            post_processor_func (callable): A function that takes a trace JSON file path as input
+                and returns a processed trace JSON file path.
+                The function signature should be:
+                def post_processor_func(original_trace_json_path: os.PathLike) -> os.PathLike
+        """
+        if not callable(post_processor_func):
+            raise TypeError("post_processor_func must be a callable")
+        self.post_processor = post_processor_func
+        logger.debug("Post-processor function registered successfully in BaseTracer")
     def start(self):
         """Initialize a new trace"""
         self.tracking = True
@@ -301,12 +319,19 @@ class BaseTracer:
             logger.info("Traces saved successfully.")
             logger.debug(f"Trace saved to {filepath}")
+            # Apply post-processor if registered
+            if self.post_processor is not None:
+                try:
+                    filepath = self.post_processor(filepath)
+                    logger.debug(f"Post-processor applied successfully in BaseTracer, new path: {filepath}")
+                except Exception as e:
+                    logger.error(f"Error in post-processing in BaseTracer: {e}")
             # Make sure uploader process is available
             ensure_uploader_running()
             logger.debug("Base URL used for uploading: {}".format(self.base_url))
             # Submit to background process for uploading using futures
             self.upload_task_id = submit_upload_task(
                 filepath=filepath,

ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py CHANGED Viewed

@@ -156,6 +156,12 @@ class AgenticTracing(
                 self.current_component_id.set(None)
                 self.user_interaction_tracer.component_id.set(None)
+    def register_post_processor(self, post_processor_func):
+        """
+        Pass through the post-processor registration to the BaseTracer
+        """
+        super().register_post_processor(post_processor_func)
     def start(self):
         """Start tracing"""
         self.is_active = True

ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py CHANGED Viewed

@@ -257,180 +257,196 @@ def format_interactions(trace) -> dict:
     interactions = []
     interaction_id = 1
-    if 'data' not in trace or not trace['data'][0]["spans"]:
-        return {"workflow": []}
-    for span in trace['data'][0]["spans"]:
-        # Process agent spans
-        if span['type'] == "agent":
-            # Add agent_start interaction
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "agent_call_start",
-                    "name": span['name'],
-                    "content": None,
-                    "timestamp": span['start_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
-            # Process children of agent recursively
-            if "children" in span['data']:
-                for child in span['data']["children"]:
-                    interaction_id = process_child_interactions(
-                        child, interaction_id, interactions
-                    )
-            # Add agent_end interaction
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "agent_call_end",
-                    "name": span['name'],
-                    "content": span['data'].get("output"),
-                    "timestamp": span['end_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
-        elif span['type'] == "tool":
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "tool_call_start",
-                    "name": span['name'],
-                    "content": {
-                        "prompt": span['data'].get("input"),
-                        "response": span['data'].get("output"),
-                    },
-                    "timestamp": span['start_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
+    try:
+        if 'data' not in trace or not trace['data'][0].get("spans"):
+            return {"workflow": []}
+    except Exception as e:
+        print(f"Error in checking data or spans: {str(e)}")
+    for span in trace['data'][0].get("spans", []):
+        try:
+            # Process agent spans
+            if span.get('type') == "agent":
+                # Add agent_start interaction
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "agent_call_start",
+                        "name": span.get('name'),
+                        "content": None,
+                        "timestamp": span.get('start_time'),
+                        "error": span.get('error'),
+                    }
+                )
+                interaction_id += 1
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "tool_call_end",
-                    "name": span['name'],
-                    "content": {
-                        "prompt": span['data'].get("input"),
-                        "response": span['data'].get("output"),
-                    },
-                    "timestamp": span['end_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
+                # Process children of agent recursively
+                if "children" in span.get('data', {}):
+                    for child in span['data'].get("children", []):
+                        interaction_id = process_child_interactions(
+                            child, interaction_id, interactions
+                        )
+                # Add agent_end interaction
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "agent_call_end",
+                        "name": span.get('name'),
+                        "content": span.get('data', {}).get("output"),
+                        "timestamp": span.get('end_time'),
+                        "error": span.get('error'),
+                    }
+                )
+                interaction_id += 1
-        elif span['type'] == "llm":
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "llm_call_start",
-                    "name": span['name'],
-                    "content": {
-                        "prompt": span['data'].get("input"),
-                    },
-                    "timestamp": span['start_time'],
-                    "error": span['error']
-                }
-            )
-            interaction_id += 1
+            elif span.get('type') == "tool":
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "tool_call_start",
+                        "name": span.get('name'),
+                        "content": {
+                            "prompt": span.get('data', {}).get("input"),
+                            "response": span.get('data', {}).get("output"),
+                        },
+                        "timestamp": span.get('start_time'),
+                        "error": span.get('error'),
+                    }
+                )
+                interaction_id += 1
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": "llm_call_end",
-                    "name": span['name'],
-                    "content": {"response": span['data'].get("output")},
-                    "timestamp": span['end_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "tool_call_end",
+                        "name": span.get('name'),
+                        "content": {
+                            "prompt": span.get('data', {}).get("input"),
+                            "response": span.get('data', {}).get("output"),
+                        },
+                        "timestamp": span.get('end_time'),
+                        "error": span.get('error'),
+                    }
+                )
+                interaction_id += 1
-        else:
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": f"{span['type']}_call_start",
-                    "name": span['name'],
-                    "content": span['data'],
-                    "timestamp": span['start_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
+            elif span.get('type') == "llm":
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "llm_call_start",
+                        "name": span.get('name'),
+                        "content": {
+                            "prompt": span.get('data', {}).get("input"),
+                        },
+                        "timestamp": span.get('start_time'),
+                        "error": span.get('error')
+                    }
+                )
+                interaction_id += 1
-            interactions.append(
-                {
-                    "id": str(interaction_id),
-                    "span_id": span['id'],
-                    "interaction_type": f"{span['type']}_call_end",
-                    "name": span['name'],
-                    "content": span['data'],
-                    "timestamp": span['end_time'],
-                    "error": span['error'],
-                }
-            )
-            interaction_id += 1
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": "llm_call_end",
+                        "name": span.get('name'),
+                        "content": {"response": span.get('data', {}).get("output")},
+                        "timestamp": span.get('end_time'),
+                        "error": span.get('error'),
+                    }
+                )
+                interaction_id += 1
-        # Process interactions from span.data if they exist
-        if 'interactions' in span:
-            for span_interaction in span['interactions']:
-                interaction = {}
-                interaction["id"] = str(interaction_id)
-                interaction["span_id"] = span['id']
-                interaction["interaction_type"] = span_interaction['type']
-                interaction["content"] = span_interaction['content']
-                interaction["timestamp"] = span_interaction['timestamp']
-                interaction["error"] = span['error']
-                interactions.append(interaction)
+            else:
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": f"{span.get('type')}_call_start",
+                        "name": span.get('name'),
+                        "content": span.get('data'),
+                        "timestamp": span.get('start_time'),
+                        "error": span.get('error'),
+                    }
+                )
                 interaction_id += 1
-        if 'network_calls' in span:
-            for span_network_call in span['network_calls']:
-                network_call = {}
-                network_call["id"] = str(interaction_id)
-                network_call["span_id"] = span['id']
-                network_call["interaction_type"] = "network_call"
-                network_call["name"] = None
-                network_call["content"] = {
-                    "request": {
-                        "url": span_network_call.get("url"),
-                        "method": span_network_call.get("method"),
-                        "headers": span_network_call.get("headers"),
-                    },
-                    "response": {
-                        "status_code": span_network_call.get("status_code"),
-                        "headers": span_network_call.get("response_headers"),
-                        "body": span_network_call.get("response_body"),
-                    },
-                }
-                network_call["timestamp"] = span_network_call.get("timestamp")
-                network_call["error"] = span_network_call.get("error")
-                interactions.append(network_call)
+                interactions.append(
+                    {
+                        "id": str(interaction_id),
+                        "span_id": span.get('id'),
+                        "interaction_type": f"{span.get('type')}_call_end",
+                        "name": span.get('name'),
+                        "content": span.get('data'),
+                        "timestamp": span.get('end_time'),
+                        "error": span.get('error'),
+                    }
+                )
                 interaction_id += 1
-    # Sort interactions by timestamp
-    sorted_interactions = sorted(
-        interactions, key=lambda x: x["timestamp"] if x["timestamp"] else ""
-    )
+            # Process interactions from span.data if they exist
+            if 'interactions' in span:
+                for span_interaction in span['interactions']:
+                    interaction = {}
+                    interaction["id"] = str(interaction_id)
+                    interaction["span_id"] = span.get('id')
+                    interaction["interaction_type"] = span_interaction.get('type')
+                    interaction["content"] = span_interaction.get('content')
+                    interaction["timestamp"] = span_interaction.get('timestamp')
+                    interaction["error"] = span.get('error')
+                    interactions.append(interaction)
+                    interaction_id += 1
+            if 'network_calls' in span:
+                for span_network_call in span['network_calls']:
+                    network_call = {}
+                    network_call["id"] = str(interaction_id)
+                    network_call["span_id"] = span.get('id')
+                    network_call["interaction_type"] = "network_call"
+                    network_call["name"] = None
+                    network_call["content"] = {
+                        "request": {
+                            "url": span_network_call.get("url"),
+                            "method": span_network_call.get("method"),
+                            "headers": span_network_call.get("headers"),
+                        },
+                        "response": {
+                            "status_code": span_network_call.get("status_code"),
+                            "headers": span_network_call.get("response_headers"),
+                            "body": span_network_call.get("response_body"),
+                        },
+                    }
+                    network_call["timestamp"] = span_network_call.get("timestamp")
+                    network_call["error"] = span_network_call.get("error")
+                    interactions.append(network_call)
+                    interaction_id += 1
+        except Exception as e:
+            logger.warning(f"Found issue processing span, skipping")
+            continue
+    try:
+        # Sort interactions by timestamp
+        sorted_interactions = sorted(
+            interactions, key=lambda x: x.get("timestamp") if x.get("timestamp") else ""
+        )
+    except Exception as e:
+        print(f"Error in sorting interactions: {str(e)}")
-    # Reassign IDs to maintain sequential order after sorting
-    for idx, interaction in enumerate(sorted_interactions, 1):
-        interaction["id"] = str(idx)
+    try:
+        # Reassign IDs to maintain sequential order after sorting
+        for idx, interaction in enumerate(sorted_interactions, 1):
+            interaction["id"] = str(idx)
+    except Exception as e:
+        print(f"Error in reassigning IDs: {str(e)}")
     return {"workflow": sorted_interactions}

ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py CHANGED Viewed

@@ -14,19 +14,23 @@ class DynamicTraceExporter(SpanExporter):
     certain properties to be updated dynamically during execution.
     """
-    def __init__(self, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120):
+    def __init__(self, tracer_type, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120, post_processor = None, max_upload_workers = 30):
         """
         Initialize the DynamicTraceExporter.
         Args:
+            tracer_type: Type of tracer
             files_to_zip: List of files to zip
             project_name: Project name
             project_id: Project ID
             dataset_name: Dataset name
             user_details: User details
             base_url: Base URL for API
+            post_processor: Post processing function before uploading trace
+            max_upload_workers: Maximum number of upload workers
         """
         self._exporter = RAGATraceExporter(
+            tracer_type=tracer_type,
             files_to_zip=files_to_zip,
             project_name=project_name,
             project_id=project_id,
@@ -34,7 +38,9 @@ class DynamicTraceExporter(SpanExporter):
             user_details=user_details,
             base_url=base_url,
             custom_model_cost=custom_model_cost,
-            timeout=timeout
+            timeout=timeout,
+            post_processor= post_processor,
+            max_upload_workers = max_upload_workers
         )
         # Store the initial values
@@ -45,6 +51,8 @@ class DynamicTraceExporter(SpanExporter):
         self._user_details = user_details
         self._base_url = base_url
         self._custom_model_cost = custom_model_cost
+        self._post_processor = post_processor
+        self._max_upload_workers = max_upload_workers
     def export(self, spans):
@@ -101,6 +109,8 @@ class DynamicTraceExporter(SpanExporter):
         self._exporter.user_details = self._user_details
         self._exporter.base_url = self._base_url
         self._exporter.custom_model_cost = self._custom_model_cost
+        self._exporter.post_processor = self._post_processor
+        self._exporter.max_upload_workers = self._max_upload_workers
     # Getter and setter methods for dynamic properties
@@ -159,3 +169,11 @@ class DynamicTraceExporter(SpanExporter):
     @custom_model_cost.setter
     def custom_model_cost(self, value):
         self._custom_model_cost = value
+    @property
+    def max_upload_workers(self):
+        return self._max_upload_workers
+    @max_upload_workers.setter
+    def max_upload_workers(self, value):
+        self._max_upload_workers = value

ragaai-catalyst 2.1.6.4b1__py3-none-any.whl → 2.1.7__py3-none-any.whl

ragaai-catalyst 2.1.6.4b1py3-none-any.whl → 2.1.7py3-none-any.whl