PyPI - ragaai-catalyst - Versions diffs - 2.1.3__py3-none-any.whl → 2.1.4__py3-none-any.whl - Mend

ragaai-catalyst 2.1.3py3-none-any.whl → 2.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py CHANGED Viewed

@@ -1,12 +1,22 @@
+import os
 import uuid
 from datetime import datetime
 import psutil
 import functools
 from typing import Optional, Any, Dict, List
-from ..utils.unique_decorator import generate_unique_hash_simple, mydecorator
+from ..utils.unique_decorator import generate_unique_hash_simple
 import contextvars
 import asyncio
 from ..utils.file_name_tracker import TrackName
+from ..utils.span_attributes import SpanAttributes
+import logging
+logger = logging.getLogger(__name__)
+logging_level = (
+    logger.setLevel(logging.DEBUG)
+    if os.getenv("DEBUG")
+    else logger.setLevel(logging.INFO)
+)
 class ToolTracerMixin:
@@ -19,17 +29,68 @@ class ToolTracerMixin:
         self.component_user_interaction = {}
         self.gt = None
+        # add auto_instrument option
+        self.auto_instrument_tool = False
+        self.auto_instrument_user_interaction = False
+        self.auto_instrument_network = False
+    # take care of auto_instrument
+    def instrument_tool_calls(self):
+        self.auto_instrument_tool = True
+    def instrument_user_interaction_calls(self):
+        self.auto_instrument_user_interaction = True
+    def instrument_network_calls(self):
+        self.auto_instrument_network = True
+    def trace_tool(
+        self,
+        name: str,
+        tool_type: str = "generic",
+        version: str = "1.0.0",
+        tags: List[str] = [],
+        metadata: Dict[str, Any] = {},
+        metrics: List[Dict[str, Any]] = [],
+        feedback: Optional[Any] = None,
+    ):
+        if name not in self.span_attributes_dict:
+            self.span_attributes_dict[name] = SpanAttributes(name)
+        if tags:
+            self.span(name).add_tags(tags)
+        if metadata:
+            self.span(name).add_metadata(metadata)
+        if metrics:
+            if isinstance(metrics, dict):
+                metrics = [metrics]
+            try:
+                for metric in metrics:
+                    self.span(name).add_metrics(
+                        name=metric["name"],
+                        score=metric["score"],
+                        reasoning=metric.get("reasoning", ""),
+                        cost=metric.get("cost", None),
+                        latency=metric.get("latency", None),
+                        metadata=metric.get("metadata", {}),
+                        config=metric.get("config", {}),
+                    )
+            except ValueError as e:
+                    logger.error(f"Validation Error: {e}")
+            except Exception as e:
+                logger.error(f"Error adding metric: {e}")
+        if feedback:
+            self.span(name).add_feedback(feedback)
-    def trace_tool(self, name: str, tool_type: str = "generic", version: str = "1.0.0"):
         def decorator(func):
             # Add metadata attribute to the function
             metadata = {
                 "name": name,
                 "tool_type": tool_type,
                 "version": version,
-                "is_active": True
+                "is_active": self.is_active,
             }
             # Check if the function is async
             is_async = asyncio.iscoroutinefunction(func)
@@ -37,7 +98,7 @@ class ToolTracerMixin:
             @functools.wraps(func)
             async def async_wrapper(*args, **kwargs):
                 async_wrapper.metadata = metadata
-                self.gt = kwargs.get('gt', None) if kwargs else None
+                self.gt = kwargs.get("gt", None) if kwargs else None
                 return await self._trace_tool_execution(
                     func, name, tool_type, version, *args, **kwargs
                 )
@@ -46,7 +107,7 @@ class ToolTracerMixin:
             @functools.wraps(func)
             def sync_wrapper(*args, **kwargs):
                 sync_wrapper.metadata = metadata
-                self.gt = kwargs.get('gt', None) if kwargs else None
+                self.gt = kwargs.get("gt", None) if kwargs else None
                 return self._trace_sync_tool_execution(
                     func, name, tool_type, version, *args, **kwargs
                 )
@@ -57,11 +118,16 @@ class ToolTracerMixin:
         return decorator
-    def _trace_sync_tool_execution(self, func, name, tool_type, version, *args, **kwargs):
+    def _trace_sync_tool_execution(
+        self, func, name, tool_type, version, *args, **kwargs
+    ):
         """Synchronous version of tool tracing"""
         if not self.is_active:
             return func(*args, **kwargs)
+        if not self.auto_instrument_tool:
+            return func(*args, **kwargs)
         start_time = datetime.now().astimezone()
         start_memory = psutil.Process().memory_info().rss
         component_id = str(uuid.uuid4())
@@ -91,10 +157,11 @@ class ToolTracerMixin:
                 memory_used=memory_used,
                 start_time=start_time,
                 input_data=self._sanitize_input(args, kwargs),
-                output_data=self._sanitize_output(result)
+                output_data=self._sanitize_output(result),
             )
             self.add_component(tool_component)
             return result
         except Exception as e:
@@ -102,12 +169,12 @@ class ToolTracerMixin:
                 "code": 500,
                 "type": type(e).__name__,
                 "message": str(e),
-                "details": {}
+                "details": {},
             }
             # End tracking network calls for this component
             self.end_component(component_id)
             tool_component = self.create_tool_component(
                 component_id=component_id,
                 hash_id=hash_id,
@@ -118,22 +185,29 @@ class ToolTracerMixin:
                 start_time=start_time,
                 input_data=self._sanitize_input(args, kwargs),
                 output_data=None,
-                error=error_component
+                error=error_component,
             )
             self.add_component(tool_component)
             raise
-    async def _trace_tool_execution(self, func, name, tool_type, version, *args, **kwargs):
+    async def _trace_tool_execution(
+        self, func, name, tool_type, version, *args, **kwargs
+    ):
         """Asynchronous version of tool tracing"""
         if not self.is_active:
             return await func(*args, **kwargs)
+        if not self.auto_instrument_tool:
+            return await func(*args, **kwargs)
         start_time = datetime.now().astimezone()
         start_memory = psutil.Process().memory_info().rss
         component_id = str(uuid.uuid4())
         hash_id = generate_unique_hash_simple(func)
+        self.start_component(component_id)
         try:
             # Execute the tool
             result = await func(*args, **kwargs)
@@ -141,6 +215,7 @@ class ToolTracerMixin:
             # Calculate resource usage
             end_memory = psutil.Process().memory_info().rss
             memory_used = max(0, end_memory - start_memory)
+            self.end_component(component_id)
             # Create tool component
             tool_component = self.create_tool_component(
@@ -152,9 +227,10 @@ class ToolTracerMixin:
                 start_time=start_time,
                 memory_used=memory_used,
                 input_data=self._sanitize_input(args, kwargs),
-                output_data=self._sanitize_output(result)
+                output_data=self._sanitize_output(result),
             )
             self.add_component(tool_component)
             return result
         except Exception as e:
@@ -162,9 +238,9 @@ class ToolTracerMixin:
                 "code": 500,
                 "type": type(e).__name__,
                 "message": str(e),
-                "details": {}
+                "details": {},
             }
             tool_component = self.create_tool_component(
                 component_id=component_id,
                 hash_id=hash_id,
@@ -175,15 +251,42 @@ class ToolTracerMixin:
                 memory_used=0,
                 input_data=self._sanitize_input(args, kwargs),
                 output_data=None,
-                error=error_component
+                error=error_component,
             )
             self.add_component(tool_component)
             raise
     def create_tool_component(self, **kwargs):
         """Create a tool component according to the data structure"""
+        network_calls = []
+        if self.auto_instrument_network:
+            network_calls = self.component_network_calls.get(kwargs["component_id"], [])
+        interactions = []
+        if self.auto_instrument_user_interaction:
+            interactions = self.component_user_interaction.get(
+                kwargs["component_id"], []
+            )
+        # Get tags, metrics
+        name = kwargs["name"]
+        # tags
+        tags = []
+        if name in self.span_attributes_dict:
+            tags = self.span_attributes_dict[name].tags or []
+        # metrics
+        metrics = []
+        if name in self.span_attributes_dict:
+            raw_metrics = self.span_attributes_dict[name].metrics or []
+            for metric in raw_metrics:
+                base_metric_name = metric["name"]
+                counter = sum(1 for x in self.visited_metrics if x.startswith(base_metric_name))
+                metric_name = f'{base_metric_name}_{counter}' if counter > 0 else base_metric_name
+                self.visited_metrics.append(metric_name)
+                metric["name"] = metric_name
+                metrics.append(metric)
         start_time = kwargs["start_time"]
         component = {
             "id": kwargs["component_id"],
@@ -198,20 +301,25 @@ class ToolTracerMixin:
             "info": {
                 "tool_type": kwargs["tool_type"],
                 "version": kwargs["version"],
-                "memory_used": kwargs["memory_used"]
+                "memory_used": kwargs["memory_used"],
+                "tags": tags,
             },
             "data": {
                 "input": kwargs["input_data"],
                 "output": kwargs["output_data"],
-                "memory_used": kwargs["memory_used"]
+                "memory_used": kwargs["memory_used"],
             },
-            "network_calls": self.component_network_calls.get(kwargs["component_id"], []),
-            "interactions": self.component_user_interaction.get(kwargs["component_id"], [])
+            "metrics": metrics,
+            "network_calls": network_calls,
+            "interactions": interactions,
         }
-        if self.gt:
+        if self.gt:
             component["data"]["gt"] = self.gt
+        # Reset the SpanAttributes context variable
+        self.span_attributes_dict[kwargs["name"]] = SpanAttributes(kwargs["name"])
         return component
     def start_component(self, component_id):
@@ -223,15 +331,26 @@ class ToolTracerMixin:
     def _sanitize_input(self, args: tuple, kwargs: dict) -> Dict:
         """Sanitize and format input data"""
         return {
-            "args": [str(arg) if not isinstance(arg, (int, float, bool, str, list, dict)) else arg for arg in args],
+            "args": [
+                (
+                    str(arg)
+                    if not isinstance(arg, (int, float, bool, str, list, dict))
+                    else arg
+                )
+                for arg in args
+            ],
             "kwargs": {
-                k: str(v) if not isinstance(v, (int, float, bool, str, list, dict)) else v
+                k: (
+                    str(v)
+                    if not isinstance(v, (int, float, bool, str, list, dict))
+                    else v
+                )
                 for k, v in kwargs.items()
-            }
+            },
         }
     def _sanitize_output(self, output: Any) -> Any:
         """Sanitize and format output data"""
         if isinstance(output, (int, float, bool, str, list, dict)):
             return output
-        return str(output)
+        return str(output)

ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py CHANGED Viewed

@@ -3,6 +3,35 @@ from datetime import datetime
 import contextvars
 import inspect
 import uuid
+from typing import Optional, Any
+class TracedFile:
+    def __init__(self, file_obj, file_path: str, tracer):
+        self._file = file_obj
+        self._file_path = file_path
+        self._tracer = tracer
+    def write(self, content: str) -> int:
+        self._tracer.trace_file_operation("write", self._file_path, content=content)
+        return self._file.write(content)
+    def read(self, size: Optional[int] = None) -> str:
+        content = self._file.read() if size is None else self._file.read(size)
+        self._tracer.trace_file_operation("read", self._file_path, content=content)
+        return content
+    def close(self) -> None:
+        return self._file.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return None
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._file, name)
 class UserInteractionTracer:
     def __init__(self, *args, **kwargs):
@@ -12,6 +41,7 @@ class UserInteractionTracer:
         self.component_id = contextvars.ContextVar("component_id", default=None)
         self.original_input = builtins.input
         self.original_print = builtins.print
+        self.original_open = builtins.open
         self.interactions = []
     def traced_input(self, prompt=""):
@@ -25,9 +55,10 @@ class UserInteractionTracer:
         self.interactions.append({
             "id": str(uuid.uuid4()),
+            "component_id": self.component_id.get(),
             "interaction_type": "input",
             "content": content,
-            "timestamp": datetime.now().isoformat()
+            "timestamp": datetime.now().astimezone().isoformat()
         })
         return content
@@ -36,8 +67,63 @@ class UserInteractionTracer:
         self.interactions.append({
             "id": str(uuid.uuid4()),
+            "component_id": self.component_id.get(),
             "interaction_type": "output",
             "content": content,
-            "timestamp": datetime.now().isoformat()
+            "timestamp": datetime.now().astimezone().isoformat()
         })
         return self.original_print(*args, **kwargs)
+    def traced_open(self, file: str, mode: str = 'r', *args, **kwargs):
+        # Skip tracing for system and virtual environment paths
+        system_paths = [
+            'site-packages',
+            'dist-packages',
+            '/proc/',
+            '/sys/',
+            '/var/lib/',
+            '/usr/lib/',
+            '/System/Library'
+        ]
+        file_str = str(file)
+        if any(path in file_str for path in system_paths):
+            return self.original_open(file, mode, *args, **kwargs)
+        file_obj = self.original_open(file, mode, *args, **kwargs)
+        return TracedFile(file_obj, file, self)
+    def trace_file_operation(self, operation: str, file_path: str, **kwargs):
+        interaction_type = f"file_{operation}"
+        # Check for existing interaction with same file_path and operation
+        for existing in reversed(self.interactions):
+            if (existing.get("file_path") == file_path and
+                existing.get("interaction_type") == interaction_type):
+                # Merge content if it exists
+                if "content" in kwargs and "content" in existing:
+                    existing["content"] += kwargs["content"]
+                    return
+                break
+        # If no matching interaction found or couldn't merge, create new one
+        interaction = {
+            "id": str(uuid.uuid4()),
+            "component_id": self.component_id.get(),
+            "interaction_type": interaction_type,
+            "file_path": file_path,
+            "timestamp": datetime.now().astimezone().isoformat()
+        }
+        interaction.update(kwargs)
+        self.interactions.append(interaction)
+    def __enter__(self):
+        builtins.input = self.traced_input
+        builtins.print = self.traced_print
+        builtins.open = self.traced_open
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        builtins.input = self.original_input
+        builtins.print = self.original_print
+        builtins.open = self.original_open

ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py CHANGED Viewed

@@ -18,50 +18,7 @@ class UploadAgenticTraces:
         self.dataset_name = dataset_name
         self.user_detail = user_detail
         self.base_url = base_url
-        self.timeout = 99999
-    def _create_dataset_schema_with_trace(self):
-        SCHEMA_MAPPING_NEW = {
-            "trace_id": {"columnType": "traceId"},
-            "trace_uri": {"columnType": "traceUri"},
-            "prompt": {"columnType": "prompt"},
-            "response":{"columnType": "response"},
-            "context": {"columnType": "context"},
-            "llm_model": {"columnType":"pipeline"},
-            "recorded_on": {"columnType": "metadata"},
-            "embed_model": {"columnType":"pipeline"},
-            "log_source": {"columnType": "metadata"},
-            "vector_store":{"columnType":"pipeline"},
-            "feedback": {"columnType":"feedBack"}
-        }
-        def make_request():
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
-                "X-Project-Name": self.project_name,
-            }
-            payload = json.dumps({
-                "datasetName": self.dataset_name,
-                # "schemaMapping": SCHEMA_MAPPING_NEW,
-                "traceFolderUrl": None,
-            })
-            response = requests.request("POST",
-                f"{self.base_url}/v1/llm/dataset/logs",
-                headers=headers,
-                data=payload,
-                timeout=self.timeout
-            )
-            return response
-        response = make_request()
-        if response.status_code == 401:
-            # get_token()  # Fetch a new token and set it in the environment
-            response = make_request()  # Retry the request
-        if response.status_code != 200:
-            return response.status_code
-        return response.status_code
+        self.timeout = 30
     def _get_presigned_url(self):
@@ -181,10 +138,11 @@ class UploadAgenticTraces:
             return None
     def upload_agentic_traces(self):
-        self._create_dataset_schema_with_trace()
-        presignedUrl = self._get_presigned_url()
-        if presignedUrl is None:
-            return
-        self._put_presigned_url(presignedUrl, self.json_file_path)
-        self.insert_traces(presignedUrl)
-        print("Agentic Traces uploaded")
+        try:
+            presignedUrl = self._get_presigned_url()
+            if presignedUrl is None:
+                return
+            self._put_presigned_url(presignedUrl, self.json_file_path)
+            self.insert_traces(presignedUrl)
+        except Exception as e:
+            print(f"Error while uploading agentic traces: {e}")

ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py ADDED Viewed

@@ -0,0 +1,83 @@
+import requests
+import os
+import json
+from ....ragaai_catalyst import RagaAICatalyst
+from ..utils.get_user_trace_metrics import get_user_trace_metrics
+def upload_trace_metric(json_file_path, dataset_name, project_name):
+    try:
+        with open(json_file_path, "r") as f:
+            traces = json.load(f)
+        metrics = get_trace_metrics_from_trace(traces)
+        metrics = _change_metrics_format_for_payload(metrics)
+        user_trace_metrics = get_user_trace_metrics(project_name, dataset_name)
+        if user_trace_metrics:
+            user_trace_metrics_list = [metric["displayName"] for metric in user_trace_metrics]
+        if user_trace_metrics:
+            for metric in metrics:
+                if metric["displayName"] in user_trace_metrics_list:
+                    metricConfig = next((user_metric["metricConfig"] for user_metric in user_trace_metrics if user_metric["displayName"] == metric["displayName"]), None)
+                    if not metricConfig or metricConfig.get("Metric Source", {}).get("value") != "user":
+                        raise ValueError(f"Metrics {metric['displayName']} already exist in dataset {dataset_name} of project {project_name}.")
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+            "X-Project-Name": project_name,
+        }
+        payload = json.dumps({
+            "datasetName": dataset_name,
+            "metrics": metrics
+        })
+        response = requests.request("POST",
+                                    f"{RagaAICatalyst.BASE_URL}/v1/llm/trace/metrics",
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=10)
+        if response.status_code != 200:
+            raise ValueError(f"Error inserting agentic trace metrics")
+    except requests.exceptions.RequestException as e:
+        raise ValueError(f"Error submitting traces: {e}")
+        return None
+    return response
+def _get_children_metrics_of_agent(children_traces):
+    metrics = []
+    for span in children_traces:
+        metrics.extend(span.get("metrics", []))
+        if span["type"] != "agent":
+            metric = span.get("metrics", [])
+            if metric:
+                metrics.extend(metric)
+        else:
+            metrics.extend(_get_children_metrics_of_agent(span["data"]["children"]))
+    return metrics
+def get_trace_metrics_from_trace(traces):
+    metrics = []
+    for span in traces["data"][0]["spans"]:
+        if span["type"] == "agent":
+            children_metric = _get_children_metrics_of_agent(span["data"]["children"])
+            if children_metric:
+                metrics.extend(children_metric)
+        else:
+            metric = span.get("metrics", [])
+            if metric:
+                metrics.extend(metric)
+    return metrics
+def _change_metrics_format_for_payload(metrics):
+    formatted_metrics = []
+    for metric in metrics:
+        if any(m["name"] == metric["name"] for m in formatted_metrics):
+            continue
+        formatted_metrics.append({
+            "name": metric["name"],
+            "displayName": metric["name"],
+            "config": {"source": "user"},
+        })
+    return formatted_metrics

ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+import json
+import re
+import requests
+from ragaai_catalyst.tracers.agentic_tracing.tracers.base import RagaAICatalyst
+def create_dataset_schema_with_trace(project_name, dataset_name):
+    def make_request():
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+            "X-Project-Name": project_name,
+        }
+        payload = json.dumps({
+            "datasetName": dataset_name,
+            "traceFolderUrl": None,
+        })
+        response = requests.request("POST",
+            f"{RagaAICatalyst.BASE_URL}/v1/llm/dataset/logs",
+            headers=headers,
+            data=payload,
+            timeout=10
+        )
+        return response
+    response = make_request()
+    return response

ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py ADDED Viewed

@@ -0,0 +1,28 @@
+import requests
+import os
+from ....ragaai_catalyst import RagaAICatalyst
+from ....dataset import Dataset
+def get_user_trace_metrics(project_name, dataset_name):
+    try:
+        list_datasets = Dataset(project_name=project_name).list_datasets()
+        if not list_datasets:
+            return []
+        elif dataset_name not in list_datasets:
+            return []
+        else:
+            headers = {
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "X-Project-Name": project_name,
+            }
+            response = requests.request("GET",
+                                        f"{RagaAICatalyst.BASE_URL}/v1/llm/trace/metrics?datasetName={dataset_name}",
+                                        headers=headers, timeout=10)
+            if response.status_code != 200:
+                print(f"Error fetching traces metrics: {response.json()['message']}")
+                return None
+            return response.json()["data"]["columns"]
+    except Exception as e:
+        print(f"Error fetching traces metrics: {e}")
+        return None

ragaai-catalyst 2.1.3__py3-none-any.whl → 2.1.4__py3-none-any.whl

ragaai-catalyst 2.1.3py3-none-any.whl → 2.1.4py3-none-any.whl