PyPI - ragaai-catalyst - Versions diffs - 2.0.5__py3-none-any.whl → 2.0.6b0__py3-none-any.whl - Mend

ragaai-catalyst 2.0.5py3-none-any.whl → 2.0.6b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ragaai_catalyst/__init__.py +2 -1
ragaai_catalyst/dataset.py +49 -60
ragaai_catalyst/evaluation.py +47 -29
ragaai_catalyst/guardrails_manager.py +233 -0
ragaai_catalyst/internal_api_completion.py +83 -0
ragaai_catalyst/proxy_call.py +1 -1
ragaai_catalyst/synthetic_data_generation.py +201 -78
ragaai_catalyst/tracers/llamaindex_callback.py +361 -0
ragaai_catalyst/tracers/tracer.py +62 -28
{ragaai_catalyst-2.0.5.dist-info → ragaai_catalyst-2.0.6b0.dist-info}/METADATA +139 -72
{ragaai_catalyst-2.0.5.dist-info → ragaai_catalyst-2.0.6b0.dist-info}/RECORD +13 -10
{ragaai_catalyst-2.0.5.dist-info → ragaai_catalyst-2.0.6b0.dist-info}/WHEEL +1 -1
{ragaai_catalyst-2.0.5.dist-info → ragaai_catalyst-2.0.6b0.dist-info}/top_level.txt +0 -0

ragaai_catalyst/tracers/llamaindex_callback.py ADDED Viewed

@@ -0,0 +1,361 @@
+from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
+from llama_index.core import Settings
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from enum import Enum
+import json
+import uuid
+import os
+import requests
+import tempfile
+from ..ragaai_catalyst import RagaAICatalyst
+class CustomEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Enum):
+            return obj.value
+        elif hasattr(obj, "__dict__"):
+            return obj.__dict__
+        return str(obj)
+class LlamaIndexTracer:
+    def __init__(self, user_detail):
+        self.trace_handler = None
+        self.callback_manager = (
+            CallbackManager()
+        )  # Ensure callback manager is initialized
+        self._original_inits = {}  # Store original __init__ methods
+        self.project_name = user_detail["project_name"]
+        self.project_id = user_detail["project_id"]
+        self.dataset_name = user_detail["dataset_name"]
+        self.user_detail = user_detail["trace_user_detail"]
+        self.base_url = f"{RagaAICatalyst.BASE_URL}"
+        self.timeout = 10
+        self.query_count = 0
+        self._upload_task = None
+    def start(self):
+        """Start tracing - call this before your LlamaIndex operations"""
+        outer_self = self  # Capture outer self reference for inner class
+        class CustomTraceHandler(LlamaDebugHandler):
+            def __init__(self):
+                super().__init__()
+                self.traces: List[Dict[str, Any]] = []
+                self.current_query_traces: List[Dict[str, Any]] = []
+                self.in_query = False
+                self.query_event_id = None
+            def on_event_start(
+                self,
+                event_type: Optional[str],
+                payload: Optional[Dict[str, Any]] = None,
+                event_id: str = "",
+                parent_id: str = "",
+                **kwargs: Any
+            ) -> None:
+                trace = {
+                    "event_type": event_type,
+                    "timestamp": datetime.now().isoformat(),
+                    "payload": payload,
+                    "status": "started",
+                    "event_id": event_id,
+                    "parent_id": parent_id,
+                }
+                if event_type == "query":
+                    self.in_query = True
+                    self.query_event_id = event_id
+                    self.current_query_traces = []
+                if self.in_query:
+                    self.current_query_traces.append(trace)
+                self.traces.append(trace)
+            def on_event_end(
+                self,
+                event_type: Optional[str],
+                payload: Optional[Dict[str, Any]] = None,
+                event_id: str = "",
+                **kwargs: Any
+            ) -> None:
+                trace = {
+                    "event_type": event_type,
+                    "timestamp": datetime.now().isoformat(),
+                    "payload": payload,
+                    "status": "completed",
+                    "event_id": event_id,
+                }
+                if self.in_query:
+                    self.current_query_traces.append(trace)
+                self.traces.append(trace)
+                # If this is the end of a query event, automatically save the traces
+                if event_type == "query" and event_id == self.query_event_id:
+                    self.in_query = False
+                    outer_self._save_current_query_traces(self.current_query_traces)
+                    self.current_query_traces = []
+        self.trace_handler = CustomTraceHandler()
+        self.callback_manager.add_handler(self.trace_handler)
+        Settings.callback_manager = self.callback_manager
+        # Monkey-patch LlamaIndex components
+        self._monkey_patch()
+        return self  # Return self to allow method chaining
+    def _save_current_query_traces(self, query_traces):
+        """Save traces for the current query"""
+        self.query_count += 1
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"trace_query_{self.query_count}_{timestamp}.json"
+        traces = self._add_traces_in_data(query_traces)
+        # Write the tracer json files to a temporary directory
+        temp_dir = tempfile.gettempdir()
+        temp_file_path = f"{temp_dir}/{filename}"
+        with open(temp_file_path, "w") as f:
+            json.dump([traces], f, indent=2, cls=CustomEncoder)
+        # print(f"Query traces saved to {temp_file_path}")
+        # Upload the traces
+        self._create_dataset_schema_with_trace()
+        presignedUrl = self._get_presigned_url()
+        self._put_presigned_url(presignedUrl, temp_file_path)
+        self._insert_traces(presignedUrl)
+        # print(f"Query {self.query_count} traces uploaded")
+    def _monkey_patch(self):
+        """Monkey-patch LlamaIndex components to automatically include the callback manager"""
+        from llama_index.core import VectorStoreIndex, ServiceContext
+        from llama_index.llms.openai import OpenAI
+        # Import any other classes you need to patch here
+        def make_new_init(original_init, callback_manager):
+            def new_init(self, *args, **kwargs):
+                # If 'callback_manager' is not provided, inject our tracer's callback manager
+                if "callback_manager" not in kwargs:
+                    kwargs["callback_manager"] = callback_manager
+                original_init(self, *args, **kwargs)
+            return new_init
+        # Monkey-patch VectorStoreIndex
+        self._original_inits["VectorStoreIndex"] = VectorStoreIndex.__init__
+        VectorStoreIndex.__init__ = make_new_init(
+            VectorStoreIndex.__init__, self.callback_manager
+        )
+        # Monkey-patch OpenAI LLM
+        self._original_inits["OpenAI"] = OpenAI.__init__
+        OpenAI.__init__ = make_new_init(OpenAI.__init__, self.callback_manager)
+        # Monkey-patch ServiceContext
+        self._original_inits["ServiceContext"] = ServiceContext.__init__
+        ServiceContext.__init__ = make_new_init(
+            ServiceContext.__init__, self.callback_manager
+        )
+        # To monkey-patch additional classes:
+        # 1. Import the class you want to patch
+        # from llama_index.some_module import SomeOtherClass
+        # 2. Store the original __init__ method
+        # self._original_inits['SomeOtherClass'] = SomeOtherClass.__init__
+        # 3. Replace the __init__ method with the new one that injects the callback manager
+        # SomeOtherClass.__init__ = make_new_init(SomeOtherClass.__init__, self.callback_manager)
+        # Repeat steps 1-3 for each additional class you wish to monkey-patch
+    def stop(self):
+        """Stop tracing and restore original methods"""
+        # self._upload_traces(save_json_to_pwd=True)
+        self.callback_manager.remove_handler(self.trace_handler)
+        self._restore_original_inits()
+        print("Traces uplaoded")
+        self._upload_task = True
+    def _restore_original_inits(self):
+        """Restore the original __init__ methods of LlamaIndex components"""
+        from llama_index.core import VectorStoreIndex, ServiceContext
+        from llama_index.llms.openai import OpenAI
+        # Import any other classes you patched
+        # Restore VectorStoreIndex
+        if "VectorStoreIndex" in self._original_inits:
+            VectorStoreIndex.__init__ = self._original_inits["VectorStoreIndex"]
+        # Restore OpenAI
+        if "OpenAI" in self._original_inits:
+            OpenAI.__init__ = self._original_inits["OpenAI"]
+        # Restore ServiceContext
+        if "ServiceContext" in self._original_inits:
+            ServiceContext.__init__ = self._original_inits["ServiceContext"]
+        # To restore additional classes:
+        # Check if the class was patched, then restore the original __init__
+        # if 'SomeOtherClass' in self._original_inits:
+        #     SomeOtherClass.__init__ = self._original_inits['SomeOtherClass']
+    def _generate_trace_id(self):
+        """
+        Generate a random trace ID using UUID4.
+        Returns a string representation of the UUID with no hyphens.
+        """
+        return '0x'+str(uuid.uuid4()).replace('-', '')
+    def _get_user_passed_detail(self):
+        user_detail = self.user_detail
+        user_detail["trace_id"] = self._generate_trace_id()
+        metadata = user_detail["metadata"]
+        metadata["log_source"] = "llamaindex_tracer"
+        metadata["recorded_on"] = datetime.utcnow().isoformat().replace('T', ' ')
+        user_detail["metadata"] = metadata
+        return user_detail
+    def _add_traces_in_data(self, traces=None):
+        """Add traces to user detail"""
+        user_detail = self._get_user_passed_detail()
+        if traces is None:
+            if not self.trace_handler:
+                raise RuntimeError("No traces available. Did you call start()?")
+            traces = self.trace_handler.traces
+        user_detail["traces"] = traces
+        return user_detail
+    def _create_dataset_schema_with_trace(self):
+        SCHEMA_MAPPING_NEW = {
+            "trace_id": {"columnType": "traceId"},
+            "trace_uri": {"columnType": "traceUri"},
+            "prompt": {"columnType": "prompt"},
+            "response":{"columnType": "response"},
+            "context": {"columnType": "context"},
+            "llm_model": {"columnType":"pipeline"},
+            "recorded_on": {"columnType": "metadata"},
+            "embed_model": {"columnType":"pipeline"},
+            "log_source": {"columnType": "metadata"},
+            "vector_store":{"columnType":"pipeline"},
+            "feedback": {"columnType":"feedBack"}
+        }
+        def make_request():
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "X-Project-Name": self.project_name,
+            }
+            payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "schemaMapping": SCHEMA_MAPPING_NEW,
+                "traceFolderUrl": None,
+            })
+            response = requests.request("POST",
+                f"{self.base_url}/v1/llm/dataset/logs",
+                headers=headers,
+                data=payload,
+                timeout=self.timeout
+            )
+            return response
+        response = make_request()
+        if response.status_code == 401:
+            # get_token()  # Fetch a new token and set it in the environment
+            response = make_request()  # Retry the request
+        if response.status_code != 200:
+            return response.status_code
+        return response.status_code
+    def _get_presigned_url(self):
+        payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "numFiles": 1,
+            })
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+            "X-Project-Name": self.project_name,
+        }
+        response = requests.request("GET",
+                                    f"{self.base_url}/v1/llm/presigned-url",
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+        if response.status_code == 200:
+            presignedUrls = response.json()["data"]["presignedUrls"][0]
+            return presignedUrls
+    def _put_presigned_url(self, presignedUrl, filename):
+        headers = {
+                "Content-Type": "application/json",
+            }
+        if "blob.core.windows.net" in presignedUrl:  # Azure
+            headers["x-ms-blob-type"] = "BlockBlob"
+        print(f"Uploading traces...")
+        with open(filename) as f:
+            payload = f.read().replace("\n", "").replace("\r", "").encode()
+        response = requests.request("PUT",
+                                    presignedUrl,
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+        if response.status_code != 200 or response.status_code != 201:
+            return response, response.status_code
+    def _insert_traces(self, presignedUrl):
+        headers = {
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "Content-Type": "application/json",
+                "X-Project-Name": self.project_name,
+            }
+        payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "presignedUrl": presignedUrl,
+            })
+        response = requests.request("POST",
+                                    f"{self.base_url}/v1/llm/insert/trace",
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+    def _upload_traces(self, save_json_to_pwd=None):
+        """Save traces to a file"""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"trace_{timestamp}.json"
+        traces = self._add_traces_in_data()
+        if save_json_to_pwd:
+            with open(filename, "w") as f:
+                json.dump([traces], f, indent=2, cls=CustomEncoder)
+        print(f"tracer is saved to {filename}")
+        self._create_dataset_schema_with_trace()
+        presignedUrl = self._get_presigned_url()
+        self._put_presigned_url(presignedUrl, filename)
+        self._insert_traces(presignedUrl)
+        print("Traces uplaoded")
+    def get_upload_status(self):
+        """Check the status of the trace upload."""
+        if self._upload_task is None:
+            return "No upload task in progress."
+        if self._upload_task:
+            return "Upload completed"

ragaai_catalyst/tracers/tracer.py CHANGED Viewed

@@ -17,7 +17,7 @@ from .instrumentators import (
     LlamaIndexInstrumentor,
 )
 from .utils import get_unique_key
+# from .llamaindex_callback import LlamaIndexTracer
 from ..ragaai_catalyst import RagaAICatalyst
 logger = logging.getLogger(__name__)
@@ -86,13 +86,19 @@ class Tracer:
             logger.error(f"Failed to retrieve projects list: {e}")
             raise
+        if tracer_type == "langchain":
+            self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
-        self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
+            self._tracer_provider = self._setup_provider()
+            self._instrumentor = self._setup_instrumentor(tracer_type)
+            self.is_instrumented = False
+            self._upload_task = None
+        elif tracer_type == "llamaindex":
+            self._upload_task = None
+            from .llamaindex_callback import LlamaIndexTracer
-        self._tracer_provider = self._setup_provider()
-        self._instrumentor = self._setup_instrumentor(tracer_type)
-        self.is_instrumented = False
-        self._upload_task = None
+        else:
+            raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
     def _improve_metadata(self, metadata, tracer_type):
         if metadata is None:
@@ -142,34 +148,44 @@ class Tracer:
     def start(self):
         """Start the tracer."""
-        if not self.is_instrumented:
-            self._instrumentor().instrument(tracer_provider=self._tracer_provider)
-            self.is_instrumented = True
-        print(f"Tracer started for project: {self.project_name}")
-        return self
+        if self.tracer_type == "langchain":
+            if not self.is_instrumented:
+                self._instrumentor().instrument(tracer_provider=self._tracer_provider)
+                self.is_instrumented = True
+            print(f"Tracer started for project: {self.project_name}")
+            return self
+        elif self.tracer_type == "llamaindex":
+            from .llamaindex_callback import LlamaIndexTracer
+            return LlamaIndexTracer(self._pass_user_data()).start()
     def stop(self):
         """Stop the tracer and initiate trace upload."""
-        if not self.is_instrumented:
-            logger.warning("Tracer was not started. No traces to upload.")
-            return "No traces to upload"
-        print("Stopping tracer and initiating trace upload...")
-        self._cleanup()
-        self._upload_task = self._run_async(self._upload_traces())
-        return "Trace upload initiated. Use get_upload_status() to check the status."
+        if self.tracer_type == "langchain":
+            if not self.is_instrumented:
+                logger.warning("Tracer was not started. No traces to upload.")
+                return "No traces to upload"
+            print("Stopping tracer and initiating trace upload...")
+            self._cleanup()
+            self._upload_task = self._run_async(self._upload_traces())
+            return "Trace upload initiated. Use get_upload_status() to check the status."
+        elif self.tracer_type == "llamaindex":
+            from .llamaindex_callback import LlamaIndexTracer
+            return LlamaIndexTracer().stop()
     def get_upload_status(self):
         """Check the status of the trace upload."""
-        if self._upload_task is None:
-            return "No upload task in progress."
-        if self._upload_task.done():
-            try:
-                result = self._upload_task.result()
-                return f"Upload completed: {result}"
-            except Exception as e:
-                return f"Upload failed: {str(e)}"
-        return "Upload in progress..."
+        if self.tracer_type == "langchain":
+            if self._upload_task is None:
+                return "No upload task in progress."
+            if self._upload_task.done():
+                try:
+                    result = self._upload_task.result()
+                    return f"Upload completed: {result}"
+                except Exception as e:
+                    return f"Upload failed: {str(e)}"
+            return "Upload in progress..."
     def _run_async(self, coroutine):
         """Run an asynchronous coroutine in a separate thread."""
@@ -246,3 +262,21 @@ class Tracer:
         # Reset instrumentation flag
         self.is_instrumented = False
         # Note: We're not resetting all attributes here to allow for upload status checking
+    def _pass_user_data(self):
+        return {"project_name":self.project_name,
+                "project_id": self.project_id,
+                "dataset_name":self.dataset_name,
+                "trace_user_detail" : {
+                    "project_id": self.project_id,
+                    "trace_id": "",
+                    "session_id": None,
+                    "trace_type": self.tracer_type,
+                    "traces": [],
+                    "metadata": self.metadata,
+                    "pipeline": {
+                        "llm_model": self.pipeline["llm_model"],
+                        "vector_store": self.pipeline["vector_store"],
+                        "embed_model": self.pipeline["embed_model"]
+                        }
+                    }
+                }

ragaai-catalyst 2.0.5__py3-none-any.whl → 2.0.6b0__py3-none-any.whl

ragaai-catalyst 2.0.5py3-none-any.whl → 2.0.6b0py3-none-any.whl