PyPI - ragaai-catalyst - Versions diffs - 2.1.3b0__py3-none-any.whl → 2.1.4__py3-none-any.whl - Mend

ragaai-catalyst 2.1.3b0py3-none-any.whl → 2.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py ADDED Viewed

@@ -0,0 +1,59 @@
+import os
+from typing import List, Dict, Any
+import logging
+logger = logging.getLogger(__name__)
+logging_level = (
+    logger.setLevel(logging.DEBUG)
+    if os.getenv("DEBUG")
+    else logger.setLevel(logging.INFO)
+)
+class SpanAttributes:
+    def __init__(self, name):
+        self.name = name
+        self.tags = []
+        self.metadata = {}
+        self.metrics = []
+        self.feedback = None
+        self.trace_attributes = ["tags", "metadata", "metrics"]
+    def add_tags(self, tags: str | List[str]):
+        if isinstance(tags, str):
+            tags = [tags]
+        self.tags.extend(tags)
+        logger.debug(f"Added tags: {tags}")
+    def add_metadata(self, metadata):
+        self.metadata.update(metadata)
+        logger.debug(f"Added metadata: {metadata}")
+    def add_metrics(
+        self,
+        name: str,
+        score: float | int,
+        reasoning: str = "",
+        cost: float = None,
+        latency: float = None,
+        metadata: Dict[str, Any] = {},
+        config: Dict[str, Any] = {},
+    ):
+        self.metrics.append(
+            {
+                "name": name,
+                "score": score,
+                "reason": reasoning,
+                "source": "user",
+                "cost": cost,
+                "latency": latency,
+                "metadata": metadata,
+                "mappings": [],
+                "config": config,
+            }
+        )
+        logger.debug(f"Added metrics: {self.metrics}")
+    def add_feedback(self, feedback: Any):
+        self.feedback = feedback
+        logger.debug(f"Added feedback: {self.feedback}")

ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import json
 import os
+import requests
+import logging
 from importlib import resources
 from dataclasses import asdict
+logger = logging.getLogger(__name__)
 def convert_usage_to_dict(usage):
     # Initialize the token_usage dictionary with default values
@@ -68,6 +71,26 @@ def load_model_costs():
             return json.load(file)
+def update_model_costs_from_github():
+    """Updates the model_costs.json file with latest costs from GitHub."""
+    try:
+        logger.debug("loading the latest model costs.")
+        response = requests.get(
+            "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
+        )
+        if response.status_code == 200:
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            model_costs_path = os.path.join(current_dir, "model_costs.json")
+            with open(model_costs_path, "w") as file:
+                json.dump(response.json(), file, indent=4)
+            logger.debug("Model costs updated successfully.")
+            return True
+        return False
+    except Exception as e:
+        logger.error(f"Failed to update model costs from GitHub: {e}")
+        return False
 def log_event(event_data, log_file_path):
     event_data = asdict(event_data)
     with open(log_file_path, "a") as f:

ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py CHANGED Viewed

@@ -6,9 +6,22 @@ import ast
 import importlib.util
 import json
 import astor
+import ipynbname
+import sys
 from pathlib import Path
+from IPython import get_ipython
+if 'get_ipython' in locals():
+    ipython_instance = get_ipython()
+    if ipython_instance:
+        ipython_instance.run_line_magic('reset', '-f')
 import logging
 logger = logging.getLogger(__name__)
+logging_level = logger.setLevel(logging.DEBUG) if os.getenv("DEBUG") == "1" else logging.INFO
 # Define the PackageUsageRemover class
 class PackageUsageRemover(ast.NodeTransformer):
@@ -68,12 +81,187 @@ def remove_package_code(source_code: str, package_name: str) -> str:
     except Exception as e:
         raise Exception(f"Error processing source code: {str(e)}")
-# TraceDependencyTracker class
+class JupyterNotebookHandler:
+    @staticmethod
+    def is_running_in_colab():
+        """Check if the code is running in Google Colab."""
+        try:
+            import google.colab
+            return True
+        except ImportError:
+            return False
+    @staticmethod
+    def is_running_in_notebook():
+        """Check if the code is running in a Jupyter notebook or Colab."""
+        try:
+            shell = get_ipython().__class__.__name__
+            if JupyterNotebookHandler.is_running_in_colab():
+                return True
+            return shell == 'ZMQInteractiveShell'
+        except:
+            return False
+    @staticmethod
+    def get_notebook_path():
+        """Get the path of the current executing notebook."""
+        try:
+            # First try using ipynbname
+            try:
+                notebook_path = ipynbname.path()
+                if notebook_path:
+                    # logger.info(f"Found notebook using ipynbname: {notebook_path}")
+                    return str(notebook_path)
+            except:
+                pass
+            # Check if running in Colab
+            if JupyterNotebookHandler.is_running_in_colab():
+                try:
+                    from google.colab import drive
+                    if not os.path.exists('/content/drive'):
+                        drive.mount('/content/drive')
+                        # logger.info("Google Drive mounted successfully")
+                    # Look for notebooks in /content first
+                    ipynb_files = list(Path('/content').glob('*.ipynb'))
+                    if ipynb_files:
+                        current_nb = max(ipynb_files, key=os.path.getmtime)
+                        # logger.info(f"Found current Colab notebook: {current_nb}")
+                        return str(current_nb)
+                    # Then check Drive if mounted
+                    if os.path.exists('/content/drive'):
+                        drive_ipynb_files = list(Path('/content/drive').rglob('*.ipynb'))
+                        if drive_ipynb_files:
+                            current_nb = max(drive_ipynb_files, key=os.path.getmtime)
+                            # logger.info(f"Found Colab notebook in Drive: {current_nb}")
+                            return str(current_nb)
+                except Exception as e:
+                    logger.warning(f"Error in Colab notebook detection: {str(e)}")
+            # Try getting notebook path for regular Jupyter
+            try:
+                import IPython
+                ipython = IPython.get_ipython()
+                if ipython is not None:
+                    # Try getting the notebook name from kernel
+                    if hasattr(ipython, 'kernel') and hasattr(ipython.kernel, 'session'):
+                        kernel_file = ipython.kernel.session.config.get('IPKernelApp', {}).get('connection_file', '')
+                        if kernel_file:
+                            kernel_id = Path(kernel_file).stem
+                            current_dir = Path.cwd()
+                            # Look for .ipynb files in current and parent directories
+                            for search_dir in [current_dir] + list(current_dir.parents):
+                                notebooks = list(search_dir.glob('*.ipynb'))
+                                recent_notebooks = [
+                                    nb for nb in notebooks
+                                    if '.ipynb_checkpoints' not in str(nb)
+                                ]
+                                if recent_notebooks:
+                                    notebook_path = str(max(recent_notebooks, key=os.path.getmtime))
+                                    # logger.info(f"Found Jupyter notebook: {notebook_path}")
+                                    return notebook_path
+                    # Try alternative method using notebook metadata
+                    try:
+                        notebook_path = ipython.kernel._parent_ident
+                        if notebook_path:
+                            # logger.info(f"Found notebook using kernel parent ident: {notebook_path}")
+                            return notebook_path
+                    except:
+                        pass
+            except Exception as e:
+                # logger.warning(f"Error in Jupyter notebook detection: {str(e)}")
+                return None
+        except Exception as e:
+            # logger.warning(f"Error getting notebook path: {str(e)}")
+            return None
+def comment_magic_commands(script_content: str) -> str:
+    """Comment out magic commands, shell commands, and direct execution commands in the script content."""
+    lines = script_content.splitlines()
+    commented_lines = []
+    for line in lines:
+        # Check for magic commands, shell commands, or direct execution commands
+        if re.match(r'^\s*(!|%|pip|apt-get|curl|conda)', line.strip()):
+            commented_lines.append(f"# {line}")  # Comment the line
+        else:
+            commented_lines.append(line)  # Keep the line unchanged
+    return "\n".join(commented_lines)
 class TraceDependencyTracker:
     def __init__(self, output_dir=None):
         self.tracked_files = set()
         self.python_imports = set()
-        self.output_dir = output_dir or os.getcwd()
+        self.notebook_path = None
+        self.colab_content = None
+        # Set output directory with Colab handling
+        if JupyterNotebookHandler.is_running_in_colab():
+            self.output_dir = '/content'
+            if not os.path.exists(self.output_dir):
+                os.makedirs(self.output_dir)
+            logger.info("Using /content as output directory for Colab")
+        else:
+            self.output_dir = output_dir or os.getcwd()
+        self.jupyter_handler = JupyterNotebookHandler()
+    def check_environment_and_save(self):
+        """Check if running in Colab and get current cell content."""
+        try:
+            from IPython import get_ipython
+            ipython = get_ipython()
+            if 'google.colab' in sys.modules:
+                logger.info("Running on Google Colab.")
+                # Retrieve the current cell content dynamically in Colab
+                current_cell = ipython.history_manager.get_range()
+                script_content = "\n".join(input_line for _, _, input_line in current_cell if input_line.strip())
+                script_content = comment_magic_commands(script_content)  # Comment out magic commands
+                # Store the content in the class attribute instead of saving to file
+                self.colab_content = script_content
+                logger.info("Successfully retrieved Colab cell content")
+            else:
+                logger.info("Not running on Google Colab.")
+        except Exception as e:
+            logger.warning(f"Error retrieving the current cell content: {e}")
+    def track_jupyter_notebook(self):
+        """Track the current notebook and its dependencies."""
+        if self.jupyter_handler.is_running_in_notebook():
+            # Get notebook path using the enhanced handler
+            notebook_path = self.jupyter_handler.get_notebook_path()
+            if notebook_path:
+                self.notebook_path = notebook_path
+                self.track_file_access(notebook_path)
+                # Track notebook dependencies
+                try:
+                    with open(notebook_path, 'r', encoding='utf-8') as f:
+                        notebook_content = f.read()
+                        notebook_content = comment_magic_commands(notebook_content)
+                        # Find and track imported files
+                        self.find_config_files(notebook_content, notebook_path)
+                except Exception as e:
+                    pass
+            else:
+                pass
     def track_file_access(self, filepath):
         if os.path.exists(filepath):
@@ -122,65 +310,146 @@ class TraceDependencyTracker:
                     except (ImportError, AttributeError):
                         pass
         except Exception as e:
-            print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
+            pass
     def create_zip(self, filepaths):
+        self.track_jupyter_notebook()
+        # logger.info("Tracked Jupyter notebook and its dependencies")
+        # Ensure output directory exists
+        os.makedirs(self.output_dir, exist_ok=True)
+        # logger.info(f"Using output directory: {self.output_dir}")
+        # Special handling for Colab
+        if self.jupyter_handler.is_running_in_colab():
+            # logger.info("Running in Google Colab environment")
+            # Try to get the Colab notebook path
+            colab_notebook = self.jupyter_handler.get_notebook_path()
+            if colab_notebook:
+                self.tracked_files.add(os.path.abspath(colab_notebook))
+                # logger.info(f"Added Colab notebook to tracked files: {colab_notebook}")
+            # Get current cell content
+            self.check_environment_and_save()
+        # Process all files (existing code)
         for filepath in filepaths:
             abs_path = os.path.abspath(filepath)
             self.track_file_access(abs_path)
             try:
                 with open(abs_path, 'r', encoding='utf-8') as file:
                     content = file.read()
+                    # Comment out magic commands before processing
+                    content = comment_magic_commands(content)
                 self.find_config_files(content, abs_path)
                 if filepath.endswith('.py'):
                     self.analyze_python_imports(abs_path)
             except Exception as e:
-                print(f"Warning: Could not process {filepath}: {str(e)}")
+                pass
+        notebook_content_str = None
+        if self.notebook_path and os.path.exists(self.notebook_path):
+            try:
+                with open(self.notebook_path, 'r', encoding='utf-8') as f:
+                    notebook_content = json.load(f)
+                    cell_contents = []
+                    for cell in notebook_content.get('cells', []):
+                        if cell['cell_type'] == 'code':
+                            # Comment out magic commands in the cell's source
+                            cell_source = ''.join(cell['source'])
+                            commented_source = comment_magic_commands(cell_source)
+                            cell_contents.append(commented_source)
+                    notebook_content_str = '\n\n'.join(cell_contents)
+                    notebook_abs_path = os.path.abspath(self.notebook_path)
+                    if notebook_abs_path in self.tracked_files:
+                        self.tracked_files.remove(notebook_abs_path)
+            except Exception as e:
+                pass
+        # Calculate hash and create zip
         self.tracked_files.update(self.python_imports)
         hash_contents = []
         for filepath in sorted(self.tracked_files):
-            if 'env' in filepath:
+            if not filepath.endswith('.py'):
+                continue
+            elif '/envs' in filepath or '__init__' in filepath:
                 continue
             try:
                 with open(filepath, 'rb') as file:
                     content = file.read()
-                    if filepath.endswith('.py'):
-                        # Temporarily remove raga_catalyst code for hash calculation
-                        content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
+                    content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
                     hash_contents.append(content)
             except Exception as e:
-                print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
+                logger.warning(f"Could not read {filepath} for hash calculation: {str(e)}")
+                pass
+        if notebook_content_str:
+            hash_contents.append(notebook_content_str.encode('utf-8'))
+        if self.colab_content:
+            hash_contents.append(self.colab_content.encode('utf-8'))
         combined_content = b''.join(hash_contents)
         hash_id = hashlib.sha256(combined_content).hexdigest()
+        # Create zip in the appropriate location
         zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
         common_path = [os.path.abspath(p) for p in self.tracked_files if 'env' not in p]
-        if common_path!=[]:
+        if common_path:
             base_path = os.path.commonpath(common_path)
+        else:
+            base_path = os.getcwd()
         with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for filepath in sorted(self.tracked_files):
-                if 'env' in filepath:
+                if 'env' in filepath or 'ragaai_catalyst' in filepath:
                     continue
                 try:
                     relative_path = os.path.relpath(filepath, base_path)
                     zipf.write(filepath, relative_path)
-                    # logger.info(f"Added to zip: {relative_path}")
+                    logger.debug(f"Added python script to zip: {relative_path}")
                 except Exception as e:
-                    print(f"Warning: Could not add {filepath} to zip: {str(e)}")
+                    pass
+            if notebook_content_str:
+                py_filename = os.path.splitext(os.path.basename(self.notebook_path))[0] + ".py"
+                zipf.writestr(py_filename, notebook_content_str)
+                logger.debug(f"Added notebook content to zip as: {py_filename}")
+            if self.colab_content:
+                colab_filename = "colab_file.py"
+                zipf.writestr(colab_filename, self.colab_content)
+                logger.debug(f"Added Colab cell content to zip as: {colab_filename}")
+        logger.info(" Zip file created successfully.")
+        logger.debug(f"Zip file created successfully at: {zip_filename}")
         return hash_id, zip_filename
-# Main function for creating a zip of unique files
-def zip_list_of_unique_files(filepaths, output_dir):
+def zip_list_of_unique_files(filepaths, output_dir=None):
+    """Create a zip file containing all unique files and their dependencies."""
+    if output_dir is None:
+        # Set default output directory based on environment
+        if JupyterNotebookHandler.is_running_in_colab():
+            output_dir = '/content'
+        else:
+            output_dir = os.getcwd()
     tracker = TraceDependencyTracker(output_dir)
     return tracker.create_zip(filepaths)
 # Example usage
 if __name__ == "__main__":
     filepaths = ["script1.py", "script2.py"]
     hash_id, zip_path = zip_list_of_unique_files(filepaths)
     print(f"Created zip file: {zip_path}")
     print(f"Hash ID: {hash_id}")

ragaai_catalyst/tracers/llamaindex_callback.py CHANGED Viewed

@@ -58,7 +58,7 @@ class LlamaIndexTracer:
             ) -> None:
                 trace = {
                     "event_type": event_type,
-                    "timestamp": datetime.now().isoformat(),
+                    "timestamp": datetime.now().astimezone().isoformat(),
                     "payload": payload,
                     "status": "started",
                     "event_id": event_id,
@@ -82,7 +82,7 @@ class LlamaIndexTracer:
             ) -> None:
                 trace = {
                     "event_type": event_type,
-                    "timestamp": datetime.now().isoformat(),
+                    "timestamp": datetime.now().astimezone().isoformat(),
                     "payload": payload,
                     "status": "completed",
                     "event_id": event_id,
@@ -181,7 +181,7 @@ class LlamaIndexTracer:
         # self._upload_traces(save_json_to_pwd=True)
         self.callback_manager.remove_handler(self.trace_handler)
         self._restore_original_inits()
-        print("Traces uplaoded")
+        print("Traces uploaded")
         self._upload_task = True
     def _restore_original_inits(self):
@@ -220,7 +220,7 @@ class LlamaIndexTracer:
         user_detail["trace_id"] = self._generate_trace_id()
         metadata = user_detail["metadata"]
         metadata["log_source"] = "llamaindex_tracer"
-        metadata["recorded_on"] = datetime.utcnow().isoformat().replace('T', ' ')
+        metadata["recorded_on"] = datetime.now().isoformat()
         user_detail["metadata"] = metadata
         return user_detail
@@ -351,7 +351,7 @@ class LlamaIndexTracer:
         presignedUrl = self._get_presigned_url()
         self._put_presigned_url(presignedUrl, filename)
         self._insert_traces(presignedUrl)
-        print("Traces uplaoded")
+        print("Traces uploaded")
     def get_upload_status(self):
         """Check the status of the trace upload."""

ragaai_catalyst/tracers/tracer.py CHANGED Viewed

@@ -21,10 +21,10 @@ from ragaai_catalyst.tracers.utils import get_unique_key
 from ragaai_catalyst import RagaAICatalyst
 from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
 from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
+from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import load_model_costs, update_model_costs_from_github
 logger = logging.getLogger(__name__)
 class Tracer(AgenticTracing):
     NUM_PROJECTS = 100
     TIMEOUT = 10
@@ -32,14 +32,28 @@ class Tracer(AgenticTracing):
         self,
         project_name,
         dataset_name,
+        trace_name=None,
         tracer_type=None,
         pipeline=None,
         metadata=None,
         description=None,
         upload_timeout=30,  # Default timeout of 30 seconds
+        update_llm_cost=True,  # Parameter to control model cost updates
+        auto_instrumentation={ # to control automatic instrumentation of different components
+            'llm':True,
+            'tool':True,
+            'agent':True,
+            'user_interaction':True,
+            'file_io':True,
+            'network':True,
+            'custom':True
+        },
+        interval_time=2,
+        # auto_instrumentation=True/False  # to control automatic instrumentation of everything
     ):
         """
-        Initializes a Tracer object.
+        Initializes a Tracer object.
         Args:
             project_name (str): The name of the project.
@@ -49,19 +63,48 @@ class Tracer(AgenticTracing):
             metadata (dict, optional): The metadata. Defaults to None.
             description (str, optional): The description. Defaults to None.
             upload_timeout (int, optional): The upload timeout in seconds. Defaults to 30.
-        Returns:
-            None
+            update_llm_cost (bool, optional): Whether to update model costs from GitHub. Defaults to True.
         """
-        # Set auto_instrument_llm to True to enable automatic LLM tracing
         user_detail = {
             "project_name": project_name,
             "project_id": None,  # Will be set after project validation
             "dataset_name": dataset_name,
+            "interval_time": interval_time,
+            "trace_name": trace_name if trace_name else f"trace_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
             "trace_user_detail": {"metadata": metadata} if metadata else {}
         }
-        super().__init__(user_detail=user_detail, auto_instrument_llm=True)
-        self.is_active = True
+        # take care of auto_instrumentation
+        if isinstance(auto_instrumentation, bool):
+            if auto_instrumentation:
+                auto_instrumentation = {
+                    "llm": True,
+                    "tool": True,
+                    "agent": True,
+                    "user_interaction": True,
+                    "file_io": True,
+                    "network": True,
+                    "custom": True
+                }
+            else:
+                auto_instrumentation = {
+                    "llm": False,
+                    "tool": False,
+                    "agent": False,
+                    "user_interaction": False,
+                    "file_io": False,
+                    "network": False,
+                    "custom": False
+                }
+        elif isinstance(auto_instrumentation, dict):
+            auto_instrumentation = {k: v for k, v in auto_instrumentation.items() if v}
+            for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
+                if key not in auto_instrumentation:
+                    auto_instrumentation[key] = False
+        super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
         self.project_name = project_name
         self.dataset_name = dataset_name
         self.tracer_type = tracer_type
@@ -72,10 +115,14 @@ class Tracer(AgenticTracing):
         self.description = description
         self.upload_timeout = upload_timeout
         self.base_url = f"{RagaAICatalyst.BASE_URL}"
-        self.timeout = 10
+        self.timeout = 30
         self.num_projects = 100
-        self.start_time = datetime.datetime.now(datetime.timezone.utc)
+        self.start_time = datetime.datetime.now().astimezone().isoformat()
+        if update_llm_cost:
+            # First update the model costs file from GitHub
+            update_model_costs_from_github()
         try:
             response = requests.get(
                 f"{self.base_url}/v2/llm/projects?size={self.num_projects}",
@@ -118,7 +165,30 @@ class Tracer(AgenticTracing):
         else:
             self._upload_task = None
             # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
+    def set_dataset_name(self, dataset_name):
+        """
+        Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
+        Args:
+            dataset_name (str): The new dataset name to set
+        """
+        # Store current parameters
+        current_params = {
+            'project_name': self.project_name,
+            'tracer_type': self.tracer_type,
+            'pipeline': self.pipeline,
+            'metadata': self.metadata,
+            'description': self.description,
+            'upload_timeout': self.upload_timeout
+        }
+        # Reinitialize self with new dataset_name and stored parameters
+        self.__init__(
+            dataset_name=dataset_name,
+            **current_params
+        )
     def _improve_metadata(self, metadata, tracer_type):
         if metadata is None:
@@ -191,6 +261,9 @@ class Tracer(AgenticTracing):
             print("Stopping tracer and initiating trace upload...")
             self._cleanup()
             self._upload_task = self._run_async(self._upload_traces())
+            self.is_active = False
+            self.dataset_name = None
             return "Trace upload initiated. Use get_upload_status() to check the status."
         elif self.tracer_type == "llamaindex":
             from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer

ragaai_catalyst/tracers/upload_traces.py CHANGED Viewed

@@ -124,4 +124,4 @@ class UploadTraces:
         presignedUrl = self._get_presigned_url()
         self._put_presigned_url(presignedUrl, self.json_file_path)
         self._insert_traces(presignedUrl)
-        print("Traces uplaoded")
+        print("Traces uploaded")

ragaai-catalyst 2.1.3b0__py3-none-any.whl → 2.1.4__py3-none-any.whl

ragaai-catalyst 2.1.3b0py3-none-any.whl → 2.1.4py3-none-any.whl