PyPI - ragaai-catalyst - Versions diffs - 2.1b0__py3-none-any.whl → 2.1b1__py3-none-any.whl - Mend

ragaai-catalyst 2.1b0py3-none-any.whl → 2.1b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

ragaai_catalyst/__init__.py +1 -0
ragaai_catalyst/dataset.py +1 -4
ragaai_catalyst/evaluation.py +4 -5
ragaai_catalyst/guard_executor.py +97 -0
ragaai_catalyst/guardrails_manager.py +41 -15
ragaai_catalyst/internal_api_completion.py +1 -1
ragaai_catalyst/prompt_manager.py +7 -2
ragaai_catalyst/ragaai_catalyst.py +1 -1
ragaai_catalyst/synthetic_data_generation.py +7 -0
ragaai_catalyst/tracers/__init__.py +1 -1
ragaai_catalyst/tracers/agentic_tracing/__init__.py +3 -0
ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +422 -0
ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +198 -0
ragaai_catalyst/tracers/agentic_tracing/base.py +376 -0
ragaai_catalyst/tracers/agentic_tracing/data_structure.py +248 -0
ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +808 -0
ragaai_catalyst/tracers/agentic_tracing/network_tracer.py +286 -0
ragaai_catalyst/tracers/agentic_tracing/sample.py +197 -0
ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +247 -0
ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +165 -0
ragaai_catalyst/tracers/agentic_tracing/unique_decorator_test.py +172 -0
ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +43 -0
ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +3 -0
ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +18 -0
ragaai_catalyst/tracers/agentic_tracing/utils/data_classes.py +61 -0
ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +32 -0
ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +177 -0
ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +7823 -0
ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +74 -0
ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
ragaai_catalyst/tracers/tracer.py +30 -4
ragaai_catalyst/tracers/upload_traces.py +127 -0
ragaai_catalyst-2.1b1.dist-info/METADATA +43 -0
ragaai_catalyst-2.1b1.dist-info/RECORD +56 -0
{ragaai_catalyst-2.1b0.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +1 -1
ragaai_catalyst-2.1b0.dist-info/METADATA +0 -295
ragaai_catalyst-2.1b0.dist-info/RECORD +0 -28
{ragaai_catalyst-2.1b0.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0

ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py ADDED Viewed

@@ -0,0 +1,74 @@
+import json
+import os
+from importlib import resources
+from dataclasses import asdict
+def convert_usage_to_dict(usage):
+    # Initialize the token_usage dictionary with default values
+    token_usage = {
+        "input": 0,
+        "completion": 0,
+        "reasoning": 0,  # Default reasoning tokens to 0 unless specified
+    }
+    if usage:
+        if isinstance(usage, dict):
+            # Access usage data as dictionary keys
+            token_usage["input"] = usage.get("prompt_tokens", 0)
+            token_usage["completion"] = usage.get("completion_tokens", 0)
+            # If reasoning tokens are provided, adjust accordingly
+            token_usage["reasoning"] = usage.get("reasoning_tokens", 0)
+        else:
+            # Handle the case where usage is not a dictionary
+            # This could be an object with attributes, or something else
+            try:
+                token_usage["input"] = getattr(usage, "prompt_tokens", 0)
+                token_usage["completion"] = getattr(usage, "completion_tokens", 0)
+                token_usage["reasoning"] = getattr(usage, "reasoning_tokens", 0)
+            except AttributeError:
+                # If attributes are not found, log or handle the error as needed
+                print(f"Warning: Unexpected usage type: {type(usage)}")
+    return token_usage
+def calculate_cost(
+    token_usage,
+    input_cost_per_token=0.0,
+    output_cost_per_token=0.0,
+    reasoning_cost_per_token=0.0,
+):
+    input_tokens = token_usage.get("prompt_tokens", 0)
+    output_tokens = token_usage.get("completion_tokens", 0)
+    reasoning_tokens = token_usage.get("reasoning_tokens", 0)
+    input_cost = input_tokens * input_cost_per_token
+    output_cost = output_tokens * output_cost_per_token
+    reasoning_cost = reasoning_tokens * reasoning_cost_per_token
+    total_cost = input_cost + output_cost + reasoning_cost
+    return {
+        "input": input_cost,
+        "completion": output_cost,
+        "reasoning": reasoning_cost,
+        "total": total_cost,
+    }
+def load_model_costs():
+    try:
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        model_costs_path = os.path.join(current_dir, "model_costs.json")
+        with open(model_costs_path, "r") as file:
+            return json.load(file)
+    except FileNotFoundError:
+        with resources.open_text("utils", "model_costs.json") as file:
+            return json.load(file)
+def log_event(event_data, log_file_path):
+    event_data = asdict(event_data)
+    with open(log_file_path, "a") as f:
+        f.write(json.dumps(event_data) + "\n")

ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py ADDED Viewed

@@ -0,0 +1,342 @@
+# import os
+# import hashlib
+# import zipfile
+# import re
+# import ast
+# import importlib.util
+# import json
+# from pathlib import Path
+# class TraceDependencyTracker:
+#     def __init__(self, output_dir=None):
+#         self.tracked_files = set()
+#         self.python_imports = set()
+#         self.output_dir = output_dir or os.getcwd()
+#     def track_file_access(self, filepath):
+#         """Track a file that's been accessed."""
+#         if os.path.exists(filepath):
+#             self.tracked_files.add(os.path.abspath(filepath))
+#     def find_config_files(self, content, base_path):
+#         """Find configuration files referenced in the content."""
+#         patterns = [
+#             r'(?:open|read|load|with\s+open)\s*\([\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
+#             r'(?:config|cfg|conf|settings|file|path)(?:_file|_path)?\s*=\s*[\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
+#             r'[\'"]([^\'"]*\.txt)[\'"]',
+#             r'[\'"]([^\'"]*\.(?:yaml|yml))[\'"]',
+#             r'from\s+(\S+)\s+import',
+#             r'import\s+(\S+)'
+#         ]
+#         for pattern in patterns:
+#             matches = re.finditer(pattern, content)
+#             for match in matches:
+#                 filepath = match.group(1)
+#                 if not os.path.isabs(filepath):
+#                     full_path = os.path.join(os.path.dirname(base_path), filepath)
+#                 else:
+#                     full_path = filepath
+#                 if os.path.exists(full_path):
+#                     self.track_file_access(full_path)
+#                     try:
+#                         with open(full_path, 'r', encoding='utf-8') as f:
+#                             self.find_config_files(f.read(), full_path)
+#                     except (UnicodeDecodeError, IOError):
+#                         pass
+#     def analyze_python_imports(self, filepath):
+#         """Analyze Python file for imports and track imported files."""
+#         try:
+#             with open(filepath, 'r', encoding='utf-8') as file:
+#                 tree = ast.parse(file.read(), filename=filepath)
+#             for node in ast.walk(tree):
+#                 if isinstance(node, (ast.Import, ast.ImportFrom)):
+#                     if isinstance(node, ast.ImportFrom) and node.module:
+#                         module_name = node.module
+#                     else:
+#                         for name in node.names:
+#                             module_name = name.name.split('.')[0]
+#                     try:
+#                         spec = importlib.util.find_spec(module_name)
+#                         if spec and spec.origin and not spec.origin.startswith(os.path.dirname(importlib.__file__)):
+#                             self.python_imports.add(spec.origin)
+#                     except (ImportError, AttributeError):
+#                         pass
+#         except Exception as e:
+#             print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
+#     def create_zip(self, filepaths):
+#         """
+#         Process files and create a single zip with all dependencies.
+#         Args:
+#             filepaths (list): List of file paths to process.
+#         Returns:
+#             tuple: A tuple containing the hash ID (str) and the path to the saved .zip file (str).
+#         """
+#         # Process all files and their dependencies
+#         for filepath in filepaths:
+#             abs_path = os.path.abspath(filepath)
+#             self.track_file_access(abs_path)
+#             try:
+#                 with open(abs_path, 'r', encoding='utf-8') as file:
+#                     content = file.read()
+#                 self.find_config_files(content, abs_path)
+#                 if filepath.endswith('.py'):
+#                     self.analyze_python_imports(abs_path)
+#             except Exception as e:
+#                 print(f"Warning: Could not process {filepath}: {str(e)}")
+#         # Add Python imports to tracked files
+#         self.tracked_files.update(self.python_imports)
+#         # Generate hash from all files
+#         hash_contents = []
+#         for filepath in sorted(self.tracked_files):
+#             # Skip any file paths that contain 'env'
+#             if 'env' in filepath:
+#                 continue  # Skip env folder
+#             try:
+#                 with open(filepath, 'rb') as file:
+#                     content = file.read()
+#                     hash_contents.append(content)
+#             except Exception as e:
+#                 print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
+#         combined_content = b''.join(hash_contents)
+#         hash_id = hashlib.sha256(combined_content).hexdigest()
+#         # Create zip file
+#         zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
+#         # Determine base path excluding 'env' folders
+#         base_path = os.path.commonpath([os.path.abspath(p) for p in self.tracked_files if 'env' not in p])
+#         with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
+#             for filepath in sorted(self.tracked_files):
+#                 # Skip any file paths that contain 'env'
+#                 if 'env' in filepath:
+#                     continue  # Skip env folder
+#                 try:
+#                     relative_path = os.path.relpath(filepath, base_path)
+#                     zipf.write(filepath, relative_path)
+#                     print(f"Added to zip: {relative_path}")
+#                 except Exception as e:
+#                     print(f"Warning: Could not add {filepath} to zip: {str(e)}")
+#         return hash_id, zip_filename
+# def zip_list_of_unique_files(filepaths):
+#     """
+#     Enhanced version of the original function that tracks all dependencies.
+#     Args:
+#         filepaths (list): List of file paths to process.
+#     Returns:
+#         tuple: A tuple containing the hash ID (str) and the path to the saved .zip file (str).
+#     """
+#     tracker = TraceDependencyTracker()
+#     return tracker.create_zip(filepaths)
+# if __name__ == "__main__":
+#     filepaths = ["script1.py", "script2.py"]
+#     hash_id, zip_path = zip_list_of_unique_files(filepaths)
+#     print(f"Created zip file: {zip_path}")
+#     print(f"Hash ID: {hash_id}")
+import os
+import hashlib
+import zipfile
+import re
+import ast
+import importlib.util
+import json
+import astor
+from pathlib import Path
+# Define the PackageUsageRemover class
+class PackageUsageRemover(ast.NodeTransformer):
+    def __init__(self, package_name):
+        self.package_name = package_name
+        self.imported_names = set()
+    def visit_Import(self, node):
+        filtered_names = []
+        for name in node.names:
+            if not name.name.startswith(self.package_name):
+                filtered_names.append(name)
+            else:
+                self.imported_names.add(name.asname or name.name)
+        if not filtered_names:
+            return None
+        node.names = filtered_names
+        return node
+    def visit_ImportFrom(self, node):
+        if node.module and node.module.startswith(self.package_name):
+            self.imported_names.update(n.asname or n.name for n in node.names)
+            return None
+        return node
+    def visit_Assign(self, node):
+        if self._uses_package(node.value):
+            return None
+        return node
+    def visit_Call(self, node):
+        if isinstance(node.func, ast.Name) and node.func.id in self.imported_names:
+            return None
+        if isinstance(node.func, ast.Attribute):
+            if isinstance(node.func.value, ast.Name) and node.func.value.id in self.imported_names:
+                return None
+        return node
+    def _uses_package(self, node):
+        if isinstance(node, ast.Name) and node.id in self.imported_names:
+            return True
+        if isinstance(node, ast.Call):
+            return self._uses_package(node.func)
+        if isinstance(node, ast.Attribute):
+            return self._uses_package(node.value)
+        return False
+# Define the function to remove package code from a source code string
+def remove_package_code(source_code: str, package_name: str) -> str:
+    try:
+        tree = ast.parse(source_code)
+        transformer = PackageUsageRemover(package_name)
+        modified_tree = transformer.visit(tree)
+        modified_code = astor.to_source(modified_tree)
+        return modified_code
+    except Exception as e:
+        raise Exception(f"Error processing source code: {str(e)}")
+# TraceDependencyTracker class
+class TraceDependencyTracker:
+    def __init__(self, output_dir=None):
+        self.tracked_files = set()
+        self.python_imports = set()
+        self.output_dir = output_dir or os.getcwd()
+    def track_file_access(self, filepath):
+        if os.path.exists(filepath):
+            self.tracked_files.add(os.path.abspath(filepath))
+    def find_config_files(self, content, base_path):
+        patterns = [
+            r'(?:open|read|load|with\s+open)\s*\([\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
+            r'(?:config|cfg|conf|settings|file|path)(?:_file|_path)?\s*=\s*[\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
+            r'[\'"]([^\'"]*\.txt)[\'"]',
+            r'[\'"]([^\'"]*\.(?:yaml|yml))[\'"]',
+            r'from\s+(\S+)\s+import',
+            r'import\s+(\S+)'
+        ]
+        for pattern in patterns:
+            matches = re.finditer(pattern, content)
+            for match in matches:
+                filepath = match.group(1)
+                if not os.path.isabs(filepath):
+                    full_path = os.path.join(os.path.dirname(base_path), filepath)
+                else:
+                    full_path = filepath
+                if os.path.exists(full_path):
+                    self.track_file_access(full_path)
+                    try:
+                        with open(full_path, 'r', encoding='utf-8') as f:
+                            self.find_config_files(f.read(), full_path)
+                    except (UnicodeDecodeError, IOError):
+                        pass
+    def analyze_python_imports(self, filepath):
+        try:
+            with open(filepath, 'r', encoding='utf-8') as file:
+                tree = ast.parse(file.read(), filename=filepath)
+            for node in ast.walk(tree):
+                if isinstance(node, (ast.Import, ast.ImportFrom)):
+                    if isinstance(node, ast.ImportFrom) and node.module:
+                        module_name = node.module
+                    else:
+                        for name in node.names:
+                            module_name = name.name.split('.')[0]
+                    try:
+                        spec = importlib.util.find_spec(module_name)
+                        if spec and spec.origin and not spec.origin.startswith(os.path.dirname(importlib.__file__)):
+                            self.python_imports.add(spec.origin)
+                    except (ImportError, AttributeError):
+                        pass
+        except Exception as e:
+            print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
+    def create_zip(self, filepaths):
+        for filepath in filepaths:
+            abs_path = os.path.abspath(filepath)
+            self.track_file_access(abs_path)
+            try:
+                with open(abs_path, 'r', encoding='utf-8') as file:
+                    content = file.read()
+                self.find_config_files(content, abs_path)
+                if filepath.endswith('.py'):
+                    self.analyze_python_imports(abs_path)
+            except Exception as e:
+                print(f"Warning: Could not process {filepath}: {str(e)}")
+        self.tracked_files.update(self.python_imports)
+        hash_contents = []
+        for filepath in sorted(self.tracked_files):
+            if 'env' in filepath:
+                continue
+            try:
+                with open(filepath, 'rb') as file:
+                    content = file.read()
+                    if filepath.endswith('.py'):
+                        # Temporarily remove raga_catalyst code for hash calculation
+                        content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
+                    hash_contents.append(content)
+            except Exception as e:
+                print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
+        combined_content = b''.join(hash_contents)
+        hash_id = hashlib.sha256(combined_content).hexdigest()
+        zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
+        common_path = [os.path.abspath(p) for p in self.tracked_files if 'env' not in p]
+        if common_path!=[]:
+            base_path = os.path.commonpath(common_path)
+        with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            for filepath in sorted(self.tracked_files):
+                if 'env' in filepath:
+                    continue
+                try:
+                    relative_path = os.path.relpath(filepath, base_path)
+                    zipf.write(filepath, relative_path)
+                    print(f"Added to zip: {relative_path}")
+                except Exception as e:
+                    print(f"Warning: Could not add {filepath} to zip: {str(e)}")
+        return hash_id, zip_filename
+# Main function for creating a zip of unique files
+def zip_list_of_unique_files(filepaths):
+    tracker = TraceDependencyTracker()
+    return tracker.create_zip(filepaths)
+# Example usage
+if __name__ == "__main__":
+    filepaths = ["script1.py", "script2.py"]
+    hash_id, zip_path = zip_list_of_unique_files(filepaths)
+    print(f"Created zip file: {zip_path}")
+    print(f"Hash ID: {hash_id}")

ragaai_catalyst/tracers/exporters/raga_exporter.py CHANGED Viewed

@@ -7,7 +7,6 @@ from tqdm import tqdm
 import requests
 from ...ragaai_catalyst import RagaAICatalyst
 import shutil
-import pdb
 logger = logging.getLogger(__name__)
@@ -196,7 +195,6 @@ class RagaExporter:
         return status_code
     async def get_presigned_url(self, session, num_files):
-        # pdb.set_trace()
         """
         Asynchronously retrieves a presigned URL from the RagaExporter API.
@@ -213,7 +211,6 @@ class RagaExporter:
         """
         async def make_request():
-            # pdb.set_trace()
             json_data = {
                 "datasetName": self.dataset_name,
@@ -296,8 +293,7 @@ class RagaExporter:
         return response.status
     async def upload_file(self, session, url, file_path):
-        # pdb.set_trace()
-        # print('url', url)
         """
         Asynchronously uploads a file using the given session, url, and file path.
         Supports both regular and Azure blob storage URLs.
@@ -345,8 +341,6 @@ class RagaExporter:
         return response.status
     async def check_and_upload_files(self, session, file_paths):
-        # print(file_paths)
-        # pdb.set_trace()
         """
         Checks if there are files to upload, gets presigned URLs, uploads files, and streams them if successful.

ragaai_catalyst/tracers/tracer.py CHANGED Viewed

@@ -19,11 +19,14 @@ from .instrumentators import (
 from .utils import get_unique_key
 # from .llamaindex_callback import LlamaIndexTracer
 from ..ragaai_catalyst import RagaAICatalyst
+from .agentic_tracing.agentic_tracing import AgenticTracing
+from .agentic_tracing.file_name_tracker import TrackName
+from .agentic_tracing.llm_tracer import LLMTracerMixin
 logger = logging.getLogger(__name__)
-class Tracer:
+class Tracer(AgenticTracing):
     NUM_PROJECTS = 100
     TIMEOUT = 10
     def __init__(
@@ -41,6 +44,7 @@ class Tracer:
         Args:
             project_name (str): The name of the project.
+            dataset_name (str): The name of the dataset.
             tracer_type (str, optional): The type of tracer. Defaults to None.
             pipeline (dict, optional): The pipeline configuration. Defaults to None.
             metadata (dict, optional): The metadata. Defaults to None.
@@ -50,16 +54,28 @@ class Tracer:
         Returns:
             None
         """
+        # Set auto_instrument_llm to True to enable automatic LLM tracing
+        user_detail = {
+            "project_name": project_name,
+            "project_id": None,  # Will be set after project validation
+            "dataset_name": dataset_name,
+            "trace_user_detail": {"metadata": metadata} if metadata else {}
+        }
+        super().__init__(user_detail=user_detail, auto_instrument_llm=True)
+        self.is_active = True
         self.project_name = project_name
         self.dataset_name = dataset_name
         self.tracer_type = tracer_type
         self.metadata = self._improve_metadata(metadata, tracer_type)
+        # self.metadata["total_cost"] = 0.0
+        # self.metadata["total_tokens"] = 0
         self.pipeline = pipeline
         self.description = description
         self.upload_timeout = upload_timeout
         self.base_url = f"{RagaAICatalyst.BASE_URL}"
         self.timeout = 10
         self.num_projects = 100
+        self.start_time = datetime.datetime.now(datetime.timezone.utc)
         try:
             response = requests.get(
@@ -81,6 +97,9 @@ class Tracer:
             self.project_id = [
                 project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
             ][0]
+            # super().__init__(user_detail=self._pass_user_data())
+            # self.file_tracker = TrackName()
+            self._pass_user_data()
         except requests.exceptions.RequestException as e:
             logger.error(f"Failed to retrieve projects list: {e}")
@@ -98,7 +117,9 @@ class Tracer:
             from .llamaindex_callback import LlamaIndexTracer
         else:
-            raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
+            self._upload_task = None
+            # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
     def _improve_metadata(self, metadata, tracer_type):
         if metadata is None:
@@ -157,7 +178,9 @@ class Tracer:
         elif self.tracer_type == "llamaindex":
             from .llamaindex_callback import LlamaIndexTracer
             return LlamaIndexTracer(self._pass_user_data()).start()
+        else:
+            super().start()
+            return self
     def stop(self):
         """Stop the tracer and initiate trace upload."""
@@ -172,7 +195,9 @@ class Tracer:
             return "Trace upload initiated. Use get_upload_status() to check the status."
         elif self.tracer_type == "llamaindex":
             from .llamaindex_callback import LlamaIndexTracer
-            return LlamaIndexTracer().stop()
+            return LlamaIndexTracer(self._pass_user_data()).stop()
+        else:
+            super().stop()
     def get_upload_status(self):
         """Check the status of the trace upload."""
@@ -262,6 +287,7 @@ class Tracer:
         # Reset instrumentation flag
         self.is_instrumented = False
         # Note: We're not resetting all attributes here to allow for upload status checking
     def _pass_user_data(self):
         return {"project_name":self.project_name,
                 "project_id": self.project_id,

ragaai_catalyst/tracers/upload_traces.py ADDED Viewed

@@ -0,0 +1,127 @@
+import requests
+import json
+import os
+from datetime import datetime
+class UploadTraces:
+    def __init__(self,
+                 json_file_path,
+                 project_name,
+                 project_id,
+                 dataset_name,
+                 user_detail,
+                 base_url):
+        self.json_file_path = json_file_path
+        self.project_name = project_name
+        self.project_id = project_id
+        self.dataset_name = dataset_name
+        self.user_detail = user_detail
+        self.base_url = base_url
+        self.timeout = 10
+    def _create_dataset_schema_with_trace(self):
+        SCHEMA_MAPPING_NEW = {
+            "trace_id": {"columnType": "traceId"},
+            "trace_uri": {"columnType": "traceUri"},
+            "prompt": {"columnType": "prompt"},
+            "response":{"columnType": "response"},
+            "context": {"columnType": "context"},
+            "llm_model": {"columnType":"pipeline"},
+            "recorded_on": {"columnType": "metadata"},
+            "embed_model": {"columnType":"pipeline"},
+            "log_source": {"columnType": "metadata"},
+            "vector_store":{"columnType":"pipeline"},
+            "feedback": {"columnType":"feedBack"}
+        }
+        def make_request():
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "X-Project-Name": self.project_name,
+            }
+            payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "schemaMapping": SCHEMA_MAPPING_NEW,
+                "traceFolderUrl": None,
+            })
+            response = requests.request("POST",
+                f"{self.base_url}/v1/llm/dataset/logs",
+                headers=headers,
+                data=payload,
+                timeout=self.timeout
+            )
+            return response
+        response = make_request()
+        if response.status_code == 401:
+            # get_token()  # Fetch a new token and set it in the environment
+            response = make_request()  # Retry the request
+        if response.status_code != 200:
+            return response.status_code
+        return response.status_code
+    def _get_presigned_url(self):
+        payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "numFiles": 1,
+            })
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+            "X-Project-Name": self.project_name,
+        }
+        response = requests.request("GET",
+                                    f"{self.base_url}/v1/llm/presigned-url",
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+        if response.status_code == 200:
+            presignedUrls = response.json()["data"]["presignedUrls"][0]
+            return presignedUrls
+    def _put_presigned_url(self, presignedUrl, filename):
+        headers = {
+                "Content-Type": "application/json",
+            }
+        if "blob.core.windows.net" in presignedUrl:  # Azure
+            headers["x-ms-blob-type"] = "BlockBlob"
+        print(f"Uploading traces...")
+        with open(filename) as f:
+            payload = f.read().replace("\n", "").replace("\r", "").encode()
+        response = requests.request("PUT",
+                                    presignedUrl,
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+        if response.status_code != 200 or response.status_code != 201:
+            return response, response.status_code
+    def _insert_traces(self, presignedUrl):
+        headers = {
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "Content-Type": "application/json",
+                "X-Project-Name": self.project_name,
+            }
+        payload = json.dumps({
+                "datasetName": self.dataset_name,
+                "presignedUrl": presignedUrl,
+            })
+        response = requests.request("POST",
+                                    f"{self.base_url}/v1/llm/insert/trace",
+                                    headers=headers,
+                                    data=payload,
+                                    timeout=self.timeout)
+    def upload_traces(self):
+        self._create_dataset_schema_with_trace()
+        presignedUrl = self._get_presigned_url()
+        self._put_presigned_url(presignedUrl, self.json_file_path)
+        self._insert_traces(presignedUrl)
+        print("Traces uplaoded")

ragaai-catalyst 2.1b0__py3-none-any.whl → 2.1b1__py3-none-any.whl

ragaai-catalyst 2.1b0py3-none-any.whl → 2.1b1py3-none-any.whl