PyPI - ragaai-catalyst - Versions diffs - 1.0.8.2__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

ragaai-catalyst 1.0.8.2py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ragaai_catalyst/__init__.py +2 -1
ragaai_catalyst/dataset.py +186 -126
ragaai_catalyst/evaluation.py +369 -0
ragaai_catalyst/experiment.py +1 -1
ragaai_catalyst/prompt_manager.py +112 -54
ragaai_catalyst/ragaai_catalyst.py +45 -20
ragaai_catalyst/tracers/exporters/file_span_exporter.py +3 -2
ragaai_catalyst/tracers/exporters/raga_exporter.py +50 -27
ragaai_catalyst/tracers/tracer.py +33 -26
{ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/METADATA +3 -4
ragaai_catalyst-2.0.1.dist-info/RECORD +23 -0
ragaai_catalyst-1.0.8.2.dist-info/RECORD +0 -22
{ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/WHEEL +0 -0
{ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/top_level.txt +0 -0

ragaai_catalyst/__init__.py CHANGED Viewed

@@ -4,5 +4,6 @@ from .tracers import Tracer
 from .utils import response_checker
 from .dataset import Dataset
 from .prompt_manager import PromptManager
+from .evaluation import Evaluation
-__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager"]
+__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation"]

ragaai_catalyst/dataset.py CHANGED Viewed

@@ -16,11 +16,38 @@ class Dataset:
     def __init__(self, project_name):
         self.project_name = project_name
+        self.num_projects = 100
         Dataset.BASE_URL = (
             os.getenv("RAGAAI_CATALYST_BASE_URL")
             if os.getenv("RAGAAI_CATALYST_BASE_URL")
-            else "https://llm-platform.prod5.ragaai.ai/api"
+            else "https://catalyst.raga.ai/api"
         )
+        headers = {
+            "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
+        }
+        try:
+            response = requests.get(
+                f"{Dataset.BASE_URL}/v2/llm/projects?size={self.num_projects}",
+                headers=headers,
+                timeout=self.TIMEOUT,
+            )
+            response.raise_for_status()
+            logger.debug("Projects list retrieved successfully")
+            project_list = [
+                project["name"] for project in response.json()["data"]["content"]
+            ]
+            if project_name not in project_list:
+                raise ValueError("Project not found. Please enter a valid project name")
+            self.project_id = [
+                project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
+            ][0]
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Failed to retrieve projects list: {e}")
+            raise
     def list_datasets(self):
         """
@@ -35,34 +62,44 @@ class Dataset:
         def make_request():
             headers = {
-                "accept": "application/json, text/plain, */*",
-                "authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
-                "X-Project-Name": self.project_name,
-            }
-            params = {
-                "projectName": self.project_name,
-            }
-            response = requests.get(
-                f"{Dataset.BASE_URL}/v1/llm/sub-datasets",
-                headers=headers,
-                params=params,
-                timeout=Dataset.TIMEOUT,
-            )
-            return response
-        response = make_request()
-        response_checker(response, "Dataset.list_datasets")
-        if response.status_code == 401:
-            get_token()  # Fetch a new token and set it in the environment
-            response = make_request()  # Retry the request
-        if response.status_code != 200:
-            return {
-                "status_code": response.status_code,
-                "message": response.json(),
+                'Content-Type': 'application/json',
+                "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+                "X-Project-Id": str(self.project_id),
             }
-        datasets = response.json()["data"]["content"]
-        sub_datasets = [dataset["name"] for dataset in datasets]
-        return sub_datasets
+            json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
+            try:
+                response = requests.post(
+                    f"{Dataset.BASE_URL}/v2/llm/dataset",
+                    headers=headers,
+                    json=json_data,
+                    timeout=Dataset.TIMEOUT,
+                )
+                response.raise_for_status()
+                return response
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to list datasets: {e}")
+                raise
+        try:
+            response = make_request()
+            response_checker(response, "Dataset.list_datasets")
+            if response.status_code == 401:
+                get_token()  # Fetch a new token and set it in the environment
+                response = make_request()  # Retry the request
+            if response.status_code != 200:
+                return {
+                    "status_code": response.status_code,
+                    "message": response.json(),
+                }
+            datasets = response.json()["data"]["content"]
+            dataset_list = [dataset["name"] for dataset in datasets]
+            return dataset_list
+        except Exception as e:
+            logger.error(f"Error in list_datasets: {e}")
+            raise
+    def get_schema_mapping(self):
+        return ["traceid", "prompt", "context", "response", "expected_response", "expected_context", "timestamp", "metadata", "pipeline", "cost", "feedBack", "latency", "sanitized_response", "system_prompt", "traceUri"]
     def create_from_trace(self, dataset_name, filter_list):
         """
@@ -91,85 +128,88 @@ class Dataset:
                 "subDatasetName": dataset_name,
                 "filterList": filter_list,
             }
-            response = requests.post(
-                f"{Dataset.BASE_URL}/v1/llm/sub-dataset",
-                headers=headers,
-                json=json_data,
-                timeout=Dataset.TIMEOUT,
-            )
-            return response
-        response = request_trace_creation()
-        response_checker(response, "Dataset.create_dataset")
-        if response.status_code == 401:
-            get_token()  # Fetch a new token and set it in the environment
-            response = request_trace_creation()  # Retry the request
-        if response.status_code != 200:
-            return response.json()["message"]
-        message = response.json()["message"]
-        return f"{message} {dataset_name}"
+            try:
+                response = requests.post(
+                    f"{Dataset.BASE_URL}/v1/llm/sub-dataset",
+                    headers=headers,
+                    json=json_data,
+                    timeout=Dataset.TIMEOUT,
+                )
+                response.raise_for_status()
+                return response
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to create dataset from trace: {e}")
+                raise
+        try:
+            response = request_trace_creation()
+            response_checker(response, "Dataset.create_dataset")
+            if response.status_code == 401:
+                get_token()  # Fetch a new token and set it in the environment
+                response = request_trace_creation()  # Retry the request
+            if response.status_code != 200:
+                return response.json()["message"]
+            message = response.json()["message"]
+            return f"{message} {dataset_name}"
+        except Exception as e:
+            logger.error(f"Error in create_from_trace: {e}")
+            raise
-###################### CSV Upload APIs ###################
+    ###################### CSV Upload APIs ###################
     def get_csv_schema(self):
         headers = {
             "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
             "X-Project-Name": self.project_name,
         }
-        response = requests.get(
+        try:
+            response = requests.get(
                 f"{Dataset.BASE_URL}/v1/llm/schema-elements",
                 headers=headers,
                 timeout=Dataset.TIMEOUT,
             )
-        response_data = response.json()
-        if not response_data['success']:
-            raise ValueError('Unable to fetch Schema Elements for the CSV')
-        # chema_elements = response['data']['schemaElements']
-        return response_data
+            response.raise_for_status()
+            response_data = response.json()
+            if not response_data['success']:
+                raise ValueError('Unable to fetch Schema Elements for the CSV')
+            return response_data
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Failed to get CSV schema: {e}")
+            raise
     def create_from_csv(self, csv_path, dataset_name, schema_mapping):
-        ## check the validity of schema_mapping
-        df = pd.read_csv(csv_path)
-        keys = list(df.columns)
-        values = self.get_csv_schema()['data']['schemaElements']
-        print(type(values), values)
-        for k in schema_mapping.keys():
-            if k not in keys:
-                raise ValueError(f'--{k}-- column is not present in csv column but present in schema_mapping. Plase provide the right schema_mapping.')
-        for k in schema_mapping.values():
-            if k not in values:
-                raise ValueError(f'--{k}-- is not present in the schema_elements but present in schema_mapping. Plase provide the right schema_mapping.')
+        list_dataset = self.list_datasets()
+        if dataset_name in list_dataset:
+            raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
         #### get presigned URL
         def get_presignedUrl():
             headers = {
                 "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
-                "X-Project-Name": self.project_name,
+                "X-Project-Id": str(self.project_id),
             }
-            response = requests.get(
-                f"{Dataset.BASE_URL}/v1/llm/presignedUrl/test-url",
-                headers=headers,
-                timeout=Dataset.TIMEOUT,
-            )
-            return response.json()
-        presignedUrl = get_presignedUrl()
-        if presignedUrl['success']:
-            url = presignedUrl['data']['presignedUrl']
-            filename = presignedUrl['data']['fileName']
-            print('-- PresignedUrl fetched Succussfuly --')
-            print('filename: ', filename)
-        else:
-            raise ValueError('Unable to fetch presignedUrl')
+            try:
+                response = requests.get(
+                    f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
+                    headers=headers,
+                    timeout=Dataset.TIMEOUT,
+                )
+                response.raise_for_status()
+                return response.json()
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to get presigned URL: {e}")
+                raise
+        try:
+            presignedUrl = get_presignedUrl()
+            if presignedUrl['success']:
+                url = presignedUrl['data']['presignedUrl']
+                filename = presignedUrl['data']['fileName']
+            else:
+                raise ValueError('Unable to fetch presignedUrl')
+        except Exception as e:
+            logger.error(f"Error in get_presignedUrl: {e}")
+            raise
         #### put csv to presigned URL
         def put_csv_to_presignedUrl(url):
@@ -177,51 +217,71 @@ class Dataset:
                 'Content-Type': 'text/csv',
                 'x-ms-blob-type': 'BlockBlob',
             }
-            with open(csv_path, 'rb') as file:
-                response = requests.put(
-                    url,
-                    headers=headers,
-                    data=file,
-                    timeout=Dataset.TIMEOUT,
-                )
-            return response
-        put_csv_response = put_csv_to_presignedUrl(url)
-        if put_csv_response.status_code != 201:
-            raise ValueError('Unable to put csv to the presignedUrl')
-        else:
-            print('-- csv put to presignedUrl Succussfuly --')
+            try:
+                with open(csv_path, 'rb') as file:
+                    response = requests.put(
+                        url,
+                        headers=headers,
+                        data=file,
+                        timeout=Dataset.TIMEOUT,
+                    )
+                    response.raise_for_status()
+                    return response
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to put CSV to presigned URL: {e}")
+                raise
+        try:
+            put_csv_response = put_csv_to_presignedUrl(url)
+            if put_csv_response.status_code != 200:
+                raise ValueError('Unable to put csv to the presignedUrl')
+        except Exception as e:
+            logger.error(f"Error in put_csv_to_presignedUrl: {e}")
+            raise
         ## Upload csv to elastic
         def upload_csv_to_elastic(data):
             header = {
+                'Content-Type': 'application/json',
                 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
-                'X-Project-Name': self.project_name
+                "X-Project-Id": str(self.project_id)
             }
-            response = requests.post(
-                f"{Dataset.BASE_URL}/v1/llm/csv-dataset",
-                headers=header,
-                json=data,
-                timeout=Dataset.TIMEOUT,
-            )
+            try:
+                response = requests.post(
+                    f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
+                    headers=header,
+                    json=data,
+                    timeout=Dataset.TIMEOUT,
+                )
+                if response.status_code==400:
+                    raise ValueError(response.json()["message"])
+                response.raise_for_status()
+                return response.json()
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to upload CSV to elastic: {e}")
+                raise
-            return response.json()
-        data = {
-            "datasetName": dataset_name,
-            "fileName": filename,
-            "schemaMapping": schema_mapping
-        }
-        print(data)
-        upload_csv_response = upload_csv_to_elastic(data)
-        print(type(upload_csv_response), upload_csv_response)
-        if not upload_csv_response['success']:
-            raise ValueError('Unable to upload csv')
-        else:
-            print(upload_csv_response['message'])
+        def generate_schema(mapping):
+            result = {}
+            for column, schema_element in mapping.items():
+                result[column] = {"columnType": schema_element}
+            return result
+        try:
+            schema_mapping = generate_schema(schema_mapping)
+            data = {
+                "projectId": str(self.project_id),
+                "datasetName": dataset_name,
+                "fileName": filename,
+                "schemaMapping": schema_mapping,
+                "opType": "insert",
+                "description": ""
+            }
+            upload_csv_response = upload_csv_to_elastic(data)
+            if not upload_csv_response['success']:
+                raise ValueError('Unable to upload csv')
+            else:
+                print(upload_csv_response['message'])
+        except Exception as e:
+            logger.error(f"Error in create_from_csv: {e}")
+            raise

ragaai-catalyst 1.0.8.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

ragaai-catalyst 1.0.8.2py3-none-any.whl → 2.0.1py3-none-any.whl