PyPI - ragaai-catalyst - Versions diffs - 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl - Mend

ragaai-catalyst 2.0.3py3-none-any.whl → 2.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ragaai_catalyst/__init__.py CHANGED Viewed

@@ -5,5 +5,7 @@ from .utils import response_checker
 from .dataset import Dataset
 from .prompt_manager import PromptManager
 from .evaluation import Evaluation
+from .synthetic_data_generation import SyntheticDataGeneration
-__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation"]
+__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation","SyntheticDataGeneration"]

ragaai_catalyst/evaluation.py CHANGED Viewed

@@ -80,7 +80,8 @@ class Evaluation:
         try:
             response = requests.get(
                 f'{self.base_url}/v1/llm/llm-metrics',
-                headers=headers)
+                headers=headers,
+                timeout=self.timeout)
             response.raise_for_status()
             metric_names = [metric["name"] for metric in response.json()["data"]["metrics"]]
             return metric_names
@@ -111,7 +112,8 @@ class Evaluation:
             response = requests.post(
                 f'{self.base_url}/v1/llm/docs',
                 headers=headers,
-                json=data)
+                json=data,
+                timeout=self.timeout)
             response.raise_for_status()
             if response.status_code == 200:
                 return response.json()["data"]["columns"]
@@ -128,27 +130,52 @@ class Evaluation:
         return {}
     def _get_variablename_from_dataset_schema(self, schemaName, metric_name):
+        # pdb.set_trace()
+        # print(schemaName)
         dataset_schema = self._get_dataset_schema()
         variableName = None
         for column in dataset_schema:
-            columnName = column["columnName"].split('_')[0]
+            columnName = column["columnType"]
             displayName = column["displayName"]
-            if columnName==schemaName.lower():
+            # print(columnName, displayName)
+            if "".join(columnName.split("_")).lower() == schemaName.lower():
                 variableName = displayName
+                break
+        return variableName
+        # print(variableName)
+        # if variableName:
+        #     return variableName
+        # else:
+        #     raise ValueError(f"'{schemaName}' column is required for {metric_name} metric evaluation, but not found in dataset")
+    def _get_variablename_from_user_schema_mapping(self, schemaName, metric_name, schema_mapping):
+        # pdb.set_trace()
+        user_dataset_schema = self._get_dataset_schema()
+        user_dataset_columns = [item["displayName"] for item in user_dataset_schema]
+        variableName = None
+        for key, val in schema_mapping.items():
+            if "".join(val.split("_")).lower()==schemaName:
+                if key in user_dataset_columns:
+                    variableName=key
+                else:
+                    raise ValueError(f"Column '{key}' is not present in {self.dataset_name}")
         if variableName:
             return variableName
         else:
-            raise ValueError(f"'{schemaName.lower()}' column is required for {metric_name} metric evaluation, but not found in dataset")
+            raise ValueError(f"Map '{schemaName}' column in schema_mapping for {metric_name} metric evaluation")
-    def _get_mapping(self, metric_name, metrics_schema):
+    def _get_mapping(self, metric_name, metrics_schema, schema_mapping):
         mapping = []
         for schema in metrics_schema:
             if schema["name"]==metric_name:
                 requiredFields = schema["config"]["requiredFields"]
                 for field in requiredFields:
                     schemaName = field["name"]
-                    variableName = self._get_variablename_from_dataset_schema(schemaName, metric_name)
+                    # variableName = self._get_variablename_from_dataset_schema(schemaName, metric_name)
+                    variableName = self._get_variablename_from_user_schema_mapping(schemaName.lower(), metric_name, schema_mapping)
                     mapping.append({"schemaName": schemaName, "variableName": variableName})
         return mapping
@@ -160,7 +187,7 @@ class Evaluation:
                         "model": "null",
                         "params": {
                             "model": {
-                                "value": "gpt-4o-mini"
+                                "value": ""
                             }
                         },
                         "mappings": "mappings"
@@ -178,7 +205,8 @@ class Evaluation:
         try:
             response = requests.get(
                 f'{self.base_url}/v1/llm/llm-metrics',
-                headers=headers)
+                headers=headers,
+                timeout=self.timeout)
             response.raise_for_status()
             metrics_schema = [metric for metric in response.json()["data"]["metrics"]]
             return metrics_schema
@@ -208,14 +236,21 @@ class Evaluation:
                 #checking if provider is one of the allowed providers
                 if key.lower()=="provider" and value.lower() not in sub_providers:
                     raise ValueError("Enter a valid provider name. The following Provider names are supported: OpenAI, Azure, Gemini, Groq")
-                base_json["metricSpec"]["config"]["params"][key] = {"value": value}
+                if key.lower()=="threshold":
+                    if len(value)>1:
+                        raise ValueError("'threshold' can only take one argument gte/lte/eq")
+                    else:
+                        for key_thres, value_thres in value.items():
+                            base_json["metricSpec"]["config"]["params"][key] = {f"{key_thres}":value_thres}
+                else:
+                    base_json["metricSpec"]["config"]["params"][key] = {"value": value}
             # if metric["config"]["model"]:
             #     base_json["metricSpec"]["config"]["params"]["model"]["value"] = metric["config"]["model"]
             base_json["metricSpec"]["displayName"] = metric["column_name"]
-            mappings = self._get_mapping(metric["name"], metrics_schema_response)
+            mappings = self._get_mapping(metric["name"], metrics_schema_response, metric["schema_mapping"])
             base_json["metricSpec"]["config"]["mappings"] = mappings
             metricParams.append(base_json)
         metric_schema_mapping["metricParams"] = metricParams
@@ -228,12 +263,15 @@ class Evaluation:
         }
         try:
             response = requests.get(
-                f'{self.base_url}/v1/llm/filter?datasetId={str(self.dataset_id)}',
-                headers=headers
-                )
+                f"{self.base_url}/v2/llm/dataset/{str(self.dataset_id)}?initialCols=0",
+                headers=headers,
+                timeout=self.timeout,
+            )
             response.raise_for_status()
-            executed_metric_response = response.json()["data"]["filter"]
-            executed_metric_list = [item["displayName"] for item in executed_metric_response]
+            dataset_columns = response.json()["data"]["datasetColumnsResponses"]
+            dataset_columns = [item["displayName"] for item in dataset_columns]
+            executed_metric_list = [data for data in dataset_columns if not data.startswith('_')]
             return executed_metric_list
         except requests.exceptions.HTTPError as http_err:
             logger.error(f"HTTP error occurred: {http_err}")
@@ -248,6 +286,13 @@ class Evaluation:
             return []
     def add_metrics(self, metrics):
+        #Handle required key if missing
+        required_keys = {"name", "config", "column_name", "schema_mapping"}
+        for metric in metrics:
+            missing_keys = required_keys - metric.keys()
+            if missing_keys:
+                raise ValueError(f"{missing_keys} required for each metric evaluation.")
         executed_metric_list = self._get_executed_metrics_list()
         metrics_name = self.list_metrics()
         user_metric_names = [metric["name"] for metric in metrics]
@@ -265,12 +310,12 @@ class Evaluation:
             'X-Project-Id': str(self.project_id),
         }
         metric_schema_mapping = self._update_base_json(metrics)
-        print(metric_schema_mapping)
         try:
             response = requests.post(
                 f'{self.base_url}/playground/metric-evaluation',
                 headers=headers,
-                json=metric_schema_mapping
+                json=metric_schema_mapping,
+                timeout=self.timeout
                 )
             if response.status_code == 400:
                 raise ValueError(response.json()["message"])
@@ -296,14 +341,20 @@ class Evaluation:
             "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
             'X-Project-Id': str(self.project_id),
         }
-        data = {"jobId": self.jobId}
         try:
-            response = requests.post(
+            response = requests.get(
                 f'{self.base_url}/job/status',
                 headers=headers,
-                json=data)
+                timeout=self.timeout)
             response.raise_for_status()
-            print(response.json()["data"]["status"])
+            if response.json()["success"]:
+                status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId][0]
+            if status_json == "Failed":
+                return print("Job failed. No results to fetch.")
+            elif status_json == "In Progress":
+                return print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
+            elif status_json == "Completed":
+                print(f"Job completed. Fetching results.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
         except requests.exceptions.HTTPError as http_err:
             logger.error(f"HTTP error occurred: {http_err}")
         except requests.exceptions.ConnectionError as conn_err:
@@ -336,7 +387,8 @@ class Evaluation:
                 response = requests.post(
                     f'{self.base_url}/v1/llm/docs',
                     headers=headers,
-                    json=data)
+                    json=data,
+                    timeout=self.timeout)
                 response.raise_for_status()
                 return response.json()
             except requests.exceptions.HTTPError as http_err:
@@ -355,7 +407,7 @@ class Evaluation:
             try:
                 response = get_presignedUrl()
                 preSignedURL = response["data"]["preSignedURL"]
-                response = requests.get(preSignedURL)
+                response = requests.get(preSignedURL, timeout=self.timeout)
                 response.raise_for_status()
                 return response.text
             except requests.exceptions.HTTPError as http_err:

ragaai_catalyst/proxy_call.py ADDED Viewed

@@ -0,0 +1,134 @@
+import requests
+import json
+import subprocess
+import logging
+import traceback
+logger = logging.getLogger(__name__)
+def api_completion(model,messages, api_base='http://127.0.0.1:8000',
+                    api_key='',model_config=dict()):
+    whoami = get_username()
+    all_response = list()
+    job_id = model_config.get('job_id',-1)
+    converted_message = convert_input(messages,model,model_config)
+    payload = json.dumps(converted_message)
+    response = payload
+    headers = {
+        'Content-Type': 'application/json',
+        'Wd-PCA-Feature-Key':f'your_feature_key, $(whoami)'
+    }
+    try:
+        response = requests.request("POST", api_base, headers=headers, data=payload, verify=False)
+        if model_config.get('log_level','')=='debug':
+            logger.info(f'Model response Job ID {job_id} {response.text}')
+        if response.status_code!=200:
+            logger.error(f'Error in model response Job ID {job_id}:',str(response.text))
+            raise ValueError(str(response.text))
+    except Exception as e:
+        logger.error(f'Error in calling api Job ID {job_id}:',str(e))
+        raise ValueError(str(e))
+    try:
+        response = response.json()
+        if 'error' in response:
+            logger.error(f'Invalid response from API Job ID {job_id}:'+str(response))
+            raise ValueError(str(response.get('error')))
+        all_response.append(convert_output(response,job_id))
+    except ValueError as e1:
+        logger.error(f'Invalid json response from API Job ID {job_id}:'+response)
+        raise ValueError(str(e1))
+    except Exception as e1:
+        if model_config.get('log_level','')=='debug':
+            logger.info(f"Error trace Job ID: {job_id} {traceback.print_exc()}")
+        logger.error(f"Exception in parsing model response Job ID:{job_id} {str(e1)}")
+        logger.error(f"Model response Job ID: {job_id} {response.text}")
+        all_response.append(None)
+    return all_response
+def get_username():
+    result = subprocess.run(['whoami'], capture_output=True, text=True)
+    result = result.stdout
+    return result
+def convert_output(response,job_id):
+    try:
+        if response.get('prediction',{}).get('type','')=='generic-text-generation-v1':
+            return response['prediction']['output']
+        elif response.get('prediction',{}).get('type','')=='gcp-multimodal-v1':
+            full_response = ''
+            for chunk in response['prediction']['output']['chunks']:
+                candidate = chunk['candidates'][0]
+                if candidate['finishReason'] and candidate['finishReason'] not in ['STOP']:
+                    raise ValueError(candidate['finishReason'])
+                part = candidate['content']['parts'][0]
+                full_response += part['text']
+            return full_response
+        else:
+            raise ValueError('Invalid prediction type passed in config')
+    except ValueError as e1:
+        raise ValueError(str(e1))
+    except Exception as e:
+        logger.warning(f'Exception in formatting model response Job ID {job_id}:'+str(e))
+        return None
+def convert_input(prompt,model,model_config):
+    doc_input = {
+        "target": {
+            "provider": "echo",
+            "model": "echo"
+        },
+        "task": {
+            "type": "gcp-multimodal-v1",
+            "prediction_type": "gcp-multimodal-v1",
+            "input": {
+            "contents": [
+                {
+                "role": "user",
+                "parts": [
+                    {
+                    "text": "Give me a recipe for banana bread."
+                    }
+                ]
+                }
+            ],
+            "safetySettings":
+                [
+                    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+                ],
+            "generationConfig": {
+                "temperature": 0,
+                "maxOutputTokens": 8000,
+                "topK": 40,
+                "topP": 0.95,
+                "stopSequences": [],
+                "candidateCount": 1
+            }
+            }
+        }
+    }
+    if 'provider' not in model_config:
+        doc_input['target']['provider'] = 'gcp'
+    else:
+        doc_input['target']['provider'] = model_config['provider']
+    doc_input['task']['type'] = model_config.get('task_type','gcp-multimodal-v1')
+    doc_input['task']['prediction_type'] = model_config.get('prediction_type','generic-text-generation-v1')
+    if 'safetySettings' in model_config:
+        doc_input['task']['input']['safetySettings'] = model_config.get('safetySettings')
+    if 'generationConfig' in model_config:
+        doc_input['task']['input']['generationConfig'] = model_config.get('generationConfig')
+    doc_input['target']['model'] = model
+    if model_config.get('log_level','')=='debug':
+        logger.info(f"Using model configs Job ID {model_config.get('job_id',-1)}{doc_input}")
+    doc_input['task']['input']['contents'][0]['parts'] = [{"text":prompt[0]['content']}]
+    return doc_input
+if __name__=='__main__':
+    message_list = ["Hi How are you","I am good","How are you"]
+    response = batch_completion('gemini/gemini-1.5-flash',message_list,0,1,100,api_base='http://127.0.0.1:5000')
+    print(response)

ragaai_catalyst/synthetic_data_generation.py ADDED Viewed

@@ -0,0 +1,323 @@
+import os
+from groq import Groq
+import google.generativeai as genai
+import openai
+import PyPDF2
+import csv
+import markdown
+import pandas as pd
+import json
+from ragaai_catalyst import proxy_call
+import ast
+# dotenv.load_dotenv()
+class SyntheticDataGeneration:
+    """
+    A class for generating synthetic data using various AI models and processing different document types.
+    """
+    def __init__(self):
+        """
+        Initialize the SyntheticDataGeneration class with API clients for Groq, Gemini, and OpenAI.
+        """
+    def generate_qna(self, text, question_type="simple", n=5,model_config=dict(),api_key=None):
+        """
+        Generate questions based on the given text using the specified model and provider.
+        Args:
+            text (str): The input text to generate questions from.
+            question_type (str): The type of questions to generate ('simple', 'mcq', or 'complex').
+            model (str): The specific model to use for generation.
+            provider (str): The AI provider to use ('groq', 'gemini', or 'openai').
+            n (int): The number of question/answer pairs to generate.
+        Returns:
+            pandas.DataFrame: A DataFrame containing the generated questions and answers.
+        Raises:
+            ValueError: If an invalid provider is specified.
+        """
+        provider = model_config.get("provider")
+        model = model_config.get("model")
+        api_base = model_config.get("api_base")
+        system_message = self._get_system_message(question_type, n)
+        if provider == "groq":
+            if api_key is None and os.getenv("GROQ_API_KEY") is None:
+                raise ValueError("API key must be provided for Groq.")
+            self.groq_client = Groq(api_key=api_key or os.getenv("GROQ_API_KEY"))
+            return self._generate_groq(text, system_message, model)
+        elif provider == "gemini":
+            genai.configure(api_key=api_key or os.getenv("GEMINI_API_KEY"))
+            if api_base is None:
+                if api_key is None and os.getenv("GEMINI_API_KEY") is None:
+                    raise ValueError("API key must be provided for Gemini.")
+                genai.configure(api_key=api_key or os.getenv("GEMINI_API_KEY"))
+                return self._generate_gemini(text, system_message, model)
+            else:
+                messages=[
+                {'role': 'user', 'content': system_message+text}
+            ]
+                a= proxy_call.api_completion(messages=messages ,model=model ,api_base=api_base)
+                b= ast.literal_eval(a[0])
+                return pd.DataFrame(b)
+        elif provider == "openai":
+            if api_key is None and os.getenv("OPENAI_API_KEY") is None:
+                raise ValueError("API key must be provided for OpenAI.")
+            openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
+            return self._generate_openai(text, system_message, model,api_key=api_key)
+        else:
+            raise ValueError("Invalid provider. Choose 'groq', 'gemini', or 'openai'.")
+    def _get_system_message(self, question_type, n):
+        """
+        Get the appropriate system message for the specified question type.
+        Args:
+            question_type (str): The type of questions to generate ('simple', 'mcq', or 'complex').
+            n (int): The number of question/answer pairs to generate.
+        Returns:
+            str: The system message for the AI model.
+        Raises:
+            ValueError: If an invalid question type is specified.
+        """
+        if question_type == 'simple':
+            return f'''Generate a set of {n} very simple questions answerable in a single phrase.
+                Also return the answers for the generated questions.
+                Return the response in a list of object format.
+                Each object in list should have Question and corresponding answer.
+                Do not return any extra strings. Return Generated text strictly in below format.
+                [{{"Question":"question,"Answer":"answer"}}]
+            '''
+        elif question_type == 'mcq':
+            return f'''Generate a set of {n} questions with 4 probable answers from the given text.
+                The options should not be longer than a phrase. There should be only 1 correct answer.
+                There should not be any ambiguity between correct and incorrect options.
+                Return the response in a list of object format.
+                Each object in list should have Question and a list of options.
+                Do not return any extra strings. Return Generated text strictly in below format.
+                [{{"Question":"question","Options":[option1,option2,option3,option4]}}]
+            '''
+        elif question_type == 'complex':
+            return f'''Can you generate a set of {n} complex questions answerable in long form from the below texts.
+                Make sure the questions are important and provide new information to the user.
+                Return the response in a list of object format. Enclose any quotes in single quote.
+                Do not use double quotes within questions or answers.
+                Each object in list should have Question and corresponding answer.
+                Do not return any extra strings. Return generated text strictly in below format.
+                [{{"Question":"question","Answer":"answers"}}]
+            '''
+        else:
+            raise ValueError("Invalid question type")
+    def _generate_groq(self, text, system_message, model):
+        """
+        Generate questions using the Groq API.
+        Args:
+            text (str): The input text to generate questions from.
+            system_message (str): The system message for the AI model.
+            model (str): The specific Groq model to use.
+        Returns:
+            pandas.DataFrame: A DataFrame containing the generated questions and answers.
+        """
+        response = self.groq_client.chat.completions.create(
+            model=model,
+            messages=[
+                {'role': 'system', 'content': system_message},
+                {'role': 'user', 'content': text}
+            ]
+        )
+        return self._parse_response(response, provider="groq")
+    def _generate_gemini(self, text, system_message, model):
+        """
+        Generate questions using the Gemini API.
+        Args:
+            text (str): The input text to generate questions from.
+            system_message (str): The system message for the AI model.
+            model (str): The specific Gemini model to use.
+        Returns:
+            pandas.DataFrame: A DataFrame containing the generated questions and answers.
+        """
+        model = genai.GenerativeModel(model)
+        response = model.generate_content([system_message, text])
+        return self._parse_response(response, provider="gemini")
+    def _generate_openai(self, text, system_message, model,api_key=None):
+        """
+        Generate questions using the OpenAI API.
+        Args:+
+            text (str): The input text to generate questions from.
+            system_message (str): The system message for the AI model.
+            model (str): The specific OpenAI model to use.
+        Returns:
+            pandas.DataFrame: A DataFrame containing the generated questions and answers.
+        """
+        client = openai.OpenAI(api_key=api_key)
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": text}
+            ]
+        )
+        return self._parse_response(response, provider="openai")
+    def _parse_response(self, response, provider):
+        """
+        Parse the response from the AI model and return it as a DataFrame.
+        Args:
+            response (str): The response from the AI model.
+            provider (str): The AI provider used ('groq', 'gemini', or 'openai').
+        Returns:
+            pandas.DataFrame: The parsed response as a DataFrame.
+        """
+        if provider == "openai":
+            data = response.choices[0].message.content
+        elif provider == "gemini":
+            data = response.candidates[0].content.parts[0].text
+        elif provider == "groq":
+            data = response.choices[0].message.content.replace('\n', '')
+            list_start_index = data.find('[')  # Find the index of the first '['
+            substring_data = data[list_start_index:] if list_start_index != -1 else data  # Slice from the list start
+            data = substring_data
+        else:
+            raise ValueError("Invalid provider. Choose 'groq', 'gemini', or 'openai'.")
+        try:
+            json_data = json.loads(data)
+            return pd.DataFrame(json_data)
+        except json.JSONDecodeError:
+            # If JSON parsing fails, return a DataFrame with a single column
+            return pd.DataFrame({'content': [data]})
+    def process_document(self, input_data):
+        """
+        Process the input document and extract its content.
+        Args:
+            input_data (str): Either a file path or a string of text.
+        Returns:
+            str: The extracted text content from the document.
+        Raises:
+            ValueError: If the input is neither a valid file path nor a string of text.
+        """
+        if isinstance(input_data, str):
+            if os.path.isfile(input_data):
+                # If input_data is a file path
+                _, file_extension = os.path.splitext(input_data)
+                if file_extension.lower() == '.pdf':
+                    return self._read_pdf(input_data)
+                elif file_extension.lower() == '.txt':
+                    return self._read_text(input_data)
+                elif file_extension.lower() == '.md':
+                    return self._read_markdown(input_data)
+                elif file_extension.lower() == '.csv':
+                    return self._read_csv(input_data)
+                else:
+                    raise ValueError(f"Unsupported file type: {file_extension}")
+            else:
+                # If input_data is a string of text
+                return input_data
+        else:
+            raise ValueError("Input must be either a file path or a string of text")
+    def _read_pdf(self, file_path):
+        """
+        Read and extract text from a PDF file.
+        Args:
+            file_path (str): The path to the PDF file.
+        Returns:
+            str: The extracted text content from the PDF.
+        """
+        text = ""
+        with open(file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+        return text
+    def _read_text(self, file_path):
+        """
+        Read the contents of a text file.
+        Args:
+            file_path (str): The path to the text file.
+        Returns:
+            str: The contents of the text file.
+        """
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read()
+    def _read_markdown(self, file_path):
+        """
+        Read and convert a Markdown file to HTML.
+        Args:
+            file_path (str): The path to the Markdown file.
+        Returns:
+            str: The HTML content converted from the Markdown file.
+        """
+        with open(file_path, 'r', encoding='utf-8') as file:
+            md_content = file.read()
+            html_content = markdown.markdown(md_content)
+            return html_content
+    def _read_csv(self, file_path):
+        """
+        Read and extract text from a CSV file.
+        Args:
+            file_path (str): The path to the CSV file.
+        Returns:
+            str: The extracted text content from the CSV, with each row joined and separated by newlines.
+        """
+        text = ""
+        with open(file_path, 'r', encoding='utf-8') as file:
+            csv_reader = csv.reader(file)
+            for row in csv_reader:
+                text += " ".join(row) + "\n"
+        return text
+    def get_supported_qna(self):
+        """
+        Get a list of supported question types.
+        Returns:
+            list: A list of supported question types.
+        """
+        return ['simple', 'mcq', 'complex']
+    def get_supported_providers(self):
+        """
+        Get a list of supported AI providers.
+        Returns:
+            list: A list of supported AI providers.
+        """
+        return ['gemini', 'openai']
+# Usage:
+# from synthetic_data_generation import SyntheticDataGeneration
+# synthetic_data_generation = SyntheticDataGeneration()
+# text = synthetic_data_generation.process_document(input_data=text_file)
+# result = synthetic_data_generation.generate_question(text)
+# supported_question_types = synthetic_data_generation.get_supported_question_types()
+# supported_providers = synthetic_data_generation.get_supported_providers()

{ragaai_catalyst-2.0.3.dist-info → ragaai_catalyst-2.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ragaai_catalyst
-Version: 2.0.3
+Version: 2.0.5
 Summary: RAGA AI CATALYST
 Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
 Requires-Python: >=3.9
@@ -20,6 +20,10 @@ Requires-Dist: langchain-core>=0.2.11
 Requires-Dist: langchain>=0.2.11
 Requires-Dist: openai>=1.35.10
 Requires-Dist: pandas>=2.1.1
+Requires-Dist: groq>=0.11.0
+Requires-Dist: PyPDF2>=3.0.1
+Requires-Dist: google-generativeai>=0.8.2
+Requires-Dist: Markdown>=3.7
 Requires-Dist: tenacity==8.3.0
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"

{ragaai_catalyst-2.0.3.dist-info → ragaai_catalyst-2.0.5.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,12 @@
-ragaai_catalyst/__init__.py,sha256=kxR70lWGd_b5q6wn7wg8h_Oth5EohSqzMPjWMe1Za50,334
+ragaai_catalyst/__init__.py,sha256=T0-X4yfIAe26-tWx6kLwNkKIjaFoQL2aNLIRp5wBG5w,424
 ragaai_catalyst/_version.py,sha256=JKt9KaVNOMVeGs8ojO6LvIZr7ZkMzNN-gCcvryy4x8E,460
 ragaai_catalyst/dataset.py,sha256=XjI06Exs6-64pQPQlky4mtcUllNMCgKP-bnM_t9EWkY,10920
-ragaai_catalyst/evaluation.py,sha256=4cgCKPhFtnPdYsBcN8jTxTHcysycz-cQbau_MCiIUYg,16474
+ragaai_catalyst/evaluation.py,sha256=PR7rMkvZ4km26B24sSc60GPNS0JkrUMIYo5CPEqX2Qw,19315
 ragaai_catalyst/experiment.py,sha256=8KvqgJg5JVnt9ghhGDJvdb4mN7ETBX_E5gNxBT0Nsn8,19010
 ragaai_catalyst/prompt_manager.py,sha256=ZMIHrmsnPMq20YfeNxWXLtrxnJyMcxpeJ8Uya7S5dUA,16411
+ragaai_catalyst/proxy_call.py,sha256=nlMdJCSW73sfN0fMbCbtIk6W992Nac5FJvcfNd6UDJk,5497
 ragaai_catalyst/ragaai_catalyst.py,sha256=5Q1VCE7P33DtjaOtVGRUgBL8dpDL9kjisWGIkOyX4nE,17426
+ragaai_catalyst/synthetic_data_generation.py,sha256=STpZF-a1mYT3GR4CGdDvhBdctf2ciSLyvDANqJxnQp8,12989
 ragaai_catalyst/utils.py,sha256=TlhEFwLyRU690HvANbyoRycR3nQ67lxVUQoUOfTPYQ0,3772
 ragaai_catalyst/tracers/__init__.py,sha256=NppmJhD3sQ5R1q6teaZLS7rULj08Gb6JT8XiPRIe_B0,49
 ragaai_catalyst/tracers/tracer.py,sha256=eaGJdLEIjadHpbWBXBl5AhMa2vL97SVjik4U1L8gros,9591
@@ -17,7 +19,7 @@ ragaai_catalyst/tracers/instrumentators/llamaindex.py,sha256=SMrRlR4xM7k9HK43hak
 ragaai_catalyst/tracers/instrumentators/openai.py,sha256=14R4KW9wQCR1xysLfsP_nxS7cqXrTPoD8En4MBAaZUU,379
 ragaai_catalyst/tracers/utils/__init__.py,sha256=KeMaZtYaTojilpLv65qH08QmpYclfpacDA0U3wg6Ybw,64
 ragaai_catalyst/tracers/utils/utils.py,sha256=ViygfJ7vZ7U0CTSA1lbxVloHp4NSlmfDzBRNCJuMhis,2374
-ragaai_catalyst-2.0.3.dist-info/METADATA,sha256=gWehjEvbhpzWjNBDrYAD5RshBEOSL__-Dz-cejkOyvs,6497
-ragaai_catalyst-2.0.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-ragaai_catalyst-2.0.3.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
-ragaai_catalyst-2.0.3.dist-info/RECORD,,
+ragaai_catalyst-2.0.5.dist-info/METADATA,sha256=tWppjo0sERHjjugIOAWdwD1p05HO6T6N_E1KYd9G9hY,6625
+ragaai_catalyst-2.0.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+ragaai_catalyst-2.0.5.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
+ragaai_catalyst-2.0.5.dist-info/RECORD,,

{ragaai_catalyst-2.0.3.dist-info → ragaai_catalyst-2.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{ragaai_catalyst-2.0.3.dist-info → ragaai_catalyst-2.0.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

ragaai-catalyst 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl

ragaai-catalyst 2.0.3py3-none-any.whl → 2.0.5py3-none-any.whl