PyPI - aiverify-moonshot - Versions diffs - 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl - Mend

aiverify-moonshot 0.6.1py3-none-any.whl → 0.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

moonshot/src/api/api_dataset.py CHANGED Viewed

@@ -1,3 +1,6 @@
+import json
+import os
 from pydantic import validate_call
 from moonshot.src.datasets.dataset import Dataset
@@ -81,10 +84,10 @@ def api_download_dataset(
 def api_convert_dataset(
-    name: str, description: str, reference: str, license: str, csv_file_path: str
+    name: str, description: str, reference: str, license: str, file_path: str
 ) -> str:
     """
-    Converts a CSV file to a dataset and creates a new dataset with the provided details.
+    Converts a CSV or JSON file to a dataset and creates a new dataset with the provided details.
     This function takes the name, description, reference, and license for a new dataset as input, along with the file
     path to a CSV file. It then creates a new DatasetArguments object with these details and an empty id. The id is left
@@ -96,18 +99,55 @@ def api_convert_dataset(
         description (str): A brief description of the new dataset.
         reference (str): A reference link for the new dataset.
         license (str): The license of the new dataset.
-        csv_file_path (str): The file path to the CSV file.
+        file_path (str): The file path to the CSV or JSONfile.
     Returns:
         str: The ID of the newly created dataset.
     """
-    examples = Dataset.convert_data(csv_file_path)
-    ds_args = DatasetArguments(
-        id="",
-        name=name,
-        description=description,
-        reference=reference,
-        license=license,
-        examples=examples,
-    )
+    ds_args = None
+    # Check if file is in a supported format
+    if not (file_path.endswith(".json") or file_path.endswith(".csv")):
+        raise ValueError("Unsupported file format. Please provide a JSON or CSV file.")
+    # Check that file is not empty
+    if os.path.getsize(file_path) == 0:
+        raise ValueError("The uploaded file is empty.")
+    # if file is already in json format
+    if file_path.endswith(".json"):
+        json_data = json.load(open(file_path))
+        try:
+            if "examples" in json_data and json_data["examples"]:
+                ds_args = DatasetArguments(
+                    id="",
+                    name=json_data.get("name", name),
+                    description=json_data.get("description", description),
+                    reference=json_data.get("reference", reference),
+                    license=json_data.get("license", license),
+                    examples=iter(json_data["examples"]),
+                )
+            else:
+                raise KeyError(
+                    "examples is either empty or this key is not in the JSON file. "
+                    "Please ensure that this field is present."
+                )
+        except Exception as e:
+            raise e
+    # if file is in csv format, convert data
+    else:
+        try:
+            examples = Dataset.convert_data(file_path)
+            ds_args = DatasetArguments(
+                id="",
+                name=name,
+                description=description,
+                reference=reference,
+                license=license,
+                examples=examples,
+            )
+        except Exception as e:
+            raise e
     return Dataset.create(ds_args)

moonshot/src/cookbooks/cookbook_arguments.py CHANGED Viewed

@@ -8,7 +8,7 @@ class CookbookArguments(BaseModel):
     description: str  # description (str): A brief description of the Cookbook.
-    tags: list[str]  #  tags (list): The list of tags in the Cookbook.
+    tags: list[str]  # tags (list): The list of tags in the Cookbook.
     categories: list[str]  # categories (list): The list of categories in the Cookbook.

moonshot/src/datasets/dataset.py CHANGED Viewed

@@ -60,7 +60,6 @@ class Dataset:
             }
             examples = ds_args.examples
             # Write as JSON output
             file_path = Storage.create_object_with_iterator(
                 EnvVariables.DATASETS.name,
@@ -91,9 +90,26 @@ class Dataset:
         Returns:
             Iterator[dict]: An iterator of dictionaries representing the CSV data.
         """
+        # validate headers
+        df_header = pd.read_csv(csv_file_path, nrows=1)
+        headers = df_header.columns.tolist()
+        required_headers = ["input", "target"]
+        if not all(header in headers for header in required_headers):
+            raise KeyError(
+                f"Required headers not found in the dataset. Required headers are {required_headers}."
+            )
         df = pd.read_csv(csv_file_path, chunksize=1)
-        for chunk in df:
-            yield chunk.to_dict("records")[0]
+        # validate dataset
+        first_chunk = next(df, None)
+        if first_chunk is None or first_chunk.empty:
+            raise ValueError("The uploaded file does not contain any data.")
+        # Reset df after performing next(df)
+        df = pd.read_csv(csv_file_path, chunksize=1)
+        result = [chunk.to_dict("records")[0] for chunk in df]
+        return iter(result)
     @staticmethod
     @validate_call

moonshot/src/redteaming/attack/attack_module.py CHANGED Viewed

@@ -649,7 +649,7 @@ class RedTeamingPromptArguments(BaseModel):
         This method collects all the attributes of the RedTeamingPromptArguments instance and forms a tuple
         with the attribute values in this specific order: conn_id, cs_id, pt_id, am_id, me_id, original_prompt,
-        connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
+        connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
         connector_prompt.duration, start_time.
         Returns:
@@ -664,7 +664,9 @@ class RedTeamingPromptArguments(BaseModel):
             self.original_prompt,
             self.connector_prompt.prompt,
             self.system_prompt,
-            self.connector_prompt.predicted_results.response if self.connector_prompt.predicted_results else "",
+            self.connector_prompt.predicted_results.response
+            if self.connector_prompt.predicted_results
+            else "",
             str(self.connector_prompt.duration),
             self.start_time,
         )
@@ -689,7 +691,11 @@ class RedTeamingPromptArguments(BaseModel):
             "original_prompt": self.original_prompt,
             "prepared_prompt": self.connector_prompt.prompt,
             "system_prompt": self.system_prompt,
-            "response": self.connector_prompt.predicted_results.response if self.connector_prompt.predicted_results else "",
+            "response": (
+                self.connector_prompt.predicted_results.response
+                if self.connector_prompt.predicted_results
+                else ""
+            ),
             "duration": str(self.connector_prompt.duration),
             "start_time": self.start_time,
         }

aiverify-moonshot 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl

aiverify-moonshot 0.6.1py3-none-any.whl → 0.6.3py3-none-any.whl