aiverify-moonshot 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {aiverify_moonshot-0.6.1.dist-info → aiverify_moonshot-0.6.3.dist-info}/METADATA +3 -2
  2. {aiverify_moonshot-0.6.1.dist-info → aiverify_moonshot-0.6.3.dist-info}/RECORD +31 -31
  3. aiverify_moonshot-0.6.3.dist-info/licenses/NOTICES.md +1187 -0
  4. moonshot/__main__.py +9 -0
  5. moonshot/integrations/cli/__main__.py +1 -3
  6. moonshot/integrations/cli/redteam/session.py +8 -8
  7. moonshot/integrations/web_api/app.py +1 -1
  8. moonshot/integrations/web_api/routes/benchmark_result.py +1 -0
  9. moonshot/integrations/web_api/routes/bookmark.py +5 -2
  10. moonshot/integrations/web_api/routes/context_strategy.py +3 -1
  11. moonshot/integrations/web_api/routes/dataset.py +32 -4
  12. moonshot/integrations/web_api/routes/prompt_template.py +1 -0
  13. moonshot/integrations/web_api/schemas/cookbook_create_dto.py +4 -2
  14. moonshot/integrations/web_api/schemas/dataset_create_dto.py +3 -3
  15. moonshot/integrations/web_api/schemas/prompt_response_model.py +0 -1
  16. moonshot/integrations/web_api/schemas/recipe_create_dto.py +2 -1
  17. moonshot/integrations/web_api/services/context_strategy_service.py +1 -4
  18. moonshot/integrations/web_api/services/cookbook_service.py +0 -2
  19. moonshot/integrations/web_api/services/dataset_service.py +4 -3
  20. moonshot/integrations/web_api/services/session_service.py +5 -5
  21. moonshot/integrations/web_api/services/utils/exceptions_handler.py +47 -10
  22. moonshot/integrations/web_api/services/utils/results_formatter.py +25 -16
  23. moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +3 -3
  24. moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +3 -3
  25. moonshot/src/api/api_dataset.py +52 -12
  26. moonshot/src/cookbooks/cookbook_arguments.py +1 -1
  27. moonshot/src/datasets/dataset.py +19 -3
  28. moonshot/src/redteaming/attack/attack_module.py +9 -3
  29. aiverify_moonshot-0.6.1.dist-info/licenses/NOTICES.md +0 -2506
  30. {aiverify_moonshot-0.6.1.dist-info → aiverify_moonshot-0.6.3.dist-info}/WHEEL +0 -0
  31. {aiverify_moonshot-0.6.1.dist-info → aiverify_moonshot-0.6.3.dist-info}/licenses/AUTHORS.md +0 -0
  32. {aiverify_moonshot-0.6.1.dist-info → aiverify_moonshot-0.6.3.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,3 +1,6 @@
1
+ import json
2
+ import os
3
+
1
4
  from pydantic import validate_call
2
5
 
3
6
  from moonshot.src.datasets.dataset import Dataset
@@ -81,10 +84,10 @@ def api_download_dataset(
81
84
 
82
85
 
83
86
  def api_convert_dataset(
84
- name: str, description: str, reference: str, license: str, csv_file_path: str
87
+ name: str, description: str, reference: str, license: str, file_path: str
85
88
  ) -> str:
86
89
  """
87
- Converts a CSV file to a dataset and creates a new dataset with the provided details.
90
+ Converts a CSV or JSON file to a dataset and creates a new dataset with the provided details.
88
91
 
89
92
  This function takes the name, description, reference, and license for a new dataset as input, along with the file
90
93
  path to a CSV file. It then creates a new DatasetArguments object with these details and an empty id. The id is left
@@ -96,18 +99,55 @@ def api_convert_dataset(
96
99
  description (str): A brief description of the new dataset.
97
100
  reference (str): A reference link for the new dataset.
98
101
  license (str): The license of the new dataset.
99
- csv_file_path (str): The file path to the CSV file.
102
+ file_path (str): The file path to the CSV or JSONfile.
100
103
 
101
104
  Returns:
102
105
  str: The ID of the newly created dataset.
103
106
  """
104
- examples = Dataset.convert_data(csv_file_path)
105
- ds_args = DatasetArguments(
106
- id="",
107
- name=name,
108
- description=description,
109
- reference=reference,
110
- license=license,
111
- examples=examples,
112
- )
107
+ ds_args = None
108
+
109
+ # Check if file is in a supported format
110
+ if not (file_path.endswith(".json") or file_path.endswith(".csv")):
111
+ raise ValueError("Unsupported file format. Please provide a JSON or CSV file.")
112
+
113
+ # Check that file is not empty
114
+ if os.path.getsize(file_path) == 0:
115
+ raise ValueError("The uploaded file is empty.")
116
+
117
+ # if file is already in json format
118
+ if file_path.endswith(".json"):
119
+ json_data = json.load(open(file_path))
120
+
121
+ try:
122
+ if "examples" in json_data and json_data["examples"]:
123
+ ds_args = DatasetArguments(
124
+ id="",
125
+ name=json_data.get("name", name),
126
+ description=json_data.get("description", description),
127
+ reference=json_data.get("reference", reference),
128
+ license=json_data.get("license", license),
129
+ examples=iter(json_data["examples"]),
130
+ )
131
+ else:
132
+ raise KeyError(
133
+ "examples is either empty or this key is not in the JSON file. "
134
+ "Please ensure that this field is present."
135
+ )
136
+ except Exception as e:
137
+ raise e
138
+
139
+ # if file is in csv format, convert data
140
+ else:
141
+ try:
142
+ examples = Dataset.convert_data(file_path)
143
+ ds_args = DatasetArguments(
144
+ id="",
145
+ name=name,
146
+ description=description,
147
+ reference=reference,
148
+ license=license,
149
+ examples=examples,
150
+ )
151
+ except Exception as e:
152
+ raise e
113
153
  return Dataset.create(ds_args)
@@ -8,7 +8,7 @@ class CookbookArguments(BaseModel):
8
8
 
9
9
  description: str # description (str): A brief description of the Cookbook.
10
10
 
11
- tags: list[str] # tags (list): The list of tags in the Cookbook.
11
+ tags: list[str] # tags (list): The list of tags in the Cookbook.
12
12
 
13
13
  categories: list[str] # categories (list): The list of categories in the Cookbook.
14
14
 
@@ -60,7 +60,6 @@ class Dataset:
60
60
  }
61
61
 
62
62
  examples = ds_args.examples
63
-
64
63
  # Write as JSON output
65
64
  file_path = Storage.create_object_with_iterator(
66
65
  EnvVariables.DATASETS.name,
@@ -91,9 +90,26 @@ class Dataset:
91
90
  Returns:
92
91
  Iterator[dict]: An iterator of dictionaries representing the CSV data.
93
92
  """
93
+ # validate headers
94
+ df_header = pd.read_csv(csv_file_path, nrows=1)
95
+ headers = df_header.columns.tolist()
96
+ required_headers = ["input", "target"]
97
+ if not all(header in headers for header in required_headers):
98
+ raise KeyError(
99
+ f"Required headers not found in the dataset. Required headers are {required_headers}."
100
+ )
101
+
94
102
  df = pd.read_csv(csv_file_path, chunksize=1)
95
- for chunk in df:
96
- yield chunk.to_dict("records")[0]
103
+ # validate dataset
104
+ first_chunk = next(df, None)
105
+ if first_chunk is None or first_chunk.empty:
106
+ raise ValueError("The uploaded file does not contain any data.")
107
+
108
+ # Reset df after performing next(df)
109
+ df = pd.read_csv(csv_file_path, chunksize=1)
110
+
111
+ result = [chunk.to_dict("records")[0] for chunk in df]
112
+ return iter(result)
97
113
 
98
114
  @staticmethod
99
115
  @validate_call
@@ -649,7 +649,7 @@ class RedTeamingPromptArguments(BaseModel):
649
649
 
650
650
  This method collects all the attributes of the RedTeamingPromptArguments instance and forms a tuple
651
651
  with the attribute values in this specific order: conn_id, cs_id, pt_id, am_id, me_id, original_prompt,
652
- connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
652
+ connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
653
653
  connector_prompt.duration, start_time.
654
654
 
655
655
  Returns:
@@ -664,7 +664,9 @@ class RedTeamingPromptArguments(BaseModel):
664
664
  self.original_prompt,
665
665
  self.connector_prompt.prompt,
666
666
  self.system_prompt,
667
- self.connector_prompt.predicted_results.response if self.connector_prompt.predicted_results else "",
667
+ self.connector_prompt.predicted_results.response
668
+ if self.connector_prompt.predicted_results
669
+ else "",
668
670
  str(self.connector_prompt.duration),
669
671
  self.start_time,
670
672
  )
@@ -689,7 +691,11 @@ class RedTeamingPromptArguments(BaseModel):
689
691
  "original_prompt": self.original_prompt,
690
692
  "prepared_prompt": self.connector_prompt.prompt,
691
693
  "system_prompt": self.system_prompt,
692
- "response": self.connector_prompt.predicted_results.response if self.connector_prompt.predicted_results else "",
694
+ "response": (
695
+ self.connector_prompt.predicted_results.response
696
+ if self.connector_prompt.predicted_results
697
+ else ""
698
+ ),
693
699
  "duration": str(self.connector_prompt.duration),
694
700
  "start_time": self.start_time,
695
701
  }