aiverify-moonshot 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/METADATA +2 -2
  2. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/RECORD +70 -56
  3. moonshot/__main__.py +77 -35
  4. moonshot/api.py +16 -0
  5. moonshot/integrations/cli/benchmark/benchmark.py +29 -13
  6. moonshot/integrations/cli/benchmark/cookbook.py +62 -24
  7. moonshot/integrations/cli/benchmark/datasets.py +79 -40
  8. moonshot/integrations/cli/benchmark/metrics.py +62 -23
  9. moonshot/integrations/cli/benchmark/recipe.py +89 -69
  10. moonshot/integrations/cli/benchmark/result.py +85 -47
  11. moonshot/integrations/cli/benchmark/run.py +99 -59
  12. moonshot/integrations/cli/common/common.py +20 -6
  13. moonshot/integrations/cli/common/connectors.py +154 -74
  14. moonshot/integrations/cli/common/dataset.py +66 -0
  15. moonshot/integrations/cli/common/prompt_template.py +57 -19
  16. moonshot/integrations/cli/redteam/attack_module.py +90 -24
  17. moonshot/integrations/cli/redteam/context_strategy.py +83 -23
  18. moonshot/integrations/cli/redteam/prompt_template.py +1 -1
  19. moonshot/integrations/cli/redteam/redteam.py +52 -6
  20. moonshot/integrations/cli/redteam/session.py +565 -44
  21. moonshot/integrations/cli/utils/process_data.py +52 -0
  22. moonshot/integrations/web_api/__main__.py +2 -0
  23. moonshot/integrations/web_api/app.py +6 -6
  24. moonshot/integrations/web_api/container.py +12 -2
  25. moonshot/integrations/web_api/routes/bookmark.py +173 -0
  26. moonshot/integrations/web_api/routes/dataset.py +46 -1
  27. moonshot/integrations/web_api/schemas/bookmark_create_dto.py +13 -0
  28. moonshot/integrations/web_api/schemas/dataset_create_dto.py +18 -0
  29. moonshot/integrations/web_api/schemas/recipe_create_dto.py +0 -2
  30. moonshot/integrations/web_api/services/bookmark_service.py +94 -0
  31. moonshot/integrations/web_api/services/dataset_service.py +25 -0
  32. moonshot/integrations/web_api/services/recipe_service.py +0 -1
  33. moonshot/integrations/web_api/services/utils/file_manager.py +52 -0
  34. moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +0 -1
  35. moonshot/integrations/web_api/temp/.gitkeep +0 -0
  36. moonshot/src/api/api_bookmark.py +95 -0
  37. moonshot/src/api/api_connector_endpoint.py +1 -1
  38. moonshot/src/api/api_context_strategy.py +2 -2
  39. moonshot/src/api/api_dataset.py +35 -0
  40. moonshot/src/api/api_recipe.py +0 -3
  41. moonshot/src/api/api_session.py +1 -1
  42. moonshot/src/bookmark/bookmark.py +257 -0
  43. moonshot/src/bookmark/bookmark_arguments.py +38 -0
  44. moonshot/src/configs/env_variables.py +12 -2
  45. moonshot/src/connectors/connector.py +15 -7
  46. moonshot/src/connectors_endpoints/connector_endpoint.py +65 -49
  47. moonshot/src/cookbooks/cookbook.py +57 -37
  48. moonshot/src/datasets/dataset.py +125 -5
  49. moonshot/src/metrics/metric.py +8 -4
  50. moonshot/src/metrics/metric_interface.py +8 -2
  51. moonshot/src/prompt_templates/prompt_template.py +5 -1
  52. moonshot/src/recipes/recipe.py +38 -40
  53. moonshot/src/recipes/recipe_arguments.py +0 -4
  54. moonshot/src/redteaming/attack/attack_module.py +18 -8
  55. moonshot/src/redteaming/attack/context_strategy.py +6 -2
  56. moonshot/src/redteaming/session/session.py +15 -11
  57. moonshot/src/results/result.py +7 -3
  58. moonshot/src/runners/runner.py +65 -42
  59. moonshot/src/runs/run.py +15 -11
  60. moonshot/src/runs/run_progress.py +7 -3
  61. moonshot/src/storage/db_interface.py +14 -0
  62. moonshot/src/storage/storage.py +33 -2
  63. moonshot/src/utils/find_feature.py +45 -0
  64. moonshot/src/utils/log.py +72 -0
  65. moonshot/src/utils/pagination.py +25 -0
  66. moonshot/src/utils/timeit.py +8 -1
  67. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/WHEEL +0 -0
  68. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/AUTHORS.md +0 -0
  69. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/LICENSE.md +0 -0
  70. {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/NOTICES.md +0 -0
@@ -8,33 +8,39 @@ from moonshot.src.connectors_endpoints.connector_endpoint_arguments import (
8
8
  ConnectorEndpointArguments,
9
9
  )
10
10
  from moonshot.src.storage.storage import Storage
11
+ from moonshot.src.utils.log import configure_logger
12
+
13
+ # Create a logger for this module
14
+ logger = configure_logger(__name__)
11
15
 
12
16
 
13
17
  class ConnectorEndpoint:
14
18
  @staticmethod
15
19
  def create(ep_args: ConnectorEndpointArguments) -> str:
16
20
  """
17
- Creates a new connector endpoint.
21
+ Creates a new connector endpoint and stores its details as a JSON object.
22
+
23
+ This method accepts a ConnectorEndpointArguments object, generates a unique slugified ID from the endpoint's
24
+ name, and stores the endpoint's details in a JSON file within a specified directory.
18
25
 
19
- This method takes a ConnectorEndpointArguments object as input, generates a unique slugified ID based on the
20
- endpoint's name, and then creates a new endpoint with the provided details. The endpoint information is stored
21
- as a JSON object in the directory specified by `EnvVariables.CONNECTORS_ENDPOINTS`. If the operation is
22
- successful, the unique ID of the new endpoint is returned. If any error arises during the process, an exception
23
- is raised and the error message is logged.
26
+ The directory path is determined by the `EnvVariables.CONNECTORS_ENDPOINTS` environment variable.
27
+ Upon successful creation, the method returns the unique ID of the endpoint.
28
+ If an error occurs during the creation process, the method raises an exception and logs the error message.
24
29
 
25
30
  Args:
26
- ep_args (ConnectorEndpointArguments): An object containing the details of the endpoint to be created.
31
+ ep_args (ConnectorEndpointArguments): The details of the endpoint to be created,
32
+ encapsulated in a ConnectorEndpointArguments object.
27
33
 
28
34
  Returns:
29
- str: The unique ID of the newly created endpoint.
35
+ str: The unique ID of the newly created endpoint, derived from slugifying the endpoint's name.
30
36
 
31
37
  Raises:
32
- Exception: If there's an error during the endpoint creation process.
38
+ Exception: If an error occurs during the creation process, including issues with storing the endpoint's
39
+ details.
33
40
  """
34
41
  try:
35
42
  ep_id = slugify(ep_args.name, lowercase=True)
36
43
  ep_info = {
37
- "id": ep_id,
38
44
  "name": ep_args.name,
39
45
  "connector_type": ep_args.connector_type,
40
46
  "uri": ep_args.uri,
@@ -51,59 +57,63 @@ class ConnectorEndpoint:
51
57
  return ep_id
52
58
 
53
59
  except Exception as e:
54
- print(f"Failed to create endpoint: {str(e)}")
60
+ logger.error(f"Failed to create endpoint: {str(e)}")
55
61
  raise e
56
62
 
57
63
  @staticmethod
58
64
  @validate_call
59
65
  def read(ep_id: str) -> ConnectorEndpointArguments:
60
66
  """
61
- Fetches the details of a given endpoint.
67
+ Retrieves the details of a specified endpoint by its ID.
62
68
 
63
- This method takes an endpoint ID as input, finds the corresponding JSON file in the directory
64
- specified by `EnvironmentVars.CONNECTORS_ENDPOINTS`, and returns a ConnectorEndpointArguments object
65
- that contains the endpoint's details. If any error arises during the process, an exception is raised and the
66
- error message is logged.
69
+ This method searches for the endpoint's corresponding JSON file within the directory defined by the
70
+ `EnvVariables.CONNECTORS_ENDPOINTS` environment variable. It then constructs and returns a
71
+ ConnectorEndpointArguments object populated with the endpoint's details. If the endpoint ID is not found or
72
+ any other error occurs, an exception is raised with an appropriate error message.
67
73
 
68
74
  Args:
69
- ep_id (str): The unique ID of the endpoint to be fetched.
75
+ ep_id (str): The unique identifier of the endpoint whose details are to be retrieved.
70
76
 
71
77
  Returns:
72
- ConnectorEndpointArguments: An object encapsulating the details of the fetched endpoint.
78
+ ConnectorEndpointArguments: An instance filled with the endpoint's details.
73
79
 
74
80
  Raises:
75
- Exception: If there's an error during the file reading process or any other operation within the method.
81
+ RuntimeError: If the endpoint ID is empty or the specified endpoint does not exist.
82
+ Exception: For any issues encountered during the file reading or data parsing process.
76
83
  """
77
84
  try:
78
- if ep_id:
79
- return ConnectorEndpointArguments(
80
- **ConnectorEndpoint._read_endpoint(ep_id)
81
- )
82
- else:
83
- raise RuntimeError("Connector Endpoint ID is empty")
85
+ if not ep_id:
86
+ raise RuntimeError("Connector Endpoint ID is empty.")
87
+
88
+ endpoint_details = ConnectorEndpoint._read_endpoint(ep_id)
89
+ if not endpoint_details:
90
+ raise RuntimeError(f"Endpoint with ID '{ep_id}' does not exist.")
91
+
92
+ return ConnectorEndpointArguments(**endpoint_details)
84
93
 
85
94
  except Exception as e:
86
- print(f"Failed to read endpoint: {str(e)}")
95
+ logger.error(f"Failed to read endpoint: {str(e)}")
87
96
  raise e
88
97
 
89
98
  @staticmethod
90
99
  def _read_endpoint(ep_id: str) -> dict:
91
100
  """
92
- Reads the endpoint information from a JSON file and adds the creation datetime.
101
+ Retrieves the endpoint's information from a JSON file, including its creation datetime.
93
102
 
94
- This method accepts an endpoint ID as an argument, locates the corresponding JSON file in the directory
95
- defined by `EnvironmentVars.CONNECTORS_ENDPOINTS`, and returns a dictionary that encapsulates the endpoint's
96
- details along with its creation datetime. If any error occurs during the process, it is handled by the calling
97
- method.
103
+ This internal method is designed to fetch the details of a specific endpoint by its ID. It searches for the
104
+ corresponding JSON file within the directory specified by `EnvVariables.CONNECTORS_ENDPOINTS`. The method
105
+ returns a dictionary containing the endpoint's information, enriched with the creation datetime. Errors
106
+ encountered during this process are managed by the method that invokes this one.
98
107
 
99
108
  Args:
100
- ep_id (str): The unique identifier of the endpoint to be retrieved.
109
+ ep_id (str): The unique identifier of the endpoint whose information is being retrieved.
101
110
 
102
111
  Returns:
103
- dict: A dictionary containing the details of the retrieved endpoint along with its creation datetime.
112
+ dict: A dictionary with the endpoint's information, including its creation datetime.
104
113
  """
105
- connector_endpoint_info = Storage.read_object(
106
- EnvVariables.CONNECTORS_ENDPOINTS.name, ep_id, "json"
114
+ connector_endpoint_info = {"id": ep_id}
115
+ connector_endpoint_info.update(
116
+ Storage.read_object(EnvVariables.CONNECTORS_ENDPOINTS.name, ep_id, "json")
107
117
  )
108
118
  creation_datetime = Storage.get_creation_datetime(
109
119
  EnvVariables.CONNECTORS_ENDPOINTS.name, ep_id, "json"
@@ -116,35 +126,41 @@ class ConnectorEndpoint:
116
126
  @staticmethod
117
127
  def update(ep_args: ConnectorEndpointArguments) -> bool:
118
128
  """
119
- Updates the endpoint information based on the provided arguments.
129
+ Updates the endpoint information in the storage based on the provided ConnectorEndpointArguments object.
130
+
131
+ This method serializes the provided ConnectorEndpointArguments object into a dictionary, excluding the 'id' and
132
+ 'created_date' keys. It then persists the updated information to the corresponding JSON file within the
133
+ directory defined by `EnvVariables.CONNECTORS_ENDPOINTS`.
120
134
 
121
- This method takes a ConnectorEndpointArguments object, converts it to a dictionary, and removes the
122
- 'created_date' key if it exists. It then writes the updated information to the corresponding JSON file
123
- in the directory specified by `EnvVariables.CONNECTORS_ENDPOINTS`.
135
+ This operation ensures that the endpoint's mutable attributes are updated according to the provided arguments.
124
136
 
125
137
  Args:
126
- ep_args (ConnectorEndpointArguments): An object containing the updated details of the endpoint.
138
+ ep_args (ConnectorEndpointArguments): The object encapsulating the updated attributes of the endpoint.
127
139
 
128
140
  Returns:
129
- bool: True if the update operation was successful.
141
+ bool: Indicates whether the update operation was successful. Returns True if the update was successfully
142
+ persisted to the storage; otherwise, an exception is raised.
130
143
 
131
144
  Raises:
132
- Exception: If there's an error during the update process.
145
+ Exception: Signifies a failure in the update process, potentially due to issues with data serialization or
146
+ storage access.
133
147
  """
134
148
  try:
135
- # Convert the endpoint arguments to a dictionary
136
- # Remove created_date if it exists
149
+ # Serialize the ConnectorEndpointArguments object to a dictionary and remove derived properties
137
150
  ep_info = ep_args.to_dict()
138
- ep_info.pop("created_date", None)
151
+ ep_info.pop("id", None) # The 'id' is derived and should not be written
152
+ ep_info.pop(
153
+ "created_date", None
154
+ ) # The 'created_date' is derived and should not be written
139
155
 
140
- # Write the updated endpoint information to the file
156
+ # Write the updated endpoint information to the storage
141
157
  Storage.create_object(
142
158
  EnvVariables.CONNECTORS_ENDPOINTS.name, ep_args.id, ep_info, "json"
143
159
  )
144
160
  return True
145
161
 
146
162
  except Exception as e:
147
- print(f"Failed to update endpoint: {str(e)}")
163
+ logger.error(f"Failed to update endpoint: {str(e)}")
148
164
  raise e
149
165
 
150
166
  @staticmethod
@@ -171,7 +187,7 @@ class ConnectorEndpoint:
171
187
  return True
172
188
 
173
189
  except Exception as e:
174
- print(f"Failed to delete endpoint: {str(e)}")
190
+ logger.error(f"Failed to delete endpoint: {str(e)}")
175
191
  raise e
176
192
 
177
193
  @staticmethod
@@ -207,5 +223,5 @@ class ConnectorEndpoint:
207
223
  return retn_eps_ids, retn_eps
208
224
 
209
225
  except Exception as e:
210
- print(f"Failed to get available endpoints: {str(e)}")
226
+ logger.error(f"Failed to get available endpoints: {str(e)}")
211
227
  raise e
@@ -8,6 +8,10 @@ from slugify import slugify
8
8
  from moonshot.src.configs.env_variables import EnvVariables
9
9
  from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
10
10
  from moonshot.src.storage.storage import Storage
11
+ from moonshot.src.utils.log import configure_logger
12
+
13
+ # Create a logger for this module
14
+ logger = configure_logger(__name__)
11
15
 
12
16
 
13
17
  class Cookbook:
@@ -31,8 +35,7 @@ class Cookbook:
31
35
  Returns:
32
36
  Cookbook: An instance of the Cookbook class populated with the loaded cookbook information.
33
37
  """
34
- cb_info = Storage.read_object(EnvVariables.COOKBOOKS.name, cb_id, "json")
35
- return cls(CookbookArguments(**cb_info))
38
+ return cls(Cookbook.read(cb_id))
36
39
 
37
40
  @staticmethod
38
41
  def create(cb_args: CookbookArguments) -> str:
@@ -58,6 +61,11 @@ class Cookbook:
58
61
  """
59
62
  try:
60
63
  cb_id = slugify(cb_args.name, lowercase=True)
64
+ cb_info = {
65
+ "name": cb_args.name,
66
+ "description": cb_args.description,
67
+ "recipes": cb_args.recipes,
68
+ }
61
69
 
62
70
  # check if the cookbook exists
63
71
  if Storage.is_object_exists(EnvVariables.COOKBOOKS.name, cb_id, "json"):
@@ -70,55 +78,70 @@ class Cookbook:
70
78
  ):
71
79
  raise RuntimeError(f"{recipe} recipe does not exist.")
72
80
 
73
- cb_info = {
74
- "id": cb_id,
75
- "name": cb_args.name,
76
- "description": cb_args.description,
77
- "recipes": cb_args.recipes,
78
- }
79
-
80
81
  # Write as json output
81
82
  Storage.create_object(EnvVariables.COOKBOOKS.name, cb_id, cb_info, "json")
82
83
  return cb_id
83
84
 
84
85
  except Exception as e:
85
- print(f"Failed to create cookbook: {str(e)}")
86
+ logger.error(f"Failed to create cookbook: {str(e)}")
86
87
  raise e
87
88
 
88
89
  @staticmethod
89
90
  @validate_call
90
91
  def read(cb_id: str) -> CookbookArguments:
91
92
  """
92
- Retrieves the details of a specified cookbook.
93
+ Fetches and returns the details of a specified cookbook by its ID.
94
+
95
+ This method takes a cookbook ID, searches for its corresponding JSON file in the directory set by
96
+ `EnvironmentVars.COOKBOOKS`, and constructs a CookbookArguments object with the cookbook's details.
93
97
 
94
- This method accepts a cookbook ID as an argument, locates the corresponding JSON file in the directory
95
- defined by `EnvironmentVars.COOKBOOKS`, and returns a CookbookArguments object that encapsulates the cookbook's
96
- details. If any error occurs during the process, an exception is raised and the error message is logged.
98
+ If the process encounters any issues, such as the file not existing or being inaccessible, it logs the error
99
+ and raises an exception.
97
100
 
98
101
  Args:
99
- cb_id (str): The unique identifier of the cookbook to be retrieved.
102
+ cb_id (str): The unique identifier of the cookbook to fetch.
100
103
 
101
104
  Returns:
102
- CookbookArguments: An object encapsulating the details of the retrieved cookbook.
105
+ CookbookArguments: An instance filled with the cookbook's details.
103
106
 
104
107
  Raises:
105
- Exception: If there's an error during the file reading process or any other operation within the method.
108
+ RuntimeError: If the cookbook ID is empty or the specified cookbook does not exist.
109
+ Exception: For any issues encountered during the file reading or data parsing process.
106
110
  """
107
111
  try:
108
112
  if not cb_id:
109
- raise RuntimeError("Cookbook ID is empty")
113
+ raise RuntimeError("Cookbook ID is empty.")
110
114
 
111
- obj_results = Storage.read_object(
112
- EnvVariables.COOKBOOKS.name, cb_id, "json"
113
- )
114
- if obj_results:
115
- return CookbookArguments(**obj_results)
116
- else:
117
- raise RuntimeError(f"Unable to get results for {cb_id}.")
115
+ cookbook_details = Cookbook._read_cookbook(cb_id)
116
+ if not cookbook_details:
117
+ raise RuntimeError(f"Cookbook with ID '{cb_id}' does not exist.")
118
+
119
+ return CookbookArguments(**cookbook_details)
118
120
 
119
121
  except Exception as e:
120
- print(f"Failed to read cookbook: {str(e)}")
121
- raise e
122
+ logger.error(f"Failed to read cookbook: {str(e)}")
123
+ raise
124
+
125
+ @staticmethod
126
+ def _read_cookbook(cb_id: str) -> dict:
127
+ """
128
+ Retrieves the cookbook's information from a JSON file.
129
+
130
+ This internal method is designed to fetch the details of a specific cookbook by its ID. It searches for the
131
+ corresponding JSON file within the directory specified by `EnvVariables.COOKBOOKS`. The method returns a
132
+ dictionary containing the cookbook's information.
133
+
134
+ Args:
135
+ cb_id (str): The unique identifier of the cookbook whose information is being retrieved.
136
+
137
+ Returns:
138
+ dict: A dictionary with the cookbook's information.
139
+ """
140
+ cookbook_info = {"id": cb_id}
141
+ cookbook_info.update(
142
+ Storage.read_object(EnvVariables.COOKBOOKS.name, cb_id, "json")
143
+ )
144
+ return cookbook_info
122
145
 
123
146
  @staticmethod
124
147
  def update(cb_args: CookbookArguments) -> bool:
@@ -145,17 +168,18 @@ class Cookbook:
145
168
  ):
146
169
  raise RuntimeError(f"{recipe} recipe does not exist.")
147
170
 
148
- # Convert the cookbook arguments to a dictionary
171
+ # Serialize the CookbookArguments object to a dictionary and remove derived properties
149
172
  cb_info = cb_args.to_dict()
173
+ cb_info.pop("id", None) # The 'id' is derived and should not be written
150
174
 
151
- # Write the updated cookbook information to the file
175
+ # Write the updated cookbook information to the storage
152
176
  Storage.create_object(
153
177
  EnvVariables.COOKBOOKS.name, cb_args.id, cb_info, "json"
154
178
  )
155
179
  return True
156
180
 
157
181
  except Exception as e:
158
- print(f"Failed to update cookbook: {str(e)}")
182
+ logger.error(f"Failed to update cookbook: {str(e)}")
159
183
  raise e
160
184
 
161
185
  @staticmethod
@@ -181,7 +205,7 @@ class Cookbook:
181
205
  return True
182
206
 
183
207
  except Exception as e:
184
- print(f"Failed to delete cookbook: {str(e)}")
208
+ logger.error(f"Failed to delete cookbook: {str(e)}")
185
209
  raise e
186
210
 
187
211
  @staticmethod
@@ -210,16 +234,12 @@ class Cookbook:
210
234
  if "__" in cb:
211
235
  continue
212
236
 
213
- cb_info = CookbookArguments(
214
- **Storage.read_object(
215
- EnvVariables.COOKBOOKS.name, Path(cb).stem, "json"
216
- )
217
- )
237
+ cb_info = CookbookArguments(**Cookbook._read_cookbook(Path(cb).stem))
218
238
  retn_cbs.append(cb_info)
219
239
  retn_cbs_ids.append(cb_info.id)
220
240
 
221
241
  return retn_cbs_ids, retn_cbs
222
242
 
223
243
  except Exception as e:
224
- print(f"Failed to get available cookbooks: {str(e)}")
244
+ logger.error(f"Failed to get available cookbooks: {str(e)}")
225
245
  raise e
@@ -2,17 +2,137 @@ from __future__ import annotations
2
2
 
3
3
  from pathlib import Path
4
4
 
5
+ import pandas as pd
6
+ from datasets import load_dataset
5
7
  from pydantic import validate_call
8
+ from slugify import slugify
6
9
 
7
10
  from moonshot.src.configs.env_variables import EnvVariables
8
11
  from moonshot.src.datasets.dataset_arguments import DatasetArguments
9
12
  from moonshot.src.storage.storage import Storage
13
+ from moonshot.src.utils.log import configure_logger
14
+
15
+ # Create a logger for this module
16
+ logger = configure_logger(__name__)
10
17
 
11
18
 
12
19
  class Dataset:
13
20
  cache_name = "cache"
14
21
  cache_extension = "json"
15
22
 
23
+ @staticmethod
24
+ @validate_call
25
+ def create(ds_args: DatasetArguments, method: str, **kwargs) -> str:
26
+ """
27
+ Creates a new dataset based on the provided arguments and method.
28
+
29
+ This method generates a unique dataset ID using the dataset name,
30
+ checks if a dataset with the same ID already exists, and then
31
+ creates the dataset using the specified method (either 'csv' or
32
+ 'hf'). The dataset information is then stored as a JSON object.
33
+
34
+ Args:
35
+ ds_args (DatasetArguments): The arguments containing dataset
36
+ details such as name, description, reference, and license.
37
+ method (str): The method to create the dataset. It can be either
38
+ 'csv' or 'hf'.
39
+ **kwargs: Additional keyword arguments required for the specified
40
+ method.
41
+ - For 'csv' method: 'csv_file_path' (str): The file path to
42
+ the CSV file.
43
+ - For 'hf' method: 'dataset_name' (str): The name of the
44
+ Hugging Face dataset.
45
+ 'dataset_config' (str): The configuration of the Hugging
46
+ Face dataset.
47
+ 'split' (str): The split of the dataset to load.
48
+ 'input_col' (list[str]): The list of input columns.
49
+ 'target_col' (str): The target column.
50
+
51
+ Returns:
52
+ str: The unique ID of the created dataset.
53
+
54
+ Raises:
55
+ RuntimeError: If a dataset with the same ID already exists.
56
+ Exception: If any other error occurs during the dataset creation
57
+ process.
58
+ """
59
+ try:
60
+ ds_id = slugify(ds_args.name, lowercase=True)
61
+
62
+ # Check if the dataset exists
63
+ if Storage.is_object_exists(EnvVariables.DATASETS.name, ds_id, "json"):
64
+ raise RuntimeError(f"Dataset with ID '{ds_id}' already exists.")
65
+
66
+ examples = [{}]
67
+ if method == "csv":
68
+ examples = Dataset._convert_csv(kwargs["csv_file_path"])
69
+ elif method == "hf":
70
+ examples = Dataset._download_hf(kwargs)
71
+
72
+ ds_info = {
73
+ "id": ds_id,
74
+ "name": ds_args.name,
75
+ "description": ds_args.description,
76
+ "reference": ds_args.reference,
77
+ "license": ds_args.license,
78
+ "examples": examples,
79
+ }
80
+
81
+ # Write as JSON output
82
+ file_path = Storage.create_object(
83
+ EnvVariables.DATASETS.name, ds_id, ds_info, "json"
84
+ )
85
+ return file_path
86
+
87
+ except Exception as e:
88
+ logger.error(f"Failed to create dataset: {str(e)}")
89
+ raise e
90
+
91
+ @staticmethod
92
+ def _convert_csv(csv_file: str) -> list[dict]:
93
+ """
94
+ Converts a CSV file to a list of dictionaries.
95
+
96
+ This method reads a CSV file and converts its contents into a list of dictionaries,
97
+ where each dictionary represents a row in the CSV file.
98
+
99
+ Args:
100
+ csv_file (str): The file path to the CSV file.
101
+
102
+ Returns:
103
+ list[dict]: A list of dictionaries representing the CSV data.
104
+ """
105
+ df = pd.read_csv(csv_file)
106
+ data = df.to_dict("records")
107
+ return data
108
+
109
+ @staticmethod
110
+ def _download_hf(hf_args) -> list[dict]:
111
+ """
112
+ Downloads a dataset from Hugging Face and converts it to a list of dictionaries.
113
+
114
+ This method loads a dataset from Hugging Face based on the provided arguments and converts
115
+ its contents into a list of dictionaries, where each dictionary contains 'input' and 'target' keys.
116
+
117
+ Args:
118
+ hf_args (dict): A dictionary containing the following keys:
119
+ - 'dataset_name' (str): The name of the Hugging Face dataset.
120
+ - 'dataset_config' (str): The configuration of the Hugging Face dataset.
121
+ - 'split' (str): The split of the dataset to load.
122
+ - 'input_col' (list[str]): The list of input columns.
123
+ - 'target_col' (str): The target column.
124
+
125
+ Returns:
126
+ list[dict]: A list of dictionaries representing the dataset.
127
+ """
128
+ dataset = load_dataset(hf_args["dataset_name"], hf_args["dataset_config"])
129
+ data = []
130
+ for example in dataset[hf_args["split"]]:
131
+ input_data = " ".join([str(example[col]) for col in hf_args["input_col"]])
132
+ target_data = str(example[hf_args["target_col"]])
133
+ data.append({"input": input_data, "target": target_data})
134
+ return data
135
+
16
136
  @staticmethod
17
137
  @validate_call
18
138
  def read(ds_id: str) -> DatasetArguments:
@@ -40,7 +160,7 @@ class Dataset:
40
160
  raise RuntimeError("Dataset ID is empty")
41
161
 
42
162
  except Exception as e:
43
- print(f"Failed to read dataset: {str(e)}")
163
+ logger.error(f"Failed to read dataset: {str(e)}")
44
164
  raise e
45
165
 
46
166
  @staticmethod
@@ -111,7 +231,7 @@ class Dataset:
111
231
  return True
112
232
 
113
233
  except Exception as e:
114
- print(f"Failed to delete dataset: {str(e)}")
234
+ logger.error(f"Failed to delete dataset: {str(e)}")
115
235
  raise e
116
236
 
117
237
  @staticmethod
@@ -137,7 +257,7 @@ class Dataset:
137
257
  )
138
258
  return cache_info if cache_info else {}
139
259
  except Exception as e:
140
- print(f"Failed to retrieve cache information: {str(e)}")
260
+ logger.error(f"Failed to retrieve cache information: {str(e)}")
141
261
  return {}
142
262
 
143
263
  @staticmethod
@@ -156,7 +276,7 @@ class Dataset:
156
276
  obj_extension=Dataset.cache_extension,
157
277
  )
158
278
  except Exception as e:
159
- print(f"Failed to write cache information: {str(e)}")
279
+ logger.error(f"Failed to write cache information: {str(e)}")
160
280
  raise e
161
281
 
162
282
  @staticmethod
@@ -215,7 +335,7 @@ class Dataset:
215
335
  return retn_datasets_ids, retn_datasets
216
336
 
217
337
  except Exception as e:
218
- print(f"Failed to get available datasets: {str(e)}")
338
+ logger.error(f"Failed to get available datasets: {str(e)}")
219
339
  raise e
220
340
 
221
341
  @staticmethod
@@ -8,6 +8,10 @@ from moonshot.src.configs.env_variables import EnvVariables
8
8
  from moonshot.src.metrics.metric_interface import MetricInterface
9
9
  from moonshot.src.storage.storage import Storage
10
10
  from moonshot.src.utils.import_modules import get_instance
11
+ from moonshot.src.utils.log import configure_logger
12
+
13
+ # Create a logger for this module
14
+ logger = configure_logger(__name__)
11
15
 
12
16
 
13
17
  class Metric:
@@ -64,7 +68,7 @@ class Metric:
64
68
  return True
65
69
 
66
70
  except Exception as e:
67
- print(f"Failed to delete metric: {str(e)}")
71
+ logger.error(f"Failed to delete metric: {str(e)}")
68
72
  raise e
69
73
 
70
74
  @staticmethod
@@ -90,7 +94,7 @@ class Metric:
90
94
  )
91
95
  return cache_info if cache_info else {}
92
96
  except Exception:
93
- print(
97
+ logger.error(
94
98
  f"No previous cache information because {Metric.cache_name} is not found."
95
99
  )
96
100
  return {}
@@ -111,7 +115,7 @@ class Metric:
111
115
  obj_extension=Metric.cache_extension,
112
116
  )
113
117
  except Exception as e:
114
- print(f"Failed to write cache information: {str(e)}")
118
+ logger.error(f"Failed to write cache information: {str(e)}")
115
119
  raise e
116
120
 
117
121
  @staticmethod
@@ -154,7 +158,7 @@ class Metric:
154
158
  return retn_mets_ids, retn_mets
155
159
 
156
160
  except Exception as e:
157
- print(f"Failed to get available metrics: {str(e)}")
161
+ logger.error(f"Failed to get available metrics: {str(e)}")
158
162
  raise e
159
163
 
160
164
  @staticmethod
@@ -3,8 +3,12 @@ from typing import Any
3
3
 
4
4
  from moonshot.src.configs.env_variables import EnvVariables
5
5
  from moonshot.src.storage.storage import Storage
6
+ from moonshot.src.utils.log import configure_logger
6
7
  from moonshot.src.utils.timeit import timeit
7
8
 
9
+ # Create a logger for this module
10
+ logger = configure_logger(__name__)
11
+
8
12
 
9
13
  class MetricInterface:
10
14
  config_name = "metrics_config"
@@ -75,8 +79,10 @@ class MetricInterface:
75
79
  return obj_results.get(met_id, {})
76
80
 
77
81
  except Exception as e:
78
- print(f"[MetricInterface] Failed to read metrics configuration: {str(e)}")
79
- print("Attempting to create empty metrics configuration...")
82
+ logger.warning(
83
+ f"[MetricInterface] Failed to read metrics configuration: {str(e)}"
84
+ )
85
+ logger.info("Attempting to create empty metrics configuration...")
80
86
  try:
81
87
  Storage.create_object(
82
88
  obj_type=EnvVariables.METRICS.name,
@@ -4,6 +4,10 @@ from jinja2 import Template
4
4
 
5
5
  from moonshot.src.configs.env_variables import EnvVariables
6
6
  from moonshot.src.storage.storage import Storage
7
+ from moonshot.src.utils.log import configure_logger
8
+
9
+ # Create a logger for this module
10
+ logger = configure_logger(__name__)
7
11
 
8
12
 
9
13
  class PromptTemplate:
@@ -79,7 +83,7 @@ class PromptTemplate:
79
83
  return True
80
84
 
81
85
  except Exception as e:
82
- print(f"Failed to delete prompt template: {str(e)}")
86
+ logger.error(f"Failed to delete prompt template: {str(e)}")
83
87
  raise e
84
88
 
85
89
  @staticmethod