aiverify-moonshot 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/METADATA +2 -2
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/RECORD +70 -56
- moonshot/__main__.py +77 -35
- moonshot/api.py +16 -0
- moonshot/integrations/cli/benchmark/benchmark.py +29 -13
- moonshot/integrations/cli/benchmark/cookbook.py +62 -24
- moonshot/integrations/cli/benchmark/datasets.py +79 -40
- moonshot/integrations/cli/benchmark/metrics.py +62 -23
- moonshot/integrations/cli/benchmark/recipe.py +89 -69
- moonshot/integrations/cli/benchmark/result.py +85 -47
- moonshot/integrations/cli/benchmark/run.py +99 -59
- moonshot/integrations/cli/common/common.py +20 -6
- moonshot/integrations/cli/common/connectors.py +154 -74
- moonshot/integrations/cli/common/dataset.py +66 -0
- moonshot/integrations/cli/common/prompt_template.py +57 -19
- moonshot/integrations/cli/redteam/attack_module.py +90 -24
- moonshot/integrations/cli/redteam/context_strategy.py +83 -23
- moonshot/integrations/cli/redteam/prompt_template.py +1 -1
- moonshot/integrations/cli/redteam/redteam.py +52 -6
- moonshot/integrations/cli/redteam/session.py +565 -44
- moonshot/integrations/cli/utils/process_data.py +52 -0
- moonshot/integrations/web_api/__main__.py +2 -0
- moonshot/integrations/web_api/app.py +6 -6
- moonshot/integrations/web_api/container.py +12 -2
- moonshot/integrations/web_api/routes/bookmark.py +173 -0
- moonshot/integrations/web_api/routes/dataset.py +46 -1
- moonshot/integrations/web_api/schemas/bookmark_create_dto.py +13 -0
- moonshot/integrations/web_api/schemas/dataset_create_dto.py +18 -0
- moonshot/integrations/web_api/schemas/recipe_create_dto.py +0 -2
- moonshot/integrations/web_api/services/bookmark_service.py +94 -0
- moonshot/integrations/web_api/services/dataset_service.py +25 -0
- moonshot/integrations/web_api/services/recipe_service.py +0 -1
- moonshot/integrations/web_api/services/utils/file_manager.py +52 -0
- moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +0 -1
- moonshot/integrations/web_api/temp/.gitkeep +0 -0
- moonshot/src/api/api_bookmark.py +95 -0
- moonshot/src/api/api_connector_endpoint.py +1 -1
- moonshot/src/api/api_context_strategy.py +2 -2
- moonshot/src/api/api_dataset.py +35 -0
- moonshot/src/api/api_recipe.py +0 -3
- moonshot/src/api/api_session.py +1 -1
- moonshot/src/bookmark/bookmark.py +257 -0
- moonshot/src/bookmark/bookmark_arguments.py +38 -0
- moonshot/src/configs/env_variables.py +12 -2
- moonshot/src/connectors/connector.py +15 -7
- moonshot/src/connectors_endpoints/connector_endpoint.py +65 -49
- moonshot/src/cookbooks/cookbook.py +57 -37
- moonshot/src/datasets/dataset.py +125 -5
- moonshot/src/metrics/metric.py +8 -4
- moonshot/src/metrics/metric_interface.py +8 -2
- moonshot/src/prompt_templates/prompt_template.py +5 -1
- moonshot/src/recipes/recipe.py +38 -40
- moonshot/src/recipes/recipe_arguments.py +0 -4
- moonshot/src/redteaming/attack/attack_module.py +18 -8
- moonshot/src/redteaming/attack/context_strategy.py +6 -2
- moonshot/src/redteaming/session/session.py +15 -11
- moonshot/src/results/result.py +7 -3
- moonshot/src/runners/runner.py +65 -42
- moonshot/src/runs/run.py +15 -11
- moonshot/src/runs/run_progress.py +7 -3
- moonshot/src/storage/db_interface.py +14 -0
- moonshot/src/storage/storage.py +33 -2
- moonshot/src/utils/find_feature.py +45 -0
- moonshot/src/utils/log.py +72 -0
- moonshot/src/utils/pagination.py +25 -0
- moonshot/src/utils/timeit.py +8 -1
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/WHEEL +0 -0
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/AUTHORS.md +0 -0
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/LICENSE.md +0 -0
- {aiverify_moonshot-0.4.1.dist-info → aiverify_moonshot-0.4.3.dist-info}/licenses/NOTICES.md +0 -0
|
@@ -8,33 +8,39 @@ from moonshot.src.connectors_endpoints.connector_endpoint_arguments import (
|
|
|
8
8
|
ConnectorEndpointArguments,
|
|
9
9
|
)
|
|
10
10
|
from moonshot.src.storage.storage import Storage
|
|
11
|
+
from moonshot.src.utils.log import configure_logger
|
|
12
|
+
|
|
13
|
+
# Create a logger for this module
|
|
14
|
+
logger = configure_logger(__name__)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class ConnectorEndpoint:
|
|
14
18
|
@staticmethod
|
|
15
19
|
def create(ep_args: ConnectorEndpointArguments) -> str:
|
|
16
20
|
"""
|
|
17
|
-
Creates a new connector endpoint.
|
|
21
|
+
Creates a new connector endpoint and stores its details as a JSON object.
|
|
22
|
+
|
|
23
|
+
This method accepts a ConnectorEndpointArguments object, generates a unique slugified ID from the endpoint's
|
|
24
|
+
name, and stores the endpoint's details in a JSON file within a specified directory.
|
|
18
25
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
successful, the unique ID of the new endpoint is returned. If any error arises during the process, an exception
|
|
23
|
-
is raised and the error message is logged.
|
|
26
|
+
The directory path is determined by the `EnvVariables.CONNECTORS_ENDPOINTS` environment variable.
|
|
27
|
+
Upon successful creation, the method returns the unique ID of the endpoint.
|
|
28
|
+
If an error occurs during the creation process, the method raises an exception and logs the error message.
|
|
24
29
|
|
|
25
30
|
Args:
|
|
26
|
-
ep_args (ConnectorEndpointArguments):
|
|
31
|
+
ep_args (ConnectorEndpointArguments): The details of the endpoint to be created,
|
|
32
|
+
encapsulated in a ConnectorEndpointArguments object.
|
|
27
33
|
|
|
28
34
|
Returns:
|
|
29
|
-
str: The unique ID of the newly created endpoint.
|
|
35
|
+
str: The unique ID of the newly created endpoint, derived from slugifying the endpoint's name.
|
|
30
36
|
|
|
31
37
|
Raises:
|
|
32
|
-
Exception: If
|
|
38
|
+
Exception: If an error occurs during the creation process, including issues with storing the endpoint's
|
|
39
|
+
details.
|
|
33
40
|
"""
|
|
34
41
|
try:
|
|
35
42
|
ep_id = slugify(ep_args.name, lowercase=True)
|
|
36
43
|
ep_info = {
|
|
37
|
-
"id": ep_id,
|
|
38
44
|
"name": ep_args.name,
|
|
39
45
|
"connector_type": ep_args.connector_type,
|
|
40
46
|
"uri": ep_args.uri,
|
|
@@ -51,59 +57,63 @@ class ConnectorEndpoint:
|
|
|
51
57
|
return ep_id
|
|
52
58
|
|
|
53
59
|
except Exception as e:
|
|
54
|
-
|
|
60
|
+
logger.error(f"Failed to create endpoint: {str(e)}")
|
|
55
61
|
raise e
|
|
56
62
|
|
|
57
63
|
@staticmethod
|
|
58
64
|
@validate_call
|
|
59
65
|
def read(ep_id: str) -> ConnectorEndpointArguments:
|
|
60
66
|
"""
|
|
61
|
-
|
|
67
|
+
Retrieves the details of a specified endpoint by its ID.
|
|
62
68
|
|
|
63
|
-
This method
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
error
|
|
69
|
+
This method searches for the endpoint's corresponding JSON file within the directory defined by the
|
|
70
|
+
`EnvVariables.CONNECTORS_ENDPOINTS` environment variable. It then constructs and returns a
|
|
71
|
+
ConnectorEndpointArguments object populated with the endpoint's details. If the endpoint ID is not found or
|
|
72
|
+
any other error occurs, an exception is raised with an appropriate error message.
|
|
67
73
|
|
|
68
74
|
Args:
|
|
69
|
-
ep_id (str): The unique
|
|
75
|
+
ep_id (str): The unique identifier of the endpoint whose details are to be retrieved.
|
|
70
76
|
|
|
71
77
|
Returns:
|
|
72
|
-
ConnectorEndpointArguments: An
|
|
78
|
+
ConnectorEndpointArguments: An instance filled with the endpoint's details.
|
|
73
79
|
|
|
74
80
|
Raises:
|
|
75
|
-
|
|
81
|
+
RuntimeError: If the endpoint ID is empty or the specified endpoint does not exist.
|
|
82
|
+
Exception: For any issues encountered during the file reading or data parsing process.
|
|
76
83
|
"""
|
|
77
84
|
try:
|
|
78
|
-
if ep_id:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
raise RuntimeError("
|
|
85
|
+
if not ep_id:
|
|
86
|
+
raise RuntimeError("Connector Endpoint ID is empty.")
|
|
87
|
+
|
|
88
|
+
endpoint_details = ConnectorEndpoint._read_endpoint(ep_id)
|
|
89
|
+
if not endpoint_details:
|
|
90
|
+
raise RuntimeError(f"Endpoint with ID '{ep_id}' does not exist.")
|
|
91
|
+
|
|
92
|
+
return ConnectorEndpointArguments(**endpoint_details)
|
|
84
93
|
|
|
85
94
|
except Exception as e:
|
|
86
|
-
|
|
95
|
+
logger.error(f"Failed to read endpoint: {str(e)}")
|
|
87
96
|
raise e
|
|
88
97
|
|
|
89
98
|
@staticmethod
|
|
90
99
|
def _read_endpoint(ep_id: str) -> dict:
|
|
91
100
|
"""
|
|
92
|
-
|
|
101
|
+
Retrieves the endpoint's information from a JSON file, including its creation datetime.
|
|
93
102
|
|
|
94
|
-
This method
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
method.
|
|
103
|
+
This internal method is designed to fetch the details of a specific endpoint by its ID. It searches for the
|
|
104
|
+
corresponding JSON file within the directory specified by `EnvVariables.CONNECTORS_ENDPOINTS`. The method
|
|
105
|
+
returns a dictionary containing the endpoint's information, enriched with the creation datetime. Errors
|
|
106
|
+
encountered during this process are managed by the method that invokes this one.
|
|
98
107
|
|
|
99
108
|
Args:
|
|
100
|
-
ep_id (str): The unique identifier of the endpoint
|
|
109
|
+
ep_id (str): The unique identifier of the endpoint whose information is being retrieved.
|
|
101
110
|
|
|
102
111
|
Returns:
|
|
103
|
-
dict: A dictionary
|
|
112
|
+
dict: A dictionary with the endpoint's information, including its creation datetime.
|
|
104
113
|
"""
|
|
105
|
-
connector_endpoint_info =
|
|
106
|
-
|
|
114
|
+
connector_endpoint_info = {"id": ep_id}
|
|
115
|
+
connector_endpoint_info.update(
|
|
116
|
+
Storage.read_object(EnvVariables.CONNECTORS_ENDPOINTS.name, ep_id, "json")
|
|
107
117
|
)
|
|
108
118
|
creation_datetime = Storage.get_creation_datetime(
|
|
109
119
|
EnvVariables.CONNECTORS_ENDPOINTS.name, ep_id, "json"
|
|
@@ -116,35 +126,41 @@ class ConnectorEndpoint:
|
|
|
116
126
|
@staticmethod
|
|
117
127
|
def update(ep_args: ConnectorEndpointArguments) -> bool:
|
|
118
128
|
"""
|
|
119
|
-
Updates the endpoint information based on the provided
|
|
129
|
+
Updates the endpoint information in the storage based on the provided ConnectorEndpointArguments object.
|
|
130
|
+
|
|
131
|
+
This method serializes the provided ConnectorEndpointArguments object into a dictionary, excluding the 'id' and
|
|
132
|
+
'created_date' keys. It then persists the updated information to the corresponding JSON file within the
|
|
133
|
+
directory defined by `EnvVariables.CONNECTORS_ENDPOINTS`.
|
|
120
134
|
|
|
121
|
-
This
|
|
122
|
-
'created_date' key if it exists. It then writes the updated information to the corresponding JSON file
|
|
123
|
-
in the directory specified by `EnvVariables.CONNECTORS_ENDPOINTS`.
|
|
135
|
+
This operation ensures that the endpoint's mutable attributes are updated according to the provided arguments.
|
|
124
136
|
|
|
125
137
|
Args:
|
|
126
|
-
ep_args (ConnectorEndpointArguments):
|
|
138
|
+
ep_args (ConnectorEndpointArguments): The object encapsulating the updated attributes of the endpoint.
|
|
127
139
|
|
|
128
140
|
Returns:
|
|
129
|
-
bool:
|
|
141
|
+
bool: Indicates whether the update operation was successful. Returns True if the update was successfully
|
|
142
|
+
persisted to the storage; otherwise, an exception is raised.
|
|
130
143
|
|
|
131
144
|
Raises:
|
|
132
|
-
Exception:
|
|
145
|
+
Exception: Signifies a failure in the update process, potentially due to issues with data serialization or
|
|
146
|
+
storage access.
|
|
133
147
|
"""
|
|
134
148
|
try:
|
|
135
|
-
#
|
|
136
|
-
# Remove created_date if it exists
|
|
149
|
+
# Serialize the ConnectorEndpointArguments object to a dictionary and remove derived properties
|
|
137
150
|
ep_info = ep_args.to_dict()
|
|
138
|
-
ep_info.pop("
|
|
151
|
+
ep_info.pop("id", None) # The 'id' is derived and should not be written
|
|
152
|
+
ep_info.pop(
|
|
153
|
+
"created_date", None
|
|
154
|
+
) # The 'created_date' is derived and should not be written
|
|
139
155
|
|
|
140
|
-
# Write the updated endpoint information to the
|
|
156
|
+
# Write the updated endpoint information to the storage
|
|
141
157
|
Storage.create_object(
|
|
142
158
|
EnvVariables.CONNECTORS_ENDPOINTS.name, ep_args.id, ep_info, "json"
|
|
143
159
|
)
|
|
144
160
|
return True
|
|
145
161
|
|
|
146
162
|
except Exception as e:
|
|
147
|
-
|
|
163
|
+
logger.error(f"Failed to update endpoint: {str(e)}")
|
|
148
164
|
raise e
|
|
149
165
|
|
|
150
166
|
@staticmethod
|
|
@@ -171,7 +187,7 @@ class ConnectorEndpoint:
|
|
|
171
187
|
return True
|
|
172
188
|
|
|
173
189
|
except Exception as e:
|
|
174
|
-
|
|
190
|
+
logger.error(f"Failed to delete endpoint: {str(e)}")
|
|
175
191
|
raise e
|
|
176
192
|
|
|
177
193
|
@staticmethod
|
|
@@ -207,5 +223,5 @@ class ConnectorEndpoint:
|
|
|
207
223
|
return retn_eps_ids, retn_eps
|
|
208
224
|
|
|
209
225
|
except Exception as e:
|
|
210
|
-
|
|
226
|
+
logger.error(f"Failed to get available endpoints: {str(e)}")
|
|
211
227
|
raise e
|
|
@@ -8,6 +8,10 @@ from slugify import slugify
|
|
|
8
8
|
from moonshot.src.configs.env_variables import EnvVariables
|
|
9
9
|
from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
|
|
10
10
|
from moonshot.src.storage.storage import Storage
|
|
11
|
+
from moonshot.src.utils.log import configure_logger
|
|
12
|
+
|
|
13
|
+
# Create a logger for this module
|
|
14
|
+
logger = configure_logger(__name__)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class Cookbook:
|
|
@@ -31,8 +35,7 @@ class Cookbook:
|
|
|
31
35
|
Returns:
|
|
32
36
|
Cookbook: An instance of the Cookbook class populated with the loaded cookbook information.
|
|
33
37
|
"""
|
|
34
|
-
|
|
35
|
-
return cls(CookbookArguments(**cb_info))
|
|
38
|
+
return cls(Cookbook.read(cb_id))
|
|
36
39
|
|
|
37
40
|
@staticmethod
|
|
38
41
|
def create(cb_args: CookbookArguments) -> str:
|
|
@@ -58,6 +61,11 @@ class Cookbook:
|
|
|
58
61
|
"""
|
|
59
62
|
try:
|
|
60
63
|
cb_id = slugify(cb_args.name, lowercase=True)
|
|
64
|
+
cb_info = {
|
|
65
|
+
"name": cb_args.name,
|
|
66
|
+
"description": cb_args.description,
|
|
67
|
+
"recipes": cb_args.recipes,
|
|
68
|
+
}
|
|
61
69
|
|
|
62
70
|
# check if the cookbook exists
|
|
63
71
|
if Storage.is_object_exists(EnvVariables.COOKBOOKS.name, cb_id, "json"):
|
|
@@ -70,55 +78,70 @@ class Cookbook:
|
|
|
70
78
|
):
|
|
71
79
|
raise RuntimeError(f"{recipe} recipe does not exist.")
|
|
72
80
|
|
|
73
|
-
cb_info = {
|
|
74
|
-
"id": cb_id,
|
|
75
|
-
"name": cb_args.name,
|
|
76
|
-
"description": cb_args.description,
|
|
77
|
-
"recipes": cb_args.recipes,
|
|
78
|
-
}
|
|
79
|
-
|
|
80
81
|
# Write as json output
|
|
81
82
|
Storage.create_object(EnvVariables.COOKBOOKS.name, cb_id, cb_info, "json")
|
|
82
83
|
return cb_id
|
|
83
84
|
|
|
84
85
|
except Exception as e:
|
|
85
|
-
|
|
86
|
+
logger.error(f"Failed to create cookbook: {str(e)}")
|
|
86
87
|
raise e
|
|
87
88
|
|
|
88
89
|
@staticmethod
|
|
89
90
|
@validate_call
|
|
90
91
|
def read(cb_id: str) -> CookbookArguments:
|
|
91
92
|
"""
|
|
92
|
-
|
|
93
|
+
Fetches and returns the details of a specified cookbook by its ID.
|
|
94
|
+
|
|
95
|
+
This method takes a cookbook ID, searches for its corresponding JSON file in the directory set by
|
|
96
|
+
`EnvironmentVars.COOKBOOKS`, and constructs a CookbookArguments object with the cookbook's details.
|
|
93
97
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
details. If any error occurs during the process, an exception is raised and the error message is logged.
|
|
98
|
+
If the process encounters any issues, such as the file not existing or being inaccessible, it logs the error
|
|
99
|
+
and raises an exception.
|
|
97
100
|
|
|
98
101
|
Args:
|
|
99
|
-
cb_id (str): The unique identifier of the cookbook to
|
|
102
|
+
cb_id (str): The unique identifier of the cookbook to fetch.
|
|
100
103
|
|
|
101
104
|
Returns:
|
|
102
|
-
CookbookArguments: An
|
|
105
|
+
CookbookArguments: An instance filled with the cookbook's details.
|
|
103
106
|
|
|
104
107
|
Raises:
|
|
105
|
-
|
|
108
|
+
RuntimeError: If the cookbook ID is empty or the specified cookbook does not exist.
|
|
109
|
+
Exception: For any issues encountered during the file reading or data parsing process.
|
|
106
110
|
"""
|
|
107
111
|
try:
|
|
108
112
|
if not cb_id:
|
|
109
|
-
raise RuntimeError("Cookbook ID is empty")
|
|
113
|
+
raise RuntimeError("Cookbook ID is empty.")
|
|
110
114
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
else:
|
|
117
|
-
raise RuntimeError(f"Unable to get results for {cb_id}.")
|
|
115
|
+
cookbook_details = Cookbook._read_cookbook(cb_id)
|
|
116
|
+
if not cookbook_details:
|
|
117
|
+
raise RuntimeError(f"Cookbook with ID '{cb_id}' does not exist.")
|
|
118
|
+
|
|
119
|
+
return CookbookArguments(**cookbook_details)
|
|
118
120
|
|
|
119
121
|
except Exception as e:
|
|
120
|
-
|
|
121
|
-
raise
|
|
122
|
+
logger.error(f"Failed to read cookbook: {str(e)}")
|
|
123
|
+
raise
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _read_cookbook(cb_id: str) -> dict:
|
|
127
|
+
"""
|
|
128
|
+
Retrieves the cookbook's information from a JSON file.
|
|
129
|
+
|
|
130
|
+
This internal method is designed to fetch the details of a specific cookbook by its ID. It searches for the
|
|
131
|
+
corresponding JSON file within the directory specified by `EnvVariables.COOKBOOKS`. The method returns a
|
|
132
|
+
dictionary containing the cookbook's information.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
cb_id (str): The unique identifier of the cookbook whose information is being retrieved.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict: A dictionary with the cookbook's information.
|
|
139
|
+
"""
|
|
140
|
+
cookbook_info = {"id": cb_id}
|
|
141
|
+
cookbook_info.update(
|
|
142
|
+
Storage.read_object(EnvVariables.COOKBOOKS.name, cb_id, "json")
|
|
143
|
+
)
|
|
144
|
+
return cookbook_info
|
|
122
145
|
|
|
123
146
|
@staticmethod
|
|
124
147
|
def update(cb_args: CookbookArguments) -> bool:
|
|
@@ -145,17 +168,18 @@ class Cookbook:
|
|
|
145
168
|
):
|
|
146
169
|
raise RuntimeError(f"{recipe} recipe does not exist.")
|
|
147
170
|
|
|
148
|
-
#
|
|
171
|
+
# Serialize the CookbookArguments object to a dictionary and remove derived properties
|
|
149
172
|
cb_info = cb_args.to_dict()
|
|
173
|
+
cb_info.pop("id", None) # The 'id' is derived and should not be written
|
|
150
174
|
|
|
151
|
-
# Write the updated cookbook information to the
|
|
175
|
+
# Write the updated cookbook information to the storage
|
|
152
176
|
Storage.create_object(
|
|
153
177
|
EnvVariables.COOKBOOKS.name, cb_args.id, cb_info, "json"
|
|
154
178
|
)
|
|
155
179
|
return True
|
|
156
180
|
|
|
157
181
|
except Exception as e:
|
|
158
|
-
|
|
182
|
+
logger.error(f"Failed to update cookbook: {str(e)}")
|
|
159
183
|
raise e
|
|
160
184
|
|
|
161
185
|
@staticmethod
|
|
@@ -181,7 +205,7 @@ class Cookbook:
|
|
|
181
205
|
return True
|
|
182
206
|
|
|
183
207
|
except Exception as e:
|
|
184
|
-
|
|
208
|
+
logger.error(f"Failed to delete cookbook: {str(e)}")
|
|
185
209
|
raise e
|
|
186
210
|
|
|
187
211
|
@staticmethod
|
|
@@ -210,16 +234,12 @@ class Cookbook:
|
|
|
210
234
|
if "__" in cb:
|
|
211
235
|
continue
|
|
212
236
|
|
|
213
|
-
cb_info = CookbookArguments(
|
|
214
|
-
**Storage.read_object(
|
|
215
|
-
EnvVariables.COOKBOOKS.name, Path(cb).stem, "json"
|
|
216
|
-
)
|
|
217
|
-
)
|
|
237
|
+
cb_info = CookbookArguments(**Cookbook._read_cookbook(Path(cb).stem))
|
|
218
238
|
retn_cbs.append(cb_info)
|
|
219
239
|
retn_cbs_ids.append(cb_info.id)
|
|
220
240
|
|
|
221
241
|
return retn_cbs_ids, retn_cbs
|
|
222
242
|
|
|
223
243
|
except Exception as e:
|
|
224
|
-
|
|
244
|
+
logger.error(f"Failed to get available cookbooks: {str(e)}")
|
|
225
245
|
raise e
|
moonshot/src/datasets/dataset.py
CHANGED
|
@@ -2,17 +2,137 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from datasets import load_dataset
|
|
5
7
|
from pydantic import validate_call
|
|
8
|
+
from slugify import slugify
|
|
6
9
|
|
|
7
10
|
from moonshot.src.configs.env_variables import EnvVariables
|
|
8
11
|
from moonshot.src.datasets.dataset_arguments import DatasetArguments
|
|
9
12
|
from moonshot.src.storage.storage import Storage
|
|
13
|
+
from moonshot.src.utils.log import configure_logger
|
|
14
|
+
|
|
15
|
+
# Create a logger for this module
|
|
16
|
+
logger = configure_logger(__name__)
|
|
10
17
|
|
|
11
18
|
|
|
12
19
|
class Dataset:
|
|
13
20
|
cache_name = "cache"
|
|
14
21
|
cache_extension = "json"
|
|
15
22
|
|
|
23
|
+
@staticmethod
|
|
24
|
+
@validate_call
|
|
25
|
+
def create(ds_args: DatasetArguments, method: str, **kwargs) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Creates a new dataset based on the provided arguments and method.
|
|
28
|
+
|
|
29
|
+
This method generates a unique dataset ID using the dataset name,
|
|
30
|
+
checks if a dataset with the same ID already exists, and then
|
|
31
|
+
creates the dataset using the specified method (either 'csv' or
|
|
32
|
+
'hf'). The dataset information is then stored as a JSON object.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
ds_args (DatasetArguments): The arguments containing dataset
|
|
36
|
+
details such as name, description, reference, and license.
|
|
37
|
+
method (str): The method to create the dataset. It can be either
|
|
38
|
+
'csv' or 'hf'.
|
|
39
|
+
**kwargs: Additional keyword arguments required for the specified
|
|
40
|
+
method.
|
|
41
|
+
- For 'csv' method: 'csv_file_path' (str): The file path to
|
|
42
|
+
the CSV file.
|
|
43
|
+
- For 'hf' method: 'dataset_name' (str): The name of the
|
|
44
|
+
Hugging Face dataset.
|
|
45
|
+
'dataset_config' (str): The configuration of the Hugging
|
|
46
|
+
Face dataset.
|
|
47
|
+
'split' (str): The split of the dataset to load.
|
|
48
|
+
'input_col' (list[str]): The list of input columns.
|
|
49
|
+
'target_col' (str): The target column.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
str: The unique ID of the created dataset.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
RuntimeError: If a dataset with the same ID already exists.
|
|
56
|
+
Exception: If any other error occurs during the dataset creation
|
|
57
|
+
process.
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
ds_id = slugify(ds_args.name, lowercase=True)
|
|
61
|
+
|
|
62
|
+
# Check if the dataset exists
|
|
63
|
+
if Storage.is_object_exists(EnvVariables.DATASETS.name, ds_id, "json"):
|
|
64
|
+
raise RuntimeError(f"Dataset with ID '{ds_id}' already exists.")
|
|
65
|
+
|
|
66
|
+
examples = [{}]
|
|
67
|
+
if method == "csv":
|
|
68
|
+
examples = Dataset._convert_csv(kwargs["csv_file_path"])
|
|
69
|
+
elif method == "hf":
|
|
70
|
+
examples = Dataset._download_hf(kwargs)
|
|
71
|
+
|
|
72
|
+
ds_info = {
|
|
73
|
+
"id": ds_id,
|
|
74
|
+
"name": ds_args.name,
|
|
75
|
+
"description": ds_args.description,
|
|
76
|
+
"reference": ds_args.reference,
|
|
77
|
+
"license": ds_args.license,
|
|
78
|
+
"examples": examples,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Write as JSON output
|
|
82
|
+
file_path = Storage.create_object(
|
|
83
|
+
EnvVariables.DATASETS.name, ds_id, ds_info, "json"
|
|
84
|
+
)
|
|
85
|
+
return file_path
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Failed to create dataset: {str(e)}")
|
|
89
|
+
raise e
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _convert_csv(csv_file: str) -> list[dict]:
|
|
93
|
+
"""
|
|
94
|
+
Converts a CSV file to a list of dictionaries.
|
|
95
|
+
|
|
96
|
+
This method reads a CSV file and converts its contents into a list of dictionaries,
|
|
97
|
+
where each dictionary represents a row in the CSV file.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
csv_file (str): The file path to the CSV file.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
list[dict]: A list of dictionaries representing the CSV data.
|
|
104
|
+
"""
|
|
105
|
+
df = pd.read_csv(csv_file)
|
|
106
|
+
data = df.to_dict("records")
|
|
107
|
+
return data
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _download_hf(hf_args) -> list[dict]:
|
|
111
|
+
"""
|
|
112
|
+
Downloads a dataset from Hugging Face and converts it to a list of dictionaries.
|
|
113
|
+
|
|
114
|
+
This method loads a dataset from Hugging Face based on the provided arguments and converts
|
|
115
|
+
its contents into a list of dictionaries, where each dictionary contains 'input' and 'target' keys.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
hf_args (dict): A dictionary containing the following keys:
|
|
119
|
+
- 'dataset_name' (str): The name of the Hugging Face dataset.
|
|
120
|
+
- 'dataset_config' (str): The configuration of the Hugging Face dataset.
|
|
121
|
+
- 'split' (str): The split of the dataset to load.
|
|
122
|
+
- 'input_col' (list[str]): The list of input columns.
|
|
123
|
+
- 'target_col' (str): The target column.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
list[dict]: A list of dictionaries representing the dataset.
|
|
127
|
+
"""
|
|
128
|
+
dataset = load_dataset(hf_args["dataset_name"], hf_args["dataset_config"])
|
|
129
|
+
data = []
|
|
130
|
+
for example in dataset[hf_args["split"]]:
|
|
131
|
+
input_data = " ".join([str(example[col]) for col in hf_args["input_col"]])
|
|
132
|
+
target_data = str(example[hf_args["target_col"]])
|
|
133
|
+
data.append({"input": input_data, "target": target_data})
|
|
134
|
+
return data
|
|
135
|
+
|
|
16
136
|
@staticmethod
|
|
17
137
|
@validate_call
|
|
18
138
|
def read(ds_id: str) -> DatasetArguments:
|
|
@@ -40,7 +160,7 @@ class Dataset:
|
|
|
40
160
|
raise RuntimeError("Dataset ID is empty")
|
|
41
161
|
|
|
42
162
|
except Exception as e:
|
|
43
|
-
|
|
163
|
+
logger.error(f"Failed to read dataset: {str(e)}")
|
|
44
164
|
raise e
|
|
45
165
|
|
|
46
166
|
@staticmethod
|
|
@@ -111,7 +231,7 @@ class Dataset:
|
|
|
111
231
|
return True
|
|
112
232
|
|
|
113
233
|
except Exception as e:
|
|
114
|
-
|
|
234
|
+
logger.error(f"Failed to delete dataset: {str(e)}")
|
|
115
235
|
raise e
|
|
116
236
|
|
|
117
237
|
@staticmethod
|
|
@@ -137,7 +257,7 @@ class Dataset:
|
|
|
137
257
|
)
|
|
138
258
|
return cache_info if cache_info else {}
|
|
139
259
|
except Exception as e:
|
|
140
|
-
|
|
260
|
+
logger.error(f"Failed to retrieve cache information: {str(e)}")
|
|
141
261
|
return {}
|
|
142
262
|
|
|
143
263
|
@staticmethod
|
|
@@ -156,7 +276,7 @@ class Dataset:
|
|
|
156
276
|
obj_extension=Dataset.cache_extension,
|
|
157
277
|
)
|
|
158
278
|
except Exception as e:
|
|
159
|
-
|
|
279
|
+
logger.error(f"Failed to write cache information: {str(e)}")
|
|
160
280
|
raise e
|
|
161
281
|
|
|
162
282
|
@staticmethod
|
|
@@ -215,7 +335,7 @@ class Dataset:
|
|
|
215
335
|
return retn_datasets_ids, retn_datasets
|
|
216
336
|
|
|
217
337
|
except Exception as e:
|
|
218
|
-
|
|
338
|
+
logger.error(f"Failed to get available datasets: {str(e)}")
|
|
219
339
|
raise e
|
|
220
340
|
|
|
221
341
|
@staticmethod
|
moonshot/src/metrics/metric.py
CHANGED
|
@@ -8,6 +8,10 @@ from moonshot.src.configs.env_variables import EnvVariables
|
|
|
8
8
|
from moonshot.src.metrics.metric_interface import MetricInterface
|
|
9
9
|
from moonshot.src.storage.storage import Storage
|
|
10
10
|
from moonshot.src.utils.import_modules import get_instance
|
|
11
|
+
from moonshot.src.utils.log import configure_logger
|
|
12
|
+
|
|
13
|
+
# Create a logger for this module
|
|
14
|
+
logger = configure_logger(__name__)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class Metric:
|
|
@@ -64,7 +68,7 @@ class Metric:
|
|
|
64
68
|
return True
|
|
65
69
|
|
|
66
70
|
except Exception as e:
|
|
67
|
-
|
|
71
|
+
logger.error(f"Failed to delete metric: {str(e)}")
|
|
68
72
|
raise e
|
|
69
73
|
|
|
70
74
|
@staticmethod
|
|
@@ -90,7 +94,7 @@ class Metric:
|
|
|
90
94
|
)
|
|
91
95
|
return cache_info if cache_info else {}
|
|
92
96
|
except Exception:
|
|
93
|
-
|
|
97
|
+
logger.error(
|
|
94
98
|
f"No previous cache information because {Metric.cache_name} is not found."
|
|
95
99
|
)
|
|
96
100
|
return {}
|
|
@@ -111,7 +115,7 @@ class Metric:
|
|
|
111
115
|
obj_extension=Metric.cache_extension,
|
|
112
116
|
)
|
|
113
117
|
except Exception as e:
|
|
114
|
-
|
|
118
|
+
logger.error(f"Failed to write cache information: {str(e)}")
|
|
115
119
|
raise e
|
|
116
120
|
|
|
117
121
|
@staticmethod
|
|
@@ -154,7 +158,7 @@ class Metric:
|
|
|
154
158
|
return retn_mets_ids, retn_mets
|
|
155
159
|
|
|
156
160
|
except Exception as e:
|
|
157
|
-
|
|
161
|
+
logger.error(f"Failed to get available metrics: {str(e)}")
|
|
158
162
|
raise e
|
|
159
163
|
|
|
160
164
|
@staticmethod
|
|
@@ -3,8 +3,12 @@ from typing import Any
|
|
|
3
3
|
|
|
4
4
|
from moonshot.src.configs.env_variables import EnvVariables
|
|
5
5
|
from moonshot.src.storage.storage import Storage
|
|
6
|
+
from moonshot.src.utils.log import configure_logger
|
|
6
7
|
from moonshot.src.utils.timeit import timeit
|
|
7
8
|
|
|
9
|
+
# Create a logger for this module
|
|
10
|
+
logger = configure_logger(__name__)
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
class MetricInterface:
|
|
10
14
|
config_name = "metrics_config"
|
|
@@ -75,8 +79,10 @@ class MetricInterface:
|
|
|
75
79
|
return obj_results.get(met_id, {})
|
|
76
80
|
|
|
77
81
|
except Exception as e:
|
|
78
|
-
|
|
79
|
-
|
|
82
|
+
logger.warning(
|
|
83
|
+
f"[MetricInterface] Failed to read metrics configuration: {str(e)}"
|
|
84
|
+
)
|
|
85
|
+
logger.info("Attempting to create empty metrics configuration...")
|
|
80
86
|
try:
|
|
81
87
|
Storage.create_object(
|
|
82
88
|
obj_type=EnvVariables.METRICS.name,
|
|
@@ -4,6 +4,10 @@ from jinja2 import Template
|
|
|
4
4
|
|
|
5
5
|
from moonshot.src.configs.env_variables import EnvVariables
|
|
6
6
|
from moonshot.src.storage.storage import Storage
|
|
7
|
+
from moonshot.src.utils.log import configure_logger
|
|
8
|
+
|
|
9
|
+
# Create a logger for this module
|
|
10
|
+
logger = configure_logger(__name__)
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class PromptTemplate:
|
|
@@ -79,7 +83,7 @@ class PromptTemplate:
|
|
|
79
83
|
return True
|
|
80
84
|
|
|
81
85
|
except Exception as e:
|
|
82
|
-
|
|
86
|
+
logger.error(f"Failed to delete prompt template: {str(e)}")
|
|
83
87
|
raise e
|
|
84
88
|
|
|
85
89
|
@staticmethod
|