aiverify-moonshot 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
- aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
- aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
- moonshot/__init__.py +0 -0
- moonshot/__main__.py +198 -0
- moonshot/api.py +155 -0
- moonshot/integrations/__init__.py +0 -0
- moonshot/integrations/cli/__init__.py +0 -0
- moonshot/integrations/cli/__main__.py +25 -0
- moonshot/integrations/cli/active_session_cfg.py +1 -0
- moonshot/integrations/cli/benchmark/__init__.py +0 -0
- moonshot/integrations/cli/benchmark/benchmark.py +186 -0
- moonshot/integrations/cli/benchmark/cookbook.py +545 -0
- moonshot/integrations/cli/benchmark/datasets.py +164 -0
- moonshot/integrations/cli/benchmark/metrics.py +141 -0
- moonshot/integrations/cli/benchmark/recipe.py +598 -0
- moonshot/integrations/cli/benchmark/result.py +216 -0
- moonshot/integrations/cli/benchmark/run.py +140 -0
- moonshot/integrations/cli/benchmark/runner.py +174 -0
- moonshot/integrations/cli/cli.py +64 -0
- moonshot/integrations/cli/common/__init__.py +0 -0
- moonshot/integrations/cli/common/common.py +72 -0
- moonshot/integrations/cli/common/connectors.py +325 -0
- moonshot/integrations/cli/common/display_helper.py +42 -0
- moonshot/integrations/cli/common/prompt_template.py +94 -0
- moonshot/integrations/cli/initialisation/__init__.py +0 -0
- moonshot/integrations/cli/initialisation/initialisation.py +14 -0
- moonshot/integrations/cli/redteam/__init__.py +0 -0
- moonshot/integrations/cli/redteam/attack_module.py +70 -0
- moonshot/integrations/cli/redteam/context_strategy.py +147 -0
- moonshot/integrations/cli/redteam/prompt_template.py +67 -0
- moonshot/integrations/cli/redteam/redteam.py +90 -0
- moonshot/integrations/cli/redteam/session.py +467 -0
- moonshot/integrations/web_api/.env.dev +7 -0
- moonshot/integrations/web_api/__init__.py +0 -0
- moonshot/integrations/web_api/__main__.py +56 -0
- moonshot/integrations/web_api/app.py +125 -0
- moonshot/integrations/web_api/container.py +146 -0
- moonshot/integrations/web_api/log/.gitkeep +0 -0
- moonshot/integrations/web_api/logging_conf.py +114 -0
- moonshot/integrations/web_api/routes/__init__.py +0 -0
- moonshot/integrations/web_api/routes/attack_modules.py +66 -0
- moonshot/integrations/web_api/routes/benchmark.py +116 -0
- moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
- moonshot/integrations/web_api/routes/context_strategy.py +129 -0
- moonshot/integrations/web_api/routes/cookbook.py +225 -0
- moonshot/integrations/web_api/routes/dataset.py +120 -0
- moonshot/integrations/web_api/routes/endpoint.py +282 -0
- moonshot/integrations/web_api/routes/metric.py +78 -0
- moonshot/integrations/web_api/routes/prompt_template.py +128 -0
- moonshot/integrations/web_api/routes/recipe.py +219 -0
- moonshot/integrations/web_api/routes/redteam.py +609 -0
- moonshot/integrations/web_api/routes/runner.py +239 -0
- moonshot/integrations/web_api/schemas/__init__.py +0 -0
- moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
- moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
- moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
- moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
- moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
- moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
- moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
- moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
- moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
- moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
- moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
- moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
- moonshot/integrations/web_api/services/__init__.py +0 -0
- moonshot/integrations/web_api/services/attack_module_service.py +34 -0
- moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
- moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
- moonshot/integrations/web_api/services/base_service.py +8 -0
- moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
- moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
- moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
- moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
- moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
- moonshot/integrations/web_api/services/cookbook_service.py +194 -0
- moonshot/integrations/web_api/services/dataset_service.py +20 -0
- moonshot/integrations/web_api/services/endpoint_service.py +65 -0
- moonshot/integrations/web_api/services/metric_service.py +14 -0
- moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
- moonshot/integrations/web_api/services/recipe_service.py +155 -0
- moonshot/integrations/web_api/services/runner_service.py +147 -0
- moonshot/integrations/web_api/services/session_service.py +350 -0
- moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
- moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
- moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
- moonshot/integrations/web_api/types/types.py +99 -0
- moonshot/src/__init__.py +0 -0
- moonshot/src/api/__init__.py +0 -0
- moonshot/src/api/api_connector.py +58 -0
- moonshot/src/api/api_connector_endpoint.py +162 -0
- moonshot/src/api/api_context_strategy.py +57 -0
- moonshot/src/api/api_cookbook.py +160 -0
- moonshot/src/api/api_dataset.py +46 -0
- moonshot/src/api/api_environment_variables.py +17 -0
- moonshot/src/api/api_metrics.py +51 -0
- moonshot/src/api/api_prompt_template.py +43 -0
- moonshot/src/api/api_recipe.py +182 -0
- moonshot/src/api/api_red_teaming.py +59 -0
- moonshot/src/api/api_result.py +84 -0
- moonshot/src/api/api_run.py +74 -0
- moonshot/src/api/api_runner.py +132 -0
- moonshot/src/api/api_session.py +290 -0
- moonshot/src/configs/__init__.py +0 -0
- moonshot/src/configs/env_variables.py +187 -0
- moonshot/src/connectors/__init__.py +0 -0
- moonshot/src/connectors/connector.py +327 -0
- moonshot/src/connectors/connector_prompt_arguments.py +17 -0
- moonshot/src/connectors_endpoints/__init__.py +0 -0
- moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
- moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
- moonshot/src/cookbooks/__init__.py +0 -0
- moonshot/src/cookbooks/cookbook.py +225 -0
- moonshot/src/cookbooks/cookbook_arguments.py +34 -0
- moonshot/src/datasets/__init__.py +0 -0
- moonshot/src/datasets/dataset.py +255 -0
- moonshot/src/datasets/dataset_arguments.py +50 -0
- moonshot/src/metrics/__init__.py +0 -0
- moonshot/src/metrics/metric.py +192 -0
- moonshot/src/metrics/metric_interface.py +95 -0
- moonshot/src/prompt_templates/__init__.py +0 -0
- moonshot/src/prompt_templates/prompt_template.py +103 -0
- moonshot/src/recipes/__init__.py +0 -0
- moonshot/src/recipes/recipe.py +340 -0
- moonshot/src/recipes/recipe_arguments.py +111 -0
- moonshot/src/redteaming/__init__.py +0 -0
- moonshot/src/redteaming/attack/__init__.py +0 -0
- moonshot/src/redteaming/attack/attack_module.py +618 -0
- moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
- moonshot/src/redteaming/attack/context_strategy.py +131 -0
- moonshot/src/redteaming/context_strategy/__init__.py +0 -0
- moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
- moonshot/src/redteaming/session/__init__.py +0 -0
- moonshot/src/redteaming/session/chat.py +209 -0
- moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
- moonshot/src/redteaming/session/red_teaming_type.py +6 -0
- moonshot/src/redteaming/session/session.py +775 -0
- moonshot/src/results/__init__.py +0 -0
- moonshot/src/results/result.py +119 -0
- moonshot/src/results/result_arguments.py +44 -0
- moonshot/src/runners/__init__.py +0 -0
- moonshot/src/runners/runner.py +476 -0
- moonshot/src/runners/runner_arguments.py +46 -0
- moonshot/src/runners/runner_type.py +6 -0
- moonshot/src/runs/__init__.py +0 -0
- moonshot/src/runs/run.py +344 -0
- moonshot/src/runs/run_arguments.py +162 -0
- moonshot/src/runs/run_progress.py +145 -0
- moonshot/src/runs/run_status.py +10 -0
- moonshot/src/storage/__init__.py +0 -0
- moonshot/src/storage/db_interface.py +128 -0
- moonshot/src/storage/io_interface.py +31 -0
- moonshot/src/storage/storage.py +525 -0
- moonshot/src/utils/__init__.py +0 -0
- moonshot/src/utils/import_modules.py +96 -0
- moonshot/src/utils/timeit.py +25 -0
|
File without changes
|
moonshot/src/runs/run.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from moonshot.src.configs.env_variables import EnvVariables
|
|
8
|
+
from moonshot.src.results.result_arguments import ResultArguments
|
|
9
|
+
from moonshot.src.runners.runner_type import RunnerType
|
|
10
|
+
from moonshot.src.runs.run_arguments import RunArguments
|
|
11
|
+
from moonshot.src.runs.run_progress import RunProgress
|
|
12
|
+
from moonshot.src.runs.run_status import RunStatus
|
|
13
|
+
from moonshot.src.storage.db_interface import DBInterface
|
|
14
|
+
from moonshot.src.storage.storage import Storage
|
|
15
|
+
from moonshot.src.utils.import_modules import get_instance
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Run:
|
|
19
|
+
sql_create_run_table = """
|
|
20
|
+
CREATE TABLE IF NOT EXISTS run_table (
|
|
21
|
+
run_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
runner_id text NOT NULL,
|
|
23
|
+
runner_type text NOT NULL,
|
|
24
|
+
runner_args text NOT NULL,
|
|
25
|
+
endpoints text NOT NULL,
|
|
26
|
+
results_file text NOT NULL,
|
|
27
|
+
start_time INTEGER NOT NULL,
|
|
28
|
+
end_time INTEGER NOT NULL,
|
|
29
|
+
duration INTEGER NOT NULL,
|
|
30
|
+
error_messages text NOT NULL,
|
|
31
|
+
raw_results text NOT NULL,
|
|
32
|
+
results text NOT NULL,
|
|
33
|
+
status text NOT NULL
|
|
34
|
+
);
|
|
35
|
+
"""
|
|
36
|
+
sql_create_run_record = """
|
|
37
|
+
INSERT INTO run_table (
|
|
38
|
+
runner_id,runner_type,runner_args,endpoints,results_file,start_time,end_time,duration,error_messages,raw_results,results,status)
|
|
39
|
+
VALUES(?,?,?,?,?,?,?,?,?,?,?,?)
|
|
40
|
+
"""
|
|
41
|
+
sql_read_run_record = """
|
|
42
|
+
SELECT * from run_table WHERE run_id=?
|
|
43
|
+
"""
|
|
44
|
+
sql_read_latest_run_record = """
|
|
45
|
+
SELECT * FROM run_table WHERE run_id=(SELECT MAX(run_id) FROM run_table)
|
|
46
|
+
"""
|
|
47
|
+
sql_read_all_run_records = """
|
|
48
|
+
SELECT * FROM run_table
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
runner_id: str,
|
|
54
|
+
runner_type: RunnerType,
|
|
55
|
+
runner_args: dict,
|
|
56
|
+
database_instance: Any | None,
|
|
57
|
+
endpoints: list[str],
|
|
58
|
+
results_file: str,
|
|
59
|
+
progress_callback_func: Callable | None = None,
|
|
60
|
+
) -> None:
|
|
61
|
+
# Create run arguments
|
|
62
|
+
self.run_arguments = RunArguments(
|
|
63
|
+
runner_id=runner_id,
|
|
64
|
+
runner_type=runner_type,
|
|
65
|
+
runner_args=runner_args,
|
|
66
|
+
database_instance=database_instance,
|
|
67
|
+
endpoints=endpoints,
|
|
68
|
+
results_file=results_file,
|
|
69
|
+
start_time=0.0,
|
|
70
|
+
end_time=0.0,
|
|
71
|
+
duration=0,
|
|
72
|
+
error_messages=[],
|
|
73
|
+
raw_results={},
|
|
74
|
+
results={},
|
|
75
|
+
status=RunStatus.PENDING,
|
|
76
|
+
)
|
|
77
|
+
# Pass the reference of run_arguments to RunProgress
|
|
78
|
+
self.run_progress = RunProgress(
|
|
79
|
+
self.run_arguments,
|
|
80
|
+
progress_callback_func,
|
|
81
|
+
)
|
|
82
|
+
# Create a cancellation asyncio event
|
|
83
|
+
self.cancel_event = asyncio.Event()
|
|
84
|
+
# Create run table
|
|
85
|
+
if database_instance:
|
|
86
|
+
Storage.create_database_table(database_instance, Run.sql_create_run_table)
|
|
87
|
+
|
|
88
|
+
@staticmethod
|
|
89
|
+
def load(database_instance: DBInterface | None, run_id: int | None) -> RunArguments:
|
|
90
|
+
"""
|
|
91
|
+
Loads run data for a given run_id from the database, or the latest run if run_id is None.
|
|
92
|
+
|
|
93
|
+
This method retrieves run data for the specified run_id from the database, or if run_id is None,
|
|
94
|
+
it retrieves the latest run. If the database instance is not provided, it raises a RuntimeError.
|
|
95
|
+
If the database instance is provided, it invokes the read_record method of the database instance
|
|
96
|
+
with the given run_id or the latest run and returns a RunArguments object created from the retrieved record.
|
|
97
|
+
|
|
98
|
+
Parameters:
|
|
99
|
+
database_instance (DBAccessor | None): The database accessor instance.
|
|
100
|
+
run_id (int | None): The ID of the run to retrieve, or None to retrieve the latest run.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
RunArguments: An object containing the details of the run with the given run_id or the latest run.
|
|
104
|
+
"""
|
|
105
|
+
if not database_instance:
|
|
106
|
+
raise RuntimeError("[Run] Database instance not provided.")
|
|
107
|
+
|
|
108
|
+
if run_id is not None:
|
|
109
|
+
run_arguments_info = Storage.read_database_record(
|
|
110
|
+
database_instance,
|
|
111
|
+
(run_id,),
|
|
112
|
+
Run.sql_read_run_record,
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
run_arguments_info = Storage.read_database_record(
|
|
116
|
+
database_instance,
|
|
117
|
+
(),
|
|
118
|
+
Run.sql_read_latest_run_record,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
if run_arguments_info:
|
|
122
|
+
return RunArguments.from_tuple(run_arguments_info)
|
|
123
|
+
else:
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"[Run] Failed to get database record for run_id {run_id}: {database_instance}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def get_all_runs(database_instance: DBInterface) -> list[RunArguments]:
|
|
130
|
+
"""
|
|
131
|
+
Retrieves all run records from the database.
|
|
132
|
+
|
|
133
|
+
This method fetches all the run records from the database and converts them into a list of RunArguments objects.
|
|
134
|
+
If the database instance is not provided, it raises a RuntimeError. If no records are found, it also raises
|
|
135
|
+
a RuntimeError.
|
|
136
|
+
|
|
137
|
+
Parameters:
|
|
138
|
+
database_instance (DBInterface): The database interface to fetch records from.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
list[RunArguments]: A list of RunArguments objects representing each run record.
|
|
142
|
+
"""
|
|
143
|
+
if not database_instance:
|
|
144
|
+
raise RuntimeError("[Run] Database instance not provided.")
|
|
145
|
+
|
|
146
|
+
all_run_arguments_info = Storage.read_database_records(
|
|
147
|
+
database_instance,
|
|
148
|
+
Run.sql_read_all_run_records,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if all_run_arguments_info:
|
|
152
|
+
output = [RunArguments.from_tuple(info) for info in all_run_arguments_info]
|
|
153
|
+
return output
|
|
154
|
+
else:
|
|
155
|
+
return []
|
|
156
|
+
|
|
157
|
+
def cancel(self) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Sets the cancel event to stop the run process.
|
|
160
|
+
|
|
161
|
+
This method is used to signal that the run process should be cancelled. It sets the cancel_event
|
|
162
|
+
which can be checked in various points of the asynchronous run process to gracefully stop the execution.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
None
|
|
166
|
+
"""
|
|
167
|
+
print("[Run] Cancelling run...")
|
|
168
|
+
self.cancel_event.set()
|
|
169
|
+
|
|
170
|
+
async def run(self) -> ResultArguments | None:
|
|
171
|
+
"""
|
|
172
|
+
Executes the run process asynchronously.
|
|
173
|
+
|
|
174
|
+
This method is the main entry point for running the process. It performs the run operation
|
|
175
|
+
asynchronously and returns a ResultArguments object if the run completes successfully, or None
|
|
176
|
+
if the run is cancelled or fails to complete.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
ResultArguments | None: The result of the run operation if successful, otherwise None.
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
RuntimeError: If any error occurs during the run process.
|
|
183
|
+
"""
|
|
184
|
+
# ------------------------------------------------------------------------------
|
|
185
|
+
# Part 0: Initialise
|
|
186
|
+
# ------------------------------------------------------------------------------
|
|
187
|
+
print("[Run] Part 0: Initialising run...")
|
|
188
|
+
start_time = time.perf_counter()
|
|
189
|
+
try:
|
|
190
|
+
# Initialise the run
|
|
191
|
+
self.run_arguments.start_time = time.time()
|
|
192
|
+
self.run_arguments.end_time = time.time()
|
|
193
|
+
|
|
194
|
+
# Create a new run record in database
|
|
195
|
+
if self.run_arguments.database_instance:
|
|
196
|
+
inserted_record = Storage.create_database_record(
|
|
197
|
+
self.run_arguments.database_instance,
|
|
198
|
+
self.run_arguments.to_create_tuple(),
|
|
199
|
+
Run.sql_create_run_record,
|
|
200
|
+
)
|
|
201
|
+
if inserted_record:
|
|
202
|
+
self.run_arguments.run_id = inserted_record[0]
|
|
203
|
+
else:
|
|
204
|
+
raise RuntimeError(
|
|
205
|
+
"[Run] Failed to create record: record not inserted."
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
raise RuntimeError(
|
|
209
|
+
"[Run] Failed to create record: db_instance is not initialised."
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Set status to running
|
|
213
|
+
self.run_progress.notify_progress(status=RunStatus.RUNNING)
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
error_message = (
|
|
217
|
+
f"[Run] Failed to initialise run in Part 0 due to error: {str(e)}"
|
|
218
|
+
)
|
|
219
|
+
self.run_progress.notify_error(error_message)
|
|
220
|
+
|
|
221
|
+
finally:
|
|
222
|
+
print(
|
|
223
|
+
f"[Run] Initialise run took {(time.perf_counter() - start_time):.4f}s"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# ------------------------------------------------------------------------------
|
|
227
|
+
# Part 1: Get asyncio running loop
|
|
228
|
+
# ------------------------------------------------------------------------------
|
|
229
|
+
print("[Run] Part 1: Loading asyncio running loop...")
|
|
230
|
+
loop = asyncio.get_running_loop()
|
|
231
|
+
|
|
232
|
+
# ------------------------------------------------------------------------------
|
|
233
|
+
# Part 2: Load runner and result processing module
|
|
234
|
+
# ------------------------------------------------------------------------------
|
|
235
|
+
print("[Run] Part 2: Loading modules...")
|
|
236
|
+
start_time = time.perf_counter()
|
|
237
|
+
runner_module_instance = None
|
|
238
|
+
result_module_instance = None
|
|
239
|
+
try:
|
|
240
|
+
runner_module_instance = self._load_module(
|
|
241
|
+
"runner_processing_module", EnvVariables.RUNNERS_MODULES.name
|
|
242
|
+
)
|
|
243
|
+
result_module_instance = self._load_module(
|
|
244
|
+
"result_processing_module", EnvVariables.RESULTS_MODULES.name
|
|
245
|
+
)
|
|
246
|
+
except Exception as e:
|
|
247
|
+
self.run_progress.notify_error(f"[Run] Module loading error: {e}")
|
|
248
|
+
finally:
|
|
249
|
+
print(
|
|
250
|
+
f"[Run] Module loading took {(time.perf_counter() - start_time):.4f}s"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# ------------------------------------------------------------------------------
|
|
254
|
+
# Part 3: Run runner processing module
|
|
255
|
+
# ------------------------------------------------------------------------------
|
|
256
|
+
print("[Run] Part 3: Running runner processing module...")
|
|
257
|
+
start_time = time.perf_counter()
|
|
258
|
+
runner_results = None
|
|
259
|
+
try:
|
|
260
|
+
if runner_module_instance:
|
|
261
|
+
runner_results = await runner_module_instance.generate( # type: ignore ; ducktyping
|
|
262
|
+
loop,
|
|
263
|
+
self.run_arguments.runner_args,
|
|
264
|
+
self.run_arguments.database_instance,
|
|
265
|
+
self.run_arguments.endpoints,
|
|
266
|
+
self.run_progress,
|
|
267
|
+
self.cancel_event,
|
|
268
|
+
)
|
|
269
|
+
else:
|
|
270
|
+
raise RuntimeError("Failed to initialise runner module instance.")
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
error_message = f"[Run] Failed to run runner processing module in Part 3 due to error: {str(e)}"
|
|
274
|
+
self.run_progress.notify_error(error_message)
|
|
275
|
+
|
|
276
|
+
finally:
|
|
277
|
+
print(
|
|
278
|
+
f"[Run] Running runner processing module took {(time.perf_counter() - start_time):.4f}s"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# ------------------------------------------------------------------------------
|
|
282
|
+
# Part 4: Run result processing module
|
|
283
|
+
# ------------------------------------------------------------------------------
|
|
284
|
+
print("[Run] Part 4: Running result processing module...")
|
|
285
|
+
start_time = time.perf_counter()
|
|
286
|
+
updated_runner_results = None
|
|
287
|
+
try:
|
|
288
|
+
if result_module_instance:
|
|
289
|
+
updated_runner_results = result_module_instance.generate( # type: ignore ; ducktyping
|
|
290
|
+
runner_results
|
|
291
|
+
)
|
|
292
|
+
if updated_runner_results:
|
|
293
|
+
self.run_progress.notify_progress(
|
|
294
|
+
results=updated_runner_results.results
|
|
295
|
+
)
|
|
296
|
+
else:
|
|
297
|
+
raise RuntimeError("Failed to initialise result module instance.")
|
|
298
|
+
|
|
299
|
+
except Exception as e:
|
|
300
|
+
error_message = f"[Run] Failed to run result processing module in Part 4 due to error: {str(e)}"
|
|
301
|
+
self.run_progress.notify_error(error_message)
|
|
302
|
+
|
|
303
|
+
finally:
|
|
304
|
+
print(
|
|
305
|
+
f"[Run] Running result processing module took {(time.perf_counter() - start_time):.4f}s"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# ------------------------------------------------------------------------------
|
|
309
|
+
# Part 5: Wrap up run
|
|
310
|
+
# ------------------------------------------------------------------------------
|
|
311
|
+
print("[Run] Part 5: Wrap up run...")
|
|
312
|
+
return updated_runner_results
|
|
313
|
+
|
|
314
|
+
def _load_module(self, arg_key: str, env_var: str):
|
|
315
|
+
"""
|
|
316
|
+
Load a module based on the argument key and environment variable.
|
|
317
|
+
|
|
318
|
+
This method retrieves the module name from the runner arguments using the provided
|
|
319
|
+
argument key. It then attempts to load the module using the name and the file path
|
|
320
|
+
obtained from the environment variable. If the module name is not provided or the
|
|
321
|
+
module instance cannot be created, a RuntimeError is raised.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
arg_key (str): The key to look up the module name in the runner arguments.
|
|
325
|
+
env_var (str): The environment variable used to obtain the file path of the module.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
An instance of the loaded module.
|
|
329
|
+
|
|
330
|
+
Raises:
|
|
331
|
+
RuntimeError: If the module name is not provided or the module instance cannot be created.
|
|
332
|
+
"""
|
|
333
|
+
module_name = self.run_arguments.runner_args.get(arg_key)
|
|
334
|
+
if not module_name:
|
|
335
|
+
raise RuntimeError(f"[Run] Module name for '{arg_key}' not provided.")
|
|
336
|
+
module_instance = get_instance(
|
|
337
|
+
module_name,
|
|
338
|
+
Storage.get_filepath(env_var, module_name, "py"),
|
|
339
|
+
)
|
|
340
|
+
if not module_instance:
|
|
341
|
+
raise RuntimeError(
|
|
342
|
+
f"[Run] Unable to get instance for module '{module_name}'."
|
|
343
|
+
)
|
|
344
|
+
return module_instance()
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from moonshot.src.runners.runner_type import RunnerType
|
|
8
|
+
from moonshot.src.runs.run_status import RunStatus
|
|
9
|
+
from moonshot.src.storage.db_interface import DBInterface
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RunArguments(BaseModel):
|
|
13
|
+
class Config:
|
|
14
|
+
arbitrary_types_allowed = True
|
|
15
|
+
|
|
16
|
+
run_id: int = 0 # The id of the run
|
|
17
|
+
|
|
18
|
+
runner_id: str # The id of the runner
|
|
19
|
+
|
|
20
|
+
runner_type: RunnerType # Run type for the Run.
|
|
21
|
+
|
|
22
|
+
runner_args: dict # Dictionary containing arguments for the runner.
|
|
23
|
+
|
|
24
|
+
database_instance: DBInterface | None # Database instance for the Run.
|
|
25
|
+
|
|
26
|
+
endpoints: list[str] # List of endpoints for the Run.
|
|
27
|
+
|
|
28
|
+
results_file: str # The results file associated with the Run.
|
|
29
|
+
|
|
30
|
+
start_time: float # The start time of the Run.
|
|
31
|
+
|
|
32
|
+
end_time: float # The end time of the Run.
|
|
33
|
+
|
|
34
|
+
duration: int # The duration of the Run.
|
|
35
|
+
|
|
36
|
+
error_messages: list[str] # The error messages associated with the Run.
|
|
37
|
+
|
|
38
|
+
raw_results: dict # Results of the Run by runners-module.
|
|
39
|
+
|
|
40
|
+
results: dict # Generated Results of the Run by results-module.
|
|
41
|
+
|
|
42
|
+
status: RunStatus # Status of the Run.
|
|
43
|
+
|
|
44
|
+
def to_dict(self) -> dict:
|
|
45
|
+
"""
|
|
46
|
+
Converts the RunArguments object into a dictionary format.
|
|
47
|
+
|
|
48
|
+
This method transforms the RunArguments instance into a dictionary, encapsulating all the critical attributes
|
|
49
|
+
associated with the run. The resulting dictionary includes keys for runner_id, runner_type, runner_args,
|
|
50
|
+
database_instance, endpoints, results_file, start_time, end_time, duration, error_messages, raw_results,
|
|
51
|
+
results, and status offering a comprehensive snapshot of the run's parameters for straightforward access
|
|
52
|
+
and further processing.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
dict: A dictionary containing all the significant attributes of the RunArguments instance.
|
|
56
|
+
"""
|
|
57
|
+
return {
|
|
58
|
+
"run_id": self.run_id,
|
|
59
|
+
"runner_id": self.runner_id,
|
|
60
|
+
"runner_type": self.runner_type,
|
|
61
|
+
"runner_args": self.runner_args,
|
|
62
|
+
"endpoints": self.endpoints,
|
|
63
|
+
"results_file": self.results_file,
|
|
64
|
+
"start_time": self.start_time,
|
|
65
|
+
"end_time": self.end_time,
|
|
66
|
+
"duration": self.duration,
|
|
67
|
+
"error_messages": self.error_messages,
|
|
68
|
+
"raw_results": self.raw_results,
|
|
69
|
+
"results": self.results,
|
|
70
|
+
"status": self.status,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
def to_create_tuple(self) -> tuple:
|
|
74
|
+
"""
|
|
75
|
+
Creates a tuple of run arguments for database insertion.
|
|
76
|
+
|
|
77
|
+
This method prepares a tuple of run arguments that can be used to insert a new record into the run_table
|
|
78
|
+
in the database. The tuple includes the runner_id, runner_type in lowercase, string representation of
|
|
79
|
+
runner_args, string representation of endpoints, results_file, start_time, end_time, duration,
|
|
80
|
+
string representation of error_messages, string representation of raw_results, string representation of results,
|
|
81
|
+
and the run status in lowercase.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
tuple: A tuple containing the run arguments ready for database insertion.
|
|
85
|
+
"""
|
|
86
|
+
return (
|
|
87
|
+
self.runner_id,
|
|
88
|
+
self.runner_type.name.lower(),
|
|
89
|
+
str(self.runner_args),
|
|
90
|
+
str(self.endpoints),
|
|
91
|
+
self.results_file,
|
|
92
|
+
self.start_time,
|
|
93
|
+
self.end_time,
|
|
94
|
+
self.duration,
|
|
95
|
+
str(self.error_messages),
|
|
96
|
+
str(self.raw_results),
|
|
97
|
+
str(self.results),
|
|
98
|
+
self.status.name.lower(),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def to_tuple(self) -> tuple:
|
|
102
|
+
"""
|
|
103
|
+
Serializes the RunArguments object to a tuple format.
|
|
104
|
+
|
|
105
|
+
This method serializes the RunArguments instance to a tuple, encapsulating the primary attributes of the run.
|
|
106
|
+
The resulting tuple contains the runner ID, runner type in lowercase, string representation of runner arguments,
|
|
107
|
+
string representation of endpoints, results file path, start time, end time, run duration, string representation
|
|
108
|
+
of error messages, string representation of raw results, string representation of results,
|
|
109
|
+
and the run status in lowercase. This provides a complete summary of the run's parameters for easy storage
|
|
110
|
+
or transmission.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
tuple: A tuple containing the serialized attributes of the RunArguments instance.
|
|
114
|
+
"""
|
|
115
|
+
return (
|
|
116
|
+
self.runner_id,
|
|
117
|
+
self.runner_type.name.lower(),
|
|
118
|
+
str(self.runner_args),
|
|
119
|
+
str(self.endpoints),
|
|
120
|
+
self.results_file,
|
|
121
|
+
self.start_time,
|
|
122
|
+
self.end_time,
|
|
123
|
+
self.duration,
|
|
124
|
+
str(self.error_messages),
|
|
125
|
+
str(self.raw_results),
|
|
126
|
+
str(self.results),
|
|
127
|
+
self.status.name.lower(),
|
|
128
|
+
self.run_id,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def from_tuple(cls, run_record: tuple) -> RunArguments:
|
|
133
|
+
"""
|
|
134
|
+
Reconstructs a RunArguments object from a serialized tuple.
|
|
135
|
+
|
|
136
|
+
This class method takes a tuple that contains the serialized form of a RunArguments object and reconstructs it
|
|
137
|
+
into a new RunArguments instance. The tuple is expected to contain the following elements in order: runner ID,
|
|
138
|
+
runner type, runner arguments, endpoints, results file, start time, end time, duration, error messages, results,
|
|
139
|
+
and status. These elements collectively represent the state of a run at a specific point in time.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
run_record (tuple): A tuple containing the serialized state of a RunArguments object.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
RunArguments: An instance of RunArguments initialized with the data extracted from the tuple.
|
|
146
|
+
"""
|
|
147
|
+
return cls(
|
|
148
|
+
run_id=run_record[0],
|
|
149
|
+
runner_id=run_record[1],
|
|
150
|
+
runner_type=RunnerType(run_record[2]),
|
|
151
|
+
runner_args=ast.literal_eval(run_record[3]),
|
|
152
|
+
database_instance=None,
|
|
153
|
+
endpoints=ast.literal_eval(run_record[4]),
|
|
154
|
+
results_file=run_record[5],
|
|
155
|
+
start_time=float(run_record[6]),
|
|
156
|
+
end_time=float(run_record[7]),
|
|
157
|
+
duration=run_record[8],
|
|
158
|
+
error_messages=ast.literal_eval(run_record[9]),
|
|
159
|
+
raw_results=ast.literal_eval(run_record[10]),
|
|
160
|
+
results=ast.literal_eval(run_record[11]),
|
|
161
|
+
status=RunStatus(run_record[12]),
|
|
162
|
+
)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
from moonshot.src.runs.run_arguments import RunArguments
|
|
5
|
+
from moonshot.src.runs.run_status import RunStatus
|
|
6
|
+
from moonshot.src.storage.storage import Storage
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RunProgress:
|
|
10
|
+
sql_update_run_record = """
|
|
11
|
+
UPDATE run_table SET runner_id=?,runner_type=?,runner_args=?,endpoints=?,results_file=?,start_time=?,end_time=?,
|
|
12
|
+
duration=?,error_messages=?,raw_results=?,results=?,status=?
|
|
13
|
+
WHERE run_id=?
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self, run_arguments: RunArguments, run_progress_callback_func: Callable | None
|
|
18
|
+
):
|
|
19
|
+
# Information on the run and callback for progress updating
|
|
20
|
+
self.run_arguments = run_arguments
|
|
21
|
+
self.run_progress_callback_func = run_progress_callback_func
|
|
22
|
+
|
|
23
|
+
# Information to be sent back through callback
|
|
24
|
+
self.cookbook_index: int = -1
|
|
25
|
+
self.cookbook_name: str = ""
|
|
26
|
+
self.cookbook_total: int = -1
|
|
27
|
+
self.recipe_index: int = -1
|
|
28
|
+
self.recipe_name: str = ""
|
|
29
|
+
self.recipe_total: int = -1
|
|
30
|
+
self.progress: int = 0
|
|
31
|
+
|
|
32
|
+
def notify_error(self, error_message: str) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Notifies about an error that occurred during the run process.
|
|
35
|
+
|
|
36
|
+
This method logs the error message, appends it to the run arguments' error messages list,
|
|
37
|
+
and updates the run progress status to indicate that the run is continuing with errors.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
error_message (str): The error message to be logged and recorded.
|
|
41
|
+
"""
|
|
42
|
+
# Update error message
|
|
43
|
+
print(error_message)
|
|
44
|
+
self.run_arguments.error_messages.append(error_message)
|
|
45
|
+
|
|
46
|
+
# Update progress status
|
|
47
|
+
self.notify_progress(status=RunStatus.RUNNING_WITH_ERRORS)
|
|
48
|
+
|
|
49
|
+
def notify_progress(self, **kwargs) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Updates the run progress information and the run status.
|
|
52
|
+
|
|
53
|
+
This method is responsible for updating the run status, setting the end time, calculating the run duration,
|
|
54
|
+
and persisting these changes to the database. Additionally, if a callback function for run progress has been
|
|
55
|
+
set, this method will invoke it with the current state of run arguments.
|
|
56
|
+
|
|
57
|
+
The method accepts arbitrary keyword arguments which are used to update specific attributes of the run_arguments
|
|
58
|
+
object. It ensures that the run progress is accurately reflected in both the object's state and the
|
|
59
|
+
corresponding database record.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
**kwargs: Keyword arguments that correspond to the attributes of run_arguments which should be updated.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
None
|
|
66
|
+
"""
|
|
67
|
+
# Update run arguments values
|
|
68
|
+
if self.run_arguments.start_time > 0.0:
|
|
69
|
+
self.run_arguments.end_time = time.time()
|
|
70
|
+
self.run_arguments.duration = int(
|
|
71
|
+
self.run_arguments.end_time - self.run_arguments.start_time
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Update run arguments values with provided key-value pairs
|
|
75
|
+
for key, value in kwargs.items():
|
|
76
|
+
# Update self values
|
|
77
|
+
if hasattr(self, key):
|
|
78
|
+
setattr(self, key, value)
|
|
79
|
+
|
|
80
|
+
# Update self.run_arguments
|
|
81
|
+
if hasattr(self.run_arguments, key):
|
|
82
|
+
setattr(self.run_arguments, key, value)
|
|
83
|
+
|
|
84
|
+
# Calculate percentage
|
|
85
|
+
if self.cookbook_total > 0:
|
|
86
|
+
if self.recipe_total > 0 and self.cookbook_index != self.cookbook_total:
|
|
87
|
+
# Calculate percentage with cookbook and recipe defined
|
|
88
|
+
per_recipe_percentage = (100 / self.cookbook_total) / self.recipe_total
|
|
89
|
+
self.progress = int(
|
|
90
|
+
self.cookbook_index * (100 / self.cookbook_total)
|
|
91
|
+
) + int(self.recipe_index * per_recipe_percentage)
|
|
92
|
+
else:
|
|
93
|
+
# Calculate percentage with cookbook defined and no recipes defined
|
|
94
|
+
# Or cookbook index and total is same.
|
|
95
|
+
self.progress = int(self.cookbook_index * (100 / self.cookbook_total))
|
|
96
|
+
elif self.recipe_total > 0:
|
|
97
|
+
# There is no cookbook, calculate for recipes defined
|
|
98
|
+
self.progress = int(self.recipe_index * (100 / self.recipe_total))
|
|
99
|
+
else:
|
|
100
|
+
# Initialization: set 0
|
|
101
|
+
self.progress = 0
|
|
102
|
+
|
|
103
|
+
# Update database record
|
|
104
|
+
if self.run_arguments.database_instance:
|
|
105
|
+
Storage.update_database_record(
|
|
106
|
+
self.run_arguments.database_instance,
|
|
107
|
+
self.run_arguments.to_tuple(),
|
|
108
|
+
RunProgress.sql_update_run_record,
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
print(
|
|
112
|
+
"[RunProgress] Failed to update run progress: db_instance is not initialised."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# If a callback function is provided, call it with the updated run arguments
|
|
116
|
+
if self.run_progress_callback_func:
|
|
117
|
+
self.run_progress_callback_func(self.get_dict())
|
|
118
|
+
|
|
119
|
+
def get_dict(self) -> dict:
|
|
120
|
+
"""
|
|
121
|
+
Constructs and returns a dictionary with the current state of the benchmark execution.
|
|
122
|
+
|
|
123
|
+
This method assembles a dictionary encapsulating the current progress of the benchmark execution.
|
|
124
|
+
The resulting dictionary is composed of the following keys: execution identifier, name, type, current duration,
|
|
125
|
+
status, cookbook index, cookbook name, cookbook total, recipe index, recipe name, recipe total,
|
|
126
|
+
progress percentage, and a list of error messages.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
dict: A dictionary representing the current benchmark execution state, with keys for various progress
|
|
130
|
+
metrics and details.
|
|
131
|
+
"""
|
|
132
|
+
return {
|
|
133
|
+
"current_runner_id": self.run_arguments.runner_id,
|
|
134
|
+
"current_runner_type": self.run_arguments.runner_type.value,
|
|
135
|
+
"current_duration": self.run_arguments.duration,
|
|
136
|
+
"current_status": self.run_arguments.status.value,
|
|
137
|
+
"current_cookbook_index": self.cookbook_index,
|
|
138
|
+
"current_cookbook_name": self.cookbook_name,
|
|
139
|
+
"current_cookbook_total": self.cookbook_total,
|
|
140
|
+
"current_recipe_index": self.recipe_index,
|
|
141
|
+
"current_recipe_name": self.recipe_name,
|
|
142
|
+
"current_recipe_total": self.recipe_total,
|
|
143
|
+
"current_progress": self.progress,
|
|
144
|
+
"current_error_messages": list(set(self.run_arguments.error_messages)),
|
|
145
|
+
}
|
|
File without changes
|