ert 16.0.9__py3-none-any.whl → 19.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _ert/events.py +19 -2
- _ert/forward_model_runner/client.py +6 -2
- _ert/forward_model_runner/fm_dispatch.py +9 -6
- _ert/forward_model_runner/reporting/event.py +1 -0
- _ert/forward_model_runner/runner.py +1 -2
- _ert/utils.py +12 -0
- ert/__main__.py +58 -38
- ert/analysis/_enif_update.py +8 -4
- ert/analysis/_es_update.py +19 -6
- ert/analysis/_update_commons.py +16 -6
- ert/base_model_context.py +1 -1
- ert/cli/main.py +17 -12
- ert/cli/monitor.py +7 -0
- ert/config/__init__.py +17 -6
- ert/config/_create_observation_dataframes.py +118 -21
- ert/config/_get_num_cpu.py +1 -1
- ert/config/_observations.py +91 -2
- ert/config/_read_summary.py +74 -328
- ert/config/design_matrix.py +62 -23
- ert/config/distribution.py +1 -1
- ert/config/ensemble_config.py +9 -17
- ert/config/ert_config.py +155 -58
- ert/config/everest_control.py +234 -0
- ert/config/{everest_constraints_config.py → everest_response.py} +27 -15
- ert/config/field.py +99 -90
- ert/config/forward_model_step.py +122 -17
- ert/config/gen_data_config.py +5 -10
- ert/config/gen_kw_config.py +11 -41
- ert/config/known_response_types.py +14 -0
- ert/config/parameter_config.py +1 -33
- ert/config/parsing/_option_dict.py +10 -2
- ert/config/parsing/config_errors.py +1 -1
- ert/config/parsing/config_keywords.py +2 -1
- ert/config/parsing/config_schema.py +23 -11
- ert/config/parsing/config_schema_deprecations.py +3 -3
- ert/config/parsing/config_schema_item.py +26 -11
- ert/config/parsing/context_values.py +3 -3
- ert/config/parsing/file_context_token.py +1 -1
- ert/config/parsing/observations_parser.py +6 -2
- ert/config/parsing/queue_system.py +9 -0
- ert/config/parsing/schema_item_type.py +1 -0
- ert/config/queue_config.py +42 -50
- ert/config/response_config.py +0 -8
- ert/config/rft_config.py +275 -0
- ert/config/summary_config.py +3 -8
- ert/config/surface_config.py +73 -26
- ert/config/workflow_fixtures.py +2 -1
- ert/config/workflow_job.py +135 -54
- ert/dark_storage/client/__init__.py +2 -2
- ert/dark_storage/client/_session.py +4 -4
- ert/dark_storage/client/client.py +2 -2
- ert/dark_storage/common.py +12 -3
- ert/dark_storage/compute/misfits.py +11 -7
- ert/dark_storage/endpoints/compute/misfits.py +6 -4
- ert/dark_storage/endpoints/ensembles.py +4 -0
- ert/dark_storage/endpoints/experiment_server.py +30 -24
- ert/dark_storage/endpoints/experiments.py +2 -2
- ert/dark_storage/endpoints/observations.py +8 -6
- ert/dark_storage/endpoints/parameters.py +4 -12
- ert/dark_storage/endpoints/responses.py +24 -5
- ert/dark_storage/json_schema/ensemble.py +3 -0
- ert/dark_storage/json_schema/experiment.py +1 -1
- ert/data/_measured_data.py +6 -5
- ert/ensemble_evaluator/__init__.py +8 -1
- ert/ensemble_evaluator/config.py +2 -1
- ert/ensemble_evaluator/evaluator.py +81 -29
- ert/ensemble_evaluator/event.py +6 -0
- ert/ensemble_evaluator/snapshot.py +3 -1
- ert/ensemble_evaluator/state.py +1 -0
- ert/field_utils/__init__.py +8 -0
- ert/field_utils/field_utils.py +228 -15
- ert/field_utils/grdecl_io.py +1 -1
- ert/field_utils/roff_io.py +1 -1
- ert/gui/__init__.py +5 -2
- ert/gui/ertnotifier.py +1 -1
- ert/gui/ertwidgets/__init__.py +23 -16
- ert/gui/ertwidgets/analysismoduleedit.py +2 -2
- ert/gui/ertwidgets/checklist.py +1 -1
- ert/gui/ertwidgets/closabledialog.py +2 -0
- ert/gui/ertwidgets/copyablelabel.py +2 -0
- ert/gui/ertwidgets/create_experiment_dialog.py +3 -1
- ert/gui/ertwidgets/ensembleselector.py +2 -2
- ert/gui/ertwidgets/listeditbox.py +2 -0
- ert/gui/ertwidgets/models/__init__.py +2 -0
- ert/gui/ertwidgets/models/activerealizationsmodel.py +5 -1
- ert/gui/ertwidgets/models/path_model.py +1 -1
- ert/gui/ertwidgets/models/targetensemblemodel.py +5 -1
- ert/gui/ertwidgets/models/text_model.py +4 -1
- ert/gui/ertwidgets/pathchooser.py +0 -3
- ert/gui/ertwidgets/searchbox.py +17 -4
- ert/gui/ertwidgets/stringbox.py +2 -0
- ert/gui/{suggestor → ertwidgets/suggestor}/_suggestor_message.py +13 -4
- ert/gui/{suggestor → ertwidgets/suggestor}/suggestor.py +63 -30
- ert/gui/main.py +41 -13
- ert/gui/main_window.py +3 -7
- ert/gui/model/fm_step_list.py +3 -0
- ert/gui/model/real_list.py +1 -0
- ert/gui/model/snapshot.py +1 -0
- ert/gui/simulation/combobox_with_description.py +3 -0
- ert/gui/simulation/ensemble_experiment_panel.py +8 -2
- ert/gui/simulation/ensemble_information_filter_panel.py +7 -2
- ert/gui/simulation/ensemble_smoother_panel.py +8 -2
- ert/gui/simulation/evaluate_ensemble_panel.py +17 -7
- ert/gui/simulation/experiment_panel.py +18 -6
- ert/gui/simulation/manual_update_panel.py +35 -10
- ert/gui/simulation/multiple_data_assimilation_panel.py +13 -9
- ert/gui/simulation/run_dialog.py +47 -20
- ert/gui/simulation/single_test_run_panel.py +6 -3
- ert/gui/simulation/view/progress_widget.py +2 -0
- ert/gui/simulation/view/realization.py +5 -1
- ert/gui/simulation/view/update.py +2 -0
- ert/gui/summarypanel.py +20 -1
- ert/gui/tools/event_viewer/panel.py +3 -4
- ert/gui/tools/event_viewer/tool.py +2 -0
- ert/gui/tools/load_results/load_results_panel.py +1 -1
- ert/gui/tools/load_results/load_results_tool.py +2 -0
- ert/gui/tools/manage_experiments/export_dialog.py +136 -0
- ert/gui/tools/manage_experiments/manage_experiments_panel.py +2 -0
- ert/gui/tools/manage_experiments/storage_info_widget.py +121 -16
- ert/gui/tools/manage_experiments/storage_widget.py +4 -3
- ert/gui/tools/plot/customize/color_chooser.py +5 -2
- ert/gui/tools/plot/customize/customize_plot_dialog.py +2 -0
- ert/gui/tools/plot/customize/default_customization_view.py +4 -0
- ert/gui/tools/plot/customize/limits_customization_view.py +3 -0
- ert/gui/tools/plot/customize/statistics_customization_view.py +3 -0
- ert/gui/tools/plot/customize/style_chooser.py +2 -0
- ert/gui/tools/plot/customize/style_customization_view.py +3 -0
- ert/gui/tools/plot/data_type_keys_widget.py +2 -0
- ert/gui/tools/plot/data_type_proxy_model.py +3 -0
- ert/gui/tools/plot/plot_api.py +50 -28
- ert/gui/tools/plot/plot_ensemble_selection_widget.py +17 -10
- ert/gui/tools/plot/plot_widget.py +15 -2
- ert/gui/tools/plot/plot_window.py +41 -19
- ert/gui/tools/plot/plottery/plot_config.py +2 -0
- ert/gui/tools/plot/plottery/plot_context.py +14 -0
- ert/gui/tools/plot/plottery/plots/__init__.py +2 -0
- ert/gui/tools/plot/plottery/plots/cesp.py +3 -1
- ert/gui/tools/plot/plottery/plots/distribution.py +6 -1
- ert/gui/tools/plot/plottery/plots/ensemble.py +13 -5
- ert/gui/tools/plot/plottery/plots/gaussian_kde.py +12 -2
- ert/gui/tools/plot/plottery/plots/histogram.py +3 -1
- ert/gui/tools/plot/plottery/plots/misfits.py +436 -0
- ert/gui/tools/plot/plottery/plots/observations.py +18 -4
- ert/gui/tools/plot/plottery/plots/statistics.py +62 -20
- ert/gui/tools/plot/plottery/plots/std_dev.py +3 -1
- ert/gui/tools/plot/widgets/clearable_line_edit.py +9 -0
- ert/gui/tools/plot/widgets/filter_popup.py +2 -0
- ert/gui/tools/plot/widgets/filterable_kw_list_model.py +3 -0
- ert/gui/tools/plugins/plugin.py +1 -1
- ert/gui/tools/plugins/plugins_tool.py +2 -0
- ert/gui/tools/plugins/process_job_dialog.py +3 -0
- ert/gui/tools/workflows/workflow_dialog.py +2 -0
- ert/gui/tools/workflows/workflows_tool.py +2 -0
- ert/libres_facade.py +5 -7
- ert/logging/__init__.py +4 -1
- ert/mode_definitions.py +2 -0
- ert/plugins/__init__.py +4 -6
- ert/plugins/hook_implementations/workflows/csv_export.py +2 -3
- ert/plugins/hook_implementations/workflows/gen_data_rft_export.py +10 -2
- ert/plugins/hook_specifications/__init__.py +0 -10
- ert/plugins/hook_specifications/jobs.py +0 -9
- ert/plugins/plugin_manager.py +53 -124
- ert/resources/forward_models/run_reservoirsimulator.py +8 -4
- ert/resources/forward_models/template_render.py +10 -10
- ert/resources/shell_scripts/delete_directory.py +2 -2
- ert/run_models/__init__.py +24 -6
- ert/run_models/_create_run_path.py +133 -38
- ert/run_models/ensemble_experiment.py +10 -4
- ert/run_models/ensemble_information_filter.py +8 -1
- ert/run_models/ensemble_smoother.py +9 -3
- ert/run_models/evaluate_ensemble.py +8 -6
- ert/run_models/event.py +7 -3
- ert/run_models/everest_run_model.py +337 -113
- ert/run_models/initial_ensemble_run_model.py +25 -24
- ert/run_models/manual_update.py +6 -3
- ert/run_models/manual_update_enif.py +37 -0
- ert/run_models/model_factory.py +78 -18
- ert/run_models/multiple_data_assimilation.py +22 -11
- ert/run_models/run_model.py +72 -73
- ert/run_models/single_test_run.py +7 -4
- ert/run_models/update_run_model.py +4 -2
- ert/runpaths.py +5 -6
- ert/sample_prior.py +9 -4
- ert/scheduler/__init__.py +10 -5
- ert/scheduler/driver.py +40 -0
- ert/scheduler/event.py +3 -1
- ert/scheduler/job.py +23 -13
- ert/scheduler/lsf_driver.py +15 -5
- ert/scheduler/openpbs_driver.py +10 -4
- ert/scheduler/scheduler.py +5 -0
- ert/scheduler/slurm_driver.py +20 -5
- ert/services/__init__.py +2 -2
- ert/services/_base_service.py +37 -20
- ert/services/_storage_main.py +20 -18
- ert/services/ert_server.py +317 -0
- ert/shared/_doc_utils/__init__.py +4 -2
- ert/shared/_doc_utils/ert_jobs.py +1 -4
- ert/shared/net_utils.py +43 -18
- ert/shared/storage/connection.py +3 -3
- ert/shared/version.py +3 -3
- ert/storage/__init__.py +14 -1
- ert/storage/local_ensemble.py +44 -13
- ert/storage/local_experiment.py +54 -34
- ert/storage/local_storage.py +90 -58
- ert/storage/migration/to10.py +3 -2
- ert/storage/migration/to11.py +9 -10
- ert/storage/migration/to12.py +19 -20
- ert/storage/migration/to13.py +28 -27
- ert/storage/migration/to14.py +3 -3
- ert/storage/migration/to15.py +25 -0
- ert/storage/migration/to16.py +38 -0
- ert/storage/migration/to17.py +42 -0
- ert/storage/migration/to18.py +11 -0
- ert/storage/migration/to19.py +34 -0
- ert/storage/migration/to20.py +23 -0
- ert/storage/migration/to21.py +25 -0
- ert/storage/migration/to6.py +3 -2
- ert/storage/migration/to7.py +12 -13
- ert/storage/migration/to8.py +9 -11
- ert/storage/migration/to9.py +5 -4
- ert/storage/realization_storage_state.py +7 -7
- ert/substitutions.py +12 -28
- ert/validation/active_range.py +7 -7
- ert/validation/ensemble_realizations_argument.py +4 -2
- ert/validation/rangestring.py +16 -16
- ert/workflow_runner.py +6 -3
- {ert-16.0.9.dist-info → ert-19.0.0rc2.dist-info}/METADATA +21 -15
- ert-19.0.0rc2.dist-info/RECORD +524 -0
- {ert-16.0.9.dist-info → ert-19.0.0rc2.dist-info}/WHEEL +1 -1
- everest/api/everest_data_api.py +14 -1
- everest/assets/everest_logo.svg +406 -0
- everest/bin/config_branch_script.py +30 -14
- everest/bin/everconfigdump_script.py +2 -10
- everest/bin/everest_script.py +53 -33
- everest/bin/everlint_script.py +3 -5
- everest/bin/kill_script.py +7 -5
- everest/bin/main.py +11 -24
- everest/bin/monitor_script.py +64 -35
- everest/bin/utils.py +58 -43
- everest/bin/visualization_script.py +23 -13
- everest/config/__init__.py +4 -1
- everest/config/control_config.py +81 -6
- everest/config/control_variable_config.py +4 -3
- everest/config/everest_config.py +102 -79
- everest/config/forward_model_config.py +5 -3
- everest/config/install_data_config.py +7 -5
- everest/config/install_job_config.py +45 -3
- everest/config/install_template_config.py +3 -3
- everest/config/optimization_config.py +19 -6
- everest/config/output_constraint_config.py +8 -2
- everest/config/server_config.py +6 -55
- everest/config/simulator_config.py +62 -17
- everest/config/utils.py +25 -105
- everest/config/validation_utils.py +34 -15
- everest/config_file_loader.py +30 -21
- everest/detached/__init__.py +0 -6
- everest/detached/client.py +7 -52
- everest/detached/everserver.py +19 -45
- everest/everest_storage.py +24 -40
- everest/gui/everest_client.py +2 -3
- everest/gui/main_window.py +2 -2
- everest/optimizer/everest2ropt.py +68 -42
- everest/optimizer/opt_model_transforms.py +15 -20
- everest/optimizer/utils.py +0 -29
- everest/plugins/hook_specs.py +0 -24
- everest/strings.py +1 -6
- everest/util/__init__.py +3 -1
- ert/config/everest_objective_config.py +0 -95
- ert/config/ext_param_config.py +0 -107
- ert/gui/tools/export/__init__.py +0 -3
- ert/gui/tools/export/export_panel.py +0 -83
- ert/gui/tools/export/export_tool.py +0 -67
- ert/gui/tools/export/exporter.py +0 -36
- ert/plugins/hook_specifications/ecl_config.py +0 -29
- ert/services/storage_service.py +0 -127
- ert/summary_key_type.py +0 -234
- ert-16.0.9.dist-info/RECORD +0 -521
- everest/bin/everexport_script.py +0 -53
- everest/config/sampler_config.py +0 -103
- everest/simulator/__init__.py +0 -88
- everest/simulator/everest_to_ert.py +0 -252
- /ert/gui/{suggestor → ertwidgets/suggestor}/__init__.py +0 -0
- /ert/gui/{suggestor → ertwidgets/suggestor}/_colors.py +0 -0
- {ert-16.0.9.dist-info → ert-19.0.0rc2.dist-info}/entry_points.txt +0 -0
- {ert-16.0.9.dist-info → ert-19.0.0rc2.dist-info}/licenses/COPYING +0 -0
- {ert-16.0.9.dist-info → ert-19.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import traceback
|
|
|
9
9
|
import uuid
|
|
10
10
|
from base64 import b64decode
|
|
11
11
|
from queue import SimpleQueue
|
|
12
|
+
from typing import Annotated
|
|
12
13
|
|
|
13
14
|
from fastapi import (
|
|
14
15
|
APIRouter,
|
|
@@ -25,11 +26,12 @@ from starlette.requests import Request
|
|
|
25
26
|
from starlette.responses import PlainTextResponse, Response
|
|
26
27
|
from starlette.websockets import WebSocket
|
|
27
28
|
|
|
29
|
+
from ert.base_model_context import use_runtime_plugins
|
|
28
30
|
from ert.config import QueueSystem
|
|
29
31
|
from ert.ensemble_evaluator import EndEvent, EvaluatorServerConfig
|
|
30
32
|
from ert.ensemble_evaluator.event import FullSnapshotEvent, SnapshotUpdateEvent
|
|
31
33
|
from ert.ensemble_evaluator.snapshot import EnsembleSnapshot
|
|
32
|
-
from ert.plugins import
|
|
34
|
+
from ert.plugins import get_site_plugins
|
|
33
35
|
from ert.run_models import StatusEvents
|
|
34
36
|
from ert.run_models.everest_run_model import EverestExitCode, EverestRunModel
|
|
35
37
|
from everest.config import EverestConfig
|
|
@@ -38,7 +40,7 @@ from everest.detached.everserver import (
|
|
|
38
40
|
ExperimentStatus,
|
|
39
41
|
)
|
|
40
42
|
from everest.strings import (
|
|
41
|
-
|
|
43
|
+
EXPERIMENT_SERVER,
|
|
42
44
|
OPT_FAILURE_ALL_REALIZATIONS,
|
|
43
45
|
OPT_FAILURE_REALIZATIONS,
|
|
44
46
|
EverEndpoints,
|
|
@@ -115,7 +117,7 @@ def _get_optimization_status(
|
|
|
115
117
|
status_ = ExperimentState.failed
|
|
116
118
|
messages = _failed_realizations_messages(events, exit_code)
|
|
117
119
|
for msg in messages:
|
|
118
|
-
logging.getLogger(
|
|
120
|
+
logging.getLogger(EXPERIMENT_SERVER).error(msg)
|
|
119
121
|
return status_, "\n".join(messages)
|
|
120
122
|
case _:
|
|
121
123
|
return ExperimentState.completed, "Optimization completed."
|
|
@@ -143,7 +145,7 @@ def _check_user(credentials: HTTPBasicCredentials) -> None:
|
|
|
143
145
|
|
|
144
146
|
|
|
145
147
|
def _log(request: Request) -> None:
|
|
146
|
-
logging.getLogger(
|
|
148
|
+
logging.getLogger(EXPERIMENT_SERVER).debug(
|
|
147
149
|
f"{request.scope['path']} entered from "
|
|
148
150
|
f"{request.client.host if request.client else 'unknown host'} "
|
|
149
151
|
f"with HTTP {request.method}"
|
|
@@ -152,7 +154,7 @@ def _log(request: Request) -> None:
|
|
|
152
154
|
|
|
153
155
|
@router.get("/")
|
|
154
156
|
def get_status(
|
|
155
|
-
request: Request, credentials: HTTPBasicCredentials
|
|
157
|
+
request: Request, credentials: Annotated[HTTPBasicCredentials, Depends(security)]
|
|
156
158
|
) -> PlainTextResponse:
|
|
157
159
|
_log(request)
|
|
158
160
|
_check_user(credentials)
|
|
@@ -161,7 +163,7 @@ def get_status(
|
|
|
161
163
|
|
|
162
164
|
@router.get("/status")
|
|
163
165
|
def experiment_status(
|
|
164
|
-
request: Request, credentials: HTTPBasicCredentials
|
|
166
|
+
request: Request, credentials: Annotated[HTTPBasicCredentials, Depends(security)]
|
|
165
167
|
) -> ExperimentStatus:
|
|
166
168
|
_log(request)
|
|
167
169
|
_check_user(credentials)
|
|
@@ -170,7 +172,7 @@ def experiment_status(
|
|
|
170
172
|
|
|
171
173
|
@router.post("/" + EverEndpoints.stop)
|
|
172
174
|
def stop(
|
|
173
|
-
request: Request, credentials: HTTPBasicCredentials
|
|
175
|
+
request: Request, credentials: Annotated[HTTPBasicCredentials, Depends(security)]
|
|
174
176
|
) -> Response:
|
|
175
177
|
_log(request)
|
|
176
178
|
_check_user(credentials)
|
|
@@ -184,7 +186,7 @@ def stop(
|
|
|
184
186
|
async def start_experiment(
|
|
185
187
|
request: Request,
|
|
186
188
|
background_tasks: BackgroundTasks,
|
|
187
|
-
credentials: HTTPBasicCredentials
|
|
189
|
+
credentials: Annotated[HTTPBasicCredentials, Depends(security)],
|
|
188
190
|
) -> Response:
|
|
189
191
|
_log(request)
|
|
190
192
|
_check_user(credentials)
|
|
@@ -194,9 +196,6 @@ async def start_experiment(
|
|
|
194
196
|
runner = ExperimentRunner(config)
|
|
195
197
|
try:
|
|
196
198
|
background_tasks.add_task(runner.run)
|
|
197
|
-
shared_data.status = ExperimentStatus(
|
|
198
|
-
status=ExperimentState.running, message="Experiment started"
|
|
199
|
-
)
|
|
200
199
|
# Assume only one unique running experiment per everserver instance
|
|
201
200
|
# Ideally, we should return the experiment ID in the response here
|
|
202
201
|
shared_data.config_path = config.config_path
|
|
@@ -213,14 +212,14 @@ async def start_experiment(
|
|
|
213
212
|
status=ExperimentState.failed,
|
|
214
213
|
message=f"Could not start experiment: {e!s}",
|
|
215
214
|
)
|
|
216
|
-
logging.getLogger(
|
|
215
|
+
logging.getLogger(EXPERIMENT_SERVER).exception(e)
|
|
217
216
|
return Response(f"Could not start experiment: {e!s}", status_code=501)
|
|
218
217
|
return Response("Everest experiment is running")
|
|
219
218
|
|
|
220
219
|
|
|
221
220
|
@router.get("/" + EverEndpoints.config_path)
|
|
222
221
|
async def config_path(
|
|
223
|
-
request: Request, credentials: HTTPBasicCredentials
|
|
222
|
+
request: Request, credentials: Annotated[HTTPBasicCredentials, Depends(security)]
|
|
224
223
|
) -> JSONResponse:
|
|
225
224
|
_log(request)
|
|
226
225
|
_check_user(credentials)
|
|
@@ -239,7 +238,7 @@ async def config_path(
|
|
|
239
238
|
|
|
240
239
|
@router.get("/" + EverEndpoints.start_time)
|
|
241
240
|
async def start_time(
|
|
242
|
-
request: Request, credentials: HTTPBasicCredentials
|
|
241
|
+
request: Request, credentials: Annotated[HTTPBasicCredentials, Depends(security)]
|
|
243
242
|
) -> Response:
|
|
244
243
|
_log(request)
|
|
245
244
|
_check_user(credentials)
|
|
@@ -261,9 +260,9 @@ async def websocket_endpoint(websocket: WebSocket) -> None:
|
|
|
261
260
|
if isinstance(event, EndEvent):
|
|
262
261
|
break
|
|
263
262
|
except Exception as e:
|
|
264
|
-
logging.getLogger(
|
|
263
|
+
logging.getLogger(EXPERIMENT_SERVER).exception(str(e))
|
|
265
264
|
finally:
|
|
266
|
-
logging.getLogger(
|
|
265
|
+
logging.getLogger(EXPERIMENT_SERVER).info(
|
|
267
266
|
f"Subscriber {subscriber_id} done. Closing websocket"
|
|
268
267
|
)
|
|
269
268
|
# Give some time for subscribers to get events
|
|
@@ -300,14 +299,16 @@ class ExperimentRunner:
|
|
|
300
299
|
|
|
301
300
|
async def run(self) -> None:
|
|
302
301
|
status_queue: SimpleQueue[StatusEvents] = SimpleQueue()
|
|
302
|
+
run_model: EverestRunModel | None = None
|
|
303
303
|
try:
|
|
304
|
-
|
|
304
|
+
site_plugins = get_site_plugins()
|
|
305
|
+
with use_runtime_plugins(site_plugins):
|
|
305
306
|
run_model = EverestRunModel.create(
|
|
306
307
|
everest_config=self._everest_config,
|
|
307
308
|
experiment_name=f"EnOpt@{datetime.datetime.now().isoformat(timespec='seconds')}",
|
|
308
309
|
target_ensemble="batch",
|
|
309
310
|
status_queue=status_queue,
|
|
310
|
-
runtime_plugins=
|
|
311
|
+
runtime_plugins=site_plugins,
|
|
311
312
|
)
|
|
312
313
|
shared_data.status = ExperimentStatus(
|
|
313
314
|
message="Experiment started", status=ExperimentState.running
|
|
@@ -316,9 +317,11 @@ class ExperimentRunner:
|
|
|
316
317
|
simulation_future = loop.run_in_executor(
|
|
317
318
|
None,
|
|
318
319
|
lambda: run_model.start_simulations_thread(
|
|
319
|
-
EvaluatorServerConfig(
|
|
320
|
-
|
|
321
|
-
|
|
320
|
+
EvaluatorServerConfig(
|
|
321
|
+
use_ipc_protocol=run_model.queue_config.queue_system
|
|
322
|
+
== QueueSystem.LOCAL,
|
|
323
|
+
prioritize_private_ip_address=site_plugins.prioritize_private_ip_address,
|
|
324
|
+
)
|
|
322
325
|
),
|
|
323
326
|
)
|
|
324
327
|
while True:
|
|
@@ -351,15 +354,18 @@ class ExperimentRunner:
|
|
|
351
354
|
status=exp_status,
|
|
352
355
|
)
|
|
353
356
|
except UserCancelled as e:
|
|
354
|
-
logging.getLogger(
|
|
357
|
+
logging.getLogger(EXPERIMENT_SERVER).info(f"User cancelled: {e}")
|
|
355
358
|
except Exception as e:
|
|
356
|
-
logging.getLogger(
|
|
359
|
+
logging.getLogger(EXPERIMENT_SERVER).exception(e)
|
|
357
360
|
shared_data.status = ExperimentStatus(
|
|
358
361
|
message=f"Exception: {e}\n{traceback.format_exc()}",
|
|
359
362
|
status=ExperimentState.failed,
|
|
360
363
|
)
|
|
361
364
|
finally:
|
|
362
|
-
|
|
365
|
+
if run_model and run_model._experiment:
|
|
366
|
+
run_model._experiment.status = shared_data.status
|
|
367
|
+
|
|
368
|
+
logging.getLogger(EXPERIMENT_SERVER).info(
|
|
363
369
|
f"ExperimentRunner done. Items left in queue: {status_queue.qsize()}"
|
|
364
370
|
)
|
|
365
371
|
|
|
@@ -29,7 +29,7 @@ def get_experiments(
|
|
|
29
29
|
priors=create_priors(experiment),
|
|
30
30
|
userdata={},
|
|
31
31
|
parameters={
|
|
32
|
-
group:
|
|
32
|
+
group: config.model_dump()
|
|
33
33
|
for group, config in experiment.parameter_configuration.items()
|
|
34
34
|
if not isinstance(config, SurfaceConfig)
|
|
35
35
|
},
|
|
@@ -65,7 +65,7 @@ def get_experiment_by_id(
|
|
|
65
65
|
priors=create_priors(experiment),
|
|
66
66
|
userdata={},
|
|
67
67
|
parameters={
|
|
68
|
-
group:
|
|
68
|
+
group: config.model_dump()
|
|
69
69
|
for group, config in experiment.parameter_configuration.items()
|
|
70
70
|
},
|
|
71
71
|
responses={
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import operator
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Annotated, Any
|
|
5
5
|
from urllib.parse import unquote
|
|
6
6
|
from uuid import UUID, uuid4
|
|
7
7
|
|
|
@@ -57,7 +57,9 @@ async def get_observations_for_response(
|
|
|
57
57
|
storage: Storage = DEFAULT_STORAGE,
|
|
58
58
|
ensemble_id: UUID,
|
|
59
59
|
response_key: str,
|
|
60
|
-
filter_on:
|
|
60
|
+
filter_on: Annotated[
|
|
61
|
+
str | None, Query(description="JSON string with filters")
|
|
62
|
+
] = None,
|
|
61
63
|
) -> list[js.ObservationOut]:
|
|
62
64
|
response_key = unquote(response_key)
|
|
63
65
|
try:
|
|
@@ -136,13 +138,13 @@ def _get_observations(
|
|
|
136
138
|
df = df.with_columns(pl.Series(name="x_axis", values=df.map_rows(x_axis_fn)))
|
|
137
139
|
df = df.sort("x_axis")
|
|
138
140
|
|
|
139
|
-
for obs_key,
|
|
141
|
+
for obs_key, obs_df in df.group_by("name"):
|
|
140
142
|
observations.append(
|
|
141
143
|
{
|
|
142
144
|
"name": obs_key[0],
|
|
143
|
-
"values":
|
|
144
|
-
"errors":
|
|
145
|
-
"x_axis":
|
|
145
|
+
"values": obs_df["values"].to_list(),
|
|
146
|
+
"errors": obs_df["errors"].to_list(),
|
|
147
|
+
"x_axis": obs_df["x_axis"].to_list(),
|
|
146
148
|
}
|
|
147
149
|
)
|
|
148
150
|
|
|
@@ -113,20 +113,12 @@ def get_parameter_std_dev(
|
|
|
113
113
|
return Response(content=buffer.getvalue(), media_type="application/octet-stream")
|
|
114
114
|
|
|
115
115
|
|
|
116
|
-
def _extract_parameter_group_and_key(key: str) -> tuple[str, str] | tuple[None, None]:
|
|
117
|
-
key = key.removeprefix("LOG10_")
|
|
118
|
-
if ":" not in key:
|
|
119
|
-
# Assume all incoming keys are in format group:key for now
|
|
120
|
-
return None, None
|
|
121
|
-
|
|
122
|
-
param_group, param_key = key.split(":", maxsplit=1)
|
|
123
|
-
return param_group, param_key
|
|
124
|
-
|
|
125
|
-
|
|
126
116
|
def data_for_parameter(ensemble: Ensemble, key: str) -> pd.DataFrame:
|
|
127
|
-
group, _ = _extract_parameter_group_and_key(key)
|
|
128
117
|
try:
|
|
129
|
-
df = ensemble.
|
|
118
|
+
df = ensemble.load_scalar_keys([key], transformed=True)
|
|
119
|
+
if df.is_empty():
|
|
120
|
+
logger.warning(f"No data found for parameter '{key}'")
|
|
121
|
+
return pd.DataFrame()
|
|
130
122
|
except KeyError as e:
|
|
131
123
|
logger.error(e)
|
|
132
124
|
return pd.DataFrame()
|
|
@@ -45,7 +45,9 @@ async def get_response(
|
|
|
45
45
|
storage: Storage = DEFAULT_STORAGE,
|
|
46
46
|
ensemble_id: UUID,
|
|
47
47
|
response_key: str,
|
|
48
|
-
filter_on:
|
|
48
|
+
filter_on: Annotated[
|
|
49
|
+
str | None, Query(description="JSON string with filters")
|
|
50
|
+
] = None,
|
|
49
51
|
accept: Annotated[str | None, Header()] = None,
|
|
50
52
|
) -> Response:
|
|
51
53
|
try:
|
|
@@ -96,6 +98,7 @@ async def get_response(
|
|
|
96
98
|
response_to_pandas_x_axis_fns: dict[str, Callable[[tuple[Any, ...]], Any]] = {
|
|
97
99
|
"summary": lambda t: pd.Timestamp(t[2]).isoformat(),
|
|
98
100
|
"gen_data": lambda t: str(t[3]),
|
|
101
|
+
"rft": lambda t: str(t[4]),
|
|
99
102
|
}
|
|
100
103
|
|
|
101
104
|
|
|
@@ -116,7 +119,7 @@ def _extract_response_type_and_key(
|
|
|
116
119
|
|
|
117
120
|
def data_for_response(
|
|
118
121
|
ensemble: Ensemble, key: str, filter_on: dict[str, Any] | None = None
|
|
119
|
-
) -> pd.DataFrame:
|
|
122
|
+
) -> pd.DataFrame | pd.Series:
|
|
120
123
|
response_key, response_type = _extract_response_type_and_key(
|
|
121
124
|
key, ensemble.experiment.response_key_to_response_type
|
|
122
125
|
)
|
|
@@ -147,10 +150,25 @@ def data_for_response(
|
|
|
147
150
|
# This performs the same aggragation by mean of duplicate values
|
|
148
151
|
# as in ert/analysis/_es_update.py
|
|
149
152
|
df = df.groupby(["Date", "Realization"]).mean()
|
|
150
|
-
data = df.
|
|
151
|
-
|
|
153
|
+
data = df.reset_index().pivot_table(
|
|
154
|
+
index="Realization", columns="Date", values=df.columns[0]
|
|
155
|
+
)
|
|
152
156
|
return data.astype(float)
|
|
153
157
|
|
|
158
|
+
if response_type == "rft":
|
|
159
|
+
return (
|
|
160
|
+
ensemble.load_responses(
|
|
161
|
+
response_key,
|
|
162
|
+
tuple(realizations_with_responses),
|
|
163
|
+
)
|
|
164
|
+
.rename({"realization": "Realization"})
|
|
165
|
+
.select(["Realization", "depth", "values"])
|
|
166
|
+
.unique()
|
|
167
|
+
.to_pandas()
|
|
168
|
+
.pivot_table(index="Realization", columns="depth", values="values")
|
|
169
|
+
.reset_index(drop=True)
|
|
170
|
+
)
|
|
171
|
+
|
|
154
172
|
if response_type == "gen_data":
|
|
155
173
|
data = ensemble.load_responses(response_key, tuple(realizations_with_responses))
|
|
156
174
|
|
|
@@ -159,7 +177,7 @@ def data_for_response(
|
|
|
159
177
|
assert "report_step" in filter_on
|
|
160
178
|
report_step = int(filter_on["report_step"])
|
|
161
179
|
vals = data.filter(pl.col("report_step").eq(report_step))
|
|
162
|
-
pivoted = vals.drop("response_key", "report_step").pivot(
|
|
180
|
+
pivoted = vals.drop("response_key", "report_step").pivot( # noqa: PD010
|
|
163
181
|
on="index", values="values"
|
|
164
182
|
)
|
|
165
183
|
data = pivoted.to_pandas().set_index("realization")
|
|
@@ -169,3 +187,4 @@ def data_for_response(
|
|
|
169
187
|
|
|
170
188
|
except (ValueError, KeyError, ColumnNotFoundError):
|
|
171
189
|
return pd.DataFrame()
|
|
190
|
+
return pd.DataFrame()
|
|
@@ -4,6 +4,8 @@ from uuid import UUID
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
from ert.storage.realization_storage_state import RealizationStorageState
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
class _Ensemble(BaseModel):
|
|
9
11
|
size: int
|
|
@@ -19,3 +21,4 @@ class EnsembleOut(_Ensemble):
|
|
|
19
21
|
id: UUID
|
|
20
22
|
experiment_id: UUID | None = None
|
|
21
23
|
userdata: Mapping[str, Any]
|
|
24
|
+
realization_storage_states: Mapping[RealizationStorageState, int] | None = None
|
|
@@ -24,6 +24,6 @@ class ExperimentOut(_Experiment):
|
|
|
24
24
|
ensemble_ids: list[UUID]
|
|
25
25
|
priors: Mapping[str, dict[str, Any]]
|
|
26
26
|
userdata: Mapping[str, Any]
|
|
27
|
-
parameters: Mapping[str,
|
|
27
|
+
parameters: Mapping[str, dict[str, Any]]
|
|
28
28
|
responses: Mapping[str, list[dict[str, Any]]]
|
|
29
29
|
observations: Mapping[str, dict[str, list[str]]]
|
ert/data/_measured_data.py
CHANGED
|
@@ -141,13 +141,14 @@ class MeasuredData:
|
|
|
141
141
|
|
|
142
142
|
# Pandas differentiates vs int and str keys.
|
|
143
143
|
# Legacy-wise we use int keys for realizations
|
|
144
|
-
pddf
|
|
145
|
-
|
|
146
|
-
|
|
144
|
+
pddf = (
|
|
145
|
+
pddf.rename(
|
|
146
|
+
columns={str(k): int(k) for k in active_realizations},
|
|
147
|
+
)
|
|
148
|
+
.set_index(["observation_key", "key_index"])
|
|
149
|
+
.transpose()
|
|
147
150
|
)
|
|
148
151
|
|
|
149
|
-
pddf = pddf.set_index(["observation_key", "key_index"]).transpose()
|
|
150
|
-
|
|
151
152
|
return pddf
|
|
152
153
|
|
|
153
154
|
|
|
@@ -2,7 +2,13 @@ from ._ensemble import LegacyEnsemble as Ensemble
|
|
|
2
2
|
from ._ensemble import Realization
|
|
3
3
|
from .config import EvaluatorServerConfig
|
|
4
4
|
from .evaluator import EnsembleEvaluator
|
|
5
|
-
from .event import
|
|
5
|
+
from .event import (
|
|
6
|
+
EndEvent,
|
|
7
|
+
FullSnapshotEvent,
|
|
8
|
+
SnapshotUpdateEvent,
|
|
9
|
+
StartEvent,
|
|
10
|
+
WarningEvent,
|
|
11
|
+
)
|
|
6
12
|
from .snapshot import EnsembleSnapshot, FMStepSnapshot, RealizationSnapshot
|
|
7
13
|
|
|
8
14
|
__all__ = [
|
|
@@ -16,5 +22,6 @@ __all__ = [
|
|
|
16
22
|
"Realization",
|
|
17
23
|
"RealizationSnapshot",
|
|
18
24
|
"SnapshotUpdateEvent",
|
|
25
|
+
"StartEvent",
|
|
19
26
|
"WarningEvent",
|
|
20
27
|
]
|
ert/ensemble_evaluator/config.py
CHANGED
|
@@ -27,6 +27,7 @@ class EvaluatorServerConfig:
|
|
|
27
27
|
use_token: bool = True,
|
|
28
28
|
host: str | None = None,
|
|
29
29
|
use_ipc_protocol: bool = True,
|
|
30
|
+
prioritize_private_ip_address: bool = False,
|
|
30
31
|
) -> None:
|
|
31
32
|
self.host: str | None = host
|
|
32
33
|
self.router_port: int | None = None
|
|
@@ -50,7 +51,7 @@ class EvaluatorServerConfig:
|
|
|
50
51
|
if use_ipc_protocol:
|
|
51
52
|
self.uri = f"ipc:///tmp/socket-{uuid.uuid4().hex[:8]}"
|
|
52
53
|
elif self.host is None:
|
|
53
|
-
self.host = get_ip_address()
|
|
54
|
+
self.host = get_ip_address(prioritize_private_ip_address)
|
|
54
55
|
|
|
55
56
|
if use_token:
|
|
56
57
|
self.server_public_key, self.server_secret_key = zmq.curve_keypair()
|
|
@@ -6,6 +6,8 @@ import threading
|
|
|
6
6
|
import traceback
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
from collections.abc import Awaitable, Callable, Iterable, Sequence
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from math import ceil
|
|
9
11
|
from typing import Any, cast, get_args
|
|
10
12
|
|
|
11
13
|
import zmq.asyncio
|
|
@@ -15,6 +17,7 @@ from _ert.events import (
|
|
|
15
17
|
EESnapshot,
|
|
16
18
|
EESnapshotUpdate,
|
|
17
19
|
EnsembleCancelled,
|
|
20
|
+
EnsembleEvaluationWarning,
|
|
18
21
|
EnsembleFailed,
|
|
19
22
|
EnsembleStarted,
|
|
20
23
|
EnsembleSucceeded,
|
|
@@ -49,6 +52,13 @@ from .state import (
|
|
|
49
52
|
ENSEMBLE_STATE_STOPPED,
|
|
50
53
|
)
|
|
51
54
|
|
|
55
|
+
|
|
56
|
+
@dataclass(order=True)
|
|
57
|
+
class ParallelismViolation:
|
|
58
|
+
amount: float = 0
|
|
59
|
+
message: str = ""
|
|
60
|
+
|
|
61
|
+
|
|
52
62
|
logger = logging.getLogger(__name__)
|
|
53
63
|
|
|
54
64
|
EVENT_HANDLER = Callable[[list[SnapshotInputEvent]], Awaitable[None]]
|
|
@@ -68,6 +78,13 @@ class EventSentinel:
|
|
|
68
78
|
|
|
69
79
|
class EnsembleEvaluator:
|
|
70
80
|
BATCHING_INTERVAL = 0.5
|
|
81
|
+
DEFAULT_SLEEP_PERIOD = 0.1
|
|
82
|
+
|
|
83
|
+
# These properties help us determine whether the user
|
|
84
|
+
# has misconfigured NUM_CPU in their config.
|
|
85
|
+
ALLOWED_CPU_OVERSPENDING = 1.05
|
|
86
|
+
MINIMUM_WALLTIME_SECONDS = 30 # Information is only polled every 5 sec
|
|
87
|
+
CPU_OVERSPENDING_WARNING_THRESHOLD = 1.50
|
|
71
88
|
|
|
72
89
|
def __init__(
|
|
73
90
|
self,
|
|
@@ -123,6 +140,7 @@ class EnsembleEvaluator:
|
|
|
123
140
|
submit_sleep=self.ensemble._queue_config.submit_sleep,
|
|
124
141
|
ens_id=self.ensemble.id_,
|
|
125
142
|
)
|
|
143
|
+
self.max_parallelism_violation = ParallelismViolation()
|
|
126
144
|
|
|
127
145
|
async def _publisher(self) -> None:
|
|
128
146
|
heartbeat_interval = 0.1
|
|
@@ -145,6 +163,11 @@ class EnsembleEvaluator:
|
|
|
145
163
|
self._evaluation_result.set_result(True)
|
|
146
164
|
return
|
|
147
165
|
|
|
166
|
+
elif isinstance(event, EnsembleEvaluationWarning):
|
|
167
|
+
if self._event_handler:
|
|
168
|
+
self._event_handler(event)
|
|
169
|
+
self._events_to_send.task_done()
|
|
170
|
+
|
|
148
171
|
elif type(event) in {
|
|
149
172
|
EESnapshot,
|
|
150
173
|
EESnapshotUpdate,
|
|
@@ -191,7 +214,7 @@ class EnsembleEvaluator:
|
|
|
191
214
|
await self._signal_cancel()
|
|
192
215
|
logger.debug("Run model cancelled - during evaluation - cancel sent")
|
|
193
216
|
self._end_event.clear()
|
|
194
|
-
await asyncio.sleep(
|
|
217
|
+
await asyncio.sleep(self.DEFAULT_SLEEP_PERIOD)
|
|
195
218
|
|
|
196
219
|
async def _send_terminate_message_to_dispatchers(self) -> None:
|
|
197
220
|
event = TERMINATE_MSG
|
|
@@ -244,6 +267,7 @@ class EnsembleEvaluator:
|
|
|
244
267
|
event_handler[event_type] = func
|
|
245
268
|
|
|
246
269
|
set_event_handler(set(get_args(FMEvent | RealizationEvent)), self._fm_handler)
|
|
270
|
+
set_event_handler({EnsembleEvaluationWarning}, self._warning_event_handler)
|
|
247
271
|
set_event_handler({EnsembleStarted}, self._started_handler)
|
|
248
272
|
set_event_handler({EnsembleSucceeded}, self._stopped_handler)
|
|
249
273
|
set_event_handler({EnsembleCancelled}, self._cancelled_handler)
|
|
@@ -264,7 +288,7 @@ class EnsembleEvaluator:
|
|
|
264
288
|
batch.append((function, event))
|
|
265
289
|
self._events.task_done()
|
|
266
290
|
except asyncio.QueueEmpty:
|
|
267
|
-
await asyncio.sleep(
|
|
291
|
+
await asyncio.sleep(self.DEFAULT_SLEEP_PERIOD)
|
|
268
292
|
continue
|
|
269
293
|
self._complete_batch.set()
|
|
270
294
|
await self._batch_processing_queue.put(batch)
|
|
@@ -274,6 +298,12 @@ class EnsembleEvaluator:
|
|
|
274
298
|
async def _fm_handler(self, events: Sequence[FMEvent | RealizationEvent]) -> None:
|
|
275
299
|
await self._append_message(self.ensemble.update_snapshot(events))
|
|
276
300
|
|
|
301
|
+
async def _warning_event_handler(
|
|
302
|
+
self, events: Sequence[EnsembleEvaluationWarning]
|
|
303
|
+
) -> None:
|
|
304
|
+
for event in events:
|
|
305
|
+
await self._events_to_send.put(event)
|
|
306
|
+
|
|
277
307
|
async def _started_handler(self, events: Sequence[EnsembleStarted]) -> None:
|
|
278
308
|
if self.ensemble.status != ENSEMBLE_STATE_FAILED:
|
|
279
309
|
await self._append_message(self.ensemble.update_snapshot(events))
|
|
@@ -288,11 +318,9 @@ class EnsembleEvaluator:
|
|
|
288
318
|
memory_usage = fm_step.get(ids.MAX_MEMORY_USAGE) or "-1"
|
|
289
319
|
max_memory_usage = max(int(memory_usage), max_memory_usage)
|
|
290
320
|
|
|
291
|
-
|
|
321
|
+
self.detect_overspent_cpu(
|
|
292
322
|
self.ensemble.reals[int(real_id)].num_cpu, real_id, fm_step
|
|
293
323
|
)
|
|
294
|
-
if self.ensemble.queue_system != QueueSystem.LOCAL and cpu_message:
|
|
295
|
-
logger.warning(cpu_message)
|
|
296
324
|
|
|
297
325
|
logger.info(
|
|
298
326
|
"Ensemble ran with maximum memory usage for a "
|
|
@@ -368,6 +396,7 @@ class EnsembleEvaluator:
|
|
|
368
396
|
logger.warning(
|
|
369
397
|
"Evaluator receiver closed, no new messages are received"
|
|
370
398
|
)
|
|
399
|
+
return # The socket is closed, and we won't re-establish it.
|
|
371
400
|
else:
|
|
372
401
|
logger.error(f"Unexpected error when listening to messages: {e}")
|
|
373
402
|
except asyncio.CancelledError:
|
|
@@ -416,7 +445,7 @@ class EnsembleEvaluator:
|
|
|
416
445
|
while True:
|
|
417
446
|
if self._evaluation_result.done():
|
|
418
447
|
break
|
|
419
|
-
await asyncio.sleep(
|
|
448
|
+
await asyncio.sleep(self.DEFAULT_SLEEP_PERIOD)
|
|
420
449
|
logger.debug("Async server exiting.")
|
|
421
450
|
finally:
|
|
422
451
|
try:
|
|
@@ -647,27 +676,50 @@ class EnsembleEvaluator:
|
|
|
647
676
|
else:
|
|
648
677
|
await self._events.put(EnsembleCancelled(ensemble=self.ensemble.id_))
|
|
649
678
|
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
end_time = fm_step.get(ids.END_TIME)
|
|
659
|
-
if start_time is None or end_time is None:
|
|
660
|
-
return ""
|
|
661
|
-
duration = (end_time - start_time).total_seconds()
|
|
662
|
-
if duration <= minimum_wallclock_time_seconds:
|
|
663
|
-
return ""
|
|
664
|
-
cpu_seconds = fm_step.get(ids.CPU_SECONDS) or 0.0
|
|
665
|
-
parallelization_obtained = cpu_seconds / duration
|
|
666
|
-
if parallelization_obtained > num_cpu * allowed_overspending:
|
|
667
|
-
return (
|
|
668
|
-
f"Misconfigured NUM_CPU, forward model step '{fm_step.get(ids.NAME)}' for "
|
|
669
|
-
f"realization {real_id} spent {cpu_seconds} cpu seconds "
|
|
670
|
-
f"with wall clock duration {duration:.1f} seconds, "
|
|
671
|
-
f"a factor of {parallelization_obtained:.2f}, while NUM_CPU was {num_cpu}."
|
|
679
|
+
def detect_overspent_cpu(
|
|
680
|
+
self, num_cpu: int, real_id: str, fm_step: FMStepSnapshot
|
|
681
|
+
) -> None:
|
|
682
|
+
"""Produces a message warning about misconfiguration of NUM_CPU if
|
|
683
|
+
so is detected. Returns an empty string if everything is ok."""
|
|
684
|
+
allowed_overspending = self.ALLOWED_CPU_OVERSPENDING * num_cpu
|
|
685
|
+
overspending_warning_threshold = (
|
|
686
|
+
self.CPU_OVERSPENDING_WARNING_THRESHOLD * num_cpu
|
|
672
687
|
)
|
|
673
|
-
|
|
688
|
+
|
|
689
|
+
start_time = fm_step.get(ids.START_TIME)
|
|
690
|
+
|
|
691
|
+
end_time = fm_step.get(ids.END_TIME)
|
|
692
|
+
if start_time is None or end_time is None:
|
|
693
|
+
return
|
|
694
|
+
|
|
695
|
+
duration = (end_time - start_time).total_seconds()
|
|
696
|
+
if duration <= self.MINIMUM_WALLTIME_SECONDS:
|
|
697
|
+
return
|
|
698
|
+
|
|
699
|
+
cpu_seconds = fm_step.get(ids.CPU_SECONDS) or 0.0
|
|
700
|
+
parallelization_obtained = cpu_seconds / duration
|
|
701
|
+
if (
|
|
702
|
+
parallelization_obtained > allowed_overspending
|
|
703
|
+
and self.ensemble.queue_system != QueueSystem.LOCAL
|
|
704
|
+
):
|
|
705
|
+
logger.warning(
|
|
706
|
+
f"Misconfigured NUM_CPU, forward model step '{fm_step.get(ids.NAME)}' "
|
|
707
|
+
f"for realization {real_id} spent {cpu_seconds} cpu seconds "
|
|
708
|
+
f"with wall clock duration {duration:.1f} seconds, a factor of "
|
|
709
|
+
f"{parallelization_obtained:.2f}, while NUM_CPU was {num_cpu}."
|
|
710
|
+
)
|
|
711
|
+
if parallelization_obtained > overspending_warning_threshold:
|
|
712
|
+
warning_msg = (
|
|
713
|
+
"Overusage of CPUs detected!\n"
|
|
714
|
+
f"Your experiment has used up to {ceil(parallelization_obtained)} "
|
|
715
|
+
f"CPUs in step '{fm_step.get(ids.NAME)}', "
|
|
716
|
+
f"while the Ert config has only requested {num_cpu}.\n"
|
|
717
|
+
f"This means your experiment is consuming more CPU-resources than "
|
|
718
|
+
f"requested and will slow down other users experiments.\n"
|
|
719
|
+
f"We kindly ask you to set "
|
|
720
|
+
f"NUM_CPU={ceil(parallelization_obtained)} in your Ert config."
|
|
721
|
+
)
|
|
722
|
+
self.max_parallelism_violation = max(
|
|
723
|
+
self.max_parallelism_violation,
|
|
724
|
+
ParallelismViolation(parallelization_obtained, warning_msg),
|
|
725
|
+
)
|
ert/ensemble_evaluator/event.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from collections.abc import Mapping
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from typing import Any, Literal
|
|
3
4
|
|
|
4
5
|
from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
|
|
@@ -42,6 +43,11 @@ class SnapshotUpdateEvent(_UpdateEvent):
|
|
|
42
43
|
event_type: Literal["SnapshotUpdateEvent"] = "SnapshotUpdateEvent"
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
class StartEvent(BaseModel):
|
|
47
|
+
event_type: Literal["StartEvent"] = "StartEvent"
|
|
48
|
+
timestamp: datetime
|
|
49
|
+
|
|
50
|
+
|
|
45
51
|
class EndEvent(BaseModel):
|
|
46
52
|
model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
|
|
47
53
|
event_type: Literal["EndEvent"] = "EndEvent"
|
|
@@ -12,6 +12,7 @@ from _ert.events import (
|
|
|
12
12
|
EESnapshot,
|
|
13
13
|
EESnapshotUpdate,
|
|
14
14
|
EnsembleCancelled,
|
|
15
|
+
EnsembleEvaluationWarning,
|
|
15
16
|
EnsembleEvent,
|
|
16
17
|
EnsembleFailed,
|
|
17
18
|
EnsembleStarted,
|
|
@@ -424,7 +425,8 @@ class EnsembleSnapshot:
|
|
|
424
425
|
|
|
425
426
|
elif e_type in get_args(EnsembleEvent):
|
|
426
427
|
event = cast(EnsembleEvent, event)
|
|
427
|
-
|
|
428
|
+
if not isinstance(event, EnsembleEvaluationWarning):
|
|
429
|
+
self._ensemble_state = _ENSEMBLE_TYPE_EVENT_TO_STATUS[type(event)]
|
|
428
430
|
elif type(event) is EESnapshotUpdate:
|
|
429
431
|
self.merge_snapshot(EnsembleSnapshot.from_nested_dict(event.snapshot))
|
|
430
432
|
elif type(event) is EESnapshot:
|
ert/ensemble_evaluator/state.py
CHANGED
|
@@ -7,6 +7,7 @@ COLOR_RUNNING: Final = (255, 255, 153)
|
|
|
7
7
|
COLOR_UNKNOWN: Final = (128, 128, 128)
|
|
8
8
|
COLOR_WAITING: Final = (164, 200, 255)
|
|
9
9
|
COLOR_CANCELLED: Final = (235, 242, 246)
|
|
10
|
+
COLOR_WARNING: Final = (255, 103, 0)
|
|
10
11
|
|
|
11
12
|
ENSEMBLE_STATE_CANCELLED: Final = "Cancelled"
|
|
12
13
|
ENSEMBLE_STATE_FAILED: Final = "Failed"
|