ert 17.1.9__py3-none-any.whl → 18.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. _ert/events.py +19 -2
  2. ert/__main__.py +8 -7
  3. ert/analysis/_update_commons.py +12 -3
  4. ert/cli/main.py +6 -3
  5. ert/cli/monitor.py +7 -0
  6. ert/config/__init__.py +13 -3
  7. ert/config/_create_observation_dataframes.py +60 -12
  8. ert/config/_observations.py +14 -1
  9. ert/config/_read_summary.py +8 -6
  10. ert/config/ensemble_config.py +6 -14
  11. ert/config/ert_config.py +19 -13
  12. ert/config/{everest_objective_config.py → everest_response.py} +23 -12
  13. ert/config/ext_param_config.py +133 -1
  14. ert/config/field.py +12 -8
  15. ert/config/forward_model_step.py +108 -6
  16. ert/config/gen_data_config.py +2 -6
  17. ert/config/gen_kw_config.py +0 -9
  18. ert/config/known_response_types.py +14 -0
  19. ert/config/parameter_config.py +0 -17
  20. ert/config/parsing/config_keywords.py +1 -0
  21. ert/config/parsing/config_schema.py +12 -0
  22. ert/config/parsing/config_schema_deprecations.py +11 -0
  23. ert/config/parsing/config_schema_item.py +1 -1
  24. ert/config/queue_config.py +4 -4
  25. ert/config/response_config.py +0 -7
  26. ert/config/rft_config.py +230 -0
  27. ert/config/summary_config.py +2 -6
  28. ert/config/violations.py +0 -0
  29. ert/config/workflow_fixtures.py +2 -1
  30. ert/dark_storage/client/__init__.py +2 -2
  31. ert/dark_storage/client/_session.py +4 -4
  32. ert/dark_storage/client/client.py +2 -2
  33. ert/dark_storage/compute/misfits.py +7 -6
  34. ert/dark_storage/endpoints/compute/misfits.py +2 -2
  35. ert/dark_storage/endpoints/observations.py +4 -4
  36. ert/dark_storage/endpoints/responses.py +15 -1
  37. ert/ensemble_evaluator/__init__.py +8 -1
  38. ert/ensemble_evaluator/evaluator.py +81 -29
  39. ert/ensemble_evaluator/event.py +6 -0
  40. ert/ensemble_evaluator/snapshot.py +3 -1
  41. ert/ensemble_evaluator/state.py +1 -0
  42. ert/field_utils/__init__.py +8 -0
  43. ert/field_utils/field_utils.py +211 -1
  44. ert/gui/ertwidgets/__init__.py +23 -16
  45. ert/gui/ertwidgets/analysismoduleedit.py +2 -2
  46. ert/gui/ertwidgets/checklist.py +1 -1
  47. ert/gui/ertwidgets/create_experiment_dialog.py +3 -1
  48. ert/gui/ertwidgets/ensembleselector.py +2 -2
  49. ert/gui/ertwidgets/models/__init__.py +2 -0
  50. ert/gui/ertwidgets/models/activerealizationsmodel.py +2 -1
  51. ert/gui/ertwidgets/models/path_model.py +1 -1
  52. ert/gui/ertwidgets/models/targetensemblemodel.py +2 -1
  53. ert/gui/ertwidgets/models/text_model.py +1 -1
  54. ert/gui/ertwidgets/searchbox.py +13 -4
  55. ert/gui/{suggestor → ertwidgets/suggestor}/_suggestor_message.py +13 -4
  56. ert/gui/main.py +11 -6
  57. ert/gui/main_window.py +1 -2
  58. ert/gui/simulation/ensemble_experiment_panel.py +1 -1
  59. ert/gui/simulation/ensemble_information_filter_panel.py +1 -1
  60. ert/gui/simulation/ensemble_smoother_panel.py +1 -1
  61. ert/gui/simulation/evaluate_ensemble_panel.py +1 -1
  62. ert/gui/simulation/experiment_panel.py +1 -1
  63. ert/gui/simulation/manual_update_panel.py +31 -8
  64. ert/gui/simulation/multiple_data_assimilation_panel.py +12 -8
  65. ert/gui/simulation/run_dialog.py +25 -4
  66. ert/gui/simulation/single_test_run_panel.py +2 -2
  67. ert/gui/summarypanel.py +1 -1
  68. ert/gui/tools/load_results/load_results_panel.py +1 -1
  69. ert/gui/tools/manage_experiments/storage_info_widget.py +7 -7
  70. ert/gui/tools/manage_experiments/storage_widget.py +1 -2
  71. ert/gui/tools/plot/plot_api.py +13 -10
  72. ert/gui/tools/plot/plot_window.py +12 -0
  73. ert/gui/tools/plot/plottery/plot_config.py +2 -0
  74. ert/gui/tools/plot/plottery/plot_context.py +14 -0
  75. ert/gui/tools/plot/plottery/plots/ensemble.py +9 -2
  76. ert/gui/tools/plot/plottery/plots/statistics.py +59 -19
  77. ert/mode_definitions.py +2 -0
  78. ert/plugins/__init__.py +0 -1
  79. ert/plugins/hook_implementations/workflows/gen_data_rft_export.py +10 -2
  80. ert/plugins/hook_specifications/__init__.py +0 -2
  81. ert/plugins/hook_specifications/jobs.py +0 -9
  82. ert/plugins/plugin_manager.py +2 -33
  83. ert/resources/shell_scripts/delete_directory.py +2 -2
  84. ert/run_models/__init__.py +18 -5
  85. ert/run_models/_create_run_path.py +33 -21
  86. ert/run_models/ensemble_experiment.py +10 -4
  87. ert/run_models/ensemble_information_filter.py +8 -1
  88. ert/run_models/ensemble_smoother.py +9 -3
  89. ert/run_models/evaluate_ensemble.py +8 -6
  90. ert/run_models/event.py +7 -3
  91. ert/run_models/everest_run_model.py +155 -44
  92. ert/run_models/initial_ensemble_run_model.py +23 -22
  93. ert/run_models/manual_update.py +4 -2
  94. ert/run_models/manual_update_enif.py +37 -0
  95. ert/run_models/model_factory.py +81 -22
  96. ert/run_models/multiple_data_assimilation.py +21 -10
  97. ert/run_models/run_model.py +54 -34
  98. ert/run_models/single_test_run.py +7 -4
  99. ert/run_models/update_run_model.py +4 -2
  100. ert/runpaths.py +5 -6
  101. ert/sample_prior.py +9 -4
  102. ert/scheduler/driver.py +37 -0
  103. ert/scheduler/event.py +3 -1
  104. ert/scheduler/job.py +23 -13
  105. ert/scheduler/lsf_driver.py +6 -2
  106. ert/scheduler/openpbs_driver.py +7 -1
  107. ert/scheduler/scheduler.py +5 -0
  108. ert/scheduler/slurm_driver.py +6 -2
  109. ert/services/__init__.py +2 -2
  110. ert/services/_base_service.py +31 -15
  111. ert/services/ert_server.py +317 -0
  112. ert/shared/_doc_utils/ert_jobs.py +1 -4
  113. ert/shared/storage/connection.py +3 -3
  114. ert/shared/version.py +3 -3
  115. ert/storage/local_ensemble.py +25 -5
  116. ert/storage/local_experiment.py +6 -14
  117. ert/storage/local_storage.py +35 -30
  118. ert/storage/migration/to18.py +12 -0
  119. ert/storage/migration/to8.py +4 -4
  120. ert/substitutions.py +12 -28
  121. ert/validation/active_range.py +7 -7
  122. ert/validation/rangestring.py +16 -16
  123. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/METADATA +8 -7
  124. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/RECORD +160 -159
  125. everest/api/everest_data_api.py +1 -14
  126. everest/bin/config_branch_script.py +3 -6
  127. everest/bin/everconfigdump_script.py +1 -9
  128. everest/bin/everest_script.py +21 -11
  129. everest/bin/kill_script.py +2 -2
  130. everest/bin/monitor_script.py +2 -2
  131. everest/bin/utils.py +6 -3
  132. everest/config/__init__.py +4 -1
  133. everest/config/control_config.py +61 -2
  134. everest/config/control_variable_config.py +2 -1
  135. everest/config/everest_config.py +38 -16
  136. everest/config/forward_model_config.py +5 -3
  137. everest/config/install_data_config.py +7 -5
  138. everest/config/install_job_config.py +7 -3
  139. everest/config/install_template_config.py +3 -3
  140. everest/config/optimization_config.py +19 -6
  141. everest/config/output_constraint_config.py +8 -2
  142. everest/config/server_config.py +6 -49
  143. everest/config/utils.py +25 -105
  144. everest/config/validation_utils.py +10 -10
  145. everest/config_file_loader.py +13 -2
  146. everest/detached/everserver.py +7 -8
  147. everest/everest_storage.py +6 -10
  148. everest/gui/everest_client.py +0 -1
  149. everest/gui/main_window.py +2 -2
  150. everest/optimizer/everest2ropt.py +59 -32
  151. everest/optimizer/opt_model_transforms.py +12 -13
  152. everest/optimizer/utils.py +0 -29
  153. everest/strings.py +0 -5
  154. ert/config/everest_constraints_config.py +0 -95
  155. ert/services/storage_service.py +0 -127
  156. everest/config/sampler_config.py +0 -103
  157. everest/simulator/__init__.py +0 -88
  158. everest/simulator/everest_to_ert.py +0 -51
  159. /ert/gui/{suggestor → ertwidgets/suggestor}/__init__.py +0 -0
  160. /ert/gui/{suggestor → ertwidgets/suggestor}/_colors.py +0 -0
  161. /ert/gui/{suggestor → ertwidgets/suggestor}/suggestor.py +0 -0
  162. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/WHEEL +0 -0
  163. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/entry_points.txt +0 -0
  164. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/licenses/COPYING +0 -0
  165. {ert-17.1.9.dist-info → ert-18.0.0.dist-info}/top_level.txt +0 -0
ert/sample_prior.py CHANGED
@@ -33,6 +33,7 @@ def sample_prior(
33
33
  parameter_configs = ensemble.experiment.parameter_configuration
34
34
  if parameters is None:
35
35
  parameters = list(parameter_configs.keys())
36
+ complete_dataset: pl.DataFrame | None = None
36
37
  for parameter in parameters:
37
38
  config_node = parameter_configs[parameter]
38
39
  if config_node.forward_init:
@@ -75,13 +76,17 @@ def sample_prior(
75
76
  if datasets:
76
77
  dataset = pl.concat(datasets, how="vertical")
77
78
 
78
- if dataset is not None:
79
- ensemble.save_parameters(
80
- dataset=dataset,
81
- )
79
+ if complete_dataset is None:
80
+ complete_dataset = dataset
81
+ elif dataset is not None:
82
+ complete_dataset = complete_dataset.join(dataset, on="realization")
82
83
  else:
83
84
  for realization_nr in active_realizations:
84
85
  ds = config_node.read_from_runpath(Path(), realization_nr, 0)
85
86
  ensemble.save_parameters(ds, parameter, realization_nr)
86
87
 
88
+ if complete_dataset is not None:
89
+ ensemble.save_parameters(
90
+ dataset=complete_dataset,
91
+ )
87
92
  ensemble.refresh_ensemble_state()
ert/scheduler/driver.py CHANGED
@@ -3,10 +3,13 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import logging
5
5
  import shlex
6
+ import time
6
7
  from abc import ABC, abstractmethod
7
8
  from collections.abc import Iterable
8
9
  from pathlib import Path
9
10
 
11
+ from _ert.events import EnsembleEvaluationWarning
12
+
10
13
  from .event import DriverEvent
11
14
 
12
15
  SIGNAL_OFFSET = 128
@@ -34,12 +37,19 @@ class FailedSubmit(RuntimeError):
34
37
  class Driver(ABC):
35
38
  """Adapter for the HPC cluster."""
36
39
 
40
+ POLLING_TIMEOUT_PERIOD = 600
41
+
37
42
  def __init__(self, activate_script: str = "") -> None:
38
43
  self._event_queue: asyncio.Queue[DriverEvent] | None = None
39
44
  self._job_error_message_by_iens: dict[int, str] = {}
40
45
  self.activate_script = activate_script
41
46
  self._poll_period = _POLL_PERIOD
42
47
 
48
+ self._polling_timeout_period = Driver.POLLING_TIMEOUT_PERIOD
49
+ self._last_successful_poll = time.time()
50
+ self._last_polling_error_message: str | None = None
51
+ self._has_warned_evaluator_of_polling_error = False
52
+
43
53
  @property
44
54
  def event_queue(self) -> asyncio.Queue[DriverEvent]:
45
55
  if self._event_queue is None:
@@ -178,3 +188,30 @@ class Driver(ABC):
178
188
  )
179
189
  logger.error(error_message)
180
190
  return False, error_message
191
+
192
+ async def _warn_evaluator_if_polling_has_failed_for_some_time(self) -> None:
193
+ if (
194
+ (self._last_successful_poll < time.time() - self._polling_timeout_period)
195
+ and self._last_polling_error_message
196
+ and not self._has_warned_evaluator_of_polling_error
197
+ ):
198
+ await self._warn_evaluator_about_polling_difficulties()
199
+ self._has_warned_evaluator_of_polling_error = True
200
+
201
+ async def _warn_evaluator_about_polling_difficulties(self) -> None:
202
+ last_polling_error_message = self._last_polling_error_message
203
+ logger = logging.getLogger(__name__)
204
+ logger.warning(
205
+ "Driver has not successfully polled statuses for "
206
+ f"{self._polling_timeout_period}s. The previous error "
207
+ f"was due to '{last_polling_error_message}'"
208
+ )
209
+ formatted_msg = (
210
+ "ert has not been able to update the job status for some time. This might "
211
+ "be resolved by itself, and it does not mean that the run has crashed.\n"
212
+ "Please check the runpath if it seems to still be running.\n"
213
+ f"The last error message was '{last_polling_error_message}'"
214
+ )
215
+ await self.event_queue.put(
216
+ EnsembleEvaluationWarning(warning_message=formatted_msg)
217
+ )
ert/scheduler/event.py CHANGED
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
4
 
5
+ from _ert.events import EnsembleEvaluationWarning
6
+
5
7
 
6
8
  @dataclass
7
9
  class StartedEvent:
@@ -16,4 +18,4 @@ class FinishedEvent:
16
18
  exec_hosts: str = "-"
17
19
 
18
20
 
19
- DriverEvent = StartedEvent | FinishedEvent
21
+ DriverEvent = StartedEvent | FinishedEvent | EnsembleEvaluationWarning
ert/scheduler/job.py CHANGED
@@ -12,6 +12,7 @@ from enum import StrEnum
12
12
  from pathlib import Path
13
13
  from typing import TYPE_CHECKING, assert_never
14
14
 
15
+ import anyio
15
16
  from lxml import etree
16
17
  from opentelemetry.trace import Status, StatusCode
17
18
 
@@ -31,7 +32,7 @@ from ert.storage import (
31
32
  RealizationStorageState,
32
33
  load_realization_parameters_and_responses,
33
34
  )
34
- from ert.trace import trace, tracer
35
+ from ert.trace import trace
35
36
  from ert.warnings import PostSimulationWarning
36
37
 
37
38
  from .driver import Driver, FailedSubmit
@@ -238,7 +239,6 @@ class Job:
238
239
  f"{method_name} spent {elapsed_time} seconds waiting for files"
239
240
  )
240
241
 
241
- @tracer.start_as_current_span(f"{__name__}.run")
242
242
  async def run(
243
243
  self,
244
244
  sem: asyncio.BoundedSemaphore,
@@ -335,8 +335,12 @@ class Job:
335
335
 
336
336
  valid_checksums = [info for info in checksum.values() if "error" not in info]
337
337
 
338
- # Wait for files in checksum
339
- while not all(Path(info["path"]).exists() for info in valid_checksums):
338
+ async def all_paths_exist(paths: list[Path]) -> bool:
339
+ return all(
340
+ await asyncio.gather(*[anyio.Path(path).exists() for path in paths])
341
+ )
342
+
343
+ while not await all_paths_exist([info["path"] for info in valid_checksums]):
340
344
  if timeout <= 0:
341
345
  break
342
346
  timeout -= DISK_SYNCHRONIZATION_POLLING_INTERVAL
@@ -344,17 +348,19 @@ class Job:
344
348
  await asyncio.sleep(DISK_SYNCHRONIZATION_POLLING_INTERVAL)
345
349
  async with checksum_lock:
346
350
  for info in valid_checksums:
347
- file_path = Path(info["path"])
351
+ file_path = anyio.Path(info["path"])
348
352
  expected_md5sum = info.get("md5sum")
349
- if file_path.exists() and expected_md5sum:
350
- actual_md5sum = hashlib.md5(file_path.read_bytes()).hexdigest()
353
+ file_path_exists = await file_path.exists()
354
+ if file_path_exists and expected_md5sum:
355
+ file_bytes = await file_path.read_bytes()
356
+ actual_md5sum = hashlib.md5(file_bytes).hexdigest()
351
357
  if expected_md5sum == actual_md5sum:
352
358
  logger.debug(f"File {file_path} checksum successful.")
353
359
  else:
354
360
  logger.warning(
355
361
  f"File {file_path} checksum verification failed."
356
362
  )
357
- elif file_path.exists() and expected_md5sum is None:
363
+ elif file_path_exists and expected_md5sum is None:
358
364
  logger.warning(f"Checksum not received for file {file_path}")
359
365
  else:
360
366
  logger.error(f"Disk synchronization failed for {file_path}")
@@ -506,11 +512,12 @@ async def log_warnings_from_forward_model(
506
512
  or "- ERROR - " in line
507
513
  )
508
514
 
509
- async def log_warnings_from_file( # noqa
515
+ async def log_warnings_from_file(
510
516
  file: Path, iens: int, step: ForwardModelStep, step_idx: int, filetype: str
511
517
  ) -> None:
512
518
  captured: list[str] = []
513
- for line in file.read_text(encoding="utf-8").splitlines():
519
+ file_text = await anyio.Path(file).read_text(encoding="utf-8")
520
+ for line in file_text.splitlines():
514
521
  if line_contains_warning(line):
515
522
  captured.append(line[:max_length])
516
523
 
@@ -527,9 +534,12 @@ async def log_warnings_from_forward_model(
527
534
  return 0
528
535
  remaining_timeout = _timeout
529
536
  for _ in range(_timeout):
530
- if not (
531
- file_path.exists() and file_path.stat().st_mtime >= job_submission_time
532
- ):
537
+ file_path_exists = await anyio.Path(file_path).exists()
538
+ if file_path_exists:
539
+ st_mtime = (await anyio.Path(file_path).stat()).st_mtime
540
+ else:
541
+ st_mtime = 0
542
+ if not (file_path_exists and st_mtime >= job_submission_time):
533
543
  remaining_timeout -= 1
534
544
  await asyncio.sleep(1)
535
545
  else:
@@ -444,6 +444,7 @@ class LsfDriver(Driver):
444
444
 
445
445
  async def poll(self) -> None:
446
446
  while True:
447
+ await self._warn_evaluator_if_polling_has_failed_for_some_time()
447
448
  if not self._jobs.keys():
448
449
  await asyncio.sleep(self._poll_period)
449
450
  continue
@@ -461,6 +462,7 @@ class LsfDriver(Driver):
461
462
  )
462
463
  except OSError as e:
463
464
  logger.error(str(e))
465
+ self._last_polling_error_message = str(e)
464
466
  await asyncio.sleep(self._poll_period)
465
467
  continue
466
468
 
@@ -468,10 +470,11 @@ class LsfDriver(Driver):
468
470
  if process.returncode:
469
471
  # bjobs may give nonzero return code even when it is providing
470
472
  # at least some correct information
473
+ error_msg = stderr.decode()
471
474
  logger.warning(
472
- f"bjobs gave returncode {process.returncode} "
473
- f"and error {stderr.decode()}"
475
+ f"bjobs gave returncode {process.returncode} and error {error_msg}"
474
476
  )
477
+ self._last_polling_error_message = error_msg
475
478
  bjobs_states = _parse_jobs_dict(parse_bjobs(stdout.decode(errors="ignore")))
476
479
  self.update_and_log_exec_hosts(
477
480
  parse_bjobs_exec_hosts(stdout.decode(errors="ignore"))
@@ -503,6 +506,7 @@ class LsfDriver(Driver):
503
506
  "bhist did not give status for job_ids "
504
507
  f"{missing_in_bhist_and_bjobs}, giving up for now."
505
508
  )
509
+ self._last_successful_poll = time.time()
506
510
  await asyncio.sleep(self._poll_period)
507
511
 
508
512
  async def _process_job_update(self, job_id: str, new_state: AnyJob) -> None:
@@ -5,6 +5,7 @@ import json
5
5
  import logging
6
6
  import shlex
7
7
  import shutil
8
+ import time
8
9
  from collections.abc import Iterable, Mapping, MutableMapping
9
10
  from dataclasses import dataclass
10
11
  from pathlib import Path
@@ -264,6 +265,7 @@ class OpenPBSDriver(Driver):
264
265
 
265
266
  async def poll(self) -> None:
266
267
  while True:
268
+ await self._warn_evaluator_if_polling_has_failed_for_some_time()
267
269
  if not self._jobs:
268
270
  await asyncio.sleep(self._poll_period)
269
271
  continue
@@ -280,6 +282,7 @@ class OpenPBSDriver(Driver):
280
282
  )
281
283
  except OSError as e:
282
284
  logger.error(str(e))
285
+ self._last_polling_error_message = str(e)
283
286
  await asyncio.sleep(self._poll_period)
284
287
  continue
285
288
  stdout, stderr = await process.communicate()
@@ -289,10 +292,12 @@ class OpenPBSDriver(Driver):
289
292
  await asyncio.sleep(self._poll_period)
290
293
  continue
291
294
  if process.returncode == QSTAT_UNKNOWN_JOB_ID:
295
+ error_msg = stderr.decode(errors="ignore")
292
296
  logger.debug(
293
297
  f"qstat gave returncode {QSTAT_UNKNOWN_JOB_ID} "
294
- f"with message {stderr.decode(errors='ignore')}"
298
+ f"with message {error_msg}"
295
299
  )
300
+ self._last_polling_error_message = error_msg
296
301
  parsed_jobs = _parse_jobs_dict(
297
302
  parse_qstat(stdout.decode(errors="ignore"))
298
303
  )
@@ -330,6 +335,7 @@ class OpenPBSDriver(Driver):
330
335
  for job_id, job in parsed_jobs_dict.items():
331
336
  await self._process_job_update(job_id, job)
332
337
 
338
+ self._last_successful_poll = time.time()
333
339
  await asyncio.sleep(self._poll_period)
334
340
 
335
341
  async def _process_job_update(self, job_id: str, new_state: AnyJob) -> None:
@@ -15,6 +15,7 @@ import orjson
15
15
  from pydantic.dataclasses import dataclass
16
16
 
17
17
  from _ert.events import (
18
+ EnsembleEvaluationWarning,
18
19
  ForwardModelStepChecksum,
19
20
  RealizationEvent,
20
21
  RealizationFailed,
@@ -343,6 +344,10 @@ class Scheduler:
343
344
  async def _process_event_queue(self) -> None:
344
345
  while True:
345
346
  event = await self.driver.event_queue.get()
347
+ if isinstance(event, EnsembleEvaluationWarning):
348
+ if self._ensemble_evaluator_queue:
349
+ await self._ensemble_evaluator_queue.put(event)
350
+ continue
346
351
  job = self._jobs[event.iens]
347
352
 
348
353
  # Any event implies the job has at least started
@@ -262,6 +262,7 @@ class SlurmDriver(Driver):
262
262
 
263
263
  async def poll(self) -> None:
264
264
  while True:
265
+ await self._warn_evaluator_if_polling_has_failed_for_some_time()
265
266
  if not self._jobs.keys():
266
267
  await asyncio.sleep(self._poll_period)
267
268
  continue
@@ -277,14 +278,16 @@ class SlurmDriver(Driver):
277
278
  )
278
279
  except OSError as e:
279
280
  logger.error(str(e))
281
+ self._last_polling_error_message = str(e)
280
282
  await asyncio.sleep(self._poll_period)
281
283
  continue
282
284
  stdout, stderr = await process.communicate()
283
285
  if process.returncode:
286
+ error_msg = stderr.decode()
284
287
  logger.warning(
285
- f"squeue gave returncode {process.returncode} "
286
- f"and error {stderr.decode()}"
288
+ f"squeue gave returncode {process.returncode} and error {error_msg}"
287
289
  )
290
+ self._last_polling_error_message = error_msg
288
291
  squeue_states = dict(_parse_squeue_output(stdout.decode(errors="ignore")))
289
292
 
290
293
  job_ids_found_in_squeue_output = set(squeue_states.keys())
@@ -317,6 +320,7 @@ class SlurmDriver(Driver):
317
320
  "scontrol did not give status for job_ids "
318
321
  f"{missing_in_squeue_and_scontrol}, giving up for now."
319
322
  )
323
+ self._last_successful_poll = time.time()
320
324
  await asyncio.sleep(self._poll_period)
321
325
 
322
326
  async def _process_job_update(self, job_id: str, new_info: JobInfo) -> None:
ert/services/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .storage_service import StorageService
1
+ from .ert_server import ErtServer
2
2
  from .webviz_ert_service import WebvizErt
3
3
 
4
- __all__ = ["StorageService", "WebvizErt"]
4
+ __all__ = ["ErtServer", "WebvizErt"]
@@ -1,3 +1,8 @@
1
+ """
2
+ This file contains a more generic version of "ert services", and
3
+ is scheduled for removal when WebvizErt is removed.
4
+ """
5
+
1
6
  from __future__ import annotations
2
7
 
3
8
  import contextlib
@@ -15,13 +20,21 @@ from subprocess import Popen, TimeoutExpired
15
20
  from tempfile import NamedTemporaryFile
16
21
  from time import sleep
17
22
  from types import FrameType
18
- from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar
23
+ from typing import TYPE_CHECKING, Any, Generic, Self, TypedDict, TypeVar
19
24
 
20
25
  if TYPE_CHECKING:
21
26
  from inspect import Traceback
22
27
 
23
28
  T = TypeVar("T", bound="BaseService")
24
- ConnInfo = Mapping[str, Any] | Exception | None
29
+
30
+
31
+ class ErtServerConnectionInfo(TypedDict):
32
+ urls: list[str]
33
+ authtoken: str
34
+ host: str
35
+ port: str
36
+ cert: str
37
+ auth: str
25
38
 
26
39
 
27
40
  SERVICE_CONF_PATHS: set[str] = set()
@@ -88,7 +101,9 @@ class _Proc(threading.Thread):
88
101
  service_name: str,
89
102
  exec_args: Sequence[str],
90
103
  timeout: int,
91
- set_conn_info: Callable[[ConnInfo], None],
104
+ on_connection_info_received: Callable[
105
+ [ErtServerConnectionInfo | Exception | None], None
106
+ ],
92
107
  project: Path,
93
108
  ) -> None:
94
109
  super().__init__()
@@ -98,7 +113,7 @@ class _Proc(threading.Thread):
98
113
  self._service_name = service_name
99
114
  self._exec_args = exec_args
100
115
  self._timeout = timeout
101
- self._set_conn_info = set_conn_info
116
+ self._propagate_connection_info_from_childproc = on_connection_info_received
102
117
  self._service_config_path = project / f"{self._service_name}_server.json"
103
118
 
104
119
  fd_read, fd_write = os.pipe()
@@ -119,13 +134,13 @@ class _Proc(threading.Thread):
119
134
  os.close(fd_write)
120
135
 
121
136
  def run(self) -> None:
122
- comm = self._read_conn_info(self._childproc)
137
+ comm = self._read_connection_info_from_process(self._childproc)
123
138
 
124
139
  if comm is None:
125
- self._set_conn_info(TimeoutError())
140
+ self._propagate_connection_info_from_childproc(TimeoutError())
126
141
  return # _read_conn_info() has already cleaned up in this case
127
142
 
128
- conn_info: ConnInfo = None
143
+ conn_info: ErtServerConnectionInfo | Exception | None = None
129
144
  try:
130
145
  conn_info = json.loads(comm)
131
146
  except json.JSONDecodeError:
@@ -134,7 +149,7 @@ class _Proc(threading.Thread):
134
149
  conn_info = exc
135
150
 
136
151
  try:
137
- self._set_conn_info(conn_info)
152
+ self._propagate_connection_info_from_childproc(conn_info)
138
153
 
139
154
  while True:
140
155
  if self._childproc.poll() is not None:
@@ -148,15 +163,16 @@ class _Proc(threading.Thread):
148
163
  self.logger.exception(e)
149
164
 
150
165
  finally:
151
- self._ensure_delete_conn_info()
166
+ self._ensure_connection_info_file_is_deleted()
152
167
 
153
168
  def shutdown(self) -> int:
154
169
  """Shutdown the server."""
155
170
  self._shutdown.set()
156
171
  self.join()
172
+
157
173
  return self._childproc.returncode
158
174
 
159
- def _read_conn_info(self, proc: Popen[bytes]) -> str | None:
175
+ def _read_connection_info_from_process(self, proc: Popen[bytes]) -> str | None:
160
176
  comm_buf = io.StringIO()
161
177
  first_iter = True
162
178
  while first_iter or proc.poll() is None:
@@ -166,7 +182,7 @@ class _Proc(threading.Thread):
166
182
  # Timeout reached, exit with a failure
167
183
  if ready == ([], [], []):
168
184
  self._do_shutdown()
169
- self._ensure_delete_conn_info()
185
+ self._ensure_connection_info_file_is_deleted()
170
186
  return None
171
187
 
172
188
  x = self._comm_pipe.read(PIPE_BUF)
@@ -190,7 +206,7 @@ class _Proc(threading.Thread):
190
206
  f"waiting for child-process exceeded timeout {self._timeout}s"
191
207
  )
192
208
 
193
- def _ensure_delete_conn_info(self) -> None:
209
+ def _ensure_connection_info_file_is_deleted(self) -> None:
194
210
  """
195
211
  Ensure that the JSON connection information file is deleted
196
212
  """
@@ -241,14 +257,14 @@ class BaseService:
241
257
  self,
242
258
  exec_args: Sequence[str] = (),
243
259
  timeout: int = 120,
244
- conn_info: ConnInfo = None,
260
+ conn_info: ErtServerConnectionInfo | Exception | None = None,
245
261
  project: str | None = None,
246
262
  ) -> None:
247
263
  self._exec_args = exec_args
248
264
  self._timeout = timeout
249
265
 
250
266
  self._proc: _Proc | None = None
251
- self._conn_info: ConnInfo = conn_info
267
+ self._conn_info: ErtServerConnectionInfo | Exception | None = conn_info
252
268
  self._conn_info_event = threading.Event()
253
269
  self._project = Path(project) if project is not None else Path.cwd()
254
270
 
@@ -313,7 +329,7 @@ class BaseService:
313
329
  if self._proc is not None:
314
330
  self._proc.join()
315
331
 
316
- def set_conn_info(self, info: ConnInfo) -> None:
332
+ def set_conn_info(self, info: ErtServerConnectionInfo | Exception | None) -> None:
317
333
  if self._conn_info is not None:
318
334
  raise ValueError("Connection information already set")
319
335
  if info is None: