climate-ref 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,266 +9,8 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
9
9
  This is useful for local testing and debugging.
10
10
  """
11
11
 
12
- import importlib
13
- import pathlib
14
- import shutil
15
- from typing import TYPE_CHECKING
12
+ from .local import LocalExecutor
13
+ from .result_handling import handle_execution_result
14
+ from .synchronous import SynchronousExecutor
16
15
 
17
- from loguru import logger
18
- from sqlalchemy import insert
19
-
20
- from climate_ref.database import Database
21
- from climate_ref.models.execution import Execution, ExecutionOutput, ResultOutputType
22
- from climate_ref.models.metric_value import MetricValue
23
- from climate_ref_core.diagnostics import ExecutionResult, ensure_relative_path
24
- from climate_ref_core.exceptions import InvalidExecutorException, ResultValidationError
25
- from climate_ref_core.executor import EXECUTION_LOG_FILENAME, Executor
26
- from climate_ref_core.pycmec.controlled_vocabulary import CV
27
- from climate_ref_core.pycmec.metric import CMECMetric
28
- from climate_ref_core.pycmec.output import CMECOutput, OutputDict
29
-
30
- if TYPE_CHECKING:
31
- from climate_ref.config import Config
32
-
33
-
34
- def import_executor_cls(fqn: str) -> type[Executor]:
35
- """
36
- Import an executor using a fully qualified module path
37
-
38
- Parameters
39
- ----------
40
- fqn
41
- Full package and attribute name of the executor to import
42
-
43
- For example: `climate_ref_example.executor` will use the `executor` attribute from the
44
- `climate_ref_example` package.
45
-
46
- Raises
47
- ------
48
- climate_ref_core.exceptions.InvalidExecutorException
49
- If the executor cannot be imported
50
-
51
- If the executor isn't a valid `DiagnosticProvider`.
52
-
53
- Returns
54
- -------
55
- :
56
- Executor instance
57
- """
58
- module, attribute_name = fqn.rsplit(".", 1)
59
-
60
- try:
61
- imp = importlib.import_module(module)
62
- executor: type[Executor] = getattr(imp, attribute_name)
63
-
64
- # We can't really check if the executor is a subclass of Executor here
65
- # Protocols can't be used with issubclass if they have non-method members
66
- # We have to check this at class instantiation time
67
-
68
- return executor
69
- except ModuleNotFoundError:
70
- logger.error(f"Package '{fqn}' not found")
71
- raise InvalidExecutorException(fqn, f"Module '{module}' not found")
72
- except AttributeError:
73
- logger.error(f"Provider '{fqn}' not found")
74
- raise InvalidExecutorException(fqn, f"Executor '{attribute_name}' not found in {module}")
75
-
76
-
77
- def _copy_file_to_results(
78
- scratch_directory: pathlib.Path,
79
- results_directory: pathlib.Path,
80
- fragment: pathlib.Path | str,
81
- filename: pathlib.Path | str,
82
- ) -> None:
83
- """
84
- Copy a file from the scratch directory to the executions directory
85
-
86
- Parameters
87
- ----------
88
- scratch_directory
89
- The directory where the file is currently located
90
- results_directory
91
- The directory where the file should be copied to
92
- fragment
93
- The fragment of the executions directory where the file should be copied
94
- filename
95
- The name of the file to be copied
96
- """
97
- assert results_directory != scratch_directory # noqa
98
- input_directory = scratch_directory / fragment
99
- output_directory = results_directory / fragment
100
-
101
- filename = ensure_relative_path(filename, input_directory)
102
-
103
- if not (input_directory / filename).exists():
104
- raise FileNotFoundError(f"Could not find {filename} in {input_directory}")
105
-
106
- output_filename = output_directory / filename
107
- output_filename.parent.mkdir(parents=True, exist_ok=True)
108
-
109
- shutil.copy(input_directory / filename, output_filename)
110
-
111
-
112
- def handle_execution_result(
113
- config: "Config",
114
- database: Database,
115
- execution: Execution,
116
- result: "ExecutionResult",
117
- ) -> None:
118
- """
119
- Handle the result of a diagnostic execution
120
-
121
- This will update the diagnostic execution result with the output of the diagnostic execution.
122
- The output will be copied from the scratch directory to the executions directory.
123
-
124
- Parameters
125
- ----------
126
- config
127
- The configuration to use
128
- database
129
- The active database session to use
130
- execution
131
- The diagnostic execution result DB object to update
132
- result
133
- The result of the diagnostic execution, either successful or failed
134
- """
135
- # Always copy log data
136
- _copy_file_to_results(
137
- config.paths.scratch,
138
- config.paths.results,
139
- execution.output_fragment,
140
- EXECUTION_LOG_FILENAME,
141
- )
142
-
143
- if result.successful and result.metric_bundle_filename is not None:
144
- logger.info(f"{execution} successful")
145
-
146
- _copy_file_to_results(
147
- config.paths.scratch,
148
- config.paths.results,
149
- execution.output_fragment,
150
- result.metric_bundle_filename,
151
- )
152
- execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
153
-
154
- if result.output_bundle_filename:
155
- _copy_file_to_results(
156
- config.paths.scratch,
157
- config.paths.results,
158
- execution.output_fragment,
159
- result.output_bundle_filename,
160
- )
161
- _handle_output_bundle(
162
- config,
163
- database,
164
- execution,
165
- result.to_output_path(result.output_bundle_filename),
166
- )
167
-
168
- cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
169
-
170
- # Check that the diagnostic values conform with the controlled vocabulary
171
- try:
172
- cv = CV.load_from_file(config.paths.dimensions_cv)
173
- cv.validate_metrics(cmec_metric_bundle)
174
- except (ResultValidationError, AssertionError):
175
- logger.exception("Diagnostic values do not conform with the controlled vocabulary")
176
- # TODO: Mark the diagnostic execution result as failed once the CV has stabilised
177
- # execution.mark_failed()
178
-
179
- # Perform a bulk insert of a diagnostic bundle
180
- # TODO: The section below will likely fail until we have agreed on a controlled vocabulary
181
- # The current implementation will swallow the exception, but display a log message
182
- try:
183
- # Perform this in a nested transaction to (hopefully) gracefully rollback if something
184
- # goes wrong
185
- with database.session.begin_nested():
186
- database.session.execute(
187
- insert(MetricValue),
188
- [
189
- {
190
- "execution_id": execution.id,
191
- "value": result.value,
192
- "attributes": result.attributes,
193
- **result.dimensions,
194
- }
195
- for result in cmec_metric_bundle.iter_results()
196
- ],
197
- )
198
- except Exception:
199
- # TODO: Remove once we have settled on a controlled vocabulary
200
- logger.exception("Something went wrong when ingesting diagnostic values")
201
-
202
- # TODO: This should check if the result is the most recent for the execution,
203
- # if so then update the dirty fields
204
- # i.e. if there are outstanding executions don't make as clean
205
- execution.execution_group.dirty = False
206
- else:
207
- logger.error(f"{execution} failed")
208
- execution.mark_failed()
209
-
210
-
211
- def _handle_output_bundle(
212
- config: "Config",
213
- database: Database,
214
- execution: Execution,
215
- cmec_output_bundle_filename: pathlib.Path,
216
- ) -> None:
217
- # Extract the registered outputs
218
- # Copy the content to the output directory
219
- # Track in the db
220
- cmec_output_bundle = CMECOutput.load_from_json(cmec_output_bundle_filename)
221
- _handle_outputs(
222
- cmec_output_bundle.plots,
223
- output_type=ResultOutputType.Plot,
224
- config=config,
225
- database=database,
226
- execution=execution,
227
- )
228
- _handle_outputs(
229
- cmec_output_bundle.data,
230
- output_type=ResultOutputType.Data,
231
- config=config,
232
- database=database,
233
- execution=execution,
234
- )
235
- _handle_outputs(
236
- cmec_output_bundle.html,
237
- output_type=ResultOutputType.HTML,
238
- config=config,
239
- database=database,
240
- execution=execution,
241
- )
242
-
243
-
244
- def _handle_outputs(
245
- outputs: dict[str, OutputDict] | None,
246
- output_type: ResultOutputType,
247
- config: "Config",
248
- database: Database,
249
- execution: Execution,
250
- ) -> None:
251
- if outputs is None:
252
- return
253
-
254
- for key, output_info in outputs.items():
255
- filename = ensure_relative_path(
256
- output_info.filename, config.paths.scratch / execution.output_fragment
257
- )
258
-
259
- _copy_file_to_results(
260
- config.paths.scratch,
261
- config.paths.results,
262
- execution.output_fragment,
263
- filename,
264
- )
265
- database.session.add(
266
- ExecutionOutput(
267
- execution_id=execution.id,
268
- output_type=output_type,
269
- filename=str(filename),
270
- description=output_info.description,
271
- short_name=key,
272
- long_name=output_info.long_name,
273
- )
274
- )
16
+ __all__ = ["LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
@@ -1,42 +1,128 @@
1
+ import concurrent.futures
2
+ import time
3
+ from concurrent.futures import Future, ProcessPoolExecutor
1
4
  from typing import Any
2
5
 
6
+ from attrs import define
3
7
  from loguru import logger
8
+ from tqdm import tqdm
4
9
 
5
10
  from climate_ref.config import Config
6
11
  from climate_ref.database import Database
7
- from climate_ref.executor import handle_execution_result
8
12
  from climate_ref.models import Execution
9
- from climate_ref_core.diagnostics import Diagnostic, ExecutionDefinition, ExecutionResult
10
- from climate_ref_core.logging import redirect_logs
11
- from climate_ref_core.providers import DiagnosticProvider
13
+ from climate_ref_core.diagnostics import ExecutionDefinition, ExecutionResult
14
+ from climate_ref_core.exceptions import ExecutionError
15
+ from climate_ref_core.executor import execute_locally
16
+ from climate_ref_core.logging import add_log_handler
17
+
18
+ from .result_handling import handle_execution_result
19
+
20
+
21
+ def process_result(
22
+ config: Config, database: Database, result: ExecutionResult, execution: Execution | None
23
+ ) -> None:
24
+ """
25
+ Process the result of a diagnostic execution
26
+
27
+ Parameters
28
+ ----------
29
+ config
30
+ The configuration object
31
+ database
32
+ The database object
33
+ result
34
+ The result of the diagnostic execution.
35
+
36
+ This could have either been a success or failure.
37
+ execution
38
+ A database model representing the execution of the diagnostic.
39
+ """
40
+ if not result.successful:
41
+ if execution is not None: # pragma: no branch
42
+ info_msg = (
43
+ f"\nAdditional information about this execution can be viewed using: "
44
+ f"ref executions inspect {execution.execution_group_id}"
45
+ )
46
+ else:
47
+ info_msg = ""
48
+
49
+ logger.exception(f"Error running {result.definition.execution_slug()}. {info_msg}")
50
+
51
+ if execution:
52
+ handle_execution_result(config, database, execution, result)
53
+
54
+
55
+ @define
56
+ class ExecutionFuture:
57
+ """
58
+ A container to hold the future and execution definition
59
+ """
60
+
61
+ future: Future[ExecutionResult]
62
+ definition: ExecutionDefinition
63
+ execution_id: int | None = None
64
+
65
+
66
+ def _process_initialiser() -> None:
67
+ # Setup the logging for the process
68
+ # This replaces the loguru default handler
69
+ try:
70
+ add_log_handler()
71
+ except Exception as e:
72
+ # Don't raise an exception here as that would kill the process pool
73
+ # We want to log the error and continue
74
+ logger.error(f"Failed to add log handler: {e}")
75
+
76
+
77
+ def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionResult:
78
+ # This is a catch-all for any exceptions that occur in the process
79
+ try:
80
+ return execute_locally(definition=definition, log_level=log_level)
81
+ except Exception: # pragma: no cover
82
+ # This isn't expected but if it happens we want to log the error before the process exits
83
+ logger.exception("Error running diagnostic")
84
+ # This will kill the process pool
85
+ raise
12
86
 
13
87
 
14
88
  class LocalExecutor:
15
89
  """
16
- Run a diagnostic locally, in-process.
90
+ Run a diagnostic locally using a process pool.
17
91
 
18
- This is mainly useful for debugging and testing.
19
- The production executor will run the diagnostic in a separate process or container,
20
- the exact manner of which is yet to be determined.
92
+ This performs the diagnostic executions in parallel using different processes.
93
+ The maximum number of processes is determined by the `n` parameter and default to the number of CPUs.
94
+
95
+ This executor is the default executor and is used when no other executor is specified.
21
96
  """
22
97
 
23
98
  name = "local"
24
99
 
25
100
  def __init__(
26
- self, *, database: Database | None = None, config: Config | None = None, **kwargs: Any
101
+ self,
102
+ *,
103
+ database: Database | None = None,
104
+ config: Config | None = None,
105
+ n: int | None = None,
106
+ pool: concurrent.futures.Executor | None = None,
107
+ **kwargs: Any,
27
108
  ) -> None:
28
109
  if config is None:
29
110
  config = Config.default()
30
111
  if database is None:
31
112
  database = Database.from_config(config, run_migrations=False)
113
+ self.n = n
32
114
 
33
115
  self.database = database
34
116
  self.config = config
35
117
 
118
+ if pool is not None:
119
+ self.pool = pool
120
+ else:
121
+ self.pool = ProcessPoolExecutor(max_workers=n, initializer=_process_initialiser)
122
+ self._results: list[ExecutionFuture] = []
123
+
36
124
  def run(
37
125
  self,
38
- provider: DiagnosticProvider,
39
- diagnostic: Diagnostic,
40
126
  definition: ExecutionDefinition,
41
127
  execution: Execution | None = None,
42
128
  ) -> None:
@@ -45,45 +131,92 @@ class LocalExecutor:
45
131
 
46
132
  Parameters
47
133
  ----------
48
- provider
49
- The provider of the diagnostic
50
- diagnostic
51
- Diagnostic to run
52
134
  definition
53
135
  A description of the information needed for this execution of the diagnostic
54
136
  execution
55
137
  A database model representing the execution of the diagnostic.
56
138
  If provided, the result will be updated in the database when completed.
57
139
  """
58
- definition.output_directory.mkdir(parents=True, exist_ok=True)
59
-
60
- try:
61
- with redirect_logs(definition, self.config.log_level):
62
- result = diagnostic.run(definition=definition)
63
- except Exception:
64
- if execution is not None: # pragma: no branch
65
- info_msg = (
66
- f"\nAdditional information about this execution can be viewed using: "
67
- f"ref executions inspect {execution.execution_group_id}"
68
- )
69
- else:
70
- info_msg = ""
71
-
72
- logger.exception(f"Error running diagnostic {diagnostic.slug}. {info_msg}")
73
- result = ExecutionResult.build_from_failure(definition)
74
-
75
- if execution:
76
- handle_execution_result(self.config, self.database, execution, result)
140
+ # Submit the execution to the process pool
141
+ # and track the future so we can wait for it to complete
142
+ future = self.pool.submit(
143
+ _process_run,
144
+ definition=definition,
145
+ log_level=self.config.log_level,
146
+ )
147
+ self._results.append(
148
+ ExecutionFuture(
149
+ future=future,
150
+ definition=definition,
151
+ execution_id=execution.id if execution else None,
152
+ )
153
+ )
77
154
 
78
155
  def join(self, timeout: float) -> None:
79
156
  """
80
157
  Wait for all diagnostics to finish
81
158
 
82
- This returns immediately because the local executor runs diagnostics synchronously.
159
+ This will block until all diagnostics have completed or the timeout is reached.
160
+ If the timeout is reached, the method will return and raise an exception.
83
161
 
84
162
  Parameters
85
163
  ----------
86
164
  timeout
87
- Timeout in seconds (Not used)
165
+ Timeout in seconds
166
+
167
+ Raises
168
+ ------
169
+ TimeoutError
170
+ If the timeout is reached
88
171
  """
89
- return
172
+ start_time = time.time()
173
+ refresh_time = 0.5 # Time to wait between checking for completed tasks in seconds
174
+
175
+ results = self._results
176
+ t = tqdm(total=len(results), desc="Waiting for executions to complete", unit="execution")
177
+
178
+ try:
179
+ while results:
180
+ # Iterate over a copy of the list and remove finished tasks
181
+ for result in results[:]:
182
+ if result.future.done():
183
+ try:
184
+ execution_result = result.future.result(timeout=0)
185
+ except Exception as e:
186
+ # Something went wrong when attempting to run the execution
187
+ # This is likely a failure in the execution itself not the diagnostic
188
+ raise ExecutionError(
189
+ f"Failed to execute {result.definition.execution_slug()!r}"
190
+ ) from e
191
+
192
+ assert execution_result is not None, "Execution result should not be None"
193
+ assert isinstance(execution_result, ExecutionResult), (
194
+ "Execution result should be of type ExecutionResult"
195
+ )
196
+
197
+ # Process the result in the main process
198
+ # The results should be committed after each execution
199
+ with self.database.session.begin():
200
+ execution = (
201
+ self.database.session.get(Execution, result.execution_id)
202
+ if result.execution_id
203
+ else None
204
+ )
205
+ process_result(self.config, self.database, result.future.result(), execution)
206
+ logger.debug(f"Execution completed: {result}")
207
+ t.update(n=1)
208
+ results.remove(result)
209
+
210
+ # Break early to avoid waiting for one more sleep cycle
211
+ if len(results) == 0:
212
+ break
213
+
214
+ elapsed_time = time.time() - start_time
215
+
216
+ if elapsed_time > timeout:
217
+ raise TimeoutError("Not all tasks completed within the specified timeout")
218
+
219
+ # Wait for a short time before checking for completed executions
220
+ time.sleep(refresh_time)
221
+ finally:
222
+ t.close()