climate-ref 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,7 +88,7 @@ def build_app() -> typer.Typer:
88
88
  :
89
89
  The CLI app
90
90
  """
91
- app = typer.Typer(name="climate_ref", no_args_is_help=True)
91
+ app = typer.Typer(name="ref", no_args_is_help=True)
92
92
 
93
93
  app.command(name="solve")(solve.solve)
94
94
  app.add_typer(config.app, name="config")
@@ -136,10 +136,10 @@ def main( # noqa: PLR0913
136
136
  ] = None,
137
137
  ) -> None:
138
138
  """
139
- climate_ref: A CLI for the Assessment Fast Track Rapid Evaluation Framework
139
+ A CLI for the Assessment Fast Track Rapid Evaluation Framework
140
140
 
141
141
  This CLI provides a number of commands for managing and executing diagnostics.
142
- """
142
+ """ # noqa: D401
143
143
  if quiet:
144
144
  log_level = LogLevel.Warning
145
145
  if verbose:
climate_ref/cli/config.py CHANGED
@@ -20,9 +20,9 @@ def list_(ctx: typer.Context) -> None:
20
20
  print(config.dumps(defaults=True))
21
21
 
22
22
 
23
- @app.command()
24
- def update() -> None:
25
- """
26
- Update a configuration value
27
- """
28
- print("config")
23
+ # @app.command()
24
+ # def update() -> None:
25
+ # """
26
+ # Update a configuration value
27
+ # """
28
+ # print("config")
@@ -1,5 +1,9 @@
1
1
  """
2
2
  View and ingest input datasets
3
+
4
+ The metadata from these datasets are stored in the database so that they can be used to determine
5
+ which executions are required for a given diagnostic without having to re-parse the datasets.
6
+
3
7
  """
4
8
 
5
9
  import errno
@@ -105,9 +109,12 @@ def ingest( # noqa: PLR0913
105
109
  ] = False,
106
110
  ) -> None:
107
111
  """
108
- Ingest a dataset
112
+ Ingest a directory of datasets into the database
113
+
114
+ Each dataset will be loaded and validated using the specified dataset adapter.
115
+ This will extract metadata from the datasets and store it in the database.
109
116
 
110
- This will register a dataset in the database to be used for diagnostics calculations.
117
+ A table of the datasets will be printed to the console at the end of the operation.
111
118
  """
112
119
  config = ctx.obj.config
113
120
  db = ctx.obj.database
@@ -1,5 +1,5 @@
1
1
  """
2
- View diagnostic executions
2
+ View execution groups and their results
3
3
  """
4
4
 
5
5
  import pathlib
@@ -29,11 +29,19 @@ console = Console()
29
29
  @app.command()
30
30
  def list_groups(
31
31
  ctx: typer.Context,
32
- column: Annotated[list[str] | None, typer.Option()] = None,
32
+ column: Annotated[
33
+ list[str] | None,
34
+ typer.Option(help="Only include specified columns in the output"),
35
+ ] = None,
33
36
  limit: int = typer.Option(100, help="Limit the number of rows to display"),
34
37
  ) -> None:
35
38
  """
36
39
  List the diagnostic execution groups that have been identified
40
+
41
+ The data catalog is sorted by the date that the execution group was created (first = newest).
42
+ If the `--column` option is provided, only the specified columns will be displayed.
43
+
44
+ The output will be in a tabular format.
37
45
  """
38
46
  session = ctx.obj.database.session
39
47
 
@@ -178,6 +186,8 @@ def _log_panel(result_directory: pathlib.Path) -> Panel | None:
178
186
  def inspect(ctx: typer.Context, execution_id: int) -> None:
179
187
  """
180
188
  Inspect a specific execution group by its ID
189
+
190
+ This will display the execution details, datasets, results directory, and logs if available.
181
191
  """
182
192
  config: Config = ctx.obj.config
183
193
  session = ctx.obj.database.session
@@ -56,7 +56,10 @@ def create_env(
56
56
  ] = None,
57
57
  ) -> None:
58
58
  """
59
- Create a virtual environment containing the provider software.
59
+ Create a conda environment containing the provider software.
60
+
61
+ If no provider is specified, all providers will be installed.
62
+ If the provider is up to date or does not use a virtual environment, it will be skipped.
60
63
  """
61
64
  config = ctx.obj.config
62
65
  db = ctx.obj.database
climate_ref/cli/solve.py CHANGED
@@ -49,6 +49,10 @@ def solve( # noqa: PLR0913
49
49
 
50
50
  This may trigger a number of additional calculations depending on what data has been ingested
51
51
  since the last solve.
52
+ This command will block until all executions have been solved or the timeout is reached.
53
+
54
+ Filters can be applied to limit the diagnostics and providers that are considered, see the options
55
+ `--diagnostic` and `--provider` for more information.
52
56
  """
53
57
  config = ctx.obj.config
54
58
  db = ctx.obj.database
climate_ref/config.py CHANGED
@@ -15,6 +15,7 @@ which always take precedence over any other configuration values.
15
15
  # https://github.com/ESGF/esgf-download/blob/main/esgpull/config.py
16
16
 
17
17
  import importlib.resources
18
+ import os
18
19
  from pathlib import Path
19
20
  from typing import TYPE_CHECKING, Any
20
21
 
@@ -64,6 +65,7 @@ def ensure_absolute_path(path: str | Path) -> Path:
64
65
  """
65
66
  if isinstance(path, str):
66
67
  path = Path(path)
68
+ path = Path(*[os.path.expandvars(p) for p in path.parts])
67
69
  return path.resolve()
68
70
 
69
71
 
@@ -9,8 +9,9 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
9
9
  This is useful for local testing and debugging.
10
10
  """
11
11
 
12
+ from .hpc import HPCExecutor
12
13
  from .local import LocalExecutor
13
14
  from .result_handling import handle_execution_result
14
15
  from .synchronous import SynchronousExecutor
15
16
 
16
- __all__ = ["LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
17
+ __all__ = ["HPCExecutor", "LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
@@ -0,0 +1,320 @@
1
+ """
2
+ HPC-based Executor to use job schedulers.
3
+
4
+ If you want to
5
+ - run REF under the HPC workflows
6
+ - run REF in multiple nodes
7
+
8
+ """
9
+
10
+ try:
11
+ import parsl
12
+ except ImportError: # pragma: no cover
13
+ raise ImportError("The HPCExecutor requires the `parsl` package")
14
+
15
+ import os
16
+ import time
17
+ from typing import Any
18
+
19
+ import parsl
20
+ from loguru import logger
21
+ from parsl import python_app
22
+ from parsl.config import Config as ParslConfig
23
+ from parsl.executors import HighThroughputExecutor
24
+ from parsl.launchers import SrunLauncher
25
+ from parsl.providers import SlurmProvider
26
+ from tqdm import tqdm
27
+
28
+ from climate_ref.config import Config
29
+ from climate_ref.database import Database
30
+ from climate_ref.models import Execution
31
+ from climate_ref.slurm import HAS_REAL_SLURM, SlurmChecker
32
+ from climate_ref_core.diagnostics import ExecutionDefinition, ExecutionResult
33
+ from climate_ref_core.exceptions import DiagnosticError, ExecutionError
34
+ from climate_ref_core.executor import execute_locally
35
+
36
+ from .local import ExecutionFuture, process_result
37
+
38
+
39
+ @python_app
40
+ def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionResult:
41
+ """Run the function on computer nodes"""
42
+ # This is a catch-all for any exceptions that occur in the process and need to raise for
43
+ # parsl retries to work
44
+ try:
45
+ return execute_locally(definition=definition, log_level=log_level, raise_error=True)
46
+ except DiagnosticError as e: # pragma: no cover
47
+ # any diagnostic error will be caught here
48
+ logger.exception("Error running diagnostic")
49
+ raise e
50
+
51
+
52
+ def _to_float(x: Any) -> float | None:
53
+ if x is None:
54
+ return None
55
+ if isinstance(x, int | float):
56
+ return float(x)
57
+ try:
58
+ return float(x)
59
+ except (ValueError, TypeError):
60
+ return None
61
+
62
+
63
+ def _to_int(x: Any) -> int | None:
64
+ if x is None:
65
+ return None
66
+ if isinstance(x, int):
67
+ return x
68
+ try:
69
+ return int(float(x)) # Handles both "123" and "123.0"
70
+ except (ValueError, TypeError):
71
+ return None
72
+
73
+
74
+ class HPCExecutor:
75
+ """
76
+ Run diagnostics by submitting a job script
77
+
78
+ """
79
+
80
+ name = "hpc"
81
+
82
+ def __init__(
83
+ self,
84
+ *,
85
+ database: Database | None = None,
86
+ config: Config | None = None,
87
+ **executor_config: str | float | int,
88
+ ) -> None:
89
+ config = config or Config.default()
90
+ database = database or Database.from_config(config, run_migrations=False)
91
+
92
+ self.config = config
93
+ self.database = database
94
+
95
+ self.scheduler = executor_config.get("scheduler", "slurm")
96
+ self.account = str(executor_config.get("account", os.environ.get("USER")))
97
+ self.username = executor_config.get("username", os.environ.get("USER"))
98
+ self.partition = str(executor_config.get("partition")) if executor_config.get("partition") else None
99
+ self.qos = str(executor_config.get("qos")) if executor_config.get("qos") else None
100
+ self.req_nodes = int(executor_config.get("req_nodes", 1))
101
+ self.walltime = str(executor_config.get("walltime", "00:10:00"))
102
+ self.log_dir = str(executor_config.get("log_dir", "runinfo"))
103
+
104
+ self.cores_per_worker = _to_int(executor_config.get("cores_per_worker"))
105
+ self.mem_per_worker = _to_float(executor_config.get("mem_per_worker"))
106
+
107
+ hours, minutes, seconds = map(int, self.walltime.split(":"))
108
+ total_minutes = hours * 60 + minutes + seconds / 60
109
+ self.total_minutes = total_minutes
110
+
111
+ if executor_config.get("validation") and HAS_REAL_SLURM:
112
+ self._validate_slurm_params()
113
+
114
+ self._initialize_parsl()
115
+
116
+ self.parsl_results: list[ExecutionFuture] = []
117
+
118
+ def _validate_slurm_params(self) -> None:
119
+ """Validate the Slurm configuration using SlurmChecker.
120
+
121
+ Raises
122
+ ------
123
+ ValueError: If account, partition or QOS are invalid or inaccessible.
124
+ """
125
+ slurm_checker = SlurmChecker()
126
+ if self.account and not slurm_checker.get_account_info(self.account):
127
+ raise ValueError(f"Account: {self.account} not valid")
128
+
129
+ partition_limits = None
130
+ node_info = None
131
+
132
+ if self.partition:
133
+ if not slurm_checker.get_partition_info(self.partition):
134
+ raise ValueError(f"Partition: {self.partition} not valid")
135
+
136
+ if not slurm_checker.can_account_use_partition(self.account, self.partition):
137
+ raise ValueError(f"Account: {self.account} cannot access partiton: {self.partition}")
138
+
139
+ partition_limits = slurm_checker.get_partition_limits(self.partition)
140
+ node_info = slurm_checker.get_node_from_partition(self.partition)
141
+
142
+ qos_limits = None
143
+ if self.qos:
144
+ if not slurm_checker.get_qos_info(self.qos):
145
+ raise ValueError(f"QOS: {self.qos} not valid")
146
+
147
+ if not slurm_checker.can_account_use_qos(self.account, self.qos):
148
+ raise ValueError(f"Account: {self.account} cannot access qos: {self.qos}")
149
+
150
+ qos_limits = slurm_checker.get_qos_limits(self.qos)
151
+
152
+ max_cores_per_node = int(node_info["cpus"]) if node_info else None
153
+ if max_cores_per_node and self.cores_per_worker:
154
+ if self.cores_per_worker > max_cores_per_node:
155
+ raise ValueError(
156
+ f"cores_per_work:{self.cores_per_worker}"
157
+ f"larger than the maximum in a node {max_cores_per_node}"
158
+ )
159
+
160
+ max_mem_per_node = float(node_info["real_memory"]) if node_info else None
161
+ if max_mem_per_node and self.mem_per_worker:
162
+ if self.mem_per_worker > max_mem_per_node:
163
+ raise ValueError(
164
+ f"mem_per_work:{self.mem_per_worker}"
165
+ f"larger than the maximum mem in a node {max_mem_per_node}"
166
+ )
167
+
168
+ max_walltime_partition = (
169
+ partition_limits["max_time_minutes"] if partition_limits else self.total_minutes
170
+ )
171
+ max_walltime_qos = qos_limits["max_time_minutes"] if qos_limits else self.total_minutes
172
+
173
+ max_walltime_minutes = min(float(max_walltime_partition), float(max_walltime_qos))
174
+
175
+ if self.total_minutes > float(max_walltime_minutes):
176
+ raise ValueError(
177
+ f"Walltime: {self.walltime} exceed the maximum time "
178
+ f"{max_walltime_minutes} allowed by {self.partition} and {self.qos}"
179
+ )
180
+
181
+ def _initialize_parsl(self) -> None:
182
+ executor_config = self.config.executor.config
183
+
184
+ provider = SlurmProvider(
185
+ account=self.account,
186
+ partition=self.partition,
187
+ qos=self.qos,
188
+ nodes_per_block=self.req_nodes,
189
+ max_blocks=int(executor_config.get("max_blocks", 1)),
190
+ scheduler_options=executor_config.get("scheduler_options", "#SBATCH -C cpu"),
191
+ worker_init=executor_config.get("worker_init", "source .venv/bin/activate"),
192
+ launcher=SrunLauncher(
193
+ debug=True,
194
+ overrides=executor_config.get("overrides", ""),
195
+ ),
196
+ walltime=self.walltime,
197
+ cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
198
+ )
199
+ executor = HighThroughputExecutor(
200
+ label="ref_hpc_executor",
201
+ cores_per_worker=self.cores_per_worker if self.cores_per_worker else 1,
202
+ mem_per_worker=self.mem_per_worker,
203
+ max_workers_per_node=_to_int(executor_config.get("max_workers_per_node", 16)),
204
+ cpu_affinity=str(executor_config.get("cpu_affinity")),
205
+ provider=provider,
206
+ )
207
+
208
+ hpc_config = ParslConfig(
209
+ run_dir=self.log_dir, executors=[executor], retries=int(executor_config.get("retries", 2))
210
+ )
211
+ parsl.load(hpc_config)
212
+
213
+ def run(
214
+ self,
215
+ definition: ExecutionDefinition,
216
+ execution: Execution | None = None,
217
+ ) -> None:
218
+ """
219
+ Run a diagnostic in process
220
+
221
+ Parameters
222
+ ----------
223
+ definition
224
+ A description of the information needed for this execution of the diagnostic
225
+ execution
226
+ A database model representing the execution of the diagnostic.
227
+ If provided, the result will be updated in the database when completed.
228
+ """
229
+ # Submit the execution to the process pool
230
+ # and track the future so we can wait for it to complete
231
+ future = _process_run(
232
+ definition=definition,
233
+ log_level=self.config.log_level,
234
+ )
235
+
236
+ self.parsl_results.append(
237
+ ExecutionFuture(
238
+ future=future,
239
+ definition=definition,
240
+ execution_id=execution.id if execution else None,
241
+ )
242
+ )
243
+
244
+ def join(self, timeout: float) -> None:
245
+ """
246
+ Wait for all diagnostics to finish
247
+
248
+ This will block until all diagnostics have completed or the timeout is reached.
249
+ If the timeout is reached, the method will return and raise an exception.
250
+
251
+ Parameters
252
+ ----------
253
+ timeout
254
+ Timeout in seconds (won't used in HPCExecutor)
255
+
256
+ Raises
257
+ ------
258
+ TimeoutError
259
+ If the timeout is reached
260
+ """
261
+ start_time = time.time()
262
+ refresh_time = 0.5
263
+
264
+ results = self.parsl_results
265
+ t = tqdm(total=len(results), desc="Waiting for executions to complete", unit="execution")
266
+
267
+ try:
268
+ while results:
269
+ # Iterate over a copy of the list and remove finished tasks
270
+ for result in results[:]:
271
+ if result.future.done():
272
+ # Cannot catch the execption raised by result.future.result
273
+ if result.future.exception() is None:
274
+ try:
275
+ execution_result = result.future.result(timeout=0)
276
+ except Exception as e:
277
+ # Something went wrong when attempting to run the execution
278
+ # This is likely a failure in the execution itself not the diagnostic
279
+ raise ExecutionError(
280
+ f"Failed to execute {result.definition.execution_slug()!r}"
281
+ ) from e
282
+ else:
283
+ err = result.future.exception()
284
+ if isinstance(err, DiagnosticError):
285
+ execution_result = err.result
286
+ else:
287
+ execution_result = None
288
+
289
+ assert execution_result is not None, "Execution result should not be None"
290
+ assert isinstance(execution_result, ExecutionResult), (
291
+ "Execution result should be of type ExecutionResult"
292
+ )
293
+ # Process the result in the main process
294
+ # The results should be committed after each execution
295
+ with self.database.session.begin():
296
+ execution = (
297
+ self.database.session.get(Execution, result.execution_id)
298
+ if result.execution_id
299
+ else None
300
+ )
301
+ process_result(self.config, self.database, execution_result, execution)
302
+ logger.debug(f"Execution completed: {result}")
303
+ t.update(n=1)
304
+ results.remove(result)
305
+
306
+ # Break early to avoid waiting for one more sleep cycle
307
+ if len(results) == 0:
308
+ break
309
+
310
+ elapsed_time = time.time() - start_time
311
+
312
+ if elapsed_time > self.total_minutes * 60:
313
+ logger.debug(f"Time elasped {elapsed_time} for joining the results")
314
+
315
+ # Wait for a short time before checking for completed executions
316
+ time.sleep(refresh_time)
317
+ finally:
318
+ t.close()
319
+ if parsl.dfk():
320
+ parsl.dfk().cleanup()
@@ -1,4 +1,5 @@
1
1
  import concurrent.futures
2
+ import multiprocessing
2
3
  import time
3
4
  from concurrent.futures import Future, ProcessPoolExecutor
4
5
  from typing import Any
@@ -124,7 +125,12 @@ class LocalExecutor:
124
125
  if pool is not None:
125
126
  self.pool = pool
126
127
  else:
127
- self.pool = ProcessPoolExecutor(max_workers=n, initializer=_process_initialiser)
128
+ self.pool = ProcessPoolExecutor(
129
+ max_workers=n,
130
+ initializer=_process_initialiser,
131
+ # Explicitly set the context to "spawn" to avoid issues with hanging on MacOS
132
+ mp_context=multiprocessing.get_context("spawn"),
133
+ )
128
134
  self._results: list[ExecutionFuture] = []
129
135
 
130
136
  def run(
climate_ref/slurm.py ADDED
@@ -0,0 +1,196 @@
1
+ import importlib.util
2
+ import os
3
+ from typing import Any
4
+
5
+ HAS_REAL_SLURM = importlib.util.find_spec("pyslurm") is not None
6
+
7
+
8
+ class SlurmChecker:
9
+ """Check and get slurm settings."""
10
+
11
+ def __init__(self, intest: bool = False) -> None:
12
+ if HAS_REAL_SLURM:
13
+ import pyslurm # type: ignore
14
+
15
+ self.slurm_association: dict[int, Any] | None = pyslurm.db.Associations.load()
16
+ self.slurm_partition: dict[str, Any] | None = pyslurm.Partitions.load()
17
+ self.slurm_qos: dict[str, Any] | None = pyslurm.qos().get()
18
+ self.slurm_node: dict[str, Any] | None = pyslurm.Nodes.load()
19
+ elif intest:
20
+ import pyslurm
21
+
22
+ self.slurm_association = pyslurm.db.Associations.load() # dict [num -> Association]
23
+ self.slurm_partition = pyslurm.Partitions.load() # collection
24
+ self.slurm_qos = pyslurm.qos().get() # dict
25
+ self.slurm_node = pyslurm.Nodes.load() # dict
26
+ else:
27
+ print("Warning: pyslurm not found. Skipping HPCExecutor config validations")
28
+ self.slurm_association = None
29
+ self.slurm_partition = None
30
+ self.slurm_qos = None
31
+ self.slurm_node = None
32
+
33
+ def get_partition_info(self, partition_name: str) -> Any:
34
+ """Check if a partition exists in the Slurm configuration."""
35
+ return self.slurm_partition.get(partition_name) if self.slurm_partition else None
36
+
37
+ def get_qos_info(self, qos_name: str) -> Any:
38
+ """Check if a qos exists in the Slurm configuration."""
39
+ return self.slurm_qos.get(qos_name) if self.slurm_qos else None
40
+
41
+ def get_account_info(self, account_name: str) -> list[Any]:
42
+ """Get all associations for an account"""
43
+ if self.slurm_association:
44
+ return [a for a in self.slurm_association.values() if a.account == account_name]
45
+ else:
46
+ return [None]
47
+
48
+ def can_account_use_partition(self, account_name: str, partition_name: str) -> bool:
49
+ """
50
+ Check if an account has access to a specific partition.
51
+
52
+ Returns
53
+ -------
54
+ bool: True if accessible, False if not accessible or error occurred
55
+ """
56
+ account_info = self.get_account_info(account_name)
57
+ if not account_info:
58
+ return False
59
+
60
+ partition_info = self.get_partition_info(partition_name)
61
+
62
+ if not partition_info:
63
+ return False
64
+
65
+ allowed_partitions = account_info[0].partition
66
+ if allowed_partitions is None:
67
+ return True
68
+ else:
69
+ return partition_name in allowed_partitions
70
+
71
+ def can_account_use_qos(self, account_name: str, qos_name: str) -> bool:
72
+ """
73
+ Check if an account has access to a specific qos.
74
+
75
+ Returns
76
+ -------
77
+ bool: True if accessible, False if not accessible or error occurred
78
+ """
79
+ account_info = self.get_account_info(account_name)
80
+
81
+ if not account_info:
82
+ return False
83
+
84
+ qos_info = self.get_qos_info(qos_name)
85
+ if not qos_info:
86
+ return False
87
+
88
+ sample_acc = account_info[0]
89
+ user_name = os.environ["USER"]
90
+
91
+ if user_name:
92
+ for acc in account_info:
93
+ if acc.user == user_name:
94
+ sample_acc = acc
95
+ break
96
+
97
+ allowed_qoss = sample_acc.qos
98
+ if allowed_qoss is None:
99
+ return True
100
+ else:
101
+ return qos_name in allowed_qoss
102
+
103
+ def get_partition_limits(self, partition_name: str) -> dict[str, str | int] | None:
104
+ """
105
+ Get time limits for a specific partition.
106
+
107
+ Returns
108
+ -------
109
+ Dict with 'max_time' and 'default_time' (strings or UNLIMITED)
110
+ or None if partition doesn't exist or error occurred
111
+ """
112
+ partition_info = self.get_partition_info(partition_name)
113
+ if not partition_info:
114
+ return None
115
+
116
+ return {
117
+ "max_time_minutes": partition_info.to_dict().get("max_time", 0), # in minutes
118
+ "default_time_minutes": partition_info.to_dict().get("default_time", 30), # in minutes
119
+ "max_nodes": partition_info.to_dict().get("max_node", 1),
120
+ "total_nodes": partition_info.to_dict().get("total_nodes", 0),
121
+ "total_cpus": partition_info.to_dict().get("total_cpus", 0),
122
+ }
123
+
124
+ def get_node_from_partition(self, partition_name: str) -> dict[str, str | int] | None:
125
+ """
126
+ Get the node information for a specific partition.
127
+
128
+ Returns
129
+ -------
130
+ Dicts
131
+ """
132
+ partition_info = self.get_partition_info(partition_name)
133
+ if not partition_info:
134
+ return None
135
+
136
+ sample_node = None
137
+
138
+ if self.slurm_node:
139
+ for node in self.slurm_node.values():
140
+ if partition_name in node.partitions and "cpu" in node.available_features:
141
+ sample_node = node
142
+ break
143
+
144
+ return {
145
+ "cpus": int(sample_node.total_cpus) if sample_node is not None else 1,
146
+ "cores_per_socket": int(sample_node.cores_per_socket) if sample_node is not None else 1,
147
+ "sockets": int(sample_node.sockets) if sample_node is not None else 1,
148
+ "threads_per_core": int(sample_node.threads_per_core) if sample_node is not None else 1,
149
+ "real_memory": int(sample_node.real_memory) if sample_node is not None else 215,
150
+ "node_names": sample_node.name if sample_node is not None else "unknown",
151
+ }
152
+
153
+ def get_qos_limits(self, qos_name: str) -> dict[str, str | int]:
154
+ """
155
+ Get time limits for a specific qos.
156
+
157
+ Returns
158
+ -------
159
+ Dict with 'max_time' and 'default_time' (strings or UNLIMITED)
160
+ or None if partition doesn't exist or error occurred
161
+ """
162
+ qos_info = self.get_qos_info(qos_name)
163
+
164
+ return {
165
+ "max_time_minutes": qos_info.get("max_wall_pj", 1.0e6),
166
+ "max_jobs_pu": qos_info.get("max_jobs_pu", 1.0e6),
167
+ "max_submit_jobs_pu": qos_info.get("max_submit_jobs_pu", 1.0e6),
168
+ "max_tres_pj": qos_info.get("max_tres_pj").split("=")[0],
169
+ "default_time_minutes": 120,
170
+ }
171
+
172
+ def check_account_partition_access_with_limits(
173
+ self, account_name: str, partition_name: str
174
+ ) -> dict[str, Any]:
175
+ """
176
+ Comprehensive check of account access and partition limits.
177
+
178
+ Returns dictionary with all relevant information.
179
+ """
180
+ result = {
181
+ "account_exists": True if self.get_account_info(account_name) else False,
182
+ "partition_exists": True if self.get_partition_info(partition_name) else False,
183
+ "has_access": False,
184
+ "time_limits": None,
185
+ "error": "none",
186
+ }
187
+
188
+ try:
189
+ if result["account_exists"] and result["partition_exists"]:
190
+ result["has_access"] = self.can_account_use_partition(account_name, partition_name)
191
+ if result["has_access"]:
192
+ result["time_limits"] = self.get_partition_info(partition_name).to_dict().get("max_time")
193
+ except Exception as e:
194
+ result["error"] = str(e)
195
+
196
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Application which runs the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -10,7 +10,8 @@ Classifier: Development Status :: 3 - Alpha
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Intended Audience :: Science/Research
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Operating System :: OS Independent
13
+ Classifier: Operating System :: MacOS :: MacOS X
14
+ Classifier: Operating System :: POSIX :: Linux
14
15
  Classifier: Programming Language :: Python
15
16
  Classifier: Programming Language :: Python :: 3
16
17
  Classifier: Programming Language :: Python :: 3.11
@@ -25,6 +26,7 @@ Requires-Dist: climate-ref-core
25
26
  Requires-Dist: ecgtools>=2024.7.31
26
27
  Requires-Dist: environs>=11.0.0
27
28
  Requires-Dist: loguru>=0.7.2
29
+ Requires-Dist: parsl>=2025.5.19; sys_platform != 'win32'
28
30
  Requires-Dist: platformdirs>=4.3.6
29
31
  Requires-Dist: sqlalchemy>=2.0.36
30
32
  Requires-Dist: tomlkit>=0.13.2
@@ -1,20 +1,21 @@
1
1
  climate_ref/__init__.py,sha256=M45QGfl0KCPK48A8MjI08weNvZHMYH__GblraQMxsoM,808
2
2
  climate_ref/_config_helpers.py,sha256=-atI5FX7SukhLE_jz_rL-EHQ7s0YYqKu3dSFYWxSyMU,6632
3
3
  climate_ref/alembic.ini,sha256=WRvbwSIFuZ7hWNMnR2-yHPJAwYUnwhvRYBzkJhtpGdg,3535
4
- climate_ref/config.py,sha256=SHxqdpzq-TIfAdhwk1Yt-ob96T2a3pqYcq-Wed4Ljgg,16882
4
+ climate_ref/config.py,sha256=T1WzwFhzJ2-RKnOzyOmyUsdXrj_KDW2eycdPXZKnbf0,16954
5
5
  climate_ref/constants.py,sha256=9RaNLgUSuQva7ki4eRW3TjOKeVP6T81QNiu0veB1zVk,111
6
6
  climate_ref/database.py,sha256=b_6XHdr78Mo7KeLqQJ5DjLsySHPdQE83P8dRpdMfzfM,8661
7
7
  climate_ref/provider_registry.py,sha256=dyfj4vU6unKHNXtT03HafQtAi3LilL37uvu3paCnmNY,4159
8
8
  climate_ref/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ climate_ref/slurm.py,sha256=XWXVPdXP-4BDib3bxYW9uPcAJdPpo1ixYZAI_y1cZuw,7305
9
10
  climate_ref/solver.py,sha256=T5sQjweSvpUMG4q8MfbGjljxa5kBgKxNotT78PwyxqU,16804
10
11
  climate_ref/testing.py,sha256=1b9lVCJlKxjJ7JGq6zDD2gK3BEM9ZVv1dbA-j6yb4Yk,4256
11
- climate_ref/cli/__init__.py,sha256=q-JAiRmwTXqapJGwtfuZ2P-L1a4XAmWj3CoZKLWlP3A,4357
12
+ climate_ref/cli/__init__.py,sha256=fvENOeL4j7CMPANVxrDlWfaB0QUvvYgrWcm5ptbL0P8,4350
12
13
  climate_ref/cli/_utils.py,sha256=6bIb8zEVvzXyKpv8MG58T-T2L2jH-G8WNrOOGpz3uCw,1918
13
- climate_ref/cli/config.py,sha256=8I6CLdqKgTu6yaASy-qG0T839Fc0lDZtLSZ6YCc4wOY,520
14
- climate_ref/cli/datasets.py,sha256=5fEh4VnQUcQKxSsFc8u6lWkOlpv7-ix-1eccK2TET9c,7890
15
- climate_ref/cli/executions.py,sha256=sZXyVFYWML5mD7dE8xlsqyunsrwOIweTBDEUKCjXEpo,6798
16
- climate_ref/cli/providers.py,sha256=eS9IaQxW8zGxidr8TWt7thdMU5JH53u4T3xbcIe2C_E,2455
17
- climate_ref/cli/solve.py,sha256=qc7yalXxqdcSZsoCh2ZSV7Mt6mxTKc4lg7zKpMA55Y8,2112
14
+ climate_ref/cli/config.py,sha256=ak4Rn9S6fH23PkHHlI-pXuPiZYOvUB4r26eu3p525-M,532
15
+ climate_ref/cli/datasets.py,sha256=4iYQZ0ceoF-Cd8eSpS4Q8b9aLt_vEDUw5slzGw02DsY,8277
16
+ climate_ref/cli/executions.py,sha256=2MjwxCdRB-uVJUg7RluDIf_IsoclRn22ibJjk_nhfPo,7215
17
+ climate_ref/cli/providers.py,sha256=-5hQkJc01jON1bc1dk--tSWTesyiHOzZuYMb9Vxge9k,2613
18
+ climate_ref/cli/solve.py,sha256=ZTXrwDFDXNrX5GLMJTN9tFnpV3zlcZbEu2aF3JDJVxI,2367
18
19
  climate_ref/dataset_registry/obs4ref_reference.txt,sha256=2zJMbsAsQ49KaWziX3CqrlILq9yN7S2ygmfV3V5rsnw,8395
19
20
  climate_ref/dataset_registry/sample_data.txt,sha256=3JAHy14pRbLlo9-oNxUXLgZ_QOFJXUieEftBbapSY8E,20124
20
21
  climate_ref/datasets/__init__.py,sha256=PV3u5ZmhyfcHbKqySgwVA8m4-naZgxzydLXSBqdTGLM,1171
@@ -23,8 +24,9 @@ climate_ref/datasets/cmip6.py,sha256=3MVJ1kPdw6f6V3G4gdHIiqDGUyMqPs-_wttkw2YKAH0
23
24
  climate_ref/datasets/obs4mips.py,sha256=CmMm4kopfb0yFsMSgUlHUm8clGJImBaodSkh6lAv_Ug,5926
24
25
  climate_ref/datasets/pmp_climatology.py,sha256=goHDc_3B2Wdiy_hmpERNvWDdDYZACPOyFDt3Du6nGc0,534
25
26
  climate_ref/datasets/utils.py,sha256=iLJO7h4G3DWsRe9hIC4qkIyi5_zIW1ZMw-FDASLujtM,359
26
- climate_ref/executor/__init__.py,sha256=DooN4jQudmLHyw24IfqNfWynfa1vEolLs-mZ7uY8O0k,604
27
- climate_ref/executor/local.py,sha256=P_nGD4blrLavk-ISj73cakAQCeELM_hNIhs8yVWWSAQ,8353
27
+ climate_ref/executor/__init__.py,sha256=PYtJs3oBS_GiUHbt8BF-6wJibpF6_vREm1Cg9TxVbLI,648
28
+ climate_ref/executor/hpc.py,sha256=4o90sCyoC4jlkem3BXNo4uwFZpIvOUGfrqYucB6EtU8,12251
29
+ climate_ref/executor/local.py,sha256=65LUl41YtURFb87YTWZQHjDpIRlIKJ5Ny51c9DZjy0s,8582
28
30
  climate_ref/executor/result_handling.py,sha256=i7ZMX5vvyPY5gW-WWd-JHLi1BLviB9FXhn4FE8C9d4w,7787
29
31
  climate_ref/executor/synchronous.py,sha256=o4TndsoKMu9AzJYLkusU9lRkgHCy6HcCP46tEs6o86U,1895
30
32
  climate_ref/migrations/README,sha256=xM5osYbyEbEFA2eh5kwary_oh-5VFWtDubA-vgWwvlE,935
@@ -39,9 +41,9 @@ climate_ref/models/diagnostic.py,sha256=YB6xzbEXdpz2j-Ddf19RV8mAiWBrkmtRmiAEUV3t
39
41
  climate_ref/models/execution.py,sha256=lRCpaKLSR7rZbuoL94GW76tm9wLMsSDoIOA7bIa6xgY,9848
40
42
  climate_ref/models/metric_value.py,sha256=44OLcZz-qLx-p_9w7YWDKpD5S7Y9HyTKKsvSb77RBro,10190
41
43
  climate_ref/models/provider.py,sha256=RAE2qAAxwObu-72CdK4kt5ACMmKYEn07WJm7DU9hF28,990
42
- climate_ref-0.6.0.dist-info/METADATA,sha256=Ov6ZLG2A0D78je48OtG4TOhaCczrwoknsHSSt0rwutE,4399
43
- climate_ref-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
44
- climate_ref-0.6.0.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
45
- climate_ref-0.6.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
- climate_ref-0.6.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
47
- climate_ref-0.6.0.dist-info/RECORD,,
44
+ climate_ref-0.6.2.dist-info/METADATA,sha256=1yFQ4n5psYiBtO6jmhhEMdWkpOhM74sD_rE92XXm3Do,4505
45
+ climate_ref-0.6.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
46
+ climate_ref-0.6.2.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
47
+ climate_ref-0.6.2.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
+ climate_ref-0.6.2.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
49
+ climate_ref-0.6.2.dist-info/RECORD,,