climate-ref 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
climate_ref/config.py CHANGED
@@ -17,7 +17,7 @@ which always take precedence over any other configuration values.
17
17
  import importlib.resources
18
18
  import os
19
19
  from pathlib import Path
20
- from typing import TYPE_CHECKING, Any
20
+ from typing import TYPE_CHECKING, Any, Literal
21
21
 
22
22
  import tomlkit
23
23
  from attr import Factory
@@ -215,17 +215,17 @@ class DiagnosticProviderConfig:
215
215
 
216
216
  ```toml
217
217
  [[diagnostic_providers]]
218
- provider = "climate_ref_esmvaltool.provider"
218
+ provider = "climate_ref_esmvaltool:provider"
219
219
 
220
220
  [diagnostic_providers.config]
221
221
 
222
222
  [[diagnostic_providers]]
223
- provider = "climate_ref_ilamb.provider"
223
+ provider = "climate_ref_ilamb:provider"
224
224
 
225
225
  [diagnostic_providers.config]
226
226
 
227
227
  [[diagnostic_providers]]
228
- provider = "climate_ref_pmp.provider"
228
+ provider = "climate_ref_pmp:provider"
229
229
 
230
230
  [diagnostic_providers.config]
231
231
  ```
@@ -311,10 +311,12 @@ def default_providers() -> list[DiagnosticProviderConfig]:
311
311
  if env_providers:
312
312
  return [DiagnosticProviderConfig(provider=provider) for provider in env_providers]
313
313
 
314
+ # Refer to https://setuptools.pypa.io/en/latest/userguide/entry_point.html#entry-points-for-plugins
315
+ # and https://packaging.python.org/en/latest/specifications/entry-points/
316
+ # to learn more about entry points.
314
317
  return [
315
- DiagnosticProviderConfig(provider="climate_ref_esmvaltool.provider", config={}),
316
- DiagnosticProviderConfig(provider="climate_ref_ilamb.provider", config={}),
317
- DiagnosticProviderConfig(provider="climate_ref_pmp.provider", config={}),
318
+ DiagnosticProviderConfig(provider=entry_point.value, config={})
319
+ for entry_point in importlib.metadata.entry_points(group="climate-ref.providers")
318
320
  ]
319
321
 
320
322
 
@@ -352,6 +354,16 @@ class Config:
352
354
  [loguru documentation](https://loguru.readthedocs.io/en/stable/api/logger.html#module-loguru._logger).
353
355
  """
354
356
 
357
+ cmip6_parser: Literal["drs", "complete"] = env_field("CMIP6_PARSER", default="complete")
358
+ """
359
+ Parser to use for CMIP6 datasets
360
+
361
+ This can be either `drs` or `complete`.
362
+
363
+ - `drs`: Use the DRS parser, which parses the dataset based on the DRS naming conventions.
364
+ - `complete`: Use the complete parser, which parses the dataset based on all available metadata.
365
+ """
366
+
355
367
  paths: PathConfig = Factory(PathConfig) # noqa
356
368
  db: DbConfig = Factory(DbConfig) # noqa
357
369
  executor: ExecutorConfig = Factory(ExecutorConfig) # noqa
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import Protocol, cast
2
+ from typing import Any, Protocol, cast
3
3
 
4
4
  import pandas as pd
5
5
  from loguru import logger
@@ -35,6 +35,31 @@ def _log_duplicate_metadata(
35
35
  )
36
36
 
37
37
 
38
+ class DatasetParsingFunction(Protocol):
39
+ """
40
+ Protocol for a function that parses metadata from a file or directory
41
+ """
42
+
43
+ def __call__(self, file: str, **kwargs: Any) -> dict[str, Any]:
44
+ """
45
+ Parse a file or directory and return metadata for the dataset
46
+
47
+ Parameters
48
+ ----------
49
+ file
50
+ File or directory to parse
51
+
52
+ kwargs
53
+ Additional keyword arguments to pass to the parsing function.
54
+
55
+ Returns
56
+ -------
57
+ :
58
+ Data catalog containing the metadata for the dataset
59
+ """
60
+ ...
61
+
62
+
38
63
  class DatasetAdapter(Protocol):
39
64
  """
40
65
  An adapter to provide a common interface for different dataset types
@@ -173,7 +198,7 @@ class DatasetAdapter(Protocol):
173
198
  slug = unique_slugs[0]
174
199
 
175
200
  dataset_metadata = data_catalog_dataset[list(self.dataset_specific_metadata)].iloc[0].to_dict()
176
- dataset, created = db.get_or_create(DatasetModel, slug=slug, **dataset_metadata)
201
+ dataset, created = db.get_or_create(DatasetModel, defaults=dataset_metadata, slug=slug)
177
202
  if not created:
178
203
  logger.warning(f"{dataset} already exists in the database. Skipping")
179
204
  return None
@@ -212,6 +237,7 @@ class DatasetAdapter(Protocol):
212
237
  {
213
238
  **{k: getattr(file, k) for k in self.file_specific_metadata},
214
239
  **{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
240
+ "finalised": file.dataset.finalised,
215
241
  }
216
242
  for file in result
217
243
  ],
@@ -1,18 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- import traceback
4
3
  import warnings
5
4
  from datetime import datetime
6
5
  from pathlib import Path
7
6
  from typing import Any
8
7
 
9
8
  import pandas as pd
10
- import xarray as xr
11
9
  from ecgtools import Builder
12
- from ecgtools.parsers.utilities import extract_attr_with_regex # type: ignore
13
10
  from loguru import logger
14
11
 
15
- from climate_ref.datasets.base import DatasetAdapter
12
+ from climate_ref.config import Config
13
+ from climate_ref.datasets.base import DatasetAdapter, DatasetParsingFunction
14
+ from climate_ref.datasets.cmip6_parsers import parse_cmip6_complete, parse_cmip6_drs
16
15
  from climate_ref.models.dataset import CMIP6Dataset
17
16
 
18
17
 
@@ -22,16 +21,19 @@ def _parse_datetime(dt_str: pd.Series[str]) -> pd.Series[datetime | Any]:
22
21
  """
23
22
 
24
23
  def _inner(date_string: str | None) -> datetime | None:
25
- if not date_string:
24
+ if not date_string or pd.isnull(date_string):
26
25
  return None
27
26
 
28
27
  # Try to parse the date string with and without milliseconds
29
- try:
30
- dt = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
31
- except ValueError:
32
- dt = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S.%f")
28
+ for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"):
29
+ try:
30
+ return datetime.strptime(date_string, fmt)
31
+ except ValueError:
32
+ continue
33
33
 
34
- return dt
34
+ # If all parsing attempts fail, log an error and return None
35
+ logger.error(f"Failed to parse date string: {date_string}")
36
+ return None
35
37
 
36
38
  return pd.Series(
37
39
  [_inner(dt) for dt in dt_str],
@@ -44,15 +46,16 @@ def _apply_fixes(data_catalog: pd.DataFrame) -> pd.DataFrame:
44
46
  def _fix_parent_variant_label(group: pd.DataFrame) -> pd.DataFrame:
45
47
  if group["parent_variant_label"].nunique() == 1:
46
48
  return group
47
- group["parent_variant_label"] = group["variant_label"].iloc[0]
49
+ group["parent_variant_label"] = group["parent_variant_label"].iloc[0]
48
50
 
49
51
  return group
50
52
 
51
- data_catalog = (
52
- data_catalog.groupby("instance_id")
53
- .apply(_fix_parent_variant_label, include_groups=False)
54
- .reset_index(level="instance_id")
55
- )
53
+ if "parent_variant_label" in data_catalog:
54
+ data_catalog = (
55
+ data_catalog.groupby("instance_id")
56
+ .apply(_fix_parent_variant_label, include_groups=False)
57
+ .reset_index(level="instance_id")
58
+ )
56
59
 
57
60
  if "branch_time_in_child" in data_catalog:
58
61
  data_catalog["branch_time_in_child"] = _clean_branch_time(data_catalog["branch_time_in_child"])
@@ -68,88 +71,6 @@ def _clean_branch_time(branch_time: pd.Series[str]) -> pd.Series[float]:
68
71
  return pd.to_numeric(branch_time.astype(str).str.replace("D", ""), errors="coerce")
69
72
 
70
73
 
71
- def parse_cmip6(file: str) -> dict[str, Any]:
72
- """
73
- Parser for CMIP6
74
-
75
- This function parses the CMIP6 dataset and returns a dictionary with the metadata.
76
- This was copied from the ecgtools package, but we want to log the exception when it fails.
77
- """
78
- keys = sorted(
79
- {
80
- "activity_id",
81
- "branch_method",
82
- "branch_time_in_child",
83
- "branch_time_in_parent",
84
- "experiment",
85
- "experiment_id",
86
- "frequency",
87
- "grid",
88
- "grid_label",
89
- "institution_id",
90
- "nominal_resolution",
91
- "parent_activity_id",
92
- "parent_experiment_id",
93
- "parent_source_id",
94
- "parent_time_units",
95
- "parent_variant_label",
96
- "realm",
97
- "product",
98
- "source_id",
99
- "source_type",
100
- "sub_experiment",
101
- "sub_experiment_id",
102
- "table_id",
103
- "variable_id",
104
- "variant_label",
105
- }
106
- )
107
-
108
- try:
109
- with xr.open_dataset(file, chunks={}, use_cftime=True) as ds:
110
- info = {key: ds.attrs.get(key) for key in keys}
111
- info["member_id"] = info["variant_label"]
112
-
113
- variable_id = info["variable_id"]
114
- if variable_id: # pragma: no branch
115
- attrs = ds[variable_id].attrs
116
- for attr in ["standard_name", "long_name", "units"]:
117
- info[attr] = attrs.get(attr)
118
-
119
- # Set the default of # of vertical levels to 1
120
- vertical_levels = 1
121
- start_time, end_time = None, None
122
- init_year = None
123
- try:
124
- vertical_levels = ds[ds.cf["vertical"].name].size
125
- except (KeyError, AttributeError, ValueError):
126
- ...
127
-
128
- try:
129
- start_time, end_time = str(ds.cf["T"][0].data), str(ds.cf["T"][-1].data)
130
- except (KeyError, AttributeError, ValueError):
131
- ...
132
- if info.get("sub_experiment_id"): # pragma: no branch
133
- init_year = extract_attr_with_regex(info["sub_experiment_id"], r"\d{4}")
134
- if init_year: # pragma: no cover
135
- init_year = int(init_year)
136
- info["vertical_levels"] = vertical_levels
137
- info["init_year"] = init_year
138
- info["start_time"] = start_time
139
- info["end_time"] = end_time
140
- if not (start_time and end_time):
141
- info["time_range"] = None
142
- else:
143
- info["time_range"] = f"{start_time}-{end_time}"
144
- info["path"] = str(file)
145
- info["version"] = extract_attr_with_regex(str(file), regex=r"v\d{4}\d{2}\d{2}|v\d{1}") or "v0"
146
- return info
147
-
148
- except Exception:
149
- logger.exception(f"Failed to parse {file}")
150
- return {"INVALID_ASSET": file, "TRACEBACK": traceback.format_exc()}
151
-
152
-
153
74
  class CMIP6DatasetAdapter(DatasetAdapter):
154
75
  """
155
76
  Adapter for CMIP6 datasets
@@ -191,6 +112,7 @@ class CMIP6DatasetAdapter(DatasetAdapter):
191
112
  "standard_name",
192
113
  "long_name",
193
114
  "units",
115
+ "finalised",
194
116
  slug_column,
195
117
  )
196
118
 
@@ -208,8 +130,30 @@ class CMIP6DatasetAdapter(DatasetAdapter):
208
130
  "grid_label",
209
131
  )
210
132
 
211
- def __init__(self, n_jobs: int = 1):
133
+ def __init__(self, n_jobs: int = 1, config: Config | None = None):
212
134
  self.n_jobs = n_jobs
135
+ self.config = config or Config.default()
136
+
137
+ def get_parsing_function(self) -> DatasetParsingFunction:
138
+ """
139
+ Get the parsing function for CMIP6 datasets based on configuration
140
+
141
+ The parsing function used is determined by the `cmip6_parser` configuration value:
142
+ - "drs": Use the DRS parser (default)
143
+ - "complete": Use the complete parser that extracts all available metadata
144
+
145
+ Returns
146
+ -------
147
+ :
148
+ The appropriate parsing function based on configuration
149
+ """
150
+ parser_type = self.config.cmip6_parser
151
+ if parser_type == "complete":
152
+ logger.info("Using complete CMIP6 parser")
153
+ return parse_cmip6_complete
154
+ else:
155
+ logger.info(f"Using DRS CMIP6 parser (config value: {parser_type})")
156
+ return parse_cmip6_drs
213
157
 
214
158
  def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
215
159
  """
@@ -228,6 +172,8 @@ class CMIP6DatasetAdapter(DatasetAdapter):
228
172
  :
229
173
  Data catalog containing the metadata for the dataset
230
174
  """
175
+ parsing_function = self.get_parsing_function()
176
+
231
177
  with warnings.catch_warnings():
232
178
  # Ignore the DeprecationWarning from xarray
233
179
  warnings.simplefilter("ignore", DeprecationWarning)
@@ -237,7 +183,7 @@ class CMIP6DatasetAdapter(DatasetAdapter):
237
183
  depth=10,
238
184
  include_patterns=["*.nc"],
239
185
  joblib_parallel_kwargs={"n_jobs": self.n_jobs},
240
- ).build(parsing_func=parse_cmip6) # type: ignore
186
+ ).build(parsing_func=parsing_function)
241
187
 
242
188
  datasets: pd.DataFrame = builder.df.drop(["init_year"], axis=1)
243
189
 
@@ -254,6 +200,14 @@ class CMIP6DatasetAdapter(DatasetAdapter):
254
200
  lambda row: "CMIP6." + ".".join([row[item] for item in drs_items]), axis=1
255
201
  )
256
202
 
203
+ # Add in any missing metadata columns
204
+ missing_columns = set(self.dataset_specific_metadata + self.file_specific_metadata) - set(
205
+ datasets.columns
206
+ )
207
+ if missing_columns:
208
+ for column in missing_columns:
209
+ datasets[column] = pd.NA
210
+
257
211
  # Temporary fix for some datasets
258
212
  # TODO: Replace with a standalone package that contains metadata fixes for CMIP6 datasets
259
213
  datasets = _apply_fixes(datasets)
@@ -0,0 +1,189 @@
1
+ """
2
+ CMIP6 parser functions for extracting metadata from netCDF files
3
+
4
+ Additional non-official DRS's may be added in the future.
5
+ """
6
+
7
+ import traceback
8
+ from typing import Any
9
+
10
+ import xarray as xr
11
+ from ecgtools.parsers.cmip import parse_cmip6_using_directories # type: ignore
12
+ from ecgtools.parsers.utilities import extract_attr_with_regex # type: ignore
13
+ from loguru import logger
14
+
15
+
16
+ def _parse_daterange(date_range: str) -> tuple[str | None, str | None]:
17
+ """
18
+ Parse a date range string into start and end dates
19
+
20
+ The output from this is an estimated date range until the file is completely parsed.
21
+
22
+ Parameters
23
+ ----------
24
+ date_range
25
+ Date range string in the format "YYYYMM-YYYYMM"
26
+
27
+ Returns
28
+ -------
29
+ :
30
+ Tuple containing start and end dates as strings in the format "YYYY-MM-DD"
31
+ """
32
+ try:
33
+ start, end = date_range.split("-")
34
+ if len(start) != 6 or len(end) != 6: # noqa: PLR2004
35
+ raise ValueError("Date range must be in the format 'YYYYMM-YYYYMM'")
36
+
37
+ start = f"{start[:4]}-{start[4:6]}-01"
38
+ # Up to the 30th of the month, assuming a 30-day month
39
+ # These values will be corrected later when the file is parsed
40
+ end = f"{end[:4]}-{end[4:6]}-30"
41
+
42
+ return start, end
43
+ except ValueError:
44
+ logger.error(f"Invalid date range format: {date_range}")
45
+ return None, None
46
+
47
+
48
+ def parse_cmip6_complete(file: str, **kwargs: Any) -> dict[str, Any]:
49
+ """
50
+ Complete parser for CMIP6 files
51
+
52
+ This parser loads each file and extracts all available metadata.
53
+
54
+ For some filesystems this may be slow, as it involves a lot of I/O operations.
55
+
56
+ Parameters
57
+ ----------
58
+ file
59
+ File to parse
60
+ kwargs
61
+ Additional keyword arguments (not used, but required for compatibility)
62
+
63
+ Returns
64
+ -------
65
+ :
66
+ Dictionary with extracted metadata
67
+ """
68
+ keys = sorted(
69
+ {
70
+ "activity_id",
71
+ "branch_method",
72
+ "branch_time_in_child",
73
+ "branch_time_in_parent",
74
+ "experiment",
75
+ "experiment_id",
76
+ "frequency",
77
+ "grid",
78
+ "grid_label",
79
+ "institution_id",
80
+ "nominal_resolution",
81
+ "parent_activity_id",
82
+ "parent_experiment_id",
83
+ "parent_source_id",
84
+ "parent_time_units",
85
+ "parent_variant_label",
86
+ "realm",
87
+ "product",
88
+ "source_id",
89
+ "source_type",
90
+ "sub_experiment",
91
+ "sub_experiment_id",
92
+ "table_id",
93
+ "variable_id",
94
+ "variant_label",
95
+ }
96
+ )
97
+
98
+ try:
99
+ with xr.open_dataset(file, chunks={}, use_cftime=True) as ds:
100
+ info = {key: ds.attrs.get(key) for key in keys}
101
+ info["member_id"] = info["variant_label"]
102
+
103
+ variable_id = info["variable_id"]
104
+ if variable_id: # pragma: no branch
105
+ attrs = ds[variable_id].attrs
106
+ for attr in ["standard_name", "long_name", "units"]:
107
+ info[attr] = attrs.get(attr)
108
+
109
+ # Set the default of # of vertical levels to 1
110
+ vertical_levels = 1
111
+ start_time, end_time = None, None
112
+ init_year = None
113
+ try:
114
+ vertical_levels = ds[ds.cf["vertical"].name].size
115
+ except (KeyError, AttributeError, ValueError):
116
+ ...
117
+
118
+ try:
119
+ start_time, end_time = str(ds.cf["T"][0].data), str(ds.cf["T"][-1].data)
120
+ except (KeyError, AttributeError, ValueError):
121
+ ...
122
+ if info.get("sub_experiment_id"): # pragma: no branch
123
+ init_year = extract_attr_with_regex(info["sub_experiment_id"], r"\d{4}")
124
+ if init_year: # pragma: no cover
125
+ init_year = int(init_year)
126
+ info["vertical_levels"] = vertical_levels
127
+ info["init_year"] = init_year
128
+ info["start_time"] = start_time
129
+ info["end_time"] = end_time
130
+ if not (start_time and end_time):
131
+ info["time_range"] = None
132
+ else:
133
+ info["time_range"] = f"{start_time}-{end_time}"
134
+ info["path"] = str(file)
135
+ info["version"] = extract_attr_with_regex(str(file), regex=r"v\d{4}\d{2}\d{2}|v\d{1}") or "v0"
136
+
137
+ # Mark the dataset as finalised
138
+ # This is used to indicate that the dataset has been fully parsed and is ready for use
139
+ info["finalised"] = True
140
+
141
+ return info
142
+
143
+ except Exception:
144
+ logger.exception(f"Failed to parse {file}")
145
+ return {"INVALID_ASSET": file, "TRACEBACK": traceback.format_exc()}
146
+
147
+
148
+ def parse_cmip6_drs(file: str, **kwargs: Any) -> dict[str, Any]:
149
+ """
150
+ DRS parser for CMIP6 files
151
+
152
+ This parser extracts metadata according to the CMIP6 Data Reference Syntax (DRS).
153
+ This includes the essential metadata required to identify the dataset and is included in the filename.
154
+
155
+ Parameters
156
+ ----------
157
+ file
158
+ File to parse
159
+ kwargs
160
+ Additional keyword arguments (not used, but required for compatibility)
161
+
162
+ Returns
163
+ -------
164
+ :
165
+ Dictionary with extracted metadata
166
+ """
167
+ info: dict[str, Any] = parse_cmip6_using_directories(file)
168
+
169
+ if "INVALID_ASSET" in info:
170
+ logger.warning(f"Failed to parse {file}: {info['INVALID_ASSET']}")
171
+ return info
172
+
173
+ # The member_id is technically incorrect
174
+ # but for simplicity we are going to ignore sub-experiments for the DRS parser
175
+ info["variant_label"] = info["member_id"]
176
+
177
+ # Rename the `dcpp_init_year` key to `init_year` if it exists
178
+ if "dcpp_init_year" in info:
179
+ info["init_year"] = info.pop("dcpp_init_year")
180
+
181
+ if info.get("time_range"):
182
+ # Parse the time range if it exists
183
+ start_time, end_time = _parse_daterange(info["time_range"])
184
+ info["start_time"] = start_time
185
+ info["end_time"] = end_time
186
+
187
+ info["finalised"] = False
188
+
189
+ return info
@@ -15,8 +15,17 @@ from climate_ref.datasets.cmip6 import _parse_datetime
15
15
  from climate_ref.models.dataset import Dataset, Obs4MIPsDataset
16
16
 
17
17
 
18
- def parse_obs4mips(file: str) -> dict[str, Any | None]:
19
- """Parser for obs4mips"""
18
+ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
19
+ """
20
+ Parser for obs4mips
21
+
22
+ Parameters
23
+ ----------
24
+ file
25
+ File to parse
26
+ kwargs
27
+ Additional keyword arguments (not used, but required for protocol compatibility)
28
+ """
20
29
  keys = sorted(
21
30
  list(
22
31
  {
@@ -106,6 +115,7 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
106
115
 
107
116
  dataset_specific_metadata = (
108
117
  "activity_id",
118
+ "finalised",
109
119
  "frequency",
110
120
  "grid",
111
121
  "grid_label",
@@ -159,7 +169,7 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
159
169
  depth=10,
160
170
  include_patterns=["*.nc"],
161
171
  joblib_parallel_kwargs={"n_jobs": self.n_jobs},
162
- ).build(parsing_func=parse_obs4mips) # type: ignore[arg-type]
172
+ ).build(parsing_func=parse_obs4mips)
163
173
 
164
174
  datasets = builder.df
165
175
  if datasets.empty:
@@ -178,4 +188,5 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
178
188
  datasets["instance_id"] = datasets.apply(
179
189
  lambda row: "obs4MIPs." + ".".join([row[item] for item in drs_items]), axis=1
180
190
  )
191
+ datasets["finalised"] = True
181
192
  return datasets
@@ -21,7 +21,7 @@ from loguru import logger
21
21
  from parsl import python_app
22
22
  from parsl.config import Config as ParslConfig
23
23
  from parsl.executors import HighThroughputExecutor
24
- from parsl.launchers import SrunLauncher
24
+ from parsl.launchers import SimpleLauncher, SrunLauncher
25
25
  from parsl.providers import SlurmProvider
26
26
  from tqdm import tqdm
27
27
 
@@ -34,6 +34,7 @@ from climate_ref_core.exceptions import DiagnosticError, ExecutionError
34
34
  from climate_ref_core.executor import execute_locally
35
35
 
36
36
  from .local import ExecutionFuture, process_result
37
+ from .pbs_scheduler import SmartPBSProvider
37
38
 
38
39
 
39
40
  @python_app
@@ -96,8 +97,9 @@ class HPCExecutor:
96
97
  self.account = str(executor_config.get("account", os.environ.get("USER")))
97
98
  self.username = executor_config.get("username", os.environ.get("USER"))
98
99
  self.partition = str(executor_config.get("partition")) if executor_config.get("partition") else None
100
+ self.queue = str(executor_config.get("queue")) if executor_config.get("queue") else None
99
101
  self.qos = str(executor_config.get("qos")) if executor_config.get("qos") else None
100
- self.req_nodes = int(executor_config.get("req_nodes", 1))
102
+ self.req_nodes = int(executor_config.get("req_nodes", 1)) if self.scheduler == "slurm" else 1
101
103
  self.walltime = str(executor_config.get("walltime", "00:10:00"))
102
104
  self.log_dir = str(executor_config.get("log_dir", "runinfo"))
103
105
 
@@ -181,21 +183,47 @@ class HPCExecutor:
181
183
  def _initialize_parsl(self) -> None:
182
184
  executor_config = self.config.executor.config
183
185
 
184
- provider = SlurmProvider(
185
- account=self.account,
186
- partition=self.partition,
187
- qos=self.qos,
188
- nodes_per_block=self.req_nodes,
189
- max_blocks=int(executor_config.get("max_blocks", 1)),
190
- scheduler_options=executor_config.get("scheduler_options", "#SBATCH -C cpu"),
191
- worker_init=executor_config.get("worker_init", "source .venv/bin/activate"),
192
- launcher=SrunLauncher(
193
- debug=True,
194
- overrides=executor_config.get("overrides", ""),
195
- ),
196
- walltime=self.walltime,
197
- cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
198
- )
186
+ provider: SlurmProvider | SmartPBSProvider
187
+ if self.scheduler == "slurm":
188
+ provider = SlurmProvider(
189
+ account=self.account,
190
+ partition=self.partition,
191
+ qos=self.qos,
192
+ nodes_per_block=self.req_nodes,
193
+ max_blocks=int(executor_config.get("max_blocks", 1)),
194
+ scheduler_options=executor_config.get("scheduler_options", "#SBATCH -C cpu"),
195
+ worker_init=executor_config.get("worker_init", "source .venv/bin/activate"),
196
+ launcher=SrunLauncher(
197
+ debug=True,
198
+ overrides=executor_config.get("overrides", ""),
199
+ ),
200
+ walltime=self.walltime,
201
+ cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
202
+ )
203
+
204
+ elif self.scheduler == "pbs":
205
+ provider = SmartPBSProvider(
206
+ account=self.account,
207
+ queue=self.queue,
208
+ worker_init=executor_config.get("worker_init", "source .venv/bin/activate"),
209
+ nodes_per_block=_to_int(executor_config.get("nodes_per_block", 1)),
210
+ cpus_per_node=_to_int(executor_config.get("cpus_per_node", None)),
211
+ ncpus=_to_int(executor_config.get("ncpus", None)),
212
+ mem=executor_config.get("mem", "4GB"),
213
+ jobfs=executor_config.get("jobfs", "10GB"),
214
+ storage=executor_config.get("storage", ""),
215
+ init_blocks=executor_config.get("init_blocks", 1),
216
+ min_blocks=executor_config.get("min_blocks", 0),
217
+ max_blocks=executor_config.get("max_blocks", 1),
218
+ parallelism=executor_config.get("parallelism", 1),
219
+ scheduler_options=executor_config.get("scheduler_options", ""),
220
+ launcher=SimpleLauncher(),
221
+ walltime=self.walltime,
222
+ cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
223
+ )
224
+ else:
225
+ raise ValueError(f"Unsupported scheduler: {self.scheduler}")
226
+
199
227
  executor = HighThroughputExecutor(
200
228
  label="ref_hpc_executor",
201
229
  cores_per_worker=self.cores_per_worker if self.cores_per_worker else 1,
@@ -206,8 +234,11 @@ class HPCExecutor:
206
234
  )
207
235
 
208
236
  hpc_config = ParslConfig(
209
- run_dir=self.log_dir, executors=[executor], retries=int(executor_config.get("retries", 2))
237
+ run_dir=self.log_dir,
238
+ executors=[executor],
239
+ retries=int(executor_config.get("retries", 2)),
210
240
  )
241
+
211
242
  parsl.load(hpc_config)
212
243
 
213
244
  def run(
@@ -0,0 +1,152 @@
1
+ import re
2
+ import shutil
3
+ import subprocess
4
+ import textwrap
5
+ from typing import Any
6
+
7
+ from parsl.launchers import SimpleLauncher
8
+ from parsl.providers import PBSProProvider
9
+
10
+
11
+ class SmartPBSProvider(PBSProProvider):
12
+ """
13
+ A PBSProProvider subclass that adapts to systems where `-l select` is not supported.
14
+
15
+ Falls back to individual resource requests (ncpus, mem, jobfs, storage) if needed.
16
+ """
17
+
18
+ def __init__( # noqa: PLR0913
19
+ self,
20
+ account: str | None = None,
21
+ queue: str | None = None,
22
+ scheduler_options: str = "",
23
+ worker_init: str = "",
24
+ nodes_per_block: int | None = 1,
25
+ cpus_per_node: int | None = 1,
26
+ ncpus: int | None = None,
27
+ mem: str = "4GB",
28
+ jobfs: str = "10GB",
29
+ storage: str = "",
30
+ init_blocks: int = 1,
31
+ min_blocks: int = 0,
32
+ max_blocks: int = 1,
33
+ parallelism: int = 1,
34
+ launcher: SimpleLauncher = SimpleLauncher(),
35
+ walltime: str = "00:20:00",
36
+ cmd_timeout: int = 120,
37
+ ) -> None:
38
+ self.ncpus = ncpus
39
+ self.mem = mem
40
+ self.jobfs = jobfs
41
+ self.storage = storage
42
+ self._select_supported = self._detect_select_support()
43
+
44
+ # Prepare fallback resource dictionary
45
+ self._fallback_resources = {"mem": mem, "jobfs": jobfs, "storage": storage}
46
+
47
+ # Parse and strip select if present in scheduler_options
48
+ if not self._select_supported and "-l select=" in scheduler_options:
49
+ scheduler_options = self._parse_select_from_scheduler_options(scheduler_options)
50
+
51
+ # Determine fallback ncpus
52
+ if "ncpus" not in self._fallback_resources:
53
+ self._fallback_resources["ncpus"] = str(ncpus if ncpus is not None else (cpus_per_node or 1))
54
+
55
+ # Map ncpus to cpus_per_node if needed (select mode only)
56
+ if self._select_supported:
57
+ if not ncpus and cpus_per_node:
58
+ cpus_per_node = ncpus
59
+ elif ncpus and cpus_per_node and int(ncpus) != int(cpus_per_node):
60
+ print(f"Warning: ncpus={ncpus} and cpus_per_node={cpus_per_node} differ.")
61
+ print(f"Using cpus_per_node={cpus_per_node}.")
62
+ else:
63
+ cpus_per_node = int(self._fallback_resources["ncpus"])
64
+
65
+ super().__init__(
66
+ account=account,
67
+ queue=queue,
68
+ scheduler_options=scheduler_options,
69
+ select_options="", # Not used; we handle resources ourselves
70
+ worker_init=worker_init,
71
+ nodes_per_block=nodes_per_block,
72
+ cpus_per_node=cpus_per_node,
73
+ init_blocks=init_blocks,
74
+ min_blocks=min_blocks,
75
+ max_blocks=max_blocks,
76
+ parallelism=parallelism,
77
+ launcher=launcher,
78
+ walltime=walltime,
79
+ cmd_timeout=cmd_timeout,
80
+ ) # type: ignore
81
+
82
+ if not self._select_supported:
83
+ self.template_string = self._fallback_template()
84
+
85
+ def _detect_select_support(self) -> bool:
86
+ """Detect whether `-l select` is supported by the underlying PBS system."""
87
+ qsub_path = shutil.which("qsub")
88
+ if qsub_path is None:
89
+ raise RuntimeError("qsub command not found. Ensure PBS is installed and in PATH.")
90
+
91
+ try:
92
+ result = subprocess.run( # noqa: S603
93
+ [qsub_path, "-l", "wd,select=1:ncpus=1", "--version"],
94
+ capture_output=True,
95
+ timeout=5,
96
+ check=False,
97
+ )
98
+ stderr = result.stderr.decode().lower()
99
+ return "unknown" not in stderr and result.returncode == 0
100
+ except Exception:
101
+ return False
102
+
103
+ def _parse_select_from_scheduler_options(self, scheduler_options: str) -> str:
104
+ """
105
+ Parse `-l select=...` from scheduler_options and update fallback resources.
106
+
107
+ Removes the select line from scheduler_options.
108
+ """
109
+ select_pattern = r"-l\s+select=([^\s]+)"
110
+ match = re.search(select_pattern, scheduler_options)
111
+ if match:
112
+ select_string = match.group(1)
113
+ scheduler_options = re.sub(select_pattern, "", scheduler_options).strip()
114
+
115
+ parts = select_string.split(":")[1:] # skip the initial `select=1`
116
+ for part in parts:
117
+ if "=" in part:
118
+ key, val = part.split("=")
119
+ self._fallback_resources[key.strip()] = val.strip()
120
+ return scheduler_options
121
+
122
+ def _fallback_template(self) -> str:
123
+ """Submit script template used if `select` is not supported."""
124
+ return textwrap.dedent("""\
125
+ #!/bin/bash
126
+ #PBS -N ${jobname}
127
+ #PBS -l ncpus=${ncpus}
128
+ #PBS -l mem=${mem}
129
+ #PBS -l jobfs=${jobfs}
130
+ #PBS -l walltime=${walltime}
131
+ #PBS -l storage=${storage}
132
+ #PBS -o ${job_stdout_path}
133
+ #PBS -e ${job_stderr_path}
134
+ ${scheduler_options}
135
+
136
+ ${worker_init}
137
+
138
+ export JOBNAME="${jobname}"
139
+ ${user_script}
140
+
141
+ """)
142
+
143
+ def _write_submit_script(
144
+ self, template: str, script_filename: str, job_name: str, configs: dict[str, Any]
145
+ ) -> str:
146
+ """Inject fallback values into the submit script if `select` is not supported."""
147
+ if not self._select_supported:
148
+ configs.setdefault("ncpus", self._fallback_resources.get("ncpus", "1"))
149
+ configs.setdefault("mem", self._fallback_resources.get("mem", "4GB"))
150
+ configs.setdefault("jobfs", self._fallback_resources.get("jobfs", "10GB"))
151
+ configs.setdefault("storage", self._fallback_resources.get("storage", "gdata1"))
152
+ return super()._write_submit_script(template, script_filename, job_name, configs) # type: ignore
@@ -0,0 +1,34 @@
1
+ """Drop unique requirement on slug
2
+
3
+ Revision ID: 795c1e6cf496
4
+ Revises: 03dbb4998e49
5
+ Create Date: 2025-07-03 15:05:28.517124
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Union
11
+
12
+ from alembic import op
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "795c1e6cf496"
16
+ down_revision: Union[str, None] = "03dbb4998e49"
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ with op.batch_alter_table("diagnostic", schema=None) as batch_op:
24
+ batch_op.drop_constraint("uq_diagnostic_slug", type_="unique")
25
+
26
+ # ### end Alembic commands ###
27
+
28
+
29
+ def downgrade() -> None:
30
+ # ### commands auto generated by Alembic - please adjust! ###
31
+ with op.batch_alter_table("diagnostic", schema=None) as batch_op:
32
+ batch_op.create_unique_constraint("uq_diagnostic_slug", ["slug"])
33
+
34
+ # ### end Alembic commands ###
@@ -0,0 +1,57 @@
1
+ """cmip6-finalised
2
+
3
+ Revision ID: 94beace57a9c
4
+ Revises: 795c1e6cf496
5
+ Create Date: 2025-07-20 15:21:17.132458
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Union
11
+
12
+ import sqlalchemy as sa
13
+ from alembic import op
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "94beace57a9c"
17
+ down_revision: Union[str, None] = "795c1e6cf496"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
25
+ batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=False))
26
+ batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=True)
27
+ batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=True)
28
+ batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=True)
29
+ batch_op.alter_column("nominal_resolution", existing_type=sa.VARCHAR(), nullable=True)
30
+ batch_op.alter_column("realm", existing_type=sa.VARCHAR(), nullable=True)
31
+ batch_op.alter_column("product", existing_type=sa.VARCHAR(), nullable=True)
32
+ batch_op.alter_column("standard_name", existing_type=sa.VARCHAR(), nullable=True)
33
+ batch_op.alter_column("source_type", existing_type=sa.VARCHAR(), nullable=True)
34
+ batch_op.alter_column("sub_experiment", existing_type=sa.VARCHAR(), nullable=True)
35
+ batch_op.alter_column("sub_experiment_id", existing_type=sa.VARCHAR(), nullable=True)
36
+ batch_op.alter_column("units", existing_type=sa.VARCHAR(), nullable=True)
37
+
38
+ # ### end Alembic commands ###
39
+
40
+
41
+ def downgrade() -> None:
42
+ # ### commands auto generated by Alembic - please adjust! ###
43
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
44
+ batch_op.alter_column("units", existing_type=sa.VARCHAR(), nullable=False)
45
+ batch_op.alter_column("sub_experiment_id", existing_type=sa.VARCHAR(), nullable=False)
46
+ batch_op.alter_column("sub_experiment", existing_type=sa.VARCHAR(), nullable=False)
47
+ batch_op.alter_column("source_type", existing_type=sa.VARCHAR(), nullable=False)
48
+ batch_op.alter_column("standard_name", existing_type=sa.VARCHAR(), nullable=False)
49
+ batch_op.alter_column("product", existing_type=sa.VARCHAR(), nullable=False)
50
+ batch_op.alter_column("realm", existing_type=sa.VARCHAR(), nullable=False)
51
+ batch_op.alter_column("nominal_resolution", existing_type=sa.VARCHAR(), nullable=False)
52
+ batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=False)
53
+ batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=False)
54
+ batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=False)
55
+ batch_op.drop_column("finalised")
56
+
57
+ # ### end Alembic commands ###
@@ -0,0 +1,57 @@
1
+ """finalised-on-base-dataset
2
+
3
+ Move finalised from cmip6_dataset to base dataset table and default all existing rows to True.
4
+
5
+ Revision ID: a1b2c3d4e5f6
6
+ Revises: 94beace57a9c
7
+ Create Date: 2025-08-05 03:27:00
8
+
9
+ """
10
+
11
+ from collections.abc import Sequence
12
+ from typing import Union
13
+
14
+ import sqlalchemy as sa
15
+ from alembic import op
16
+
17
+ # revision identifiers, used by Alembic.
18
+ revision: str = "ba5e"
19
+ down_revision: Union[str, None] = "94beace57a9c"
20
+ branch_labels: Union[str, Sequence[str], None] = None
21
+ depends_on: Union[str, Sequence[str], None] = None
22
+
23
+
24
+ def upgrade() -> None:
25
+ # Add finalised to base dataset with default True, non-null
26
+ with op.batch_alter_table("dataset", schema=None) as batch_op:
27
+ batch_op.add_column(
28
+ sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("true"))
29
+ )
30
+
31
+ # Backfill: ensure all existing rows are True
32
+ op.execute("UPDATE dataset SET finalised = TRUE WHERE finalised IS NULL")
33
+
34
+ # Enforce NOT NULL after backfill
35
+ with op.batch_alter_table("dataset", schema=None) as batch_op:
36
+ batch_op.alter_column("finalised", nullable=False)
37
+
38
+ # Drop column from cmip6_dataset if it exists
39
+ conn = op.get_bind()
40
+ inspector = sa.inspect(conn)
41
+ cmip6_cols = {col["name"] for col in inspector.get_columns("cmip6_dataset")}
42
+ if "finalised" in cmip6_cols:
43
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
44
+ batch_op.drop_column("finalised")
45
+
46
+
47
+ def downgrade() -> None:
48
+ # Re-create cmip6_dataset.finalised as non-nullable boolean default False
49
+ # Note: Original migration 94beace57a9c added cmip6_dataset.finalised NOT NULL, with no default.
50
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
51
+ batch_op.add_column(
52
+ sa.Column("finalised", sa.Boolean(), nullable=False, server_default=sa.text("false"))
53
+ )
54
+
55
+ # Drop base dataset finalised
56
+ with op.batch_alter_table("dataset", schema=None) as batch_op:
57
+ batch_op.drop_column("finalised")
@@ -45,6 +45,16 @@ class Dataset(Base):
45
45
  Updating a dataset will trigger a new diagnostic calculation.
46
46
  """
47
47
 
48
+ # Universal finalisation flag for all dataset types
49
+ # Only CMIP6 currently uses unfinalised datasets in practice; other types should be finalised on creation.
50
+ finalised: Mapped[bool] = mapped_column(default=True, nullable=False)
51
+ """
52
+ Whether the complete set of metadata for the dataset has been finalised.
53
+
54
+ For CMIP6, ingestion may initially create unfinalised datasets (False) until all metadata is extracted.
55
+ For other dataset types (e.g., obs4MIPs, PMP climatology), this should be True upon creation.
56
+ """
57
+
48
58
  def __repr__(self) -> str:
49
59
  return f"<Dataset slug={self.slug} dataset_type={self.dataset_type} >"
50
60
 
@@ -90,9 +100,7 @@ class CMIP6Dataset(Dataset):
90
100
  """
91
101
  Represents a CMIP6 dataset
92
102
 
93
- Fields that are not marked as required in
94
- https://wcrp-cmip.github.io/WGCM_Infrastructure_Panel/Papers/CMIP6_global_attributes_filenames_CVs_v6.2.7.pdf
95
- are optional.
103
+ Fields that are not in the DRS are marked optional.
96
104
  """
97
105
 
98
106
  __tablename__ = "cmip6_dataset"
@@ -102,29 +110,29 @@ class CMIP6Dataset(Dataset):
102
110
  branch_method: Mapped[str] = mapped_column(nullable=True)
103
111
  branch_time_in_child: Mapped[float] = mapped_column(nullable=True)
104
112
  branch_time_in_parent: Mapped[float] = mapped_column(nullable=True)
105
- experiment: Mapped[str] = mapped_column()
113
+ experiment: Mapped[str] = mapped_column(nullable=True)
106
114
  experiment_id: Mapped[str] = mapped_column()
107
- frequency: Mapped[str] = mapped_column()
108
- grid: Mapped[str] = mapped_column()
115
+ frequency: Mapped[str] = mapped_column(nullable=True)
116
+ grid: Mapped[str] = mapped_column(nullable=True)
109
117
  grid_label: Mapped[str] = mapped_column()
110
118
  institution_id: Mapped[str] = mapped_column()
111
119
  long_name: Mapped[str] = mapped_column(nullable=True)
112
120
  member_id: Mapped[str] = mapped_column()
113
- nominal_resolution: Mapped[str] = mapped_column()
121
+ nominal_resolution: Mapped[str] = mapped_column(nullable=True)
114
122
  parent_activity_id: Mapped[str] = mapped_column(nullable=True)
115
123
  parent_experiment_id: Mapped[str] = mapped_column(nullable=True)
116
124
  parent_source_id: Mapped[str] = mapped_column(nullable=True)
117
125
  parent_time_units: Mapped[str] = mapped_column(nullable=True)
118
126
  parent_variant_label: Mapped[str] = mapped_column(nullable=True)
119
- realm: Mapped[str] = mapped_column()
120
- product: Mapped[str] = mapped_column()
127
+ realm: Mapped[str] = mapped_column(nullable=True)
128
+ product: Mapped[str] = mapped_column(nullable=True)
121
129
  source_id: Mapped[str] = mapped_column()
122
- standard_name: Mapped[str] = mapped_column()
123
- source_type: Mapped[str] = mapped_column()
124
- sub_experiment: Mapped[str] = mapped_column()
125
- sub_experiment_id: Mapped[str] = mapped_column()
130
+ standard_name: Mapped[str] = mapped_column(nullable=True)
131
+ source_type: Mapped[str] = mapped_column(nullable=True)
132
+ sub_experiment: Mapped[str] = mapped_column(nullable=True)
133
+ sub_experiment_id: Mapped[str] = mapped_column(nullable=True)
126
134
  table_id: Mapped[str] = mapped_column()
127
- units: Mapped[str] = mapped_column()
135
+ units: Mapped[str] = mapped_column(nullable=True)
128
136
  variable_id: Mapped[str] = mapped_column()
129
137
  variant_label: Mapped[str] = mapped_column()
130
138
  vertical_levels: Mapped[int] = mapped_column(nullable=True)
@@ -132,7 +140,7 @@ class CMIP6Dataset(Dataset):
132
140
 
133
141
  instance_id: Mapped[str] = mapped_column()
134
142
  """
135
- Unique identifier for the dataset.
143
+ Unique identifier for the dataset (including the version).
136
144
  """
137
145
 
138
146
  __mapper_args__: ClassVar[Any] = {"polymorphic_identity": SourceDatasetType.CMIP6} # type: ignore
@@ -19,7 +19,7 @@ class Diagnostic(CreatedUpdatedMixin, Base):
19
19
  __table_args__ = (UniqueConstraint("provider_id", "slug", name="diagnostic_ident"),)
20
20
 
21
21
  id: Mapped[int] = mapped_column(primary_key=True)
22
- slug: Mapped[str] = mapped_column(unique=True)
22
+ slug: Mapped[str] = mapped_column()
23
23
  """
24
24
  Unique identifier for the diagnostic
25
25
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: Application which runs the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -1,7 +1,7 @@
1
1
  climate_ref/__init__.py,sha256=M45QGfl0KCPK48A8MjI08weNvZHMYH__GblraQMxsoM,808
2
2
  climate_ref/_config_helpers.py,sha256=-atI5FX7SukhLE_jz_rL-EHQ7s0YYqKu3dSFYWxSyMU,6632
3
3
  climate_ref/alembic.ini,sha256=WRvbwSIFuZ7hWNMnR2-yHPJAwYUnwhvRYBzkJhtpGdg,3535
4
- climate_ref/config.py,sha256=T1WzwFhzJ2-RKnOzyOmyUsdXrj_KDW2eycdPXZKnbf0,16954
4
+ climate_ref/config.py,sha256=WW6R7RLwEDuI11XYLYO57FwvmQz1psq9bNM3WVL3e_s,17481
5
5
  climate_ref/constants.py,sha256=9RaNLgUSuQva7ki4eRW3TjOKeVP6T81QNiu0veB1zVk,111
6
6
  climate_ref/database.py,sha256=b_6XHdr78Mo7KeLqQJ5DjLsySHPdQE83P8dRpdMfzfM,8661
7
7
  climate_ref/provider_registry.py,sha256=dyfj4vU6unKHNXtT03HafQtAi3LilL37uvu3paCnmNY,4159
@@ -19,14 +19,16 @@ climate_ref/cli/solve.py,sha256=ZTXrwDFDXNrX5GLMJTN9tFnpV3zlcZbEu2aF3JDJVxI,2367
19
19
  climate_ref/dataset_registry/obs4ref_reference.txt,sha256=2zJMbsAsQ49KaWziX3CqrlILq9yN7S2ygmfV3V5rsnw,8395
20
20
  climate_ref/dataset_registry/sample_data.txt,sha256=3JAHy14pRbLlo9-oNxUXLgZ_QOFJXUieEftBbapSY8E,20124
21
21
  climate_ref/datasets/__init__.py,sha256=PV3u5ZmhyfcHbKqySgwVA8m4-naZgxzydLXSBqdTGLM,1171
22
- climate_ref/datasets/base.py,sha256=yoip8UCcTCUPn2xVlsJ1If9zXw_476dDYViH5iMgcIE,10352
23
- climate_ref/datasets/cmip6.py,sha256=3MVJ1kPdw6f6V3G4gdHIiqDGUyMqPs-_wttkw2YKAH0,8425
24
- climate_ref/datasets/obs4mips.py,sha256=CmMm4kopfb0yFsMSgUlHUm8clGJImBaodSkh6lAv_Ug,5926
22
+ climate_ref/datasets/base.py,sha256=uZ55u625ckRNjsn-AqJg4_xO5uvHchqYvwBZIt4iHtY,11017
23
+ climate_ref/datasets/cmip6.py,sha256=KO761ConHvX40n9X0xLrxjhzN7wmighNWL2JyYygRAA,7049
24
+ climate_ref/datasets/cmip6_parsers.py,sha256=wH4WKQAR2_aniXwsW7nch6nIpXk2pSpPxkT4unjV4hQ,6041
25
+ climate_ref/datasets/obs4mips.py,sha256=q0_erQb4k5KBaGMvEGgUtVSDvXQjuftqDmvW4QZpWZI,6138
25
26
  climate_ref/datasets/pmp_climatology.py,sha256=goHDc_3B2Wdiy_hmpERNvWDdDYZACPOyFDt3Du6nGc0,534
26
27
  climate_ref/datasets/utils.py,sha256=iLJO7h4G3DWsRe9hIC4qkIyi5_zIW1ZMw-FDASLujtM,359
27
28
  climate_ref/executor/__init__.py,sha256=PYtJs3oBS_GiUHbt8BF-6wJibpF6_vREm1Cg9TxVbLI,648
28
- climate_ref/executor/hpc.py,sha256=4o90sCyoC4jlkem3BXNo4uwFZpIvOUGfrqYucB6EtU8,12251
29
+ climate_ref/executor/hpc.py,sha256=ZhGtzM0skH_ojnkSc6UNYIetXoyBRCwfXJusuezBZGw,13876
29
30
  climate_ref/executor/local.py,sha256=65LUl41YtURFb87YTWZQHjDpIRlIKJ5Ny51c9DZjy0s,8582
31
+ climate_ref/executor/pbs_scheduler.py,sha256=WoH1sTmDl7bdmYodpcxZjkUSvInYUcWR4x7buIgBxqk,5807
30
32
  climate_ref/executor/result_handling.py,sha256=i7ZMX5vvyPY5gW-WWd-JHLi1BLviB9FXhn4FE8C9d4w,7787
31
33
  climate_ref/executor/synchronous.py,sha256=o4TndsoKMu9AzJYLkusU9lRkgHCy6HcCP46tEs6o86U,1895
32
34
  climate_ref/migrations/README,sha256=xM5osYbyEbEFA2eh5kwary_oh-5VFWtDubA-vgWwvlE,935
@@ -34,16 +36,19 @@ climate_ref/migrations/env.py,sha256=8GvBLhGTuQy6MKYMj7QszJEQ2LNewf1Z9kB9dBHQs9I
34
36
  climate_ref/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
35
37
  climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py,sha256=S8Q4THCI4TPnlaQHgQJUCiNW5LAyQClaiTB-0dwhtXU,14050
36
38
  climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py,sha256=s9nZ_l64pSF7sWN53rRPCQlqW_xHqR8tlWhU-ovmsME,2043
39
+ climate_ref/migrations/versions/2025-07-03T1505_795c1e6cf496_drop_unique_requirement_on_slug.py,sha256=TfBHJkm3oPlz0P5Z1tiY6LBp2B1oDvdyL_OOYoV-OiI,984
40
+ climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py,sha256=NSCMMV65v48B8_OoEf4X4bRthAlhzbDo0UlC6nqW3qs,2908
41
+ climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py,sha256=G-SZKdU9dx9WyMh4JLwPKcud4gtFrxu-tULXG9vXGAU,2034
37
42
  climate_ref/models/__init__.py,sha256=rUDKRANeAEAHVOrzJVIZoZ99dDG5O4AGzHmOpC876Nc,801
38
43
  climate_ref/models/base.py,sha256=YMyovT2Z_tRv59zz6qC9YCCDodhO3x6OLnFdBtPJkho,1271
39
- climate_ref/models/dataset.py,sha256=Rpwrx0HqOJBHs4Sb4n6B0In__Uo0PqXSZKvZR-juGCg,7491
40
- climate_ref/models/diagnostic.py,sha256=YB6xzbEXdpz2j-Ddf19RV8mAiWBrkmtRmiAEUV3tl4Q,1762
44
+ climate_ref/models/dataset.py,sha256=in9FNLR4K_bpVSlWlk6A6IyFtkFy2v8ZFNcDXbwSMWI,8078
45
+ climate_ref/models/diagnostic.py,sha256=0mKVvASEWNxx41R2Y-5VxplarZ4JAP6q0oaO14FKZuk,1751
41
46
  climate_ref/models/execution.py,sha256=lRCpaKLSR7rZbuoL94GW76tm9wLMsSDoIOA7bIa6xgY,9848
42
47
  climate_ref/models/metric_value.py,sha256=44OLcZz-qLx-p_9w7YWDKpD5S7Y9HyTKKsvSb77RBro,10190
43
48
  climate_ref/models/provider.py,sha256=RAE2qAAxwObu-72CdK4kt5ACMmKYEn07WJm7DU9hF28,990
44
- climate_ref-0.6.3.dist-info/METADATA,sha256=ToCGnsUks6eGMQtZWrcPAkQI-5IqWK3mQ77s2dgCyak,4505
45
- climate_ref-0.6.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
46
- climate_ref-0.6.3.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
47
- climate_ref-0.6.3.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- climate_ref-0.6.3.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
49
- climate_ref-0.6.3.dist-info/RECORD,,
49
+ climate_ref-0.6.5.dist-info/METADATA,sha256=uKVSOC5iN1SGV3eoQ9uClB1UC_FpDbbM2ArYL0EHd0U,4505
50
+ climate_ref-0.6.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
+ climate_ref-0.6.5.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
52
+ climate_ref-0.6.5.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ climate_ref-0.6.5.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
54
+ climate_ref-0.6.5.dist-info/RECORD,,