climate-ref 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +18 -8
- climate_ref/cli/datasets.py +31 -27
- climate_ref/cli/executions.py +1 -1
- climate_ref/cli/providers.py +2 -4
- climate_ref/cli/solve.py +1 -2
- climate_ref/config.py +5 -6
- climate_ref/constants.py +1 -1
- climate_ref/database.py +1 -0
- climate_ref/dataset_registry/sample_data.txt +14 -0
- climate_ref/datasets/base.py +43 -39
- climate_ref/executor/__init__.py +4 -262
- climate_ref/executor/local.py +170 -37
- climate_ref/executor/result_handling.py +231 -0
- climate_ref/executor/synchronous.py +62 -0
- climate_ref/migrations/env.py +1 -0
- climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -21
- climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +57 -0
- climate_ref/models/__init__.py +3 -1
- climate_ref/models/base.py +2 -0
- climate_ref/models/metric_value.py +138 -13
- climate_ref/provider_registry.py +1 -1
- climate_ref/solver.py +18 -30
- climate_ref/testing.py +11 -7
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.1.dist-info}/METADATA +3 -1
- climate_ref-0.5.1.dist-info/RECORD +47 -0
- climate_ref-0.5.0.dist-info/RECORD +0 -44
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.1.dist-info}/WHEEL +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.1.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.1.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.1.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/__init__.py
CHANGED
|
@@ -12,7 +12,7 @@ from loguru import logger
|
|
|
12
12
|
from climate_ref import __version__
|
|
13
13
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
14
14
|
from climate_ref.config import Config
|
|
15
|
-
from climate_ref.constants import
|
|
15
|
+
from climate_ref.constants import CONFIG_FILENAME
|
|
16
16
|
from climate_ref.database import Database
|
|
17
17
|
from climate_ref_core import __version__ as __core_version__
|
|
18
18
|
from climate_ref_core.logging import add_log_handler
|
|
@@ -23,7 +23,8 @@ class LogLevel(str, Enum):
|
|
|
23
23
|
Log levels for the CLI
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Error = "ERROR"
|
|
27
|
+
Warning = "WARNING"
|
|
27
28
|
Debug = "DEBUG"
|
|
28
29
|
Info = "INFO"
|
|
29
30
|
|
|
@@ -65,7 +66,7 @@ def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
|
65
66
|
"""
|
|
66
67
|
try:
|
|
67
68
|
if configuration_directory:
|
|
68
|
-
config = Config.load(configuration_directory /
|
|
69
|
+
config = Config.load(configuration_directory / CONFIG_FILENAME, allow_missing=False)
|
|
69
70
|
else:
|
|
70
71
|
config = Config.default()
|
|
71
72
|
except FileNotFoundError:
|
|
@@ -109,19 +110,28 @@ app = build_app()
|
|
|
109
110
|
|
|
110
111
|
|
|
111
112
|
@app.callback()
|
|
112
|
-
def main(
|
|
113
|
+
def main( # noqa: PLR0913
|
|
113
114
|
ctx: typer.Context,
|
|
114
115
|
configuration_directory: Annotated[Path | None, typer.Option(help="Configuration directory")] = None,
|
|
115
|
-
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
|
|
116
|
-
|
|
116
|
+
verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Set the log level to DEBUG")] = False,
|
|
117
|
+
quiet: Annotated[bool, typer.Option("--quiet", "-q", help="Set the log level to WARNING")] = False,
|
|
118
|
+
log_level: Annotated[
|
|
119
|
+
LogLevel, typer.Option(case_sensitive=False, help="Set the level of logging information to display")
|
|
120
|
+
] = LogLevel.Info,
|
|
117
121
|
version: Annotated[
|
|
118
122
|
Optional[bool],
|
|
119
|
-
typer.Option(
|
|
123
|
+
typer.Option(
|
|
124
|
+
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
125
|
+
),
|
|
120
126
|
] = None,
|
|
121
127
|
) -> None:
|
|
122
128
|
"""
|
|
123
|
-
climate_ref: A CLI for the
|
|
129
|
+
climate_ref: A CLI for the Assessment Fast Track Rapid Evaluation Framework
|
|
130
|
+
|
|
131
|
+
This CLI provides a number of commands for managing and executing diagnostics.
|
|
124
132
|
"""
|
|
133
|
+
if quiet:
|
|
134
|
+
log_level = LogLevel.Warning
|
|
125
135
|
if verbose:
|
|
126
136
|
log_level = LogLevel.Debug
|
|
127
137
|
|
climate_ref/cli/datasets.py
CHANGED
|
@@ -90,7 +90,7 @@ def list_columns(
|
|
|
90
90
|
@app.command()
|
|
91
91
|
def ingest( # noqa: PLR0913
|
|
92
92
|
ctx: typer.Context,
|
|
93
|
-
file_or_directory: Path,
|
|
93
|
+
file_or_directory: list[Path],
|
|
94
94
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
95
95
|
solve: Annotated[bool, typer.Option(help="Solve for new diagnostic executions after ingestion")] = False,
|
|
96
96
|
dry_run: Annotated[bool, typer.Option(help="Do not ingest datasets into the database")] = False,
|
|
@@ -107,40 +107,44 @@ def ingest( # noqa: PLR0913
|
|
|
107
107
|
config = ctx.obj.config
|
|
108
108
|
db = ctx.obj.database
|
|
109
109
|
|
|
110
|
-
file_or_directory = Path(file_or_directory).expanduser()
|
|
111
|
-
logger.info(f"ingesting {file_or_directory}")
|
|
112
|
-
|
|
113
110
|
kwargs = {}
|
|
114
111
|
|
|
115
112
|
if n_jobs is not None:
|
|
116
113
|
kwargs["n_jobs"] = n_jobs
|
|
117
114
|
|
|
115
|
+
# Create a data catalog from the specified file or directory
|
|
118
116
|
adapter = get_dataset_adapter(source_type.value, **kwargs)
|
|
119
117
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
logger.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
dataset
|
|
138
|
-
if not dataset:
|
|
139
|
-
logger.info(f"Would save dataset {instance_id} to the database")
|
|
140
|
-
continue
|
|
141
|
-
else:
|
|
118
|
+
for _dir in file_or_directory:
|
|
119
|
+
_dir = Path(_dir).expanduser()
|
|
120
|
+
logger.info(f"Ingesting {_dir}")
|
|
121
|
+
|
|
122
|
+
if not _dir.exists():
|
|
123
|
+
logger.error(f"File or directory {_dir} does not exist")
|
|
124
|
+
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
|
|
125
|
+
|
|
126
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
127
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
128
|
+
|
|
129
|
+
logger.info(
|
|
130
|
+
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
131
|
+
)
|
|
132
|
+
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
133
|
+
|
|
134
|
+
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
135
|
+
logger.debug(f"Processing dataset {instance_id}")
|
|
142
136
|
with db.session.begin():
|
|
143
|
-
|
|
137
|
+
if dry_run:
|
|
138
|
+
dataset = (
|
|
139
|
+
db.session.query(Dataset)
|
|
140
|
+
.filter_by(slug=instance_id, dataset_type=source_type)
|
|
141
|
+
.first()
|
|
142
|
+
)
|
|
143
|
+
if not dataset:
|
|
144
|
+
logger.info(f"Would save dataset {instance_id} to the database")
|
|
145
|
+
continue
|
|
146
|
+
else:
|
|
147
|
+
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
144
148
|
|
|
145
149
|
if solve:
|
|
146
150
|
solve_required_executions(
|
climate_ref/cli/executions.py
CHANGED
|
@@ -20,7 +20,7 @@ from climate_ref.cli._utils import df_to_table, pretty_print_df
|
|
|
20
20
|
from climate_ref.config import Config
|
|
21
21
|
from climate_ref.models import Execution, ExecutionGroup
|
|
22
22
|
from climate_ref.models.execution import get_execution_group_and_latest
|
|
23
|
-
from climate_ref_core.
|
|
23
|
+
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
24
24
|
|
|
25
25
|
app = typer.Typer(help=__doc__)
|
|
26
26
|
console = Console()
|
climate_ref/cli/providers.py
CHANGED
|
@@ -24,8 +24,7 @@ def list_(ctx: typer.Context) -> None:
|
|
|
24
24
|
"""
|
|
25
25
|
config = ctx.obj.config
|
|
26
26
|
db = ctx.obj.database
|
|
27
|
-
|
|
28
|
-
provider_registry = ProviderRegistry.build_from_config(config, db)
|
|
27
|
+
provider_registry = ProviderRegistry.build_from_config(config, db)
|
|
29
28
|
|
|
30
29
|
def get_env(provider: DiagnosticProvider) -> str:
|
|
31
30
|
env = ""
|
|
@@ -61,8 +60,7 @@ def create_env(
|
|
|
61
60
|
"""
|
|
62
61
|
config = ctx.obj.config
|
|
63
62
|
db = ctx.obj.database
|
|
64
|
-
|
|
65
|
-
providers = ProviderRegistry.build_from_config(config, db).providers
|
|
63
|
+
providers = ProviderRegistry.build_from_config(config, db).providers
|
|
66
64
|
|
|
67
65
|
if provider is not None:
|
|
68
66
|
available = ", ".join([f'"{p.slug}"' for p in providers])
|
climate_ref/cli/solve.py
CHANGED
|
@@ -19,5 +19,4 @@ def solve(
|
|
|
19
19
|
"""
|
|
20
20
|
config = ctx.obj.config
|
|
21
21
|
db = ctx.obj.database
|
|
22
|
-
|
|
23
|
-
solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)
|
|
22
|
+
solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)
|
climate_ref/config.py
CHANGED
|
@@ -34,11 +34,10 @@ from climate_ref._config_helpers import (
|
|
|
34
34
|
env_field,
|
|
35
35
|
transform_error,
|
|
36
36
|
)
|
|
37
|
-
from climate_ref.constants import
|
|
38
|
-
from climate_ref.executor import import_executor_cls
|
|
37
|
+
from climate_ref.constants import CONFIG_FILENAME
|
|
39
38
|
from climate_ref_core.env import env
|
|
40
39
|
from climate_ref_core.exceptions import InvalidExecutorException
|
|
41
|
-
from climate_ref_core.executor import Executor
|
|
40
|
+
from climate_ref_core.executor import Executor, import_executor_cls
|
|
42
41
|
|
|
43
42
|
if TYPE_CHECKING:
|
|
44
43
|
from climate_ref.database import Database
|
|
@@ -156,12 +155,12 @@ class ExecutorConfig:
|
|
|
156
155
|
Configuration to define the executor to use for running diagnostics
|
|
157
156
|
"""
|
|
158
157
|
|
|
159
|
-
executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.
|
|
158
|
+
executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.LocalExecutor")
|
|
160
159
|
"""
|
|
161
160
|
Executor to use for running diagnostics
|
|
162
161
|
|
|
163
162
|
This should be the fully qualified name of the executor class
|
|
164
|
-
(e.g. `climate_ref.executor.
|
|
163
|
+
(e.g. `climate_ref.executor.LocalExecutor`).
|
|
165
164
|
The default is to use the local executor.
|
|
166
165
|
The environment variable `REF_EXECUTOR` takes precedence over this configuration value.
|
|
167
166
|
|
|
@@ -405,7 +404,7 @@ class Config:
|
|
|
405
404
|
The default configuration
|
|
406
405
|
"""
|
|
407
406
|
root = env.path("REF_CONFIGURATION")
|
|
408
|
-
path_to_load = root /
|
|
407
|
+
path_to_load = root / CONFIG_FILENAME
|
|
409
408
|
|
|
410
409
|
logger.debug(f"Loading default configuration from {path_to_load}")
|
|
411
410
|
return cls.load(path_to_load)
|
climate_ref/constants.py
CHANGED
climate_ref/database.py
CHANGED
|
@@ -106,6 +106,7 @@ class Database:
|
|
|
106
106
|
logger.info(f"Connecting to database at {url}")
|
|
107
107
|
self.url = url
|
|
108
108
|
self._engine = sqlalchemy.create_engine(self.url)
|
|
109
|
+
# TODO: Set autobegin=False
|
|
109
110
|
self.session = Session(self._engine)
|
|
110
111
|
|
|
111
112
|
def alembic_config(self, config: "Config") -> AlembicConfig:
|
|
@@ -18,12 +18,26 @@ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/abrupt-4xCO2/r1i1p1f1/fx/areacella/gn/v20191115/a
|
|
|
18
18
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/pr/gn/v20191115/pr_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc acc821dc400f53166379d2e23095bc2690d7ca7db6c7a6f88ae29a8771b3c65a
|
|
19
19
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/psl/gn/v20191115/psl_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b63a3d4051cf17568df808836b189826da580ca8e1db949b1e93a71c80756c8d
|
|
20
20
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 44a3c90a41744101afb00344f50947fe46444fe5d6bd3623c0c19aa02a378c86
|
|
21
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlutcs/gn/v20191115/rlutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 50b54337ffcf581236929c4eb904bc8240f848a5fa646de75129ed5cbddbbc23
|
|
21
22
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc a4e1fc3a4a4d00c2fa18ec616338426eb3d91165db3bc57e565ffdc8d6bd9d34
|
|
22
23
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 8d492ef1f2bb654220fe64977d9942a33af0962ee9afa4017dcc75b6f0103015
|
|
24
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsutcs/gn/v20191115/rsutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 8bae5124b8fe5040853e9b02f2942ee4c81cfd972b314f2828c04a9965804357
|
|
23
25
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 38e055e57aea5a9ae76ed3fc5325be6783b5694a9edc28aafd24dd462b32e5ce
|
|
24
26
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/ts/gn/v20191115/ts_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc e02530449c92e0ffc72e9edeba57f5d38ab8652a28486c1c2b9ddada1f38fbd9
|
|
27
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Emon/cSoil/gn/v20191115/cSoil_Emon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc bebda54ca0518630e90b79585910fc38b7edfe118ecf1cf4cb4a8de0950a911e
|
|
25
28
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/gpp/gn/v20191115/gpp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc da36ed1653f7aafe40a4fc9b99004a46cb45231697ce6b3413dfc171980c37df
|
|
29
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/lai/gn/v20191115/lai_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 972c11880af2cf06c2e1489968b1ac4829d8b00afd7586499814c3ddcfd95ed2
|
|
30
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrro/gn/v20191115/mrro_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc aaaf89f7a1dedf79e8cda71ab345a6809cfb698a63dcc638ccf7a316d13e6920
|
|
31
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrsos/gn/v20191115/mrsos_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 4b78059c4f899a48ae6f3a3cf68e95d76e3603044744521c4aadd992dec93995
|
|
32
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 75e559842e9081ea7563f2590d2c1e8a22af72abc78e37a012b9d56da532569e
|
|
26
33
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/areacello/gn/v20191115/areacello_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 6808b64c7328bd118537bfb7cfd35748b4e84cae3f6a5586403aa9d8040e4d0b
|
|
34
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/sftof/gn/v20191115/sftof_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 9bc037566546b8a65d063c4e8225b43b56151856f5a8adde5992f44c85b7c727
|
|
35
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-197912.nc f068351200f0afb451a0e39e13d5e3ddeb48b1f1812b97a1a786d802659c969c
|
|
36
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_198001-198912.nc 703b495bf2effa5cae369893e2868ae1f38b69510366404e236a4605e6560ae6
|
|
37
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_199001-199912.nc 79a7ef5b61962da66abd14598412ad4f1ba0c9b531e7ecaf5a6190e0a4f9589c
|
|
38
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200912.nc 001b3e7c639cae224e4de4f14601f492bec7aeb028cd02e9f07812c1db05abb7
|
|
39
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_201001-201412.nc 6c6cc432feb33552d643b2a57d0552ac563ec5a90ad462b6daeacdf3e7a9158f
|
|
40
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 92384dba703a6575d696bf8c04337c3a1d4f538bcd9ca28bf61ab058f8038b30
|
|
27
41
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 94121a2233aff78ef8799c5d59b6b6f3e7d3f2fb7ceb3a4a1645943ef3e88040
|
|
28
42
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/SImon/siconc/gn/v20200817/siconc_SImon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 886c62a24797e893fe11b8de4d16c8a277bdee931b692d533f2fb3fa39820aa1
|
|
29
43
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/v20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 4587a7b0110a226a805f681ee9fe456d20ec310302b2c120334c21595e4e96cb
|
climate_ref/datasets/base.py
CHANGED
|
@@ -156,6 +156,43 @@ class DatasetAdapter(Protocol):
|
|
|
156
156
|
)
|
|
157
157
|
return dataset
|
|
158
158
|
|
|
159
|
+
def _get_dataset_files(self, db: Database, limit: int | None = None) -> pd.DataFrame:
|
|
160
|
+
dataset_type = self.dataset_cls.__mapper_args__["polymorphic_identity"]
|
|
161
|
+
|
|
162
|
+
result = (
|
|
163
|
+
db.session.query(DatasetFile)
|
|
164
|
+
# The join is necessary to be able to order by the dataset columns
|
|
165
|
+
.join(DatasetFile.dataset)
|
|
166
|
+
.where(Dataset.dataset_type == dataset_type)
|
|
167
|
+
# The joinedload is necessary to avoid N+1 queries (one for each dataset)
|
|
168
|
+
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
|
|
169
|
+
.options(joinedload(DatasetFile.dataset.of_type(self.dataset_cls)))
|
|
170
|
+
.order_by(Dataset.updated_at.desc())
|
|
171
|
+
.limit(limit)
|
|
172
|
+
.all()
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return pd.DataFrame(
|
|
176
|
+
[
|
|
177
|
+
{
|
|
178
|
+
**{k: getattr(file, k) for k in self.file_specific_metadata},
|
|
179
|
+
**{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
|
|
180
|
+
}
|
|
181
|
+
for file in result
|
|
182
|
+
],
|
|
183
|
+
index=[file.dataset.id for file in result],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def _get_datasets(self, db: Database, limit: int | None = None) -> pd.DataFrame:
|
|
187
|
+
result_datasets = (
|
|
188
|
+
db.session.query(self.dataset_cls).order_by(Dataset.updated_at.desc()).limit(limit).all()
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return pd.DataFrame(
|
|
192
|
+
[{k: getattr(dataset, k) for k in self.dataset_specific_metadata} for dataset in result_datasets],
|
|
193
|
+
index=[file.id for file in result_datasets],
|
|
194
|
+
)
|
|
195
|
+
|
|
159
196
|
def load_catalog(
|
|
160
197
|
self, db: Database, include_files: bool = True, limit: int | None = None
|
|
161
198
|
) -> pd.DataFrame:
|
|
@@ -173,42 +210,9 @@ class DatasetAdapter(Protocol):
|
|
|
173
210
|
:
|
|
174
211
|
Data catalog containing the metadata for the currently ingested datasets
|
|
175
212
|
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
# The join is necessary to be able to order by the dataset columns
|
|
183
|
-
.join(DatasetFile.dataset)
|
|
184
|
-
.where(Dataset.dataset_type == dataset_type)
|
|
185
|
-
# The joinedload is necessary to avoid N+1 queries (one for each dataset)
|
|
186
|
-
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
|
|
187
|
-
.options(joinedload(DatasetFile.dataset.of_type(DatasetModel)))
|
|
188
|
-
.order_by(Dataset.updated_at.desc())
|
|
189
|
-
.limit(limit)
|
|
190
|
-
.all()
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
return pd.DataFrame(
|
|
194
|
-
[
|
|
195
|
-
{
|
|
196
|
-
**{k: getattr(file, k) for k in self.file_specific_metadata},
|
|
197
|
-
**{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
|
|
198
|
-
}
|
|
199
|
-
for file in result
|
|
200
|
-
],
|
|
201
|
-
index=[file.dataset.id for file in result],
|
|
202
|
-
)
|
|
203
|
-
else:
|
|
204
|
-
result_datasets = (
|
|
205
|
-
db.session.query(DatasetModel).order_by(Dataset.updated_at.desc()).limit(limit).all()
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
return pd.DataFrame(
|
|
209
|
-
[
|
|
210
|
-
{k: getattr(dataset, k) for k in self.dataset_specific_metadata}
|
|
211
|
-
for dataset in result_datasets
|
|
212
|
-
],
|
|
213
|
-
index=[file.id for file in result_datasets],
|
|
214
|
-
)
|
|
213
|
+
with db.session.begin():
|
|
214
|
+
# TODO: Paginate this query to avoid loading all the data at once
|
|
215
|
+
if include_files:
|
|
216
|
+
return self._get_dataset_files(db, limit)
|
|
217
|
+
else:
|
|
218
|
+
return self._get_datasets(db, limit)
|
climate_ref/executor/__init__.py
CHANGED
|
@@ -9,266 +9,8 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
|
|
|
9
9
|
This is useful for local testing and debugging.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
import
|
|
13
|
-
import
|
|
14
|
-
import
|
|
15
|
-
from typing import TYPE_CHECKING
|
|
12
|
+
from .local import LocalExecutor
|
|
13
|
+
from .result_handling import handle_execution_result
|
|
14
|
+
from .synchronous import SynchronousExecutor
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
from sqlalchemy import insert
|
|
19
|
-
|
|
20
|
-
from climate_ref.database import Database
|
|
21
|
-
from climate_ref.models.execution import Execution, ExecutionOutput, ResultOutputType
|
|
22
|
-
from climate_ref.models.metric_value import MetricValue
|
|
23
|
-
from climate_ref_core.diagnostics import ExecutionResult, ensure_relative_path
|
|
24
|
-
from climate_ref_core.exceptions import InvalidExecutorException, ResultValidationError
|
|
25
|
-
from climate_ref_core.executor import EXECUTION_LOG_FILENAME, Executor
|
|
26
|
-
from climate_ref_core.pycmec.controlled_vocabulary import CV
|
|
27
|
-
from climate_ref_core.pycmec.metric import CMECMetric
|
|
28
|
-
from climate_ref_core.pycmec.output import CMECOutput, OutputDict
|
|
29
|
-
|
|
30
|
-
if TYPE_CHECKING:
|
|
31
|
-
from climate_ref.config import Config
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def import_executor_cls(fqn: str) -> type[Executor]:
|
|
35
|
-
"""
|
|
36
|
-
Import an executor using a fully qualified module path
|
|
37
|
-
|
|
38
|
-
Parameters
|
|
39
|
-
----------
|
|
40
|
-
fqn
|
|
41
|
-
Full package and attribute name of the executor to import
|
|
42
|
-
|
|
43
|
-
For example: `climate_ref_example.executor` will use the `executor` attribute from the
|
|
44
|
-
`climate_ref_example` package.
|
|
45
|
-
|
|
46
|
-
Raises
|
|
47
|
-
------
|
|
48
|
-
climate_ref_core.exceptions.InvalidExecutorException
|
|
49
|
-
If the executor cannot be imported
|
|
50
|
-
|
|
51
|
-
If the executor isn't a valid `DiagnosticProvider`.
|
|
52
|
-
|
|
53
|
-
Returns
|
|
54
|
-
-------
|
|
55
|
-
:
|
|
56
|
-
Executor instance
|
|
57
|
-
"""
|
|
58
|
-
module, attribute_name = fqn.rsplit(".", 1)
|
|
59
|
-
|
|
60
|
-
try:
|
|
61
|
-
imp = importlib.import_module(module)
|
|
62
|
-
executor: type[Executor] = getattr(imp, attribute_name)
|
|
63
|
-
|
|
64
|
-
# We can't really check if the executor is a subclass of Executor here
|
|
65
|
-
# Protocols can't be used with issubclass if they have non-method members
|
|
66
|
-
# We have to check this at class instantiation time
|
|
67
|
-
|
|
68
|
-
return executor
|
|
69
|
-
except ModuleNotFoundError:
|
|
70
|
-
logger.error(f"Package '{fqn}' not found")
|
|
71
|
-
raise InvalidExecutorException(fqn, f"Module '{module}' not found")
|
|
72
|
-
except AttributeError:
|
|
73
|
-
logger.error(f"Provider '{fqn}' not found")
|
|
74
|
-
raise InvalidExecutorException(fqn, f"Executor '{attribute_name}' not found in {module}")
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _copy_file_to_results(
|
|
78
|
-
scratch_directory: pathlib.Path,
|
|
79
|
-
results_directory: pathlib.Path,
|
|
80
|
-
fragment: pathlib.Path | str,
|
|
81
|
-
filename: pathlib.Path | str,
|
|
82
|
-
) -> None:
|
|
83
|
-
"""
|
|
84
|
-
Copy a file from the scratch directory to the executions directory
|
|
85
|
-
|
|
86
|
-
Parameters
|
|
87
|
-
----------
|
|
88
|
-
scratch_directory
|
|
89
|
-
The directory where the file is currently located
|
|
90
|
-
results_directory
|
|
91
|
-
The directory where the file should be copied to
|
|
92
|
-
fragment
|
|
93
|
-
The fragment of the executions directory where the file should be copied
|
|
94
|
-
filename
|
|
95
|
-
The name of the file to be copied
|
|
96
|
-
"""
|
|
97
|
-
assert results_directory != scratch_directory # noqa
|
|
98
|
-
input_directory = scratch_directory / fragment
|
|
99
|
-
output_directory = results_directory / fragment
|
|
100
|
-
|
|
101
|
-
filename = ensure_relative_path(filename, input_directory)
|
|
102
|
-
|
|
103
|
-
if not (input_directory / filename).exists():
|
|
104
|
-
raise FileNotFoundError(f"Could not find {filename} in {input_directory}")
|
|
105
|
-
|
|
106
|
-
output_filename = output_directory / filename
|
|
107
|
-
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
108
|
-
|
|
109
|
-
shutil.copy(input_directory / filename, output_filename)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def handle_execution_result(
|
|
113
|
-
config: "Config",
|
|
114
|
-
database: Database,
|
|
115
|
-
execution: Execution,
|
|
116
|
-
result: "ExecutionResult",
|
|
117
|
-
) -> None:
|
|
118
|
-
"""
|
|
119
|
-
Handle the result of a diagnostic execution
|
|
120
|
-
|
|
121
|
-
This will update the diagnostic execution result with the output of the diagnostic execution.
|
|
122
|
-
The output will be copied from the scratch directory to the executions directory.
|
|
123
|
-
|
|
124
|
-
Parameters
|
|
125
|
-
----------
|
|
126
|
-
config
|
|
127
|
-
The configuration to use
|
|
128
|
-
database
|
|
129
|
-
The active database session to use
|
|
130
|
-
execution
|
|
131
|
-
The diagnostic execution result DB object to update
|
|
132
|
-
result
|
|
133
|
-
The result of the diagnostic execution, either successful or failed
|
|
134
|
-
"""
|
|
135
|
-
# Always copy log data
|
|
136
|
-
_copy_file_to_results(
|
|
137
|
-
config.paths.scratch,
|
|
138
|
-
config.paths.results,
|
|
139
|
-
execution.output_fragment,
|
|
140
|
-
EXECUTION_LOG_FILENAME,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
if result.successful and result.metric_bundle_filename is not None:
|
|
144
|
-
logger.info(f"{execution} successful")
|
|
145
|
-
|
|
146
|
-
_copy_file_to_results(
|
|
147
|
-
config.paths.scratch,
|
|
148
|
-
config.paths.results,
|
|
149
|
-
execution.output_fragment,
|
|
150
|
-
result.metric_bundle_filename,
|
|
151
|
-
)
|
|
152
|
-
execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
|
|
153
|
-
|
|
154
|
-
if result.output_bundle_filename:
|
|
155
|
-
_copy_file_to_results(
|
|
156
|
-
config.paths.scratch,
|
|
157
|
-
config.paths.results,
|
|
158
|
-
execution.output_fragment,
|
|
159
|
-
result.output_bundle_filename,
|
|
160
|
-
)
|
|
161
|
-
_handle_output_bundle(
|
|
162
|
-
config,
|
|
163
|
-
database,
|
|
164
|
-
execution,
|
|
165
|
-
result.to_output_path(result.output_bundle_filename),
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
|
|
169
|
-
|
|
170
|
-
# Check that the diagnostic values conform with the controlled vocabulary
|
|
171
|
-
try:
|
|
172
|
-
cv = CV.load_from_file(config.paths.dimensions_cv)
|
|
173
|
-
cv.validate_metrics(cmec_metric_bundle)
|
|
174
|
-
except (ResultValidationError, AssertionError):
|
|
175
|
-
logger.exception("Diagnostic values do not conform with the controlled vocabulary")
|
|
176
|
-
# TODO: Mark the diagnostic execution result as failed once the CV has stabilised
|
|
177
|
-
# execution.mark_failed()
|
|
178
|
-
|
|
179
|
-
# Perform a bulk insert of a diagnostic bundle
|
|
180
|
-
# TODO: The section below will likely fail until we have agreed on a controlled vocabulary
|
|
181
|
-
# The current implementation will swallow the exception, but display a log message
|
|
182
|
-
try:
|
|
183
|
-
# Perform this in a nested transaction to (hopefully) gracefully rollback if something
|
|
184
|
-
# goes wrong
|
|
185
|
-
with database.session.begin_nested():
|
|
186
|
-
database.session.execute(
|
|
187
|
-
insert(MetricValue),
|
|
188
|
-
[
|
|
189
|
-
{
|
|
190
|
-
"execution_id": execution.id,
|
|
191
|
-
"value": result.value,
|
|
192
|
-
"attributes": result.attributes,
|
|
193
|
-
**result.dimensions,
|
|
194
|
-
}
|
|
195
|
-
for result in cmec_metric_bundle.iter_results()
|
|
196
|
-
],
|
|
197
|
-
)
|
|
198
|
-
except Exception:
|
|
199
|
-
# TODO: Remove once we have settled on a controlled vocabulary
|
|
200
|
-
logger.exception("Something went wrong when ingesting diagnostic values")
|
|
201
|
-
|
|
202
|
-
# TODO: This should check if the result is the most recent for the execution,
|
|
203
|
-
# if so then update the dirty fields
|
|
204
|
-
# i.e. if there are outstanding executions don't make as clean
|
|
205
|
-
execution.execution_group.dirty = False
|
|
206
|
-
else:
|
|
207
|
-
logger.error(f"{execution} failed")
|
|
208
|
-
execution.mark_failed()
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def _handle_output_bundle(
|
|
212
|
-
config: "Config",
|
|
213
|
-
database: Database,
|
|
214
|
-
execution: Execution,
|
|
215
|
-
cmec_output_bundle_filename: pathlib.Path,
|
|
216
|
-
) -> None:
|
|
217
|
-
# Extract the registered outputs
|
|
218
|
-
# Copy the content to the output directory
|
|
219
|
-
# Track in the db
|
|
220
|
-
cmec_output_bundle = CMECOutput.load_from_json(cmec_output_bundle_filename)
|
|
221
|
-
_handle_outputs(
|
|
222
|
-
cmec_output_bundle.plots,
|
|
223
|
-
output_type=ResultOutputType.Plot,
|
|
224
|
-
config=config,
|
|
225
|
-
database=database,
|
|
226
|
-
execution=execution,
|
|
227
|
-
)
|
|
228
|
-
_handle_outputs(
|
|
229
|
-
cmec_output_bundle.data,
|
|
230
|
-
output_type=ResultOutputType.Data,
|
|
231
|
-
config=config,
|
|
232
|
-
database=database,
|
|
233
|
-
execution=execution,
|
|
234
|
-
)
|
|
235
|
-
_handle_outputs(
|
|
236
|
-
cmec_output_bundle.html,
|
|
237
|
-
output_type=ResultOutputType.HTML,
|
|
238
|
-
config=config,
|
|
239
|
-
database=database,
|
|
240
|
-
execution=execution,
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
def _handle_outputs(
|
|
245
|
-
outputs: dict[str, OutputDict] | None,
|
|
246
|
-
output_type: ResultOutputType,
|
|
247
|
-
config: "Config",
|
|
248
|
-
database: Database,
|
|
249
|
-
execution: Execution,
|
|
250
|
-
) -> None:
|
|
251
|
-
if outputs is None:
|
|
252
|
-
return
|
|
253
|
-
|
|
254
|
-
for key, output_info in outputs.items():
|
|
255
|
-
filename = ensure_relative_path(
|
|
256
|
-
output_info.filename, config.paths.scratch / execution.output_fragment
|
|
257
|
-
)
|
|
258
|
-
|
|
259
|
-
_copy_file_to_results(
|
|
260
|
-
config.paths.scratch,
|
|
261
|
-
config.paths.results,
|
|
262
|
-
execution.output_fragment,
|
|
263
|
-
filename,
|
|
264
|
-
)
|
|
265
|
-
database.session.add(
|
|
266
|
-
ExecutionOutput(
|
|
267
|
-
execution_id=execution.id,
|
|
268
|
-
output_type=output_type,
|
|
269
|
-
filename=str(filename),
|
|
270
|
-
description=output_info.description,
|
|
271
|
-
short_name=key,
|
|
272
|
-
long_name=output_info.long_name,
|
|
273
|
-
)
|
|
274
|
-
)
|
|
16
|
+
__all__ = ["LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
|