climate-ref 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +18 -8
- climate_ref/cli/datasets.py +31 -27
- climate_ref/cli/executions.py +1 -1
- climate_ref/cli/providers.py +2 -4
- climate_ref/cli/solve.py +1 -2
- climate_ref/config.py +50 -19
- climate_ref/constants.py +1 -1
- climate_ref/database.py +1 -0
- climate_ref/dataset_registry/sample_data.txt +14 -0
- climate_ref/datasets/base.py +43 -39
- climate_ref/executor/__init__.py +4 -262
- climate_ref/executor/local.py +170 -37
- climate_ref/executor/result_handling.py +231 -0
- climate_ref/executor/synchronous.py +62 -0
- climate_ref/migrations/env.py +5 -0
- climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -21
- climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +57 -0
- climate_ref/models/__init__.py +3 -1
- climate_ref/models/base.py +2 -0
- climate_ref/models/metric_value.py +138 -13
- climate_ref/provider_registry.py +1 -1
- climate_ref/solver.py +18 -30
- climate_ref/testing.py +11 -7
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/METADATA +12 -6
- climate_ref-0.5.2.dist-info/RECORD +47 -0
- climate_ref-0.5.0.dist-info/RECORD +0 -44
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/WHEEL +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/__init__.py
CHANGED
|
@@ -12,7 +12,7 @@ from loguru import logger
|
|
|
12
12
|
from climate_ref import __version__
|
|
13
13
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
14
14
|
from climate_ref.config import Config
|
|
15
|
-
from climate_ref.constants import
|
|
15
|
+
from climate_ref.constants import CONFIG_FILENAME
|
|
16
16
|
from climate_ref.database import Database
|
|
17
17
|
from climate_ref_core import __version__ as __core_version__
|
|
18
18
|
from climate_ref_core.logging import add_log_handler
|
|
@@ -23,7 +23,8 @@ class LogLevel(str, Enum):
|
|
|
23
23
|
Log levels for the CLI
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Error = "ERROR"
|
|
27
|
+
Warning = "WARNING"
|
|
27
28
|
Debug = "DEBUG"
|
|
28
29
|
Info = "INFO"
|
|
29
30
|
|
|
@@ -65,7 +66,7 @@ def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
|
65
66
|
"""
|
|
66
67
|
try:
|
|
67
68
|
if configuration_directory:
|
|
68
|
-
config = Config.load(configuration_directory /
|
|
69
|
+
config = Config.load(configuration_directory / CONFIG_FILENAME, allow_missing=False)
|
|
69
70
|
else:
|
|
70
71
|
config = Config.default()
|
|
71
72
|
except FileNotFoundError:
|
|
@@ -109,19 +110,28 @@ app = build_app()
|
|
|
109
110
|
|
|
110
111
|
|
|
111
112
|
@app.callback()
|
|
112
|
-
def main(
|
|
113
|
+
def main( # noqa: PLR0913
|
|
113
114
|
ctx: typer.Context,
|
|
114
115
|
configuration_directory: Annotated[Path | None, typer.Option(help="Configuration directory")] = None,
|
|
115
|
-
verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
|
|
116
|
-
|
|
116
|
+
verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Set the log level to DEBUG")] = False,
|
|
117
|
+
quiet: Annotated[bool, typer.Option("--quiet", "-q", help="Set the log level to WARNING")] = False,
|
|
118
|
+
log_level: Annotated[
|
|
119
|
+
LogLevel, typer.Option(case_sensitive=False, help="Set the level of logging information to display")
|
|
120
|
+
] = LogLevel.Info,
|
|
117
121
|
version: Annotated[
|
|
118
122
|
Optional[bool],
|
|
119
|
-
typer.Option(
|
|
123
|
+
typer.Option(
|
|
124
|
+
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
125
|
+
),
|
|
120
126
|
] = None,
|
|
121
127
|
) -> None:
|
|
122
128
|
"""
|
|
123
|
-
climate_ref: A CLI for the
|
|
129
|
+
climate_ref: A CLI for the Assessment Fast Track Rapid Evaluation Framework
|
|
130
|
+
|
|
131
|
+
This CLI provides a number of commands for managing and executing diagnostics.
|
|
124
132
|
"""
|
|
133
|
+
if quiet:
|
|
134
|
+
log_level = LogLevel.Warning
|
|
125
135
|
if verbose:
|
|
126
136
|
log_level = LogLevel.Debug
|
|
127
137
|
|
climate_ref/cli/datasets.py
CHANGED
|
@@ -90,7 +90,7 @@ def list_columns(
|
|
|
90
90
|
@app.command()
|
|
91
91
|
def ingest( # noqa: PLR0913
|
|
92
92
|
ctx: typer.Context,
|
|
93
|
-
file_or_directory: Path,
|
|
93
|
+
file_or_directory: list[Path],
|
|
94
94
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
95
95
|
solve: Annotated[bool, typer.Option(help="Solve for new diagnostic executions after ingestion")] = False,
|
|
96
96
|
dry_run: Annotated[bool, typer.Option(help="Do not ingest datasets into the database")] = False,
|
|
@@ -107,40 +107,44 @@ def ingest( # noqa: PLR0913
|
|
|
107
107
|
config = ctx.obj.config
|
|
108
108
|
db = ctx.obj.database
|
|
109
109
|
|
|
110
|
-
file_or_directory = Path(file_or_directory).expanduser()
|
|
111
|
-
logger.info(f"ingesting {file_or_directory}")
|
|
112
|
-
|
|
113
110
|
kwargs = {}
|
|
114
111
|
|
|
115
112
|
if n_jobs is not None:
|
|
116
113
|
kwargs["n_jobs"] = n_jobs
|
|
117
114
|
|
|
115
|
+
# Create a data catalog from the specified file or directory
|
|
118
116
|
adapter = get_dataset_adapter(source_type.value, **kwargs)
|
|
119
117
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
logger.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
dataset
|
|
138
|
-
if not dataset:
|
|
139
|
-
logger.info(f"Would save dataset {instance_id} to the database")
|
|
140
|
-
continue
|
|
141
|
-
else:
|
|
118
|
+
for _dir in file_or_directory:
|
|
119
|
+
_dir = Path(_dir).expanduser()
|
|
120
|
+
logger.info(f"Ingesting {_dir}")
|
|
121
|
+
|
|
122
|
+
if not _dir.exists():
|
|
123
|
+
logger.error(f"File or directory {_dir} does not exist")
|
|
124
|
+
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
|
|
125
|
+
|
|
126
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
127
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
128
|
+
|
|
129
|
+
logger.info(
|
|
130
|
+
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
131
|
+
)
|
|
132
|
+
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
133
|
+
|
|
134
|
+
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
135
|
+
logger.debug(f"Processing dataset {instance_id}")
|
|
142
136
|
with db.session.begin():
|
|
143
|
-
|
|
137
|
+
if dry_run:
|
|
138
|
+
dataset = (
|
|
139
|
+
db.session.query(Dataset)
|
|
140
|
+
.filter_by(slug=instance_id, dataset_type=source_type)
|
|
141
|
+
.first()
|
|
142
|
+
)
|
|
143
|
+
if not dataset:
|
|
144
|
+
logger.info(f"Would save dataset {instance_id} to the database")
|
|
145
|
+
continue
|
|
146
|
+
else:
|
|
147
|
+
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
144
148
|
|
|
145
149
|
if solve:
|
|
146
150
|
solve_required_executions(
|
climate_ref/cli/executions.py
CHANGED
|
@@ -20,7 +20,7 @@ from climate_ref.cli._utils import df_to_table, pretty_print_df
|
|
|
20
20
|
from climate_ref.config import Config
|
|
21
21
|
from climate_ref.models import Execution, ExecutionGroup
|
|
22
22
|
from climate_ref.models.execution import get_execution_group_and_latest
|
|
23
|
-
from climate_ref_core.
|
|
23
|
+
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
24
24
|
|
|
25
25
|
app = typer.Typer(help=__doc__)
|
|
26
26
|
console = Console()
|
climate_ref/cli/providers.py
CHANGED
|
@@ -24,8 +24,7 @@ def list_(ctx: typer.Context) -> None:
|
|
|
24
24
|
"""
|
|
25
25
|
config = ctx.obj.config
|
|
26
26
|
db = ctx.obj.database
|
|
27
|
-
|
|
28
|
-
provider_registry = ProviderRegistry.build_from_config(config, db)
|
|
27
|
+
provider_registry = ProviderRegistry.build_from_config(config, db)
|
|
29
28
|
|
|
30
29
|
def get_env(provider: DiagnosticProvider) -> str:
|
|
31
30
|
env = ""
|
|
@@ -61,8 +60,7 @@ def create_env(
|
|
|
61
60
|
"""
|
|
62
61
|
config = ctx.obj.config
|
|
63
62
|
db = ctx.obj.database
|
|
64
|
-
|
|
65
|
-
providers = ProviderRegistry.build_from_config(config, db).providers
|
|
63
|
+
providers = ProviderRegistry.build_from_config(config, db).providers
|
|
66
64
|
|
|
67
65
|
if provider is not None:
|
|
68
66
|
available = ", ".join([f'"{p.slug}"' for p in providers])
|
climate_ref/cli/solve.py
CHANGED
|
@@ -19,5 +19,4 @@ def solve(
|
|
|
19
19
|
"""
|
|
20
20
|
config = ctx.obj.config
|
|
21
21
|
db = ctx.obj.database
|
|
22
|
-
|
|
23
|
-
solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)
|
|
22
|
+
solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)
|
climate_ref/config.py
CHANGED
|
@@ -34,11 +34,10 @@ from climate_ref._config_helpers import (
|
|
|
34
34
|
env_field,
|
|
35
35
|
transform_error,
|
|
36
36
|
)
|
|
37
|
-
from climate_ref.constants import
|
|
38
|
-
from climate_ref.executor import import_executor_cls
|
|
37
|
+
from climate_ref.constants import CONFIG_FILENAME
|
|
39
38
|
from climate_ref_core.env import env
|
|
40
39
|
from climate_ref_core.exceptions import InvalidExecutorException
|
|
41
|
-
from climate_ref_core.executor import Executor
|
|
40
|
+
from climate_ref_core.executor import Executor, import_executor_cls
|
|
42
41
|
|
|
43
42
|
if TYPE_CHECKING:
|
|
44
43
|
from climate_ref.database import Database
|
|
@@ -75,11 +74,13 @@ class PathConfig:
|
|
|
75
74
|
/// admonition | Warning
|
|
76
75
|
type: warning
|
|
77
76
|
|
|
78
|
-
These paths must be common across all systems that the REF is being run
|
|
77
|
+
These paths must be common across all systems that the REF is being run.
|
|
78
|
+
Generally, this means that they should be mounted in the same location on all systems.
|
|
79
79
|
///
|
|
80
80
|
|
|
81
81
|
If any of these paths are specified as relative paths,
|
|
82
82
|
they will be resolved to absolute paths.
|
|
83
|
+
These absolute paths will be used for all operations in the REF.
|
|
83
84
|
"""
|
|
84
85
|
|
|
85
86
|
log: Path = env_field(name="LOG_ROOT", converter=ensure_absolute_path)
|
|
@@ -156,14 +157,14 @@ class ExecutorConfig:
|
|
|
156
157
|
Configuration to define the executor to use for running diagnostics
|
|
157
158
|
"""
|
|
158
159
|
|
|
159
|
-
executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.
|
|
160
|
+
executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.LocalExecutor")
|
|
160
161
|
"""
|
|
161
|
-
Executor to use for running diagnostics
|
|
162
|
+
Executor class to use for running diagnostics
|
|
162
163
|
|
|
163
164
|
This should be the fully qualified name of the executor class
|
|
164
|
-
(e.g. `climate_ref.executor.
|
|
165
|
-
The default is to use the local executor
|
|
166
|
-
|
|
165
|
+
(e.g. `climate_ref.executor.LocalExecutor`).
|
|
166
|
+
The default is to use the local executor which runs the executions locally, in-parallel
|
|
167
|
+
using a process pool.
|
|
167
168
|
|
|
168
169
|
This class will be used for all executions of diagnostics.
|
|
169
170
|
"""
|
|
@@ -173,6 +174,7 @@ class ExecutorConfig:
|
|
|
173
174
|
Additional configuration for the executor.
|
|
174
175
|
|
|
175
176
|
See the documentation for the executor for the available configuration options.
|
|
177
|
+
These options will be passed to the executor class when it is created.
|
|
176
178
|
"""
|
|
177
179
|
|
|
178
180
|
def build(self, config: "Config", database: "Database") -> Executor:
|
|
@@ -200,7 +202,30 @@ class ExecutorConfig:
|
|
|
200
202
|
@define
|
|
201
203
|
class DiagnosticProviderConfig:
|
|
202
204
|
"""
|
|
203
|
-
|
|
205
|
+
Defining the diagnostic providers used by the REF.
|
|
206
|
+
|
|
207
|
+
Each diagnostic provider is a package that contains the logic for running a specific
|
|
208
|
+
set of diagnostics.
|
|
209
|
+
This configuration determines which diagnostic providers are loaded and used when solving.
|
|
210
|
+
|
|
211
|
+
Multiple diagnostic providers can be specified as shown in the example below.
|
|
212
|
+
|
|
213
|
+
```toml
|
|
214
|
+
[[diagnostic_providers]]
|
|
215
|
+
provider = "climate_ref_esmvaltool.provider"
|
|
216
|
+
|
|
217
|
+
[diagnostic_providers.config]
|
|
218
|
+
|
|
219
|
+
[[diagnostic_providers]]
|
|
220
|
+
provider = "climate_ref_ilamb.provider"
|
|
221
|
+
|
|
222
|
+
[diagnostic_providers.config]
|
|
223
|
+
|
|
224
|
+
[[diagnostic_providers]]
|
|
225
|
+
provider = "climate_ref_pmp.provider"
|
|
226
|
+
|
|
227
|
+
[diagnostic_providers.config]
|
|
228
|
+
```
|
|
204
229
|
"""
|
|
205
230
|
|
|
206
231
|
provider: str
|
|
@@ -225,21 +250,29 @@ class DbConfig:
|
|
|
225
250
|
"""
|
|
226
251
|
Database configuration
|
|
227
252
|
|
|
228
|
-
We
|
|
229
|
-
|
|
253
|
+
We support SQLite and PostgreSQL databases.
|
|
254
|
+
The default is to use SQLite, which is a file-based database that is stored in the
|
|
255
|
+
`REF_CONFIGURATION` directory.
|
|
256
|
+
This is a good option for testing and development, but not recommended for production use.
|
|
257
|
+
|
|
258
|
+
For production use, we recommend using PostgreSQL.
|
|
230
259
|
"""
|
|
231
260
|
|
|
232
261
|
database_url: str = env_field(name="DATABASE_URL")
|
|
233
262
|
"""
|
|
234
263
|
Database URL that describes the connection to the database.
|
|
235
264
|
|
|
236
|
-
Defaults to sqlite:///{config.paths.db}/climate_ref.db
|
|
265
|
+
Defaults to `sqlite:///{config.paths.db}/climate_ref.db`.
|
|
237
266
|
This configuration value will be overridden by the `REF_DATABASE_URL` environment variable.
|
|
238
267
|
|
|
239
|
-
|
|
268
|
+
**Schemas**
|
|
240
269
|
|
|
270
|
+
The following schemas are supported:
|
|
271
|
+
```
|
|
241
272
|
postgresql://USER:PASSWORD@HOST:PORT/NAME
|
|
273
|
+
|
|
242
274
|
sqlite:///RELATIVE_PATH or sqlite:////ABS_PATH or sqlite:///:memory:
|
|
275
|
+
```
|
|
243
276
|
"""
|
|
244
277
|
run_migrations: bool = field(default=True)
|
|
245
278
|
|
|
@@ -286,12 +319,10 @@ def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
|
|
|
286
319
|
return _converter_defaults_relaxed.structure(doc, Config)
|
|
287
320
|
|
|
288
321
|
|
|
289
|
-
@define
|
|
322
|
+
@define(auto_attribs=True)
|
|
290
323
|
class Config:
|
|
291
324
|
"""
|
|
292
|
-
REF
|
|
293
|
-
|
|
294
|
-
This class is used to store the configuration of the REF application.
|
|
325
|
+
Configuration that is used by the REF
|
|
295
326
|
"""
|
|
296
327
|
|
|
297
328
|
log_level: str = field(default="INFO")
|
|
@@ -405,7 +436,7 @@ class Config:
|
|
|
405
436
|
The default configuration
|
|
406
437
|
"""
|
|
407
438
|
root = env.path("REF_CONFIGURATION")
|
|
408
|
-
path_to_load = root /
|
|
439
|
+
path_to_load = root / CONFIG_FILENAME
|
|
409
440
|
|
|
410
441
|
logger.debug(f"Loading default configuration from {path_to_load}")
|
|
411
442
|
return cls.load(path_to_load)
|
climate_ref/constants.py
CHANGED
climate_ref/database.py
CHANGED
|
@@ -106,6 +106,7 @@ class Database:
|
|
|
106
106
|
logger.info(f"Connecting to database at {url}")
|
|
107
107
|
self.url = url
|
|
108
108
|
self._engine = sqlalchemy.create_engine(self.url)
|
|
109
|
+
# TODO: Set autobegin=False
|
|
109
110
|
self.session = Session(self._engine)
|
|
110
111
|
|
|
111
112
|
def alembic_config(self, config: "Config") -> AlembicConfig:
|
|
@@ -18,12 +18,26 @@ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/abrupt-4xCO2/r1i1p1f1/fx/areacella/gn/v20191115/a
|
|
|
18
18
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/pr/gn/v20191115/pr_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc acc821dc400f53166379d2e23095bc2690d7ca7db6c7a6f88ae29a8771b3c65a
|
|
19
19
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/psl/gn/v20191115/psl_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b63a3d4051cf17568df808836b189826da580ca8e1db949b1e93a71c80756c8d
|
|
20
20
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 44a3c90a41744101afb00344f50947fe46444fe5d6bd3623c0c19aa02a378c86
|
|
21
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlutcs/gn/v20191115/rlutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 50b54337ffcf581236929c4eb904bc8240f848a5fa646de75129ed5cbddbbc23
|
|
21
22
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc a4e1fc3a4a4d00c2fa18ec616338426eb3d91165db3bc57e565ffdc8d6bd9d34
|
|
22
23
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 8d492ef1f2bb654220fe64977d9942a33af0962ee9afa4017dcc75b6f0103015
|
|
24
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsutcs/gn/v20191115/rsutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 8bae5124b8fe5040853e9b02f2942ee4c81cfd972b314f2828c04a9965804357
|
|
23
25
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 38e055e57aea5a9ae76ed3fc5325be6783b5694a9edc28aafd24dd462b32e5ce
|
|
24
26
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/ts/gn/v20191115/ts_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc e02530449c92e0ffc72e9edeba57f5d38ab8652a28486c1c2b9ddada1f38fbd9
|
|
27
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Emon/cSoil/gn/v20191115/cSoil_Emon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc bebda54ca0518630e90b79585910fc38b7edfe118ecf1cf4cb4a8de0950a911e
|
|
25
28
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/gpp/gn/v20191115/gpp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc da36ed1653f7aafe40a4fc9b99004a46cb45231697ce6b3413dfc171980c37df
|
|
29
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/lai/gn/v20191115/lai_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 972c11880af2cf06c2e1489968b1ac4829d8b00afd7586499814c3ddcfd95ed2
|
|
30
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrro/gn/v20191115/mrro_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc aaaf89f7a1dedf79e8cda71ab345a6809cfb698a63dcc638ccf7a316d13e6920
|
|
31
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrsos/gn/v20191115/mrsos_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 4b78059c4f899a48ae6f3a3cf68e95d76e3603044744521c4aadd992dec93995
|
|
32
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 75e559842e9081ea7563f2590d2c1e8a22af72abc78e37a012b9d56da532569e
|
|
26
33
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/areacello/gn/v20191115/areacello_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 6808b64c7328bd118537bfb7cfd35748b4e84cae3f6a5586403aa9d8040e4d0b
|
|
34
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/sftof/gn/v20191115/sftof_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 9bc037566546b8a65d063c4e8225b43b56151856f5a8adde5992f44c85b7c727
|
|
35
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-197912.nc f068351200f0afb451a0e39e13d5e3ddeb48b1f1812b97a1a786d802659c969c
|
|
36
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_198001-198912.nc 703b495bf2effa5cae369893e2868ae1f38b69510366404e236a4605e6560ae6
|
|
37
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_199001-199912.nc 79a7ef5b61962da66abd14598412ad4f1ba0c9b531e7ecaf5a6190e0a4f9589c
|
|
38
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200912.nc 001b3e7c639cae224e4de4f14601f492bec7aeb028cd02e9f07812c1db05abb7
|
|
39
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_201001-201412.nc 6c6cc432feb33552d643b2a57d0552ac563ec5a90ad462b6daeacdf3e7a9158f
|
|
40
|
+
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 92384dba703a6575d696bf8c04337c3a1d4f538bcd9ca28bf61ab058f8038b30
|
|
27
41
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 94121a2233aff78ef8799c5d59b6b6f3e7d3f2fb7ceb3a4a1645943ef3e88040
|
|
28
42
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/SImon/siconc/gn/v20200817/siconc_SImon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 886c62a24797e893fe11b8de4d16c8a277bdee931b692d533f2fb3fa39820aa1
|
|
29
43
|
CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/v20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 4587a7b0110a226a805f681ee9fe456d20ec310302b2c120334c21595e4e96cb
|
climate_ref/datasets/base.py
CHANGED
|
@@ -156,6 +156,43 @@ class DatasetAdapter(Protocol):
|
|
|
156
156
|
)
|
|
157
157
|
return dataset
|
|
158
158
|
|
|
159
|
+
def _get_dataset_files(self, db: Database, limit: int | None = None) -> pd.DataFrame:
|
|
160
|
+
dataset_type = self.dataset_cls.__mapper_args__["polymorphic_identity"]
|
|
161
|
+
|
|
162
|
+
result = (
|
|
163
|
+
db.session.query(DatasetFile)
|
|
164
|
+
# The join is necessary to be able to order by the dataset columns
|
|
165
|
+
.join(DatasetFile.dataset)
|
|
166
|
+
.where(Dataset.dataset_type == dataset_type)
|
|
167
|
+
# The joinedload is necessary to avoid N+1 queries (one for each dataset)
|
|
168
|
+
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
|
|
169
|
+
.options(joinedload(DatasetFile.dataset.of_type(self.dataset_cls)))
|
|
170
|
+
.order_by(Dataset.updated_at.desc())
|
|
171
|
+
.limit(limit)
|
|
172
|
+
.all()
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return pd.DataFrame(
|
|
176
|
+
[
|
|
177
|
+
{
|
|
178
|
+
**{k: getattr(file, k) for k in self.file_specific_metadata},
|
|
179
|
+
**{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
|
|
180
|
+
}
|
|
181
|
+
for file in result
|
|
182
|
+
],
|
|
183
|
+
index=[file.dataset.id for file in result],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def _get_datasets(self, db: Database, limit: int | None = None) -> pd.DataFrame:
|
|
187
|
+
result_datasets = (
|
|
188
|
+
db.session.query(self.dataset_cls).order_by(Dataset.updated_at.desc()).limit(limit).all()
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return pd.DataFrame(
|
|
192
|
+
[{k: getattr(dataset, k) for k in self.dataset_specific_metadata} for dataset in result_datasets],
|
|
193
|
+
index=[file.id for file in result_datasets],
|
|
194
|
+
)
|
|
195
|
+
|
|
159
196
|
def load_catalog(
|
|
160
197
|
self, db: Database, include_files: bool = True, limit: int | None = None
|
|
161
198
|
) -> pd.DataFrame:
|
|
@@ -173,42 +210,9 @@ class DatasetAdapter(Protocol):
|
|
|
173
210
|
:
|
|
174
211
|
Data catalog containing the metadata for the currently ingested datasets
|
|
175
212
|
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
# The join is necessary to be able to order by the dataset columns
|
|
183
|
-
.join(DatasetFile.dataset)
|
|
184
|
-
.where(Dataset.dataset_type == dataset_type)
|
|
185
|
-
# The joinedload is necessary to avoid N+1 queries (one for each dataset)
|
|
186
|
-
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
|
|
187
|
-
.options(joinedload(DatasetFile.dataset.of_type(DatasetModel)))
|
|
188
|
-
.order_by(Dataset.updated_at.desc())
|
|
189
|
-
.limit(limit)
|
|
190
|
-
.all()
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
return pd.DataFrame(
|
|
194
|
-
[
|
|
195
|
-
{
|
|
196
|
-
**{k: getattr(file, k) for k in self.file_specific_metadata},
|
|
197
|
-
**{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
|
|
198
|
-
}
|
|
199
|
-
for file in result
|
|
200
|
-
],
|
|
201
|
-
index=[file.dataset.id for file in result],
|
|
202
|
-
)
|
|
203
|
-
else:
|
|
204
|
-
result_datasets = (
|
|
205
|
-
db.session.query(DatasetModel).order_by(Dataset.updated_at.desc()).limit(limit).all()
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
return pd.DataFrame(
|
|
209
|
-
[
|
|
210
|
-
{k: getattr(dataset, k) for k in self.dataset_specific_metadata}
|
|
211
|
-
for dataset in result_datasets
|
|
212
|
-
],
|
|
213
|
-
index=[file.id for file in result_datasets],
|
|
214
|
-
)
|
|
213
|
+
with db.session.begin():
|
|
214
|
+
# TODO: Paginate this query to avoid loading all the data at once
|
|
215
|
+
if include_files:
|
|
216
|
+
return self._get_dataset_files(db, limit)
|
|
217
|
+
else:
|
|
218
|
+
return self._get_datasets(db, limit)
|