climate-ref 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. climate_ref/__init__.py +30 -0
  2. climate_ref/_config_helpers.py +214 -0
  3. climate_ref/alembic.ini +114 -0
  4. climate_ref/cli/__init__.py +138 -0
  5. climate_ref/cli/_utils.py +68 -0
  6. climate_ref/cli/config.py +28 -0
  7. climate_ref/cli/datasets.py +205 -0
  8. climate_ref/cli/executions.py +201 -0
  9. climate_ref/cli/providers.py +84 -0
  10. climate_ref/cli/solve.py +23 -0
  11. climate_ref/config.py +475 -0
  12. climate_ref/constants.py +8 -0
  13. climate_ref/database.py +223 -0
  14. climate_ref/dataset_registry/obs4ref_reference.txt +2 -0
  15. climate_ref/dataset_registry/sample_data.txt +60 -0
  16. climate_ref/datasets/__init__.py +40 -0
  17. climate_ref/datasets/base.py +214 -0
  18. climate_ref/datasets/cmip6.py +202 -0
  19. climate_ref/datasets/obs4mips.py +224 -0
  20. climate_ref/datasets/pmp_climatology.py +15 -0
  21. climate_ref/datasets/utils.py +16 -0
  22. climate_ref/executor/__init__.py +274 -0
  23. climate_ref/executor/local.py +89 -0
  24. climate_ref/migrations/README +22 -0
  25. climate_ref/migrations/env.py +139 -0
  26. climate_ref/migrations/script.py.mako +26 -0
  27. climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +292 -0
  28. climate_ref/models/__init__.py +33 -0
  29. climate_ref/models/base.py +42 -0
  30. climate_ref/models/dataset.py +206 -0
  31. climate_ref/models/diagnostic.py +61 -0
  32. climate_ref/models/execution.py +306 -0
  33. climate_ref/models/metric_value.py +195 -0
  34. climate_ref/models/provider.py +39 -0
  35. climate_ref/provider_registry.py +146 -0
  36. climate_ref/py.typed +0 -0
  37. climate_ref/solver.py +395 -0
  38. climate_ref/testing.py +109 -0
  39. climate_ref-0.5.0.dist-info/METADATA +97 -0
  40. climate_ref-0.5.0.dist-info/RECORD +44 -0
  41. climate_ref-0.5.0.dist-info/WHEEL +4 -0
  42. climate_ref-0.5.0.dist-info/entry_points.txt +2 -0
  43. climate_ref-0.5.0.dist-info/licenses/LICENCE +201 -0
  44. climate_ref-0.5.0.dist-info/licenses/NOTICE +3 -0
climate_ref/solver.py ADDED
@@ -0,0 +1,395 @@
1
+ """
2
+ Solver to determine which diagnostics need to be calculated
3
+
4
+ This module provides a solver to determine which diagnostics need to be calculated.
5
+ """
6
+
7
+ import itertools
8
+ import pathlib
9
+ import typing
10
+ from collections.abc import Sequence
11
+
12
+ import pandas as pd
13
+ from attrs import define, frozen
14
+ from loguru import logger
15
+
16
+ from climate_ref.config import Config
17
+ from climate_ref.database import Database
18
+ from climate_ref.datasets import get_dataset_adapter
19
+ from climate_ref.datasets.cmip6 import CMIP6DatasetAdapter
20
+ from climate_ref.datasets.obs4mips import Obs4MIPsDatasetAdapter
21
+ from climate_ref.datasets.pmp_climatology import PMPClimatologyDatasetAdapter
22
+ from climate_ref.models import Diagnostic as DiagnosticModel
23
+ from climate_ref.models import ExecutionGroup
24
+ from climate_ref.models import Provider as ProviderModel
25
+ from climate_ref.models.execution import Execution
26
+ from climate_ref.provider_registry import ProviderRegistry
27
+ from climate_ref_core.constraints import apply_constraint
28
+ from climate_ref_core.datasets import DatasetCollection, ExecutionDatasetCollection, SourceDatasetType
29
+ from climate_ref_core.diagnostics import DataRequirement, Diagnostic, ExecutionDefinition
30
+ from climate_ref_core.exceptions import InvalidDiagnosticException
31
+ from climate_ref_core.providers import DiagnosticProvider
32
+
33
+ SelectorKey = tuple[tuple[str, str], ...]
34
+ """
35
+ Type describing the key used to identify a group of datasets
36
+
37
+ This is a tuple of tuples, where each inner tuple contains a metadata and dimension value
38
+ that was used to group the datasets together.
39
+
40
+ This SelectorKey type must be hashable, as it is used as a key in a dictionary.
41
+ """
42
+
43
+
44
+ @frozen
45
+ class DiagnosticExecution:
46
+ """
47
+ Class to hold information about the execution of a diagnostic
48
+
49
+ This is a temporary class used by the solver to hold information about an execution that might
50
+ be required.
51
+ """
52
+
53
+ provider: DiagnosticProvider
54
+ diagnostic: Diagnostic
55
+ datasets: ExecutionDatasetCollection
56
+
57
+ @property
58
+ def dataset_key(self) -> str:
59
+ """
60
+ Key used to uniquely identify the execution group
61
+
62
+ This key is unique to an execution group and uses unique set of metadata (selectors)
63
+ that defines the group.
64
+ This key is combines the selectors from each source dataset type into a single key
65
+ and should be stable if new datasets are added or removed.
66
+ """
67
+ key_values = []
68
+
69
+ for source_type in SourceDatasetType.ordered():
70
+ # Ensure the selector is sorted using the dimension names
71
+ # This will ensure a stable key even if the groupby order changes
72
+ if source_type not in self.datasets:
73
+ continue
74
+
75
+ selector = self.datasets[source_type].selector
76
+ selector_sorted = sorted(selector, key=lambda item: item[0])
77
+
78
+ source_key = f"{source_type.value}_" + "_".join(value for _, value in selector_sorted)
79
+ key_values.append(source_key)
80
+
81
+ return "__".join(key_values)
82
+
83
+ @property
84
+ def selectors(self) -> dict[str, SelectorKey]:
85
+ """
86
+ Collection of selectors used to identify the datasets
87
+
88
+ These are the key, value pairs that were selected during the initial group-by,
89
+ for each data requirement.
90
+ """
91
+ # The "value" of SourceType is used here so this can be stored in the db
92
+ s = {}
93
+ for source_type in SourceDatasetType.ordered():
94
+ if source_type not in self.datasets:
95
+ continue
96
+ s[source_type.value] = self.datasets[source_type].selector
97
+ return s
98
+
99
+ def build_execution_definition(self, output_root: pathlib.Path) -> ExecutionDefinition:
100
+ """
101
+ Build the execution definition for the current diagnostic execution
102
+ """
103
+ # Ensure that the output root is always an absolute path
104
+ output_root = output_root.resolve()
105
+
106
+ # This is the desired path relative to the output directory
107
+ fragment = pathlib.Path() / self.provider.slug / self.diagnostic.slug / self.datasets.hash
108
+
109
+ return ExecutionDefinition(
110
+ root_directory=output_root,
111
+ output_directory=output_root / fragment,
112
+ key=self.dataset_key,
113
+ datasets=self.datasets,
114
+ )
115
+
116
+
117
+ def extract_covered_datasets(
118
+ data_catalog: pd.DataFrame, requirement: DataRequirement
119
+ ) -> dict[SelectorKey, pd.DataFrame]:
120
+ """
121
+ Determine the different diagnostic executions that should be performed with the current data catalog
122
+ """
123
+ if len(data_catalog) == 0:
124
+ logger.error(f"No datasets found in the data catalog: {requirement.source_type.value}")
125
+ return {}
126
+
127
+ subset = requirement.apply_filters(data_catalog)
128
+
129
+ if len(subset) == 0:
130
+ logger.debug(f"No datasets found for requirement {requirement}")
131
+ return {}
132
+
133
+ if requirement.group_by is None:
134
+ # Use a single group
135
+ groups = [((), subset)]
136
+ else:
137
+ groups = list(subset.groupby(list(requirement.group_by)))
138
+
139
+ results = {}
140
+
141
+ for name, group in groups:
142
+ if requirement.group_by is None:
143
+ assert len(groups) == 1 # noqa: S101
144
+ group_keys: SelectorKey = ()
145
+ else:
146
+ group_keys = tuple(zip(requirement.group_by, name))
147
+ constrained_group = _process_group_constraints(data_catalog, group, requirement)
148
+
149
+ if constrained_group is not None:
150
+ results[group_keys] = constrained_group
151
+
152
+ return results
153
+
154
+
155
+ def _process_group_constraints(
156
+ data_catalog: pd.DataFrame, group: pd.DataFrame, requirement: DataRequirement
157
+ ) -> pd.DataFrame | None:
158
+ for constraint in requirement.constraints or []:
159
+ constrained_group = apply_constraint(group, constraint, data_catalog)
160
+ if constrained_group is None:
161
+ return None
162
+
163
+ group = constrained_group
164
+ return group
165
+
166
+
167
+ def solve_executions(
168
+ data_catalog: dict[SourceDatasetType, pd.DataFrame], diagnostic: Diagnostic, provider: DiagnosticProvider
169
+ ) -> typing.Generator["DiagnosticExecution", None, None]:
170
+ """
171
+ Calculate the diagnostic executions that need to be performed for a given diagnostic
172
+
173
+ Parameters
174
+ ----------
175
+ data_catalog
176
+ Data catalogs for each source dataset type
177
+ diagnostic
178
+ Diagnostic of interest
179
+ provider
180
+ Provider of the diagnostic
181
+
182
+ Returns
183
+ -------
184
+ :
185
+ A generator that yields the diagnostic executions that need to be performed
186
+
187
+ """
188
+ if not diagnostic.data_requirements:
189
+ raise ValueError(f"Diagnostic {diagnostic.slug!r} has no data requirements")
190
+
191
+ first_item = next(iter(diagnostic.data_requirements))
192
+
193
+ if isinstance(first_item, DataRequirement):
194
+ # We have a single collection of data requirements
195
+ yield from _solve_from_data_requirements(
196
+ data_catalog,
197
+ diagnostic,
198
+ typing.cast(Sequence[DataRequirement], diagnostic.data_requirements),
199
+ provider,
200
+ )
201
+ elif isinstance(first_item, Sequence):
202
+ # We have a sequence of collections of data requirements
203
+ for requirement_collection in diagnostic.data_requirements:
204
+ if not isinstance(requirement_collection, Sequence):
205
+ raise TypeError(f"Expected a sequence of DataRequirement, got {type(requirement_collection)}")
206
+ yield from _solve_from_data_requirements(
207
+ data_catalog, diagnostic, requirement_collection, provider
208
+ )
209
+ else:
210
+ raise TypeError(f"Expected a DataRequirement, got {type(first_item)}")
211
+
212
+
213
+ def _solve_from_data_requirements(
214
+ data_catalog: dict[SourceDatasetType, pd.DataFrame],
215
+ diagnostic: Diagnostic,
216
+ data_requirements: Sequence[DataRequirement],
217
+ provider: DiagnosticProvider,
218
+ ) -> typing.Generator["DiagnosticExecution", None, None]:
219
+ # Collect up the different data groups that can be used to calculate the diagnostic
220
+ dataset_groups = {}
221
+
222
+ for requirement in data_requirements:
223
+ if not isinstance(requirement, DataRequirement):
224
+ raise TypeError(f"Expected a DataRequirement, got {type(requirement)}")
225
+ if requirement.source_type not in data_catalog:
226
+ raise InvalidDiagnosticException(
227
+ diagnostic, f"No data catalog for source type {requirement.source_type}"
228
+ )
229
+
230
+ dataset_groups[requirement.source_type] = extract_covered_datasets(
231
+ data_catalog[requirement.source_type], requirement
232
+ )
233
+
234
+ # Calculate the product across each of the source types
235
+ for items in itertools.product(*dataset_groups.values()):
236
+ yield DiagnosticExecution(
237
+ provider=provider,
238
+ diagnostic=diagnostic,
239
+ datasets=ExecutionDatasetCollection(
240
+ {
241
+ key: DatasetCollection(
242
+ datasets=dataset_groups[key][dataset_group_key],
243
+ slug_column=get_dataset_adapter(key.value).slug_column,
244
+ selector=dataset_group_key,
245
+ )
246
+ for key, dataset_group_key in zip(dataset_groups.keys(), items)
247
+ }
248
+ ),
249
+ )
250
+
251
+
252
+ @define
253
+ class ExecutionSolver:
254
+ """
255
+ A solver to determine which executions need to be calculated.
256
+ """
257
+
258
+ provider_registry: ProviderRegistry
259
+ data_catalog: dict[SourceDatasetType, pd.DataFrame]
260
+
261
+ @staticmethod
262
+ def build_from_db(config: Config, db: Database) -> "ExecutionSolver":
263
+ """
264
+ Initialise the solver using information from the database
265
+
266
+ Parameters
267
+ ----------
268
+ db
269
+ Database instance
270
+
271
+ Returns
272
+ -------
273
+ :
274
+ A new ExecutionSolver instance
275
+ """
276
+ return ExecutionSolver(
277
+ provider_registry=ProviderRegistry.build_from_config(config, db),
278
+ data_catalog={
279
+ SourceDatasetType.CMIP6: CMIP6DatasetAdapter().load_catalog(db),
280
+ SourceDatasetType.obs4MIPs: Obs4MIPsDatasetAdapter().load_catalog(db),
281
+ SourceDatasetType.PMPClimatology: PMPClimatologyDatasetAdapter().load_catalog(db),
282
+ },
283
+ )
284
+
285
+ def solve(self) -> typing.Generator[DiagnosticExecution, None, None]:
286
+ """
287
+ Solve which executions need to be calculated for a dataset
288
+
289
+ The solving scheme is iterative,
290
+ for each iteration we find all diagnostics that can be solved and calculate them.
291
+ After each iteration we check if there are any more diagnostics to solve.
292
+
293
+ Yields
294
+ ------
295
+ DiagnosticExecution
296
+ A class containing the information related to the execution of a diagnostic
297
+ """
298
+ for provider in self.provider_registry.providers:
299
+ for diagnostic in provider.diagnostics():
300
+ yield from solve_executions(self.data_catalog, diagnostic, provider)
301
+
302
+
303
+ def solve_required_executions(
304
+ db: Database,
305
+ dry_run: bool = False,
306
+ solver: ExecutionSolver | None = None,
307
+ config: Config | None = None,
308
+ timeout: int = 60,
309
+ ) -> None:
310
+ """
311
+ Solve for executions that require recalculation
312
+
313
+ This may trigger a number of additional calculations depending on what data has been ingested
314
+ since the last solve.
315
+
316
+ Raises
317
+ ------
318
+ TimeoutError
319
+ If the execution isn't completed within the specified timeout
320
+ """
321
+ if config is None:
322
+ config = Config.default()
323
+ if solver is None:
324
+ solver = ExecutionSolver.build_from_db(config, db)
325
+
326
+ logger.info("Solving for diagnostics that require recalculation...")
327
+
328
+ executor = config.executor.build(config, db)
329
+
330
+ for potential_execution in solver.solve():
331
+ # The diagnostic output is first written to the scratch directory
332
+ definition = potential_execution.build_execution_definition(output_root=config.paths.scratch)
333
+
334
+ logger.debug(
335
+ f"Identified candidate execution {definition.key} "
336
+ f"for {potential_execution.diagnostic.full_slug()}"
337
+ )
338
+
339
+ if dry_run:
340
+ continue
341
+
342
+ # Use a transaction to make sure that the models
343
+ # are created correctly before potentially executing out of process
344
+ with db.session.begin(nested=True):
345
+ diagnostic = (
346
+ db.session.query(DiagnosticModel)
347
+ .join(DiagnosticModel.provider)
348
+ .filter(
349
+ ProviderModel.slug == potential_execution.provider.slug,
350
+ ProviderModel.version == potential_execution.provider.version,
351
+ DiagnosticModel.slug == potential_execution.diagnostic.slug,
352
+ )
353
+ .one()
354
+ )
355
+ execution_group, created = db.get_or_create(
356
+ ExecutionGroup,
357
+ key=definition.key,
358
+ diagnostic_id=diagnostic.id,
359
+ defaults={
360
+ "selectors": potential_execution.selectors,
361
+ "dirty": True,
362
+ },
363
+ )
364
+
365
+ if created:
366
+ logger.info(
367
+ f"Created new execution group: "
368
+ f"{definition.key!r} for {potential_execution.diagnostic.full_slug()}"
369
+ )
370
+ db.session.flush()
371
+
372
+ if execution_group.should_run(definition.datasets.hash):
373
+ logger.info(
374
+ f"Running new execution for execution group: "
375
+ f"{definition.key!r} for {potential_execution.diagnostic.full_slug()}"
376
+ )
377
+ execution = Execution(
378
+ execution_group=execution_group,
379
+ dataset_hash=definition.datasets.hash,
380
+ output_fragment=str(definition.output_fragment()),
381
+ )
382
+ db.session.add(execution)
383
+ db.session.flush()
384
+
385
+ # Add links to the datasets used in the execution
386
+ execution.register_datasets(db, definition.datasets)
387
+
388
+ executor.run(
389
+ provider=potential_execution.provider,
390
+ diagnostic=potential_execution.diagnostic,
391
+ definition=definition,
392
+ execution=execution,
393
+ )
394
+ if timeout > 0:
395
+ executor.join(timeout=timeout)
climate_ref/testing.py ADDED
@@ -0,0 +1,109 @@
1
+ """
2
+ Testing utilities
3
+ """
4
+
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ from loguru import logger
9
+
10
+ from climate_ref.config import Config
11
+ from climate_ref.database import Database
12
+ from climate_ref.executor import handle_execution_result
13
+ from climate_ref.models import Execution
14
+ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_all_files
15
+ from climate_ref_core.diagnostics import Diagnostic, ExecutionResult
16
+ from climate_ref_core.pycmec.metric import CMECMetric
17
+ from climate_ref_core.pycmec.output import CMECOutput
18
+
19
+
20
+ def _determine_test_directory() -> Path | None:
21
+ expected = Path(__file__).parents[4] / "tests" / "test-data"
22
+
23
+ if not expected.exists(): # pragma: no cover
24
+ return None
25
+ return expected
26
+
27
+
28
+ TEST_DATA_DIR = _determine_test_directory()
29
+ SAMPLE_DATA_VERSION = "v0.5.0"
30
+
31
+
32
+ def fetch_sample_data(force_cleanup: bool = False, symlink: bool = False) -> None:
33
+ """
34
+ Fetch the sample data for the given version.
35
+
36
+ The sample data is produced in the [Climate-REF/ref-sample-data](https://github.com/Climate-REF/ref-sample-data)
37
+ repository.
38
+ This repository contains decimated versions of key datasets used by the diagnostics packages.
39
+ Decimating these data greatly reduces the data volumes needed to run the test-suite.
40
+
41
+ Parameters
42
+ ----------
43
+ force_cleanup
44
+ If True, remove any existing files
45
+ symlink
46
+ If True, symlink in the data otherwise copy the files
47
+
48
+ The symlink approach is faster, but will fail when running with a non-local executor
49
+ because the symlinks can't be followed.
50
+ """
51
+
52
+ if TEST_DATA_DIR is None: # pragma: no cover
53
+ logger.warning("Test data directory not found, skipping sample data fetch")
54
+ return
55
+
56
+ sample_data_registry = dataset_registry_manager["sample-data"]
57
+
58
+ output_dir = TEST_DATA_DIR / "sample-data"
59
+ version_file = output_dir / "version.txt"
60
+ existing_version = None
61
+
62
+ if output_dir.exists(): # pragma: no branch
63
+ if version_file.exists(): # pragma: no branch
64
+ with open(version_file) as fh:
65
+ existing_version = fh.read().strip()
66
+
67
+ if force_cleanup or existing_version != SAMPLE_DATA_VERSION: # pragma: no branch
68
+ logger.warning("Removing existing sample data")
69
+ shutil.rmtree(output_dir)
70
+
71
+ fetch_all_files(sample_data_registry, "sample", output_dir, symlink)
72
+
73
+ # Write out the current sample data version to the copying as complete
74
+ with open(output_dir / "version.txt", "w") as fh:
75
+ fh.write(SAMPLE_DATA_VERSION)
76
+
77
+
78
+ def validate_result(diagnostic: Diagnostic, config: Config, result: ExecutionResult) -> None:
79
+ """
80
+ Asserts the correctness of the result of a diagnostic execution
81
+
82
+ This should only be used by the test suite as it will create a fake
83
+ database entry for the diagnostic execution result.
84
+ """
85
+ # Add a fake item in the Database
86
+ database = Database.from_config(config)
87
+ execution = Execution(
88
+ execution_group_id=1,
89
+ dataset_hash=result.definition.datasets.hash,
90
+ output_fragment=str(result.definition.output_fragment()),
91
+ )
92
+ database.session.add(execution)
93
+ database.session.flush()
94
+
95
+ assert result.successful
96
+
97
+ # Validate bundles
98
+ metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
99
+ assert diagnostic.facets == tuple(metric_bundle.DIMENSIONS.root["json_structure"]), (
100
+ metric_bundle.DIMENSIONS.root["json_structure"]
101
+ )
102
+ CMECOutput.load_from_json(result.to_output_path(result.output_bundle_filename))
103
+
104
+ # Create a fake log file if one doesn't exist
105
+ if not result.to_output_path("out.log").exists():
106
+ result.to_output_path("out.log").touch()
107
+
108
+ # This checks if the bundles are valid
109
+ handle_execution_result(config, database=database, execution=execution, result=result)
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: climate-ref
3
+ Version: 0.5.0
4
+ Summary: Application which runs the CMIP Rapid Evaluation Framework
5
+ Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
+ License: Apache-2.0
7
+ License-File: LICENCE
8
+ License-File: NOTICE
9
+ Classifier: Development Status :: 2 - Pre-Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: alembic>=1.13.3
21
+ Requires-Dist: attrs>=24.2.0
22
+ Requires-Dist: cattrs>=24.1.2
23
+ Requires-Dist: climate-ref-core
24
+ Requires-Dist: ecgtools>=2024.7.31
25
+ Requires-Dist: environs>=11.0.0
26
+ Requires-Dist: loguru>=0.7.2
27
+ Requires-Dist: platformdirs>=4.3.6
28
+ Requires-Dist: setuptools>=75.8.0
29
+ Requires-Dist: sqlalchemy>=2.0.36
30
+ Requires-Dist: tomlkit>=0.13.2
31
+ Requires-Dist: typer>=0.12.5
32
+ Provides-Extra: celery
33
+ Requires-Dist: climate-ref-celery>=0.5.0; extra == 'celery'
34
+ Provides-Extra: metrics
35
+ Requires-Dist: climate-ref-esmvaltool>=0.5.0; extra == 'metrics'
36
+ Requires-Dist: climate-ref-ilamb>=0.5.0; extra == 'metrics'
37
+ Requires-Dist: climate-ref-pmp>=0.5.0; extra == 'metrics'
38
+ Provides-Extra: postgres
39
+ Requires-Dist: psycopg2-binary>=2.9.2; extra == 'postgres'
40
+ Description-Content-Type: text/markdown
41
+
42
+ # Climate REF (Rapid Evaluation Framework)
43
+
44
+ [![PyPI version](https://badge.fury.io/py/climate-ref.svg)](https://badge.fury.io/py/climate-ref)
45
+ [![Documentation Status](https://readthedocs.org/projects/climate-ref/badge/?version=latest)](https://climate-ref.readthedocs.io/en/latest/?badge=latest)
46
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
47
+ [![Python Version](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/downloads/)
48
+
49
+ **Status**: This project is in active development. We expect to be ready for beta releases in Q2 2025.
50
+
51
+ The Rapid Evaluation Framework (REF) is a set of Python packages that provide the ability to manage the execution of calculations against climate datasets.
52
+ The aim is to be able to evaluate climate data against a set of reference data in near-real time as datasets are published,
53
+ and to update any produced data and figures as new datasets become available.
54
+ This is somewhat analogous to a CI/CD pipeline for climate data.
55
+
56
+ ## Installation
57
+
58
+ ```bash
59
+ pip install climate-ref
60
+ ```
61
+
62
+ If you want to use the diagnostic providers for the Assessment Fast Track, you can install them with:
63
+
64
+ ```bash
65
+ pip install climate-ref[metrics]
66
+ ```
67
+
68
+ ## Quick Start
69
+
70
+ ```bash
71
+ # Ingest some observation datasets
72
+ ref datasets fetch-data --registry obs4ref --output-dir datasets/obs4ref
73
+ ref datasets fetch-data --registry sample-data --output-dir datasets/sample-data
74
+
75
+ # Run metrics against your climate data
76
+ ref solve
77
+ ```
78
+
79
+ ## Features
80
+
81
+ - Real-time evaluation of climate datasets
82
+ - Support for multiple metrics providers (PMP, ILAMB, ESMValTool)
83
+ - Distributed processing capabilities
84
+ - Extensible architecture for adding new metrics providers
85
+ - Command-line interface for easy interaction
86
+
87
+ ## Documentation
88
+
89
+ For detailed documentation, please visit [https://climate-ref.readthedocs.io/](https://climate-ref.readthedocs.io/)
90
+
91
+ ## Contributing
92
+
93
+ REF is a community project, and we welcome contributions from anyone. Please see our [Contributing Guide](https://climate-ref.readthedocs.io/en/latest/contributing/) for more information.
94
+
95
+ ## License
96
+
97
+ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,44 @@
1
+ climate_ref/__init__.py,sha256=OJl5EnjLyEoCQpa0zQ8edV8EcU2YxBJ0xjermIlm9Bw,820
2
+ climate_ref/_config_helpers.py,sha256=-atI5FX7SukhLE_jz_rL-EHQ7s0YYqKu3dSFYWxSyMU,6632
3
+ climate_ref/alembic.ini,sha256=WRvbwSIFuZ7hWNMnR2-yHPJAwYUnwhvRYBzkJhtpGdg,3535
4
+ climate_ref/config.py,sha256=QW1HOLajC2Gc5xZnrGQ8YLAver3BAlVBrfl1kVd_IyM,15072
5
+ climate_ref/constants.py,sha256=rFk3XxNuP0lkzTvUneIhNLq16uadXsT45aUFIlSiBmg,111
6
+ climate_ref/database.py,sha256=RCffNHbJcxxukN6PIOXBTW9TALE2rRsxU0chJHxyNK4,7257
7
+ climate_ref/provider_registry.py,sha256=P35H4VFcsxJ8-Ly4Czwi_gelDU_nF5RiqSC-iBcx_Ws,4116
8
+ climate_ref/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ climate_ref/solver.py,sha256=Hpb_3g-hYWopuBYyqaEtOgfuLOHfZpWbwP1rco802uE,14310
10
+ climate_ref/testing.py,sha256=rUdtU8a4p2OV35IO87veW0mEB9C4Bqwe7BBQU6BILhs,3889
11
+ climate_ref/cli/__init__.py,sha256=RSzaFkgwn1qWRBVbWlDKtNrIxLvQ2T7IWDBIqptpjjU,3658
12
+ climate_ref/cli/_utils.py,sha256=6bIb8zEVvzXyKpv8MG58T-T2L2jH-G8WNrOOGpz3uCw,1918
13
+ climate_ref/cli/config.py,sha256=8I6CLdqKgTu6yaASy-qG0T839Fc0lDZtLSZ6YCc4wOY,520
14
+ climate_ref/cli/datasets.py,sha256=SLl88S3BxKPRbHy9OJ1ymhMnxDmkadFO_BZTIeLR0k8,7367
15
+ climate_ref/cli/executions.py,sha256=6cnMxPK4ZydscUw_Mk9RMISNjP2Yr98BgsOsei8fQ7w,6799
16
+ climate_ref/cli/providers.py,sha256=XVZQsZoEqiCBvgSmp6cNf0mCTxeq_Ycoc6DwVxWDYKg,2521
17
+ climate_ref/cli/solve.py,sha256=D6rAivfm_yl1TTey_zc4KKwZ96LGGF8N1wHjcJ_0XpE,703
18
+ climate_ref/dataset_registry/obs4ref_reference.txt,sha256=1NodZd3tOS9Z1Afpb_Oq4obp4OGAFDSAwEl3FssPwAQ,251
19
+ climate_ref/dataset_registry/sample_data.txt,sha256=aKl9tfO4vknZ5X2mmdyxKOv-nyWhkPDXnpDoNLLTzE8,11892
20
+ climate_ref/datasets/__init__.py,sha256=PV3u5ZmhyfcHbKqySgwVA8m4-naZgxzydLXSBqdTGLM,1171
21
+ climate_ref/datasets/base.py,sha256=XplxCu4bfFmNHp2q8tHT26lB0RHv5swK0QqfUmuMO-c,8154
22
+ climate_ref/datasets/cmip6.py,sha256=Dhq97ow8OmTshDCaL7vfrwn83Nfi6SY8uxJHeY4ZDHk,6083
23
+ climate_ref/datasets/obs4mips.py,sha256=PQhI3QKlYA9L2d_MpnlcVrUn4irMG7Iu-II8l1ncjUs,7032
24
+ climate_ref/datasets/pmp_climatology.py,sha256=goHDc_3B2Wdiy_hmpERNvWDdDYZACPOyFDt3Du6nGc0,534
25
+ climate_ref/datasets/utils.py,sha256=iLJO7h4G3DWsRe9hIC4qkIyi5_zIW1ZMw-FDASLujtM,359
26
+ climate_ref/executor/__init__.py,sha256=vUkE5Izfietvc57gA8LTdaD5IErKVebcE6qO7M7sCRo,9286
27
+ climate_ref/executor/local.py,sha256=3icom02FCHiN0tIpsXR9tvn8-cQrUyoY-LlbHapbTx4,2920
28
+ climate_ref/migrations/README,sha256=xM5osYbyEbEFA2eh5kwary_oh-5VFWtDubA-vgWwvlE,935
29
+ climate_ref/migrations/env.py,sha256=b8om-LvFhVo_2BgaRsR8LcPQ-YcevjWikaWE6uhScAs,4213
30
+ climate_ref/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
31
+ climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py,sha256=349kbd58NdFHqqUAPDX1kR9RUOcxT2zXh9v9yg9-Je8,15533
32
+ climate_ref/models/__init__.py,sha256=dFyqfhTffZz4m06xD4SyvRL9kIBRyVYetHwOxFGy4VM,713
33
+ climate_ref/models/base.py,sha256=cMjNpGNU7pxRi9A5KXEmQIA9pvQDwqGCwo539yndpGY,1199
34
+ climate_ref/models/dataset.py,sha256=Rpwrx0HqOJBHs4Sb4n6B0In__Uo0PqXSZKvZR-juGCg,7491
35
+ climate_ref/models/diagnostic.py,sha256=YB6xzbEXdpz2j-Ddf19RV8mAiWBrkmtRmiAEUV3tl4Q,1762
36
+ climate_ref/models/execution.py,sha256=lRCpaKLSR7rZbuoL94GW76tm9wLMsSDoIOA7bIa6xgY,9848
37
+ climate_ref/models/metric_value.py,sha256=Sfjem65ih9g6WDpjGsiOphSjhYQ1ZAYUPZmsKyb_psU,6452
38
+ climate_ref/models/provider.py,sha256=RAE2qAAxwObu-72CdK4kt5ACMmKYEn07WJm7DU9hF28,990
39
+ climate_ref-0.5.0.dist-info/METADATA,sha256=pTjBsQveKvV8KGgvD9fy8LoxQ5CS-1ruBoFM4ReeLvY,4028
40
+ climate_ref-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
+ climate_ref-0.5.0.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
42
+ climate_ref-0.5.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
+ climate_ref-0.5.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
44
+ climate_ref-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ref = climate_ref.cli:app