climate-ref 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/__init__.py +30 -0
- climate_ref/_config_helpers.py +214 -0
- climate_ref/alembic.ini +114 -0
- climate_ref/cli/__init__.py +138 -0
- climate_ref/cli/_utils.py +68 -0
- climate_ref/cli/config.py +28 -0
- climate_ref/cli/datasets.py +205 -0
- climate_ref/cli/executions.py +201 -0
- climate_ref/cli/providers.py +84 -0
- climate_ref/cli/solve.py +23 -0
- climate_ref/config.py +475 -0
- climate_ref/constants.py +8 -0
- climate_ref/database.py +223 -0
- climate_ref/dataset_registry/obs4ref_reference.txt +2 -0
- climate_ref/dataset_registry/sample_data.txt +60 -0
- climate_ref/datasets/__init__.py +40 -0
- climate_ref/datasets/base.py +214 -0
- climate_ref/datasets/cmip6.py +202 -0
- climate_ref/datasets/obs4mips.py +224 -0
- climate_ref/datasets/pmp_climatology.py +15 -0
- climate_ref/datasets/utils.py +16 -0
- climate_ref/executor/__init__.py +274 -0
- climate_ref/executor/local.py +89 -0
- climate_ref/migrations/README +22 -0
- climate_ref/migrations/env.py +139 -0
- climate_ref/migrations/script.py.mako +26 -0
- climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +292 -0
- climate_ref/models/__init__.py +33 -0
- climate_ref/models/base.py +42 -0
- climate_ref/models/dataset.py +206 -0
- climate_ref/models/diagnostic.py +61 -0
- climate_ref/models/execution.py +306 -0
- climate_ref/models/metric_value.py +195 -0
- climate_ref/models/provider.py +39 -0
- climate_ref/provider_registry.py +146 -0
- climate_ref/py.typed +0 -0
- climate_ref/solver.py +395 -0
- climate_ref/testing.py +109 -0
- climate_ref-0.5.0.dist-info/METADATA +97 -0
- climate_ref-0.5.0.dist-info/RECORD +44 -0
- climate_ref-0.5.0.dist-info/WHEEL +4 -0
- climate_ref-0.5.0.dist-info/entry_points.txt +2 -0
- climate_ref-0.5.0.dist-info/licenses/LICENCE +201 -0
- climate_ref-0.5.0.dist-info/licenses/NOTICE +3 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
from sqlalchemy import Column, ForeignKey, Text
|
|
5
|
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
6
|
+
|
|
7
|
+
from climate_ref.models.base import Base, CreatedUpdatedMixin
|
|
8
|
+
from climate_ref_core.pycmec.controlled_vocabulary import CV, Dimension
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from climate_ref.models.execution import Execution
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MetricValue(CreatedUpdatedMixin, Base):
|
|
15
|
+
"""
|
|
16
|
+
Represents a single diagnostic value
|
|
17
|
+
|
|
18
|
+
This value has a number of dimensions which are used to query the diagnostic value.
|
|
19
|
+
These dimensions describe aspects such as the type of statistic being measured,
|
|
20
|
+
the region of interest or the model from which the statistic is being measured.
|
|
21
|
+
|
|
22
|
+
The columns in this table are not known statically because the REF can track an arbitrary
|
|
23
|
+
set of dimensions depending on the controlled vocabulary that will be used.
|
|
24
|
+
A call to `register_cv_dimensions` must be made before using this class.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
__tablename__ = "metric_value"
|
|
28
|
+
|
|
29
|
+
id: Mapped[int] = mapped_column(primary_key=True)
|
|
30
|
+
execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"))
|
|
31
|
+
|
|
32
|
+
value: Mapped[float] = mapped_column()
|
|
33
|
+
attributes: Mapped[dict[str, Any]] = mapped_column()
|
|
34
|
+
|
|
35
|
+
execution: Mapped["Execution"] = relationship(back_populates="values")
|
|
36
|
+
|
|
37
|
+
_cv_dimensions: ClassVar[list[str]] = []
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def dimensions(self) -> dict[str, str]:
|
|
41
|
+
"""
|
|
42
|
+
Get the non-null dimensions and their values
|
|
43
|
+
|
|
44
|
+
Any changes to the resulting dictionary are not reflected in the object
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
Collection of dimensions names and their values
|
|
49
|
+
"""
|
|
50
|
+
dims = {}
|
|
51
|
+
for key in self._cv_dimensions:
|
|
52
|
+
value = getattr(self, key)
|
|
53
|
+
if value is not None:
|
|
54
|
+
dims[key] = value
|
|
55
|
+
return dims
|
|
56
|
+
|
|
57
|
+
def __repr__(self) -> str:
|
|
58
|
+
return (
|
|
59
|
+
f"<MetricValue "
|
|
60
|
+
f"id={self.id} "
|
|
61
|
+
f"execution={self.execution} "
|
|
62
|
+
f"value={self.value} "
|
|
63
|
+
f"dimensions={self.dimensions}>"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def build_dimension_column(dimension: Dimension) -> Column[str]:
|
|
68
|
+
"""
|
|
69
|
+
Create a column representing a CV dimension
|
|
70
|
+
|
|
71
|
+
These columns are not automatically generated with alembic revisions.
|
|
72
|
+
Any changes to this functionality likely require a manual database migration
|
|
73
|
+
of the existing columns.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
dimension
|
|
78
|
+
Dimension definition to create the column for.
|
|
79
|
+
|
|
80
|
+
Currently only the "name" field is being used.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
An instance of a sqlalchemy Column
|
|
85
|
+
|
|
86
|
+
This doesn't create the column in the database,
|
|
87
|
+
but enables the ORM to access it.
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
return Column(
|
|
91
|
+
dimension.name,
|
|
92
|
+
Text,
|
|
93
|
+
index=True,
|
|
94
|
+
nullable=True,
|
|
95
|
+
info={"skip_autogenerate": True},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def register_cv_dimensions(cls, cv: CV) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Register the dimensions supplied in the controlled vocabulary
|
|
102
|
+
|
|
103
|
+
This has to be done at run-time to support custom CVs.
|
|
104
|
+
Any extra columns already in the database, but not in the CV are ignored.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
cv
|
|
109
|
+
Controlled vocabulary being used by the application.
|
|
110
|
+
This controlled vocabulary contains the definitions of the dimensions that can be used.
|
|
111
|
+
"""
|
|
112
|
+
for dimension in cv.dimensions:
|
|
113
|
+
target_attribute = dimension.name
|
|
114
|
+
if target_attribute in cls._cv_dimensions:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
cls._cv_dimensions.append(target_attribute)
|
|
118
|
+
logger.debug(f"Registered MetricValue dimension: {target_attribute}")
|
|
119
|
+
|
|
120
|
+
if hasattr(cls, target_attribute):
|
|
121
|
+
# This should only occur in test suite as we don't support removing dimensions at runtime
|
|
122
|
+
logger.warning("Column attribute already exists on MetricValue. Ignoring")
|
|
123
|
+
else:
|
|
124
|
+
setattr(cls, target_attribute, cls.build_dimension_column(dimension))
|
|
125
|
+
|
|
126
|
+
# TODO: Check if the underlying table already contains columns
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def _reset_cv_dimensions(cls) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Remove any previously registered dimensions
|
|
132
|
+
|
|
133
|
+
Used by the test suite and should not be called at runtime.
|
|
134
|
+
|
|
135
|
+
This doesn't remove any previous column definitions due to a limitation that columns in
|
|
136
|
+
declarative classes cannot be removed.
|
|
137
|
+
This means that `hasattr(MetricValue, "old_attribute")`
|
|
138
|
+
will still return True after resetting, but the values will not be included in any executions.
|
|
139
|
+
"""
|
|
140
|
+
logger.warning(f"Removing MetricValue dimensions: {cls._cv_dimensions}")
|
|
141
|
+
|
|
142
|
+
keys = list(cls._cv_dimensions)
|
|
143
|
+
for key in keys:
|
|
144
|
+
cls._cv_dimensions.remove(key)
|
|
145
|
+
|
|
146
|
+
assert not len(cls._cv_dimensions) # noqa
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def build(
|
|
150
|
+
cls,
|
|
151
|
+
*,
|
|
152
|
+
execution_id: int,
|
|
153
|
+
value: float,
|
|
154
|
+
dimensions: dict[str, str],
|
|
155
|
+
attributes: dict[str, Any] | None,
|
|
156
|
+
) -> "MetricValue":
|
|
157
|
+
"""
|
|
158
|
+
Build a MetricValue from a collection of dimensions and a value
|
|
159
|
+
|
|
160
|
+
This is a helper method that validates the dimensions supplied and provides an interface
|
|
161
|
+
similar to [climate_ref_core.pycmec.metric.MetricValue][].
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
execution_id
|
|
166
|
+
Execution that created the diagnostic value
|
|
167
|
+
value
|
|
168
|
+
The value of the diagnostic
|
|
169
|
+
dimensions
|
|
170
|
+
Dimensions that describe the diagnostic execution result
|
|
171
|
+
attributes
|
|
172
|
+
Optional additional attributes to describe the value,
|
|
173
|
+
but are not in the controlled vocabulary.
|
|
174
|
+
|
|
175
|
+
Raises
|
|
176
|
+
------
|
|
177
|
+
KeyError
|
|
178
|
+
If an unknown dimension was supplied.
|
|
179
|
+
|
|
180
|
+
Dimensions must exist in the controlled vocabulary.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
Newly created MetricValue
|
|
185
|
+
"""
|
|
186
|
+
for k in dimensions:
|
|
187
|
+
if k not in cls._cv_dimensions:
|
|
188
|
+
raise KeyError(f"Unknown dimension column '{k}'")
|
|
189
|
+
|
|
190
|
+
return MetricValue(
|
|
191
|
+
execution_id=execution_id,
|
|
192
|
+
value=value,
|
|
193
|
+
attributes=attributes,
|
|
194
|
+
**dimensions,
|
|
195
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
4
|
+
|
|
5
|
+
from climate_ref.models.base import Base, CreatedUpdatedMixin
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from climate_ref.models.diagnostic import Diagnostic
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Provider(CreatedUpdatedMixin, Base):
|
|
12
|
+
"""
|
|
13
|
+
Represents a provider that can provide diagnostic calculations
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__tablename__ = "provider"
|
|
17
|
+
|
|
18
|
+
id: Mapped[int] = mapped_column(primary_key=True)
|
|
19
|
+
slug: Mapped[str] = mapped_column(unique=True)
|
|
20
|
+
"""
|
|
21
|
+
Globally unique identifier for the provider.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name: Mapped[str] = mapped_column()
|
|
25
|
+
"""
|
|
26
|
+
Long name of the provider
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
version: Mapped[str] = mapped_column(nullable=False)
|
|
30
|
+
"""
|
|
31
|
+
Version of the provider.
|
|
32
|
+
|
|
33
|
+
This should map to the package version.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
diagnostics: Mapped[list["Diagnostic"]] = relationship(back_populates="provider")
|
|
37
|
+
|
|
38
|
+
def __repr__(self) -> str:
|
|
39
|
+
return f"<Provider slug={self.slug} version={self.version}>"
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Registry of the currently active providers in the REF
|
|
3
|
+
|
|
4
|
+
This module provides a registry for the currently active providers.
|
|
5
|
+
Often, we can't directly import a provider and it's diagnostics
|
|
6
|
+
as each provider maintains its own virtual environment to avoid dependency conflicts.
|
|
7
|
+
|
|
8
|
+
For remote providers, a proxy is used to access the metadata associated with the diagnostics.
|
|
9
|
+
These diagnostics cannot be run locally, but can be executed using other executors.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from attrs import field, frozen
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from climate_ref.config import Config
|
|
16
|
+
from climate_ref.database import Database
|
|
17
|
+
from climate_ref_core.diagnostics import Diagnostic
|
|
18
|
+
from climate_ref_core.providers import DiagnosticProvider, import_provider
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _register_provider(db: Database, provider: DiagnosticProvider) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Register a provider with the database
|
|
24
|
+
|
|
25
|
+
This is temporary until we have a proper flow for registering providers
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
provider
|
|
30
|
+
DiagnosticProvider instance
|
|
31
|
+
"""
|
|
32
|
+
from climate_ref.models import Diagnostic, Provider
|
|
33
|
+
|
|
34
|
+
provider_model, created = db.get_or_create(
|
|
35
|
+
Provider,
|
|
36
|
+
slug=provider.slug,
|
|
37
|
+
version=provider.version,
|
|
38
|
+
defaults={
|
|
39
|
+
"name": provider.name,
|
|
40
|
+
},
|
|
41
|
+
)
|
|
42
|
+
if created:
|
|
43
|
+
logger.info(f"Created provider {provider.slug}")
|
|
44
|
+
db.session.flush()
|
|
45
|
+
|
|
46
|
+
for diagnostic in provider.diagnostics():
|
|
47
|
+
diagnostic_model, created = db.get_or_create(
|
|
48
|
+
Diagnostic,
|
|
49
|
+
slug=diagnostic.slug,
|
|
50
|
+
provider_id=provider_model.id,
|
|
51
|
+
defaults={
|
|
52
|
+
"name": diagnostic.name,
|
|
53
|
+
},
|
|
54
|
+
)
|
|
55
|
+
if created:
|
|
56
|
+
db.session.flush()
|
|
57
|
+
logger.info(f"Created diagnostic {diagnostic_model.full_slug()}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@frozen
|
|
61
|
+
class ProviderRegistry:
|
|
62
|
+
"""
|
|
63
|
+
Registry for the currently active providers
|
|
64
|
+
|
|
65
|
+
In some cases we can't directly import a provider and it's diagnostics,
|
|
66
|
+
in this case we need to proxy the diagnostics.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
providers: list[DiagnosticProvider] = field(factory=list)
|
|
70
|
+
|
|
71
|
+
def get(self, slug: str) -> DiagnosticProvider:
|
|
72
|
+
"""
|
|
73
|
+
Retrieve a provider by name
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
slug
|
|
78
|
+
Slug of the provider of interest
|
|
79
|
+
|
|
80
|
+
Raises
|
|
81
|
+
------
|
|
82
|
+
KeyError
|
|
83
|
+
A provider with the matching slug has not been registered
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
The requested provider
|
|
88
|
+
"""
|
|
89
|
+
for p in self.providers:
|
|
90
|
+
if p.slug == slug:
|
|
91
|
+
return p
|
|
92
|
+
|
|
93
|
+
raise KeyError(f"No provider with slug matching: {slug}")
|
|
94
|
+
|
|
95
|
+
def get_metric(self, provider_slug: str, diagnostic_slug: str) -> "Diagnostic":
|
|
96
|
+
"""
|
|
97
|
+
Retrieve a diagnostic by name
|
|
98
|
+
|
|
99
|
+
This is a convenience method to retrieve a diagnostic from a provider
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
provider_slug :
|
|
104
|
+
Slug of the provider of interest
|
|
105
|
+
diagnostic_slug
|
|
106
|
+
Slug of the diagnostic of interest
|
|
107
|
+
|
|
108
|
+
Raises
|
|
109
|
+
------
|
|
110
|
+
KeyError
|
|
111
|
+
If the provider/diagnostic with the given slugs is not found.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
The requested diagnostic.
|
|
116
|
+
"""
|
|
117
|
+
return self.get(provider_slug).get(diagnostic_slug)
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def build_from_config(config: Config, db: Database) -> "ProviderRegistry":
|
|
121
|
+
"""
|
|
122
|
+
Create a ProviderRegistry instance using information from the database
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
config
|
|
127
|
+
Configuration object
|
|
128
|
+
db
|
|
129
|
+
Database instance
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
:
|
|
134
|
+
A new ProviderRegistry instance
|
|
135
|
+
"""
|
|
136
|
+
providers = []
|
|
137
|
+
for provider_info in config.diagnostic_providers:
|
|
138
|
+
provider = import_provider(provider_info.provider)
|
|
139
|
+
provider.configure(config)
|
|
140
|
+
providers.append(provider)
|
|
141
|
+
|
|
142
|
+
with db.session.begin_nested():
|
|
143
|
+
for provider in providers:
|
|
144
|
+
_register_provider(db, provider)
|
|
145
|
+
|
|
146
|
+
return ProviderRegistry(providers=providers)
|
climate_ref/py.typed
ADDED
|
File without changes
|