climate-ref 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +12 -3
- climate_ref/cli/_utils.py +56 -2
- climate_ref/cli/datasets.py +49 -12
- climate_ref/cli/executions.py +333 -24
- climate_ref/cli/providers.py +1 -2
- climate_ref/config.py +67 -4
- climate_ref/database.py +62 -4
- climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref/dataset_registry/sample_data.txt +10 -19
- climate_ref/datasets/__init__.py +3 -3
- climate_ref/datasets/base.py +121 -20
- climate_ref/datasets/cmip6.py +2 -0
- climate_ref/datasets/obs4mips.py +26 -15
- climate_ref/executor/hpc.py +149 -53
- climate_ref/executor/local.py +1 -2
- climate_ref/executor/result_handling.py +17 -7
- climate_ref/migrations/env.py +12 -10
- climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- climate_ref/models/__init__.py +1 -6
- climate_ref/models/base.py +4 -20
- climate_ref/models/dataset.py +2 -0
- climate_ref/models/diagnostic.py +2 -1
- climate_ref/models/execution.py +219 -7
- climate_ref/models/metric_value.py +25 -110
- climate_ref/models/mixins.py +144 -0
- climate_ref/models/provider.py +2 -1
- climate_ref/provider_registry.py +4 -4
- climate_ref/slurm.py +2 -2
- climate_ref/solver.py +17 -6
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/METADATA +1 -1
- climate_ref-0.8.0.dist-info/RECORD +58 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/WHEEL +1 -1
- climate_ref-0.6.6.dist-info/RECORD +0 -55
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/NOTICE +0 -0
climate_ref/models/execution.py
CHANGED
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
import enum
|
|
2
2
|
import pathlib
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
5
|
|
|
5
6
|
from loguru import logger
|
|
6
|
-
from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func
|
|
7
|
+
from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func, or_
|
|
7
8
|
from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
|
|
8
9
|
from sqlalchemy.orm.query import RowReturningQuery
|
|
9
10
|
|
|
10
|
-
from climate_ref.models import
|
|
11
|
-
from climate_ref.models.
|
|
11
|
+
from climate_ref.models.base import Base
|
|
12
|
+
from climate_ref.models.dataset import Dataset
|
|
13
|
+
from climate_ref.models.diagnostic import Diagnostic
|
|
14
|
+
from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
|
|
15
|
+
from climate_ref.models.provider import Provider
|
|
12
16
|
from climate_ref_core.datasets import ExecutionDatasetCollection
|
|
13
17
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from climate_ref.database import Database
|
|
16
|
-
from climate_ref.models.diagnostic import Diagnostic
|
|
17
20
|
from climate_ref.models.metric_value import MetricValue
|
|
18
21
|
|
|
19
22
|
|
|
@@ -217,16 +220,21 @@ class ResultOutputType(enum.Enum):
|
|
|
217
220
|
HTML = "html"
|
|
218
221
|
|
|
219
222
|
|
|
220
|
-
class ExecutionOutput(CreatedUpdatedMixin, Base):
|
|
223
|
+
class ExecutionOutput(DimensionMixin, CreatedUpdatedMixin, Base):
|
|
221
224
|
"""
|
|
222
225
|
An output generated as part of an execution.
|
|
223
226
|
|
|
224
227
|
This output may be a plot, data file or HTML file.
|
|
225
|
-
These outputs are defined in the CMEC output bundle
|
|
228
|
+
These outputs are defined in the CMEC output bundle.
|
|
229
|
+
|
|
230
|
+
Outputs can be tagged with dimensions from the controlled vocabulary
|
|
231
|
+
to enable filtering and organization.
|
|
226
232
|
"""
|
|
227
233
|
|
|
228
234
|
__tablename__ = "execution_output"
|
|
229
235
|
|
|
236
|
+
_cv_dimensions: ClassVar[list[str]] = []
|
|
237
|
+
|
|
230
238
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
231
239
|
|
|
232
240
|
execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
|
|
@@ -264,6 +272,65 @@ class ExecutionOutput(CreatedUpdatedMixin, Base):
|
|
|
264
272
|
|
|
265
273
|
execution: Mapped["Execution"] = relationship(back_populates="outputs")
|
|
266
274
|
|
|
275
|
+
@classmethod
|
|
276
|
+
def build( # noqa: PLR0913
|
|
277
|
+
cls,
|
|
278
|
+
*,
|
|
279
|
+
execution_id: int,
|
|
280
|
+
output_type: ResultOutputType,
|
|
281
|
+
dimensions: dict[str, str],
|
|
282
|
+
filename: str | None = None,
|
|
283
|
+
short_name: str | None = None,
|
|
284
|
+
long_name: str | None = None,
|
|
285
|
+
description: str | None = None,
|
|
286
|
+
) -> "ExecutionOutput":
|
|
287
|
+
"""
|
|
288
|
+
Build an ExecutionOutput from dimensions and metadata
|
|
289
|
+
|
|
290
|
+
This is a helper method that validates the dimensions supplied.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
execution_id
|
|
295
|
+
Execution that created the output
|
|
296
|
+
output_type
|
|
297
|
+
Type of the output
|
|
298
|
+
dimensions
|
|
299
|
+
Dimensions that describe the output
|
|
300
|
+
filename
|
|
301
|
+
Path to the output
|
|
302
|
+
short_name
|
|
303
|
+
Short key of the output
|
|
304
|
+
long_name
|
|
305
|
+
Human readable name
|
|
306
|
+
description
|
|
307
|
+
Long description
|
|
308
|
+
|
|
309
|
+
Raises
|
|
310
|
+
------
|
|
311
|
+
KeyError
|
|
312
|
+
If an unknown dimension was supplied.
|
|
313
|
+
|
|
314
|
+
Dimensions must exist in the controlled vocabulary.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
Newly created ExecutionOutput
|
|
319
|
+
"""
|
|
320
|
+
for k in dimensions:
|
|
321
|
+
if k not in cls._cv_dimensions:
|
|
322
|
+
raise KeyError(f"Unknown dimension column '{k}'")
|
|
323
|
+
|
|
324
|
+
return ExecutionOutput(
|
|
325
|
+
execution_id=execution_id,
|
|
326
|
+
output_type=output_type,
|
|
327
|
+
filename=filename,
|
|
328
|
+
short_name=short_name,
|
|
329
|
+
long_name=long_name,
|
|
330
|
+
description=description,
|
|
331
|
+
**dimensions,
|
|
332
|
+
)
|
|
333
|
+
|
|
267
334
|
|
|
268
335
|
def get_execution_group_and_latest(
|
|
269
336
|
session: Session,
|
|
@@ -305,3 +372,148 @@ def get_execution_group_and_latest(
|
|
|
305
372
|
)
|
|
306
373
|
|
|
307
374
|
return query # type: ignore
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _filter_executions_by_facets(
|
|
378
|
+
results: Sequence[tuple[ExecutionGroup, Execution | None]],
|
|
379
|
+
facet_filters: dict[str, str],
|
|
380
|
+
) -> list[tuple[ExecutionGroup, Execution | None]]:
|
|
381
|
+
"""
|
|
382
|
+
Filter execution groups and their latest executions based on facet key-value pairs.
|
|
383
|
+
|
|
384
|
+
This is a relatively expensive operation as it requires iterating over all results.
|
|
385
|
+
This should be replaced once we have normalised the selectors into a separate table.
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
results
|
|
391
|
+
List of tuples containing ExecutionGroup and its latest Execution (or None)
|
|
392
|
+
facet_filters
|
|
393
|
+
Dictionary of facet key-value pairs to filter by (AND logic, exact match)
|
|
394
|
+
|
|
395
|
+
Returns
|
|
396
|
+
-------
|
|
397
|
+
Filtered list of tuples containing ExecutionGroup and its latest Execution (or None)
|
|
398
|
+
|
|
399
|
+
Notes
|
|
400
|
+
-----
|
|
401
|
+
- Facet filters can either be key=value (searches all dataset types)
|
|
402
|
+
or dataset_type.key=value (searches specific dataset type)
|
|
403
|
+
- Key=value filters search across all dataset types
|
|
404
|
+
- dataset_type.key=value filters only search within the specified dataset type
|
|
405
|
+
- Multiple values within same filter type use OR logic
|
|
406
|
+
- All specified facets must match for an execution group to be included (AND logic)
|
|
407
|
+
"""
|
|
408
|
+
filtered_results = []
|
|
409
|
+
for eg, execution in results:
|
|
410
|
+
all_filters_match = True
|
|
411
|
+
for facet_key, facet_value in facet_filters.items():
|
|
412
|
+
filter_match = False
|
|
413
|
+
if "." in facet_key:
|
|
414
|
+
# Handle dataset_type.key=value format
|
|
415
|
+
dataset_type, key = facet_key.split(".", 1)
|
|
416
|
+
if dataset_type in eg.selectors:
|
|
417
|
+
if [key, facet_value] in eg.selectors[dataset_type]:
|
|
418
|
+
filter_match = True
|
|
419
|
+
break
|
|
420
|
+
else:
|
|
421
|
+
# Handle key=value format (search across all dataset types)
|
|
422
|
+
for ds_type_selectors in eg.selectors.values():
|
|
423
|
+
if [facet_key, facet_value] in ds_type_selectors:
|
|
424
|
+
filter_match = True
|
|
425
|
+
break
|
|
426
|
+
|
|
427
|
+
if not filter_match:
|
|
428
|
+
all_filters_match = False
|
|
429
|
+
break
|
|
430
|
+
if all_filters_match:
|
|
431
|
+
filtered_results.append((eg, execution))
|
|
432
|
+
return filtered_results
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def get_execution_group_and_latest_filtered( # noqa: PLR0913
|
|
436
|
+
session: Session,
|
|
437
|
+
diagnostic_filters: list[str] | None = None,
|
|
438
|
+
provider_filters: list[str] | None = None,
|
|
439
|
+
facet_filters: dict[str, str] | None = None,
|
|
440
|
+
dirty: bool | None = None,
|
|
441
|
+
successful: bool | None = None,
|
|
442
|
+
) -> list[tuple[ExecutionGroup, Execution | None]]:
|
|
443
|
+
"""
|
|
444
|
+
Query execution groups with filtering capabilities.
|
|
445
|
+
|
|
446
|
+
Parameters
|
|
447
|
+
----------
|
|
448
|
+
session
|
|
449
|
+
Database session
|
|
450
|
+
diagnostic_filters
|
|
451
|
+
List of diagnostic slug substrings (OR logic, case-insensitive)
|
|
452
|
+
provider_filters
|
|
453
|
+
List of provider slug substrings (OR logic, case-insensitive)
|
|
454
|
+
facet_filters
|
|
455
|
+
Dictionary of facet key-value pairs (AND logic, exact match)
|
|
456
|
+
dirty
|
|
457
|
+
If True, only return dirty execution groups.
|
|
458
|
+
If False, only return clean execution groups.
|
|
459
|
+
If None, do not filter by dirty status.
|
|
460
|
+
successful
|
|
461
|
+
If True, only return execution groups whose latest execution was successful.
|
|
462
|
+
If False, only return execution groups whose latest execution was unsuccessful or has no executions.
|
|
463
|
+
If None, do not filter by execution success.
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
Query returning tuples of (ExecutionGroup, latest Execution or None)
|
|
468
|
+
|
|
469
|
+
Notes
|
|
470
|
+
-----
|
|
471
|
+
- Diagnostic and provider filters use substring matching (case-insensitive)
|
|
472
|
+
- Multiple values within same filter type use OR logic
|
|
473
|
+
- Different filter types use AND logic
|
|
474
|
+
- Facet filters can either be key=value (searches all dataset types)
|
|
475
|
+
or dataset_type.key=value (searches specific dataset type)
|
|
476
|
+
"""
|
|
477
|
+
# Start with base query
|
|
478
|
+
query = get_execution_group_and_latest(session)
|
|
479
|
+
|
|
480
|
+
if diagnostic_filters or provider_filters:
|
|
481
|
+
# Join through to the Diagnostic table
|
|
482
|
+
query = query.join(Diagnostic, ExecutionGroup.diagnostic_id == Diagnostic.id)
|
|
483
|
+
|
|
484
|
+
# Apply diagnostic filter (OR logic for multiple values)
|
|
485
|
+
if diagnostic_filters:
|
|
486
|
+
diagnostic_conditions = [
|
|
487
|
+
Diagnostic.slug.ilike(f"%{filter_value.lower()}%") for filter_value in diagnostic_filters
|
|
488
|
+
]
|
|
489
|
+
query = query.filter(or_(*diagnostic_conditions))
|
|
490
|
+
|
|
491
|
+
# Apply provider filter (OR logic for multiple values)
|
|
492
|
+
if provider_filters:
|
|
493
|
+
# Need to join through Diagnostic to Provider
|
|
494
|
+
query = query.join(Provider, Diagnostic.provider_id == Provider.id)
|
|
495
|
+
|
|
496
|
+
provider_conditions = [
|
|
497
|
+
Provider.slug.ilike(f"%{filter_value.lower()}%") for filter_value in provider_filters
|
|
498
|
+
]
|
|
499
|
+
query = query.filter(or_(*provider_conditions))
|
|
500
|
+
|
|
501
|
+
if successful is not None:
|
|
502
|
+
if successful:
|
|
503
|
+
query = query.filter(Execution.successful.is_(True))
|
|
504
|
+
else:
|
|
505
|
+
query = query.filter(or_(Execution.successful.is_(False), Execution.successful.is_(None)))
|
|
506
|
+
|
|
507
|
+
if dirty is not None:
|
|
508
|
+
if dirty:
|
|
509
|
+
query = query.filter(ExecutionGroup.dirty.is_(True))
|
|
510
|
+
else:
|
|
511
|
+
query = query.filter(or_(ExecutionGroup.dirty.is_(False), ExecutionGroup.dirty.is_(None)))
|
|
512
|
+
|
|
513
|
+
if facet_filters:
|
|
514
|
+
# Load all results into memory for Python-based filtering
|
|
515
|
+
# TODO: Update once we have normalised the selector
|
|
516
|
+
results = [r._tuple() for r in query.all()]
|
|
517
|
+
return _filter_executions_by_facets(results, facet_filters)
|
|
518
|
+
else:
|
|
519
|
+
return [r._tuple() for r in query.all()]
|
|
@@ -2,12 +2,11 @@ import enum
|
|
|
2
2
|
from collections.abc import Mapping
|
|
3
3
|
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from sqlalchemy import Column, ForeignKey, Text, event
|
|
5
|
+
from sqlalchemy import ForeignKey, event
|
|
7
6
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
8
7
|
|
|
9
|
-
from climate_ref.models.base import Base
|
|
10
|
-
from
|
|
8
|
+
from climate_ref.models.base import Base
|
|
9
|
+
from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
|
|
11
10
|
|
|
12
11
|
if TYPE_CHECKING:
|
|
13
12
|
from climate_ref.models.execution import Execution
|
|
@@ -27,11 +26,14 @@ class MetricValueType(enum.Enum):
|
|
|
27
26
|
SERIES = "series"
|
|
28
27
|
|
|
29
28
|
|
|
30
|
-
class MetricValue(CreatedUpdatedMixin, Base):
|
|
29
|
+
class MetricValue(DimensionMixin, CreatedUpdatedMixin, Base):
|
|
31
30
|
"""
|
|
32
31
|
Represents a single metric value
|
|
33
32
|
|
|
34
|
-
This
|
|
33
|
+
This is a base class for different types of metric values (e.g. scalar, series) which
|
|
34
|
+
are stored in a single table using single table inheritance.
|
|
35
|
+
|
|
36
|
+
This value has a number of dimensions which are used to query the diagnostic values.
|
|
35
37
|
These dimensions describe aspects such as the type of statistic being measured,
|
|
36
38
|
the region of interest or the model from which the statistic is being measured.
|
|
37
39
|
|
|
@@ -46,6 +48,8 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
46
48
|
"polymorphic_on": "type",
|
|
47
49
|
}
|
|
48
50
|
|
|
51
|
+
_cv_dimensions: ClassVar[list[str]] = []
|
|
52
|
+
|
|
49
53
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
50
54
|
execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
|
|
51
55
|
|
|
@@ -60,111 +64,9 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
60
64
|
This value is used to determine how the metric value should be interpreted.
|
|
61
65
|
"""
|
|
62
66
|
|
|
63
|
-
_cv_dimensions: ClassVar[list[str]] = []
|
|
64
|
-
|
|
65
|
-
@property
|
|
66
|
-
def dimensions(self) -> dict[str, str]:
|
|
67
|
-
"""
|
|
68
|
-
Get the non-null dimensions and their values
|
|
69
|
-
|
|
70
|
-
Any changes to the resulting dictionary are not reflected in the object
|
|
71
|
-
|
|
72
|
-
Returns
|
|
73
|
-
-------
|
|
74
|
-
Collection of dimensions names and their values
|
|
75
|
-
"""
|
|
76
|
-
dims = {}
|
|
77
|
-
for key in self._cv_dimensions:
|
|
78
|
-
value = getattr(self, key)
|
|
79
|
-
if value is not None:
|
|
80
|
-
dims[key] = value
|
|
81
|
-
return dims
|
|
82
|
-
|
|
83
67
|
def __repr__(self) -> str:
|
|
84
68
|
return f"<MetricValue id={self.id} execution={self.execution} dimensions={self.dimensions}>"
|
|
85
69
|
|
|
86
|
-
@staticmethod
|
|
87
|
-
def build_dimension_column(dimension: Dimension) -> Column[str]:
|
|
88
|
-
"""
|
|
89
|
-
Create a column representing a CV dimension
|
|
90
|
-
|
|
91
|
-
These columns are not automatically generated with alembic revisions.
|
|
92
|
-
Any changes to this functionality likely require a manual database migration
|
|
93
|
-
of the existing columns.
|
|
94
|
-
|
|
95
|
-
Parameters
|
|
96
|
-
----------
|
|
97
|
-
dimension
|
|
98
|
-
Dimension definition to create the column for.
|
|
99
|
-
|
|
100
|
-
Currently only the "name" field is being used.
|
|
101
|
-
|
|
102
|
-
Returns
|
|
103
|
-
-------
|
|
104
|
-
An instance of a sqlalchemy Column
|
|
105
|
-
|
|
106
|
-
This doesn't create the column in the database,
|
|
107
|
-
but enables the ORM to access it.
|
|
108
|
-
|
|
109
|
-
"""
|
|
110
|
-
return Column(
|
|
111
|
-
dimension.name,
|
|
112
|
-
Text,
|
|
113
|
-
index=True,
|
|
114
|
-
nullable=True,
|
|
115
|
-
info={"skip_autogenerate": True},
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
@classmethod
|
|
119
|
-
def register_cv_dimensions(cls, cv: CV) -> None:
|
|
120
|
-
"""
|
|
121
|
-
Register the dimensions supplied in the controlled vocabulary
|
|
122
|
-
|
|
123
|
-
This has to be done at run-time to support custom CVs.
|
|
124
|
-
Any extra columns already in the database, but not in the CV are ignored.
|
|
125
|
-
|
|
126
|
-
Parameters
|
|
127
|
-
----------
|
|
128
|
-
cv
|
|
129
|
-
Controlled vocabulary being used by the application.
|
|
130
|
-
This controlled vocabulary contains the definitions of the dimensions that can be used.
|
|
131
|
-
"""
|
|
132
|
-
for dimension in cv.dimensions:
|
|
133
|
-
target_attribute = dimension.name
|
|
134
|
-
if target_attribute in cls._cv_dimensions:
|
|
135
|
-
continue
|
|
136
|
-
|
|
137
|
-
cls._cv_dimensions.append(target_attribute)
|
|
138
|
-
logger.debug(f"Registered MetricValue dimension: {target_attribute}")
|
|
139
|
-
|
|
140
|
-
if hasattr(cls, target_attribute):
|
|
141
|
-
# This should only occur in test suite as we don't support removing dimensions at runtime
|
|
142
|
-
logger.warning("Column attribute already exists on MetricValue. Ignoring")
|
|
143
|
-
else:
|
|
144
|
-
setattr(cls, target_attribute, cls.build_dimension_column(dimension))
|
|
145
|
-
|
|
146
|
-
# TODO: Check if the underlying table already contains columns
|
|
147
|
-
|
|
148
|
-
@classmethod
|
|
149
|
-
def _reset_cv_dimensions(cls) -> None:
|
|
150
|
-
"""
|
|
151
|
-
Remove any previously registered dimensions
|
|
152
|
-
|
|
153
|
-
Used by the test suite and should not be called at runtime.
|
|
154
|
-
|
|
155
|
-
This doesn't remove any previous column definitions due to a limitation that columns in
|
|
156
|
-
declarative classes cannot be removed.
|
|
157
|
-
This means that `hasattr(MetricValue, "old_attribute")`
|
|
158
|
-
will still return True after resetting, but the values will not be included in any executions.
|
|
159
|
-
"""
|
|
160
|
-
logger.warning(f"Removing MetricValue dimensions: {cls._cv_dimensions}")
|
|
161
|
-
|
|
162
|
-
keys = list(cls._cv_dimensions)
|
|
163
|
-
for key in keys:
|
|
164
|
-
cls._cv_dimensions.remove(key)
|
|
165
|
-
|
|
166
|
-
assert not len(cls._cv_dimensions)
|
|
167
|
-
|
|
168
70
|
|
|
169
71
|
class ScalarMetricValue(MetricValue):
|
|
170
72
|
"""
|
|
@@ -180,6 +82,12 @@ class ScalarMetricValue(MetricValue):
|
|
|
180
82
|
# This is a scalar value
|
|
181
83
|
value: Mapped[float] = mapped_column(nullable=True)
|
|
182
84
|
|
|
85
|
+
def __repr__(self) -> str:
|
|
86
|
+
return (
|
|
87
|
+
f"<ScalarMetricValue "
|
|
88
|
+
f"id={self.id} execution={self.execution} dimensions={self.dimensions} value={self.value}>"
|
|
89
|
+
)
|
|
90
|
+
|
|
183
91
|
@classmethod
|
|
184
92
|
def build(
|
|
185
93
|
cls,
|
|
@@ -232,9 +140,10 @@ class ScalarMetricValue(MetricValue):
|
|
|
232
140
|
|
|
233
141
|
class SeriesMetricValue(MetricValue):
|
|
234
142
|
"""
|
|
235
|
-
A
|
|
143
|
+
A 1d series with associated dimensions
|
|
236
144
|
|
|
237
|
-
This is a subclass of MetricValue that is used to represent a
|
|
145
|
+
This is a subclass of MetricValue that is used to represent a series.
|
|
146
|
+
This can be used to represent time series, vertical profiles or other 1d data.
|
|
238
147
|
"""
|
|
239
148
|
|
|
240
149
|
__mapper_args__: ClassVar[Mapping[str, Any]] = { # type: ignore
|
|
@@ -246,6 +155,12 @@ class SeriesMetricValue(MetricValue):
|
|
|
246
155
|
index: Mapped[list[float | int | str]] = mapped_column(nullable=True)
|
|
247
156
|
index_name: Mapped[str] = mapped_column(nullable=True)
|
|
248
157
|
|
|
158
|
+
def __repr__(self) -> str:
|
|
159
|
+
return (
|
|
160
|
+
f"<SeriesMetricValue id={self.id} execution={self.execution} "
|
|
161
|
+
f"dimensions={self.dimensions} index_name={self.index_name}>"
|
|
162
|
+
)
|
|
163
|
+
|
|
249
164
|
@classmethod
|
|
250
165
|
def build( # noqa: PLR0913
|
|
251
166
|
cls,
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Model mixins for shared functionality"""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from sqlalchemy import Column, Text, func
|
|
8
|
+
from sqlalchemy.orm import Mapped, mapped_column
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from climate_ref_core.pycmec.controlled_vocabulary import CV, Dimension
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CreatedUpdatedMixin:
|
|
15
|
+
"""
|
|
16
|
+
Mixin for models that have a created_at and updated_at fields
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
created_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now())
|
|
20
|
+
"""
|
|
21
|
+
When the dataset was added to the database
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
updated_at: Mapped[datetime.datetime] = mapped_column(
|
|
25
|
+
server_default=func.now(), onupdate=func.now(), index=True
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
When the dataset was updated.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DimensionMixin:
|
|
33
|
+
"""
|
|
34
|
+
Mixin that adds controlled vocabulary dimension support to a model
|
|
35
|
+
|
|
36
|
+
This mixin provides methods and properties for managing CV dimensions
|
|
37
|
+
on database models. Dimensions are stored as individual indexed columns
|
|
38
|
+
that are registered at runtime based on the controlled vocabulary.
|
|
39
|
+
|
|
40
|
+
Classes using this mixin must:
|
|
41
|
+
- Define _cv_dimensions as a ClassVar[list[str]] = []
|
|
42
|
+
- Have a __tablename__ attribute (SQLAlchemy requirement)
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
_cv_dimensions: ClassVar[list[str]]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def dimensions(self) -> dict[str, str]:
|
|
49
|
+
"""
|
|
50
|
+
Get the non-null dimensions and their values
|
|
51
|
+
|
|
52
|
+
Any changes to the resulting dictionary are not reflected in the object
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
Collection of dimension names and their values
|
|
57
|
+
"""
|
|
58
|
+
dims = {}
|
|
59
|
+
for key in self._cv_dimensions:
|
|
60
|
+
value = getattr(self, key)
|
|
61
|
+
if value is not None:
|
|
62
|
+
dims[key] = value
|
|
63
|
+
return dims
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def build_dimension_column(dimension: "Dimension") -> Column[str]:
|
|
67
|
+
"""
|
|
68
|
+
Create a column representing a CV dimension
|
|
69
|
+
|
|
70
|
+
These columns are not automatically generated with alembic revisions.
|
|
71
|
+
Any changes to this functionality likely require a manual database migration
|
|
72
|
+
of the existing columns.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
dimension
|
|
77
|
+
Dimension definition to create the column for.
|
|
78
|
+
Currently only the "name" field is being used.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
An instance of a sqlalchemy Column
|
|
83
|
+
|
|
84
|
+
This doesn't create the column in the database,
|
|
85
|
+
but enables the ORM to access it.
|
|
86
|
+
"""
|
|
87
|
+
return Column(
|
|
88
|
+
dimension.name,
|
|
89
|
+
Text,
|
|
90
|
+
index=True,
|
|
91
|
+
nullable=True,
|
|
92
|
+
info={"skip_autogenerate": True},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def register_cv_dimensions(cls, cv: "CV") -> None:
|
|
97
|
+
"""
|
|
98
|
+
Register the dimensions supplied in the controlled vocabulary
|
|
99
|
+
|
|
100
|
+
This has to be done at run-time to support custom CVs.
|
|
101
|
+
Any extra columns already in the database, but not in the CV are ignored.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
cv
|
|
106
|
+
Controlled vocabulary being used by the application.
|
|
107
|
+
This controlled vocabulary contains the definitions of the dimensions that can be used.
|
|
108
|
+
"""
|
|
109
|
+
model_name = cls.__name__
|
|
110
|
+
|
|
111
|
+
for dimension in cv.dimensions:
|
|
112
|
+
target_attribute = dimension.name
|
|
113
|
+
if target_attribute in cls._cv_dimensions:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
cls._cv_dimensions.append(target_attribute)
|
|
117
|
+
logger.debug(f"Registered {model_name} dimension: {target_attribute}")
|
|
118
|
+
|
|
119
|
+
if hasattr(cls, target_attribute):
|
|
120
|
+
# This should only occur in test suite as we don't support removing dimensions at runtime
|
|
121
|
+
logger.warning(f"Column attribute already exists on {model_name}. Ignoring")
|
|
122
|
+
else:
|
|
123
|
+
setattr(cls, target_attribute, cls.build_dimension_column(dimension))
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def _reset_cv_dimensions(cls) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Remove any previously registered dimensions
|
|
129
|
+
|
|
130
|
+
Used by the test suite and should not be called at runtime.
|
|
131
|
+
|
|
132
|
+
This doesn't remove any previous column definitions due to a limitation that columns in
|
|
133
|
+
declarative classes cannot be removed.
|
|
134
|
+
This means that `hasattr(cls, "old_attribute")`
|
|
135
|
+
will still return True after resetting, but the values will not be included in any executions.
|
|
136
|
+
"""
|
|
137
|
+
model_name = cls.__name__
|
|
138
|
+
logger.warning(f"Removing {model_name} dimensions: {cls._cv_dimensions}")
|
|
139
|
+
|
|
140
|
+
keys = list(cls._cv_dimensions)
|
|
141
|
+
for key in keys:
|
|
142
|
+
cls._cv_dimensions.remove(key)
|
|
143
|
+
|
|
144
|
+
assert not len(cls._cv_dimensions)
|
climate_ref/models/provider.py
CHANGED
|
@@ -2,7 +2,8 @@ from typing import TYPE_CHECKING
|
|
|
2
2
|
|
|
3
3
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from .base import Base
|
|
6
|
+
from .mixins import CreatedUpdatedMixin
|
|
6
7
|
|
|
7
8
|
if TYPE_CHECKING:
|
|
8
9
|
from climate_ref.models.diagnostic import Diagnostic
|
climate_ref/provider_registry.py
CHANGED
|
@@ -14,6 +14,8 @@ from loguru import logger
|
|
|
14
14
|
|
|
15
15
|
from climate_ref.config import Config
|
|
16
16
|
from climate_ref.database import Database
|
|
17
|
+
from climate_ref.models import Diagnostic as DiagnosticModel
|
|
18
|
+
from climate_ref.models import Provider as ProviderModel
|
|
17
19
|
from climate_ref_core.diagnostics import Diagnostic
|
|
18
20
|
from climate_ref_core.providers import DiagnosticProvider, import_provider
|
|
19
21
|
|
|
@@ -29,10 +31,8 @@ def _register_provider(db: Database, provider: DiagnosticProvider) -> None:
|
|
|
29
31
|
provider
|
|
30
32
|
DiagnosticProvider instance
|
|
31
33
|
"""
|
|
32
|
-
from climate_ref.models import Diagnostic, Provider
|
|
33
|
-
|
|
34
34
|
provider_model, created = db.get_or_create(
|
|
35
|
-
|
|
35
|
+
ProviderModel,
|
|
36
36
|
slug=provider.slug,
|
|
37
37
|
defaults={
|
|
38
38
|
"name": provider.name,
|
|
@@ -46,7 +46,7 @@ def _register_provider(db: Database, provider: DiagnosticProvider) -> None:
|
|
|
46
46
|
|
|
47
47
|
for diagnostic in provider.diagnostics():
|
|
48
48
|
diagnostic_model, created = db.get_or_create(
|
|
49
|
-
|
|
49
|
+
DiagnosticModel,
|
|
50
50
|
slug=diagnostic.slug,
|
|
51
51
|
provider_id=provider_model.id,
|
|
52
52
|
defaults={
|
climate_ref/slurm.py
CHANGED
|
@@ -10,14 +10,14 @@ class SlurmChecker:
|
|
|
10
10
|
|
|
11
11
|
def __init__(self, intest: bool = False) -> None:
|
|
12
12
|
if HAS_REAL_SLURM:
|
|
13
|
-
import pyslurm # type: ignore
|
|
13
|
+
import pyslurm # type: ignore # noqa: PLC0415
|
|
14
14
|
|
|
15
15
|
self.slurm_association: dict[int, Any] | None = pyslurm.db.Associations.load()
|
|
16
16
|
self.slurm_partition: dict[str, Any] | None = pyslurm.Partitions.load()
|
|
17
17
|
self.slurm_qos: dict[str, Any] | None = pyslurm.qos().get()
|
|
18
18
|
self.slurm_node: dict[str, Any] | None = pyslurm.Nodes.load()
|
|
19
19
|
elif intest:
|
|
20
|
-
import pyslurm
|
|
20
|
+
import pyslurm # noqa: PLC0415
|
|
21
21
|
|
|
22
22
|
self.slurm_association = pyslurm.db.Associations.load() # dict [num -> Association]
|
|
23
23
|
self.slurm_partition = pyslurm.Partitions.load() # collection
|