climate-ref-core 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ import pathlib
2
+ from typing import Any
3
+
4
+ from attrs import field, frozen, validators
5
+ from cattrs import Converter, transform_error
6
+ from loguru import logger
7
+ from ruamel.yaml import YAML
8
+
9
+ from climate_ref_core.exceptions import ResultValidationError
10
+ from climate_ref_core.pycmec.metric import CMECMetric
11
+
12
+ yaml = YAML()
13
+
14
+
15
+ RESERVED_DIMENSION_NAMES = {"attributes", "json_structure", "created_at", "updated_at", "value", "id"}
16
+ """
17
+ These names are reserved for internal use and should not be used as dimension names.
18
+
19
+ These names have other meanings that would conflict with the controlled vocabulary.
20
+ """
21
+
22
+
23
+ @frozen
24
+ class DimensionValue:
25
+ """
26
+ An allowed value for a dimension
27
+ """
28
+
29
+ name: str
30
+ long_name: str
31
+ description: str | None
32
+ units: str
33
+
34
+
35
+ @frozen
36
+ class Dimension:
37
+ """
38
+ Description of a dimension in a diagnostic bundle
39
+
40
+ This information is also used by the frontend for presentation purposes.
41
+ """
42
+
43
+ name: str = field(validator=validators.not_(validators.in_(RESERVED_DIMENSION_NAMES)))
44
+ """
45
+ A short identifier of the dimension.
46
+
47
+ This is used as a key in the diagnostic bundle and must be unique.
48
+ """
49
+ long_name: str
50
+ """
51
+ A longer name used for presentation
52
+ """
53
+ description: str
54
+ """
55
+ A short description of the dimension.
56
+
57
+ This is used for presentation
58
+ """
59
+ allow_extra_values: bool
60
+ """
61
+ If True, additional non-controlled values are allowed.
62
+ This is used for dimensions where not all the values are known at run time,'
63
+ for example, the model dimension.
64
+ """
65
+ required: bool
66
+ """
67
+ If True, this dimension is required to be specified in the executions.
68
+ """
69
+ values: list[DimensionValue] = field(factory=list)
70
+ """
71
+ The list of controlled values for a given dimension.
72
+
73
+ If `allow_extra_values` is False,
74
+ then only these values are valid for the dimension.
75
+ """
76
+
77
+
78
+ @frozen
79
+ class CV:
80
+ """
81
+ A collection of controlled dimensions and values used to validate executions.
82
+
83
+ A diagnostic bundle does not have to specify all dimensions,
84
+ but any dimensions not in the CV are not permitted.
85
+ """
86
+
87
+ # TODO: There might be some additional fields in future if this CV is project-specific
88
+
89
+ dimensions: tuple[Dimension, ...] = field()
90
+
91
+ @dimensions.validator
92
+ def _validate_dimensions(self, _: Any, value: tuple[Dimension, ...]) -> None:
93
+ """
94
+ Validate that all dimension names are unique and do not conflict with reserved names
95
+ """
96
+ seen = set()
97
+ for dim in value:
98
+ if dim.name in seen:
99
+ raise ValueError(f"Duplicate dimension name: {dim.name}")
100
+ if dim.name in RESERVED_DIMENSION_NAMES:
101
+ raise ValueError(f"Reserved dimension name: {dim.name}")
102
+ seen.add(dim.name)
103
+
104
+ def get_dimension_by_name(self, name: str) -> Dimension:
105
+ """
106
+ Get a dimension by name
107
+
108
+ Parameters
109
+ ----------
110
+ name
111
+ The name of the dimension
112
+
113
+ Returns
114
+ -------
115
+ Dimension
116
+ The dimension with the given name
117
+
118
+ Raises
119
+ ------
120
+ KeyError
121
+ If the dimension is not found
122
+ """
123
+ for dim in self.dimensions:
124
+ if dim.name == name:
125
+ return dim
126
+ raise KeyError(f"Dimension {name} not found")
127
+
128
+ def validate_metrics(self, metric_bundle: CMECMetric) -> None:
129
+ """
130
+ Validate a diagnostic bundle against a CV
131
+
132
+ The CV describes the accepted dimensions and values within a bundle
133
+
134
+ Parameters
135
+ ----------
136
+ metric_bundle
137
+
138
+ Raises
139
+ ------
140
+ ResultValidationError
141
+ If the validation of the dimensions or values fails
142
+ """
143
+ for result in metric_bundle.iter_results():
144
+ for k, v in result.dimensions.items():
145
+ try:
146
+ dimension = self.get_dimension_by_name(k)
147
+ except KeyError:
148
+ raise ResultValidationError(f"Unknown dimension: {k!r}")
149
+ if not dimension.allow_extra_values:
150
+ if v not in [dv.name for dv in dimension.values]:
151
+ raise ResultValidationError(f"Unknown value {v!r} for dimension {k!r}")
152
+ if not isinstance(result.value, float): # pragma: no cover
153
+ # This may not be possible with the current CMECMetric implementation
154
+ raise ResultValidationError(f"Unexpected value: {result.value!r}")
155
+
156
+ @staticmethod
157
+ def load_from_file(filename: pathlib.Path | str) -> "CV":
158
+ """
159
+ Load a CV from disk
160
+
161
+ Returns
162
+ -------
163
+ A new CV instance
164
+
165
+ """
166
+ convertor = Converter(forbid_extra_keys=True)
167
+ contents = yaml.load(pathlib.Path(filename))
168
+
169
+ try:
170
+ return convertor.structure(contents, CV)
171
+ except Exception as exc:
172
+ logger.error(f"Error loading CV from {filename}")
173
+ for error in transform_error(exc):
174
+ logger.error(error)
175
+ raise
@@ -0,0 +1,44 @@
1
+ dimensions:
2
+ - name: model
3
+ long_name: model_id
4
+ description: ""
5
+ allow_extra_values: true
6
+ required: false
7
+ - name: source_id
8
+ long_name: source_id
9
+ description: ""
10
+ allow_extra_values: true
11
+ required: false
12
+ - name: variant_label
13
+ long_name: Variant Label
14
+ description: ""
15
+ allow_extra_values: true
16
+ required: false
17
+ - name: metric
18
+ long_name: ""
19
+ description: ""
20
+ required: true
21
+ allow_extra_values: true
22
+ - name: region
23
+ long_name: ""
24
+ description: ""
25
+ required: true
26
+ allow_extra_values: true
27
+ - name: statistic
28
+ long_name: ""
29
+ description: ""
30
+ required: true
31
+ allow_extra_values: true
32
+ values:
33
+ - name: rmse
34
+ long_name: Root Mean Square Error
35
+ description: ""
36
+ units: dimensionless
37
+ - name: overall score
38
+ long_name: Overall Score
39
+ description: ""
40
+ units: dimensionless
41
+ - name: bias
42
+ long_name: Bias
43
+ description: ""
44
+ units: dimensionless
@@ -0,0 +1,437 @@
1
+ """
2
+ CMEC diagnostic bundle class
3
+
4
+ Following the CMEC diagnostic bundle standards at
5
+ https://github.com/Earth-System-Diagnostics-Standards/EMDS
6
+
7
+ To validate that a dictionary is compatible with the CMEC
8
+ diagnostic bundle standards, please use:
9
+ - class instantiation: cmec = CMECMetric(**result_dict)
10
+ - class model_validate method: cmec = CMECMetric.model_validate(result_dict)
11
+ Both ways will create the CMECMetric instance (cmec)
12
+ """
13
+
14
+ import json
15
+ import pathlib
16
+ from collections import Counter
17
+ from collections.abc import Generator
18
+ from enum import Enum
19
+ from typing import Any, cast
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ Field,
25
+ FilePath,
26
+ RootModel,
27
+ ValidationInfo,
28
+ field_validator,
29
+ model_validator,
30
+ validate_call,
31
+ )
32
+ from pydantic.json_schema import GenerateJsonSchema, JsonSchemaMode, JsonSchemaValue
33
+ from pydantic_core import CoreSchema
34
+ from typing_extensions import Self
35
+
36
+
37
+ class MetricCV(Enum):
38
+ """
39
+ CMEC diagnostic bundle controlled vocabulary
40
+ """
41
+
42
+ DIMENSIONS = "DIMENSIONS"
43
+ JSON_STRUCTURE = "json_structure"
44
+ RESULTS = "RESULTS"
45
+ PROVENANCE = "PROVENANCE"
46
+ DISCLAIMER = "DISCLAIMER"
47
+ NOTES = "NOTES"
48
+ ATTRIBUTES = "attributes"
49
+
50
+
51
+ class MetricDimensions(RootModel[Any]):
52
+ """
53
+ CMEC diagnostic bundle DIMENSIONS object
54
+
55
+ This describes the order of the dimensions and their possible values.
56
+ The order of the dimensions matter as that determines how the executions are nested.
57
+ """
58
+
59
+ root: dict[str, Any] = Field(
60
+ default={
61
+ MetricCV.JSON_STRUCTURE.value: ["model", "metric"],
62
+ "model": {},
63
+ "metric": {},
64
+ }
65
+ )
66
+
67
+ @model_validator(mode="after")
68
+ def _validate_dimensions(self) -> Self:
69
+ """Validate a MetricDimensions object"""
70
+ # assert the items in json_structure are same as the keys of dimensions
71
+
72
+ if MetricCV.JSON_STRUCTURE.value not in self.root.keys():
73
+ raise ValueError(f"{MetricCV.JSON_STRUCTURE.value} is required keyword")
74
+
75
+ if not (
76
+ Counter(self.root[MetricCV.JSON_STRUCTURE.value])
77
+ == Counter([k for k in self.root.keys() if k != MetricCV.JSON_STRUCTURE.value])
78
+ ):
79
+ raise ValueError("json_structure items are not in the keys of the DIMENSIONS")
80
+
81
+ return self
82
+
83
+ @validate_call
84
+ def add_dimension(self, dim_name: str, dim_content: dict[str, Any]) -> None:
85
+ """
86
+ Add or update one dimension to MetricDimensions object
87
+
88
+ Parameters
89
+ ----------
90
+ dim_name
91
+ Name of new dimension to be added
92
+ dim_content
93
+ Dictionary contains contents associated with dim_name
94
+
95
+ Returns
96
+ -------
97
+ :
98
+ CMEC MetricDimensions object with dim_name added
99
+ """
100
+ if dim_name in self.root[MetricCV.JSON_STRUCTURE.value]:
101
+ self.root[dim_name].update(dim_content)
102
+
103
+ else:
104
+ self.root[MetricCV.JSON_STRUCTURE.value].append(dim_name)
105
+ self.root[dim_name] = dim_content
106
+
107
+ @classmethod
108
+ def merge_dimension(cls, metric_dim1: Any, metric_dim2: Any) -> Self:
109
+ """
110
+ Merge two MetricDimensions objects
111
+
112
+ Parameters
113
+ ----------
114
+ metric_dim1
115
+ First CMEC MetricDimensions object to be merged
116
+ metric_dim2
117
+ Second CMEC MetricDimensions object to be merged
118
+
119
+ Returns
120
+ -------
121
+ :
122
+ Return a merged CMEC MetricDimensions object
123
+ """
124
+ mdim1 = cls.model_validate(metric_dim1)
125
+ mdim2 = cls.model_validate(metric_dim2)
126
+
127
+ if not (mdim1.root[MetricCV.JSON_STRUCTURE.value] == mdim2.root[MetricCV.JSON_STRUCTURE.value]):
128
+ raise ValueError("JSON_STRUCTURES are not same")
129
+
130
+ merged_dim = {MetricCV.JSON_STRUCTURE.value: mdim1.root[MetricCV.JSON_STRUCTURE.value]}
131
+
132
+ for dim in mdim1.root[MetricCV.JSON_STRUCTURE.value]:
133
+ merged_dim[dim] = mdim1.root[dim]
134
+
135
+ for dim in mdim2.root[MetricCV.JSON_STRUCTURE.value]:
136
+ for key in mdim2.root[dim].keys():
137
+ if key not in merged_dim[dim].keys():
138
+ merged_dim[dim][key] = mdim2.root[dim][key]
139
+ return cls(merged_dim)
140
+
141
+ def __getitem__(self, item: str) -> Any:
142
+ return self.root[item]
143
+
144
+
145
+ class MetricResults(RootModel[Any]):
146
+ """
147
+ CMEC diagnostic bundle RESULTS object
148
+ """
149
+
150
+ model_config = ConfigDict(strict=True)
151
+ root: dict[str, dict[Any, Any]]
152
+
153
+ @classmethod
154
+ def _check_nested_dict_keys(cls, nested: dict[Any, Any], metdims: dict[Any, Any], level: int = 0) -> None:
155
+ dim_name = metdims[MetricCV.JSON_STRUCTURE.value][level]
156
+
157
+ dict_keys = set(nested.keys())
158
+ if MetricCV.ATTRIBUTES.value in dict_keys:
159
+ dict_keys.remove(MetricCV.ATTRIBUTES.value)
160
+
161
+ if level < len(metdims[MetricCV.JSON_STRUCTURE.value]) - 1:
162
+ if not (Counter(list(metdims[dim_name].keys())) == Counter(dict_keys)):
163
+ raise ValueError(
164
+ f"Dimension key mismatch in '{dim_name}' and level {level}\n"
165
+ f"Actual keys: {sorted(dict_keys)}\n"
166
+ f"Expected keys: {sorted(metdims[dim_name].keys())}\n"
167
+ "Full actual structure:\n" + json.dumps(list(dict_keys), indent=2) + "\n\n"
168
+ "Full expected structure:\n" + json.dumps(metdims[dim_name], indent=2)
169
+ )
170
+
171
+ for key, value in nested.items():
172
+ if key == MetricCV.ATTRIBUTES.value:
173
+ continue
174
+
175
+ elif isinstance(value, dict):
176
+ cls._check_nested_dict_keys(value, metdims, level + 1)
177
+
178
+ else:
179
+ raise ValueError(
180
+ f"{dim_name} is not the last/deepest dimension, \n"
181
+ f"a dictionary is expected for the key {key}"
182
+ )
183
+ else:
184
+ expected_keys = set(metdims[dim_name].keys())
185
+ if not (dict_keys.issubset(expected_keys)):
186
+ raise ValueError(f"Unknown dimension values: {dict_keys - expected_keys}")
187
+
188
+ tmp = dict(nested)
189
+ if MetricCV.ATTRIBUTES.value in tmp:
190
+ tmp.pop(MetricCV.ATTRIBUTES.value)
191
+ StrNumDict(tmp)
192
+
193
+ @field_validator("root", mode="after")
194
+ @classmethod
195
+ def _validate_results(cls, rlt: Any, info: ValidationInfo) -> Any:
196
+ """Validate a MetricResults object"""
197
+ if not isinstance(info.context, MetricDimensions):
198
+ s = "\nTo validate MetricResults object, MetricDimensions is needed,\n"
199
+ s += "please use model_validate(Results, context=MetricDimensions) to instantiate\n"
200
+ raise ValueError(s)
201
+ else:
202
+ # executions = rlt.root
203
+ results = rlt
204
+ metdims = info.context.root
205
+ cls._check_nested_dict_keys(results, metdims, level=0)
206
+
207
+ return rlt
208
+
209
+
210
+ class StrNumDict(RootModel[Any]):
211
+ """A class contains string key and numeric value"""
212
+
213
+ model_config = ConfigDict(strict=True)
214
+ root: dict[str, float | int | list[str | float | int]]
215
+
216
+
217
+ class MetricValue(BaseModel):
218
+ """
219
+ A flattened representation of a diagnostic value
220
+
221
+ This includes the dimensions and the value of the diagnostic
222
+ """
223
+
224
+ dimensions: dict[str, str]
225
+ value: float | str
226
+ attributes: dict[str, str | float | int] | None = None
227
+
228
+
229
+ class CMECMetric(BaseModel):
230
+ """
231
+ CMEC diagnostic bundle object
232
+
233
+ Contains the diagnostics calculated during a diagnostic execution, in a standardised format.
234
+ """
235
+
236
+ model_config = ConfigDict(strict=True, extra="allow")
237
+
238
+ DIMENSIONS: MetricDimensions
239
+ """
240
+ Describes the dimensionality of the diagnostics produced.
241
+
242
+ This includes the order of dimensions in `RESULTS`
243
+ """
244
+ RESULTS: dict[str, Any]
245
+ """
246
+ The diagnostic values.
247
+
248
+ Results is a nested dictionary of values.
249
+ The order of the nested dictionaries corresponds to the order of the dimensions.
250
+ """
251
+ PROVENANCE: dict[str, Any] | None = None
252
+ """
253
+ Provenance information
254
+
255
+ Not currently used in the REF.
256
+ The provenance information from the output bundle is used instead
257
+ """
258
+ DISCLAIMER: dict[str, Any] | None = None
259
+ """
260
+ Disclaimer information
261
+
262
+ Not currently used in the REF.
263
+ """
264
+ NOTES: dict[str, Any] | None = None
265
+ """
266
+ Additional notes.
267
+
268
+ Not currently used in the REF.
269
+ """
270
+
271
+ @model_validator(mode="after")
272
+ def _validate_metrics(self) -> Self:
273
+ """Validate a CMECMetric object"""
274
+ # validate executions data
275
+ results = self.RESULTS
276
+ MetricResults.model_validate(results, context=self.DIMENSIONS)
277
+ return self
278
+
279
+ @validate_call
280
+ def dump_to_json(self, json_file: str | pathlib.Path = "./cmec.json") -> None:
281
+ """
282
+ Save the CMECMetric object to a file in JSON format
283
+
284
+ Parameters
285
+ ----------
286
+ json_file
287
+ JSON file path in the CMEC format to be saved
288
+
289
+ Returns
290
+ -------
291
+ :
292
+ None
293
+ """
294
+ pathlib.Path(json_file).write_text(self.model_dump_json(indent=2))
295
+
296
+ @classmethod
297
+ @validate_call
298
+ def load_from_json(cls, json_file: FilePath) -> Self:
299
+ """
300
+ Create CMECMetric object from a compatible json file
301
+
302
+ Parameters
303
+ ----------
304
+ json_file
305
+ JSON file path to be read
306
+
307
+ Returns
308
+ -------
309
+ :
310
+ CMEC Diagnostic object if the file is CMEC-compatible
311
+ """
312
+ json_str = pathlib.Path(json_file).read_text()
313
+ metric_obj = cls.model_validate_json(json_str)
314
+
315
+ return metric_obj
316
+
317
+ @classmethod
318
+ def _merge(cls, dict_a: dict[Any, Any], dict_b: dict[Any, Any]) -> dict[Any, Any]:
319
+ """Merge the values from dict_b into dict_a inplace"""
320
+ for key, value_b in dict_b.items():
321
+ if key in dict_a:
322
+ if isinstance(dict_a[key], dict) and isinstance(value_b, dict):
323
+ cls._merge(dict_a[key], value_b)
324
+ else:
325
+ dict_a[key] = value_b
326
+ else:
327
+ dict_a[key] = value_b
328
+ return dict_a
329
+
330
+ @classmethod
331
+ def _fill(cls, mdict: dict[Any, Any], mdims: dict[Any, Any], level: int = 0) -> None:
332
+ dim_name = mdims[MetricCV.JSON_STRUCTURE.value][level]
333
+ for key in mdims[dim_name].keys():
334
+ if key not in mdict:
335
+ if level < len(mdims[MetricCV.JSON_STRUCTURE.value]) - 1:
336
+ mdict[key] = {}
337
+
338
+ for key, value in mdict.items():
339
+ if (
340
+ isinstance(value, dict)
341
+ and level < len(mdims[MetricCV.JSON_STRUCTURE.value]) - 1
342
+ and key != MetricCV.ATTRIBUTES.value
343
+ ):
344
+ cls._fill(value, mdims, level + 1)
345
+
346
+ @classmethod
347
+ @validate_call
348
+ def merge(cls, metric_obj1: Any, metric_obj2: Any) -> Self:
349
+ """
350
+ Merge two CMECMetric objects with the same json_structure
351
+
352
+ Parameters
353
+ ----------
354
+ metric_obj1
355
+ First CMECMetric object to be merged
356
+ metric_obj2
357
+ Second CMECMetric object to be merged
358
+
359
+ Returns
360
+ -------
361
+ :
362
+ Merged CMEC Diagnostic object
363
+ """
364
+ mobj1 = cls.model_validate(metric_obj1)
365
+ mobj2 = cls.model_validate(metric_obj2)
366
+
367
+ merged_obj_dims = MetricDimensions.merge_dimension(mobj1.DIMENSIONS, mobj2.DIMENSIONS)
368
+
369
+ result1 = mobj1.RESULTS
370
+ result2 = mobj2.RESULTS
371
+ merged_obj_rlts = cls._merge(dict(result1), result2)
372
+
373
+ cls._fill(merged_obj_rlts, merged_obj_dims.root)
374
+
375
+ MetricResults.model_validate(merged_obj_rlts, context=merged_obj_dims)
376
+
377
+ return cls(DIMENSIONS=merged_obj_dims, RESULTS=merged_obj_rlts)
378
+
379
+ @staticmethod
380
+ def create_template() -> dict[str, Any]:
381
+ """
382
+ Return an empty dictionary in CMEC diagnostic bundle format
383
+ """
384
+ default_dimensions = MetricDimensions()
385
+
386
+ return {
387
+ MetricCV.DIMENSIONS.value: default_dimensions.root,
388
+ MetricCV.RESULTS.value: {},
389
+ MetricCV.PROVENANCE.value: None,
390
+ MetricCV.DISCLAIMER.value: None,
391
+ MetricCV.NOTES.value: None,
392
+ }
393
+
394
+ def iter_results(self) -> Generator[MetricValue]:
395
+ """
396
+ Iterate over the executions in the diagnostic bundle
397
+
398
+ This will yield a dictionary for each result, with the dimensions and the value
399
+
400
+ Returns
401
+ -------
402
+ A generator of diagnostic values
403
+
404
+ """
405
+ dimensions = cast(list[str], self.DIMENSIONS[MetricCV.JSON_STRUCTURE.value])
406
+
407
+ yield from _walk_results(dimensions, self.RESULTS, {})
408
+
409
+
410
+ def _walk_results(
411
+ dimensions: list[str], results: dict[str, Any], metadata: dict[str, str]
412
+ ) -> Generator[MetricValue]:
413
+ assert len(dimensions), "Not enough dimensions" # noqa: S101
414
+ dimension = dimensions[0]
415
+ for key, value in results.items():
416
+ if key == MetricCV.ATTRIBUTES.value:
417
+ continue
418
+ metadata[dimension] = key
419
+ if isinstance(value, str | float | int):
420
+ yield MetricValue(
421
+ dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
422
+ )
423
+ else:
424
+ yield from _walk_results(dimensions[1:], value, {**metadata})
425
+
426
+
427
+ class CMECGenerateJsonSchema(GenerateJsonSchema):
428
+ """
429
+ Customized CMEC JSON schema generation
430
+ """
431
+
432
+ def generate(self: Self, schema: CoreSchema, mode: JsonSchemaMode = "validation") -> JsonSchemaValue:
433
+ """Generate customized json schema"""
434
+ json_schema = super().generate(schema, mode=mode)
435
+ json_schema["title"] = "CMEC"
436
+ json_schema["$schema"] = self.schema_dialect
437
+ return json_schema