climate-ref-core 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,7 +100,7 @@ def apply_constraint(
100
100
 
101
101
  def _to_tuple(value: None | str | tuple[str, ...]) -> tuple[str, ...]:
102
102
  """
103
- Clean the value of group_by to a tuple of strings
103
+ Normalize value to a tuple of strings.
104
104
  """
105
105
  if value is None:
106
106
  return ()
@@ -109,6 +109,13 @@ def _to_tuple(value: None | str | tuple[str, ...]) -> tuple[str, ...]:
109
109
  return tuple(value)
110
110
 
111
111
 
112
+ def _to_tuple_dict(value: dict[str, str | tuple[str, ...]]) -> dict[str, tuple[str, ...]]:
113
+ """
114
+ Normalize value to a dict of tuples of strings.
115
+ """
116
+ return {k: _to_tuple(v) for k, v in value.items()}
117
+
118
+
112
119
  @frozen
113
120
  class RequireFacets:
114
121
  """
@@ -153,6 +160,27 @@ class RequireFacets:
153
160
  return group[select]
154
161
 
155
162
 
163
+ @frozen
164
+ class IgnoreFacets:
165
+ """
166
+ A constraint that ignores certain facet values.
167
+
168
+ Datasets with these facet values are removed from the selection.
169
+ """
170
+
171
+ facets: dict[str, str | tuple[str, ...]] = field(converter=_to_tuple_dict)
172
+ """The facet values to ignore."""
173
+
174
+ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
175
+ """
176
+ Filter out datasets with the ignored facets.
177
+ """
178
+ mask = group[list(self.facets)].isin(self.facets).all(axis="columns")
179
+ if mask.any():
180
+ logger.debug(f"Ignoring files {', '.join(group.loc[mask, 'path'])} becauseof {self}")
181
+ return group[~mask]
182
+
183
+
156
184
  @frozen
157
185
  class AddSupplementaryDataset:
158
186
  """
@@ -3,7 +3,8 @@ from collections.abc import Sequence
3
3
  from pathlib import Path
4
4
  from typing import Any, Self
5
5
 
6
- from pydantic import BaseModel, model_validator
6
+ import numpy as np
7
+ from pydantic import BaseModel, field_validator, model_validator
7
8
 
8
9
  Value = float | int
9
10
 
@@ -64,20 +65,35 @@ class SeriesMetricValue(BaseModel):
64
65
  This is used for presentation purposes and is not used in the controlled vocabulary.
65
66
  """
66
67
 
67
- attributes: dict[str, str | Value] | None = None
68
+ attributes: dict[str, str | Value | None] | None = None
68
69
  """
69
70
  Additional unstructured attributes associated with the metric value
70
71
  """
71
72
 
72
73
  @model_validator(mode="after")
73
- def validate_index_length(self) -> Self:
74
- """Validate that index has the same length as values"""
74
+ def validate_index(self) -> Self:
75
+ """Validate that index has the same length as values and contains no NaNs"""
75
76
  if len(self.index) != len(self.values):
76
77
  raise ValueError(
77
78
  f"Index length ({len(self.index)}) must match values length ({len(self.values)})"
78
79
  )
80
+ for v in self.index:
81
+ if isinstance(v, float) and not np.isfinite(v):
82
+ raise ValueError("NaN or Inf values are not allowed in the index")
79
83
  return self
80
84
 
85
+ @field_validator("values", mode="before")
86
+ @classmethod
87
+ def validate_values(cls, value: Any) -> Any:
88
+ """
89
+ Transform None values to NaN in the values field
90
+ """
91
+ if not isinstance(value, (list, tuple)):
92
+ raise ValueError("`values` must be a list or tuple.")
93
+
94
+ # Transform None values to NaN
95
+ return [float("nan") if v is None else v for v in value]
96
+
81
97
  @classmethod
82
98
  def dump_to_json(cls, path: Path, series: Sequence["SeriesMetricValue"]) -> None:
83
99
  """
@@ -94,7 +110,13 @@ class SeriesMetricValue(BaseModel):
94
110
  The series values to dump.
95
111
  """
96
112
  with open(path, "w") as f:
97
- json.dump([s.model_dump() for s in series], f, indent=2)
113
+ json.dump(
114
+ [s.model_dump(mode="json") for s in series],
115
+ f,
116
+ indent=2,
117
+ allow_nan=False,
118
+ sort_keys=True,
119
+ )
98
120
 
99
121
  @classmethod
100
122
  def load_from_json(
@@ -102,7 +124,7 @@ class SeriesMetricValue(BaseModel):
102
124
  path: Path,
103
125
  ) -> list["SeriesMetricValue"]:
104
126
  """
105
- Dump a sequence of SeriesMetricValue to a JSON file.
127
+ Load a sequence of SeriesMetricValue from a JSON file.
106
128
 
107
129
  Parameters
108
130
  ----------
@@ -115,7 +137,7 @@ class SeriesMetricValue(BaseModel):
115
137
  if not isinstance(data, list):
116
138
  raise ValueError(f"Expected a list of series values, got {type(data)}")
117
139
 
118
- return [cls.model_validate(s) for s in data]
140
+ return [cls.model_validate(s, strict=True) for s in data]
119
141
 
120
142
 
121
143
  class ScalarMetricValue(BaseModel):
@@ -16,14 +16,18 @@ import os
16
16
  import stat
17
17
  import subprocess
18
18
  from abc import abstractmethod
19
- from collections.abc import Iterable
19
+ from collections.abc import Iterable, Sequence
20
20
  from contextlib import AbstractContextManager
21
21
  from pathlib import Path
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  import requests
25
+ import yaml
26
+ from attrs import evolve
25
27
  from loguru import logger
26
28
 
29
+ from climate_ref_core.constraints import IgnoreFacets
30
+ from climate_ref_core.datasets import SourceDatasetType
27
31
  from climate_ref_core.diagnostics import Diagnostic
28
32
  from climate_ref_core.exceptions import InvalidDiagnosticException, InvalidProviderException
29
33
 
@@ -74,6 +78,51 @@ class DiagnosticProvider:
74
78
  config :
75
79
  A configuration.
76
80
  """
81
+ logger.debug(
82
+ f"Configuring provider {self.slug} using ignore_datasets_file {config.ignore_datasets_file}"
83
+ )
84
+ # The format of the configuration file is:
85
+ # provider:
86
+ # diagnostic:
87
+ # source_type:
88
+ # - facet: value
89
+ # - other_facet: [other_value1, other_value2]
90
+ ignore_datasets_all = yaml.safe_load(config.ignore_datasets_file.read_text(encoding="utf-8")) or {}
91
+ ignore_datasets = ignore_datasets_all.get(self.slug, {})
92
+ if unknown_slugs := {slug for slug in ignore_datasets} - {d.slug for d in self.diagnostics()}:
93
+ logger.warning(
94
+ f"Unknown diagnostics found in {config.ignore_datasets_file} "
95
+ f"for provider {self.slug}: {', '.join(sorted(unknown_slugs))}"
96
+ )
97
+
98
+ known_source_types = {s.value for s in iter(SourceDatasetType)}
99
+ for diagnostic in self.diagnostics():
100
+ if diagnostic.slug in ignore_datasets:
101
+ if unknown_source_types := set(ignore_datasets[diagnostic.slug]) - known_source_types:
102
+ logger.warning(
103
+ f"Unknown source types found in {config.ignore_datasets_file} for "
104
+ f"diagnostic '{diagnostic.slug}' by provider {self.slug}: "
105
+ f"{', '.join(sorted(unknown_source_types))}"
106
+ )
107
+ data_requirements = (
108
+ r if isinstance(r, Sequence) else (r,) for r in diagnostic.data_requirements
109
+ )
110
+ diagnostic.data_requirements = tuple(
111
+ tuple(
112
+ evolve(
113
+ data_requirement,
114
+ constraints=tuple(
115
+ IgnoreFacets(facets)
116
+ for facets in ignore_datasets[diagnostic.slug].get(
117
+ data_requirement.source_type.value, []
118
+ )
119
+ )
120
+ + data_requirement.constraints,
121
+ )
122
+ for data_requirement in requirement_collection
123
+ )
124
+ for requirement_collection in data_requirements
125
+ )
77
126
 
78
127
  def diagnostics(self) -> list[Diagnostic]:
79
128
  """
@@ -287,6 +336,7 @@ class CondaDiagnosticProvider(CommandLineDiagnosticProvider):
287
336
 
288
337
  def configure(self, config: Config) -> None:
289
338
  """Configure the provider."""
339
+ super().configure(config)
290
340
  self.prefix = config.paths.software / "conda"
291
341
 
292
342
  def _install_conda(self, update: bool) -> Path:
@@ -153,7 +153,7 @@ class MetricResults(RootModel[Any]):
153
153
  CMEC diagnostic bundle RESULTS object
154
154
  """
155
155
 
156
- model_config = ConfigDict(strict=True)
156
+ model_config = ConfigDict(strict=True, allow_inf_nan=False)
157
157
  root: dict[str, dict[Any, Any]]
158
158
 
159
159
  @classmethod
@@ -284,7 +284,7 @@ class CMECMetric(BaseModel):
284
284
  Contains the diagnostics calculated during a diagnostic execution, in a standardised format.
285
285
  """
286
286
 
287
- model_config = ConfigDict(strict=True, extra="allow")
287
+ model_config = ConfigDict(strict=True, extra="allow", allow_inf_nan=False)
288
288
 
289
289
  DIMENSIONS: MetricDimensions
290
290
  """
@@ -342,7 +342,15 @@ class CMECMetric(BaseModel):
342
342
  :
343
343
  None
344
344
  """
345
- pathlib.Path(json_file).write_text(self.model_dump_json(indent=2))
345
+ pathlib.Path(json_file).write_text(
346
+ json.dumps(
347
+ self.model_dump(mode="json"),
348
+ indent=2,
349
+ allow_nan=False,
350
+ sort_keys=True,
351
+ ),
352
+ encoding="utf-8",
353
+ )
346
354
 
347
355
  @classmethod
348
356
  @validate_call
@@ -360,7 +368,7 @@ class CMECMetric(BaseModel):
360
368
  :
361
369
  CMEC Diagnostic object if the file is CMEC-compatible
362
370
  """
363
- json_str = pathlib.Path(json_file).read_text()
371
+ json_str = pathlib.Path(json_file).read_text(encoding="utf-8")
364
372
  metric_obj = cls.model_validate_json(json_str)
365
373
 
366
374
  return metric_obj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-core
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Core library for the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  climate_ref_core/__init__.py,sha256=MtmPThF2F9_2UODEN6rt1x30LDxrHIZ0wyRN_wsHx5I,127
2
- climate_ref_core/constraints.py,sha256=GsACa7mwVC7XAo58qkx6ViKu-iBC8dWwVTK8el1fbZI,16879
2
+ climate_ref_core/constraints.py,sha256=OlSpXwLRENS4-2LKOhDq-uZ7QznymMwKhtL4Lf6uhso,17761
3
3
  climate_ref_core/dataset_registry.py,sha256=sQp2VT9xSVAaWsf0tF4E_VQxuEsvIxU2MZm5uNX1ynw,7172
4
4
  climate_ref_core/datasets.py,sha256=cx1L-CDf8uv7_MYtnhx3xu2oYKVeZTK72nLKw0ZImL0,6472
5
5
  climate_ref_core/diagnostics.py,sha256=sXDyUwJZWr03Wk8an-hbpLAmQKz6M8jXtmJ6e7duWl4,19689
@@ -7,18 +7,18 @@ climate_ref_core/env.py,sha256=Ph2dejVxTELfP3bL0xES086WLGvV5H6KvsOwCkL6m-k,753
7
7
  climate_ref_core/exceptions.py,sha256=7Mkz22P-kbiL-ZevAhlOuQaaeTio6zpwE9YA45OTGvs,1909
8
8
  climate_ref_core/executor.py,sha256=9mKVkm0S7ikub3_FP7CrgdC4Qj9ynOi0r_DIfzCDS-0,5459
9
9
  climate_ref_core/logging.py,sha256=xO0j7OKkuO9JoMtMTnMc62yLO2mJZmhQKAvj-CojblI,7396
10
- climate_ref_core/providers.py,sha256=z5oD7EErIWprH5zv8I5yuU0IavEiSsi6SFkYMtiqE7g,13448
10
+ climate_ref_core/providers.py,sha256=klM-pOUAdBcU0lj1a5r-nKPKzq4QojAE_nb9iOZ-y9w,15942
11
11
  climate_ref_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  climate_ref_core/metric_values/__init__.py,sha256=aHfwRrqzLOmmaBKf1-4q97DnHb8KwmW0Dhwd79ZQiNQ,634
13
- climate_ref_core/metric_values/typing.py,sha256=4Qmr1LJQxQd2qkndwOzKTHq-hihiIBfWDZ_vzRWNbsI,3880
13
+ climate_ref_core/metric_values/typing.py,sha256=n4I3kcEBYPhoWL8aPRCBAHjwNLUrDz40c289iLsOMRM,4677
14
14
  climate_ref_core/pycmec/README.md,sha256=PzkovlPpsXqFopsYzz5GRvCAipNRGO1Wo-0gc17qr2Y,36
15
15
  climate_ref_core/pycmec/__init__.py,sha256=hXvKGEJQWyAp1i-ndr3D4zuYxkRhcR2LfXgFXlhYOk4,28
16
16
  climate_ref_core/pycmec/controlled_vocabulary.py,sha256=kgMEvQ1P6EwXC7sFgdC77IQDo8I0DnnQ2CPXXQaavjE,5944
17
17
  climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=gx5QyW88pZQVUfiYXmsJtJO6AJg6NbIZgdU4vDIa3fE,4390
18
- climate_ref_core/pycmec/metric.py,sha256=zymXoutnjbdcxvG_fMJugFLLcBrfSPG0XoV-2tA0ujA,18499
18
+ climate_ref_core/pycmec/metric.py,sha256=k6kB_tvXbaixmTPGiUVZyCn5kvyoKwzHEMk7GCNNY4M,18742
19
19
  climate_ref_core/pycmec/output.py,sha256=Il4j6sjGrChBioiQS7lQ_CJmofT1BEesmSZbuZDnXN8,6102
20
- climate_ref_core-0.7.0.dist-info/METADATA,sha256=OLUdTWE6F70mVvm9Uo47DGYdHVrVLtoQvbBuUsFn3GQ,2925
21
- climate_ref_core-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- climate_ref_core-0.7.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
- climate_ref_core-0.7.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
- climate_ref_core-0.7.0.dist-info/RECORD,,
20
+ climate_ref_core-0.8.0.dist-info/METADATA,sha256=C3q17g9WyPdI2YBl4SDpLiM1LCCYhAgFoLhq2ccKUOo,2925
21
+ climate_ref_core-0.8.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
22
+ climate_ref_core-0.8.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ climate_ref_core-0.8.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
+ climate_ref_core-0.8.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any