climate-ref-core 0.6.5__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/PKG-INFO +2 -2
  2. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/pyproject.toml +2 -2
  3. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/constraints.py +198 -80
  4. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/datasets.py +20 -8
  5. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/diagnostics.py +38 -14
  6. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/executor.py +4 -1
  7. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/logging.py +7 -3
  8. climate_ref_core-0.7.0/src/climate_ref_core/metric_values/typing.py +139 -0
  9. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/providers.py +25 -0
  10. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/controlled_vocabulary.py +33 -15
  11. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/output.py +12 -0
  12. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/metric_values/test_typing.py +32 -0
  13. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/cmec_testdata/test_output_json_schema.yml +8 -0
  14. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/test_cmec_metric.py +1 -2
  15. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/test_cmec_output.py +1 -4
  16. climate_ref_core-0.7.0/tests/unit/test_constraints.py +765 -0
  17. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_dataset_registry/test_dataset_registry.py +2 -2
  18. climate_ref_core-0.7.0/tests/unit/test_datasets/dataset_collection_hash.yml +2 -0
  19. climate_ref_core-0.7.0/tests/unit/test_datasets/dataset_collection_obs4mips_hash.yml +2 -0
  20. climate_ref_core-0.7.0/tests/unit/test_datasets/execution_dataset_hash.yml +2 -0
  21. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_datasets.py +29 -11
  22. climate_ref_core-0.6.5/tests/unit/test_metrics.py → climate_ref_core-0.7.0/tests/unit/test_diagnostics.py +39 -76
  23. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_logging.py +4 -7
  24. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_providers.py +9 -2
  25. climate_ref_core-0.6.5/src/climate_ref_core/metric_values/typing.py +0 -74
  26. climate_ref_core-0.6.5/tests/unit/test_constraints.py +0 -438
  27. climate_ref_core-0.6.5/tests/unit/test_datasets/dataset_collection_hash.yml +0 -2
  28. climate_ref_core-0.6.5/tests/unit/test_datasets/dataset_collection_obs4mips_hash.yml +0 -2
  29. climate_ref_core-0.6.5/tests/unit/test_datasets/metric_dataset_hash.yml +0 -2
  30. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/.gitignore +0 -0
  31. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/LICENCE +0 -0
  32. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/NOTICE +0 -0
  33. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/README.md +0 -0
  34. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/__init__.py +0 -0
  35. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/dataset_registry.py +0 -0
  36. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/env.py +0 -0
  37. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/exceptions.py +0 -0
  38. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/metric_values/__init__.py +0 -0
  39. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/py.typed +0 -0
  40. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/README.md +0 -0
  41. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/__init__.py +0 -0
  42. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/cv_cmip7_aft.yaml +0 -0
  43. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/src/climate_ref_core/pycmec/metric.py +0 -0
  44. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/cmec_testdata/cmec_metric_sample.json +0 -0
  45. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/cmec_testdata/cmec_output_sample.json +0 -0
  46. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/cmec_testdata/cv_sample.yaml +0 -0
  47. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/cmec_testdata/test_metric_json_schema.yml +0 -0
  48. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/conftest.py +0 -0
  49. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/pycmec/test_controlled_vocabulary.py +0 -0
  50. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_exceptions.py +0 -0
  51. {climate_ref_core-0.6.5 → climate_ref_core-0.7.0}/tests/unit/test_executor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-core
3
- Version: 0.6.5
3
+ Version: 0.7.0
4
4
  Summary: Core library for the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -29,7 +29,7 @@ Requires-Dist: pydantic>=2.10.6
29
29
  Requires-Dist: pyyaml>=6.0.2
30
30
  Requires-Dist: requests
31
31
  Requires-Dist: rich
32
- Requires-Dist: setuptools>=75.8.0
32
+ Requires-Dist: setuptools<81
33
33
  Requires-Dist: typing-extensions
34
34
  Description-Content-Type: text/markdown
35
35
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climate-ref-core"
3
- version = "0.6.5"
3
+ version = "0.7.0"
4
4
  description = "Core library for the CMIP Rapid Evaluation Framework"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -39,7 +39,7 @@ dependencies = [
39
39
  "environs>=11",
40
40
  "pyyaml>=6.0.2",
41
41
  # Not used directly, but required to support some installations
42
- "setuptools>=75.8.0",
42
+ "setuptools<81",
43
43
 
44
44
  # SPEC 0000 constraints
45
45
  # We follow [SPEC-0000](https://scientific-python.org/specs/spec-0000/)
@@ -6,7 +6,9 @@ import sys
6
6
  import warnings
7
7
  from collections import defaultdict
8
8
  from collections.abc import Mapping
9
- from typing import Protocol, runtime_checkable
9
+ from datetime import datetime
10
+ from functools import total_ordering
11
+ from typing import Literal, Protocol, runtime_checkable
10
12
 
11
13
  if sys.version_info < (3, 11):
12
14
  from typing_extensions import Self
@@ -15,45 +17,21 @@ else:
15
17
 
16
18
  import numpy as np
17
19
  import pandas as pd
18
- from attrs import frozen
20
+ from attrs import field, frozen
19
21
  from loguru import logger
20
22
 
21
23
  from climate_ref_core.datasets import SourceDatasetType
22
- from climate_ref_core.exceptions import ConstraintNotSatisfied
23
24
 
24
25
 
25
26
  @runtime_checkable
26
- class GroupValidator(Protocol):
27
+ class GroupConstraint(Protocol):
27
28
  """
28
- A constraint that must be satisfied when executing a given diagnostic run.
29
-
30
- All constraints must be satisfied for a given group to be run.
31
- """
32
-
33
- def validate(self, group: pd.DataFrame) -> bool:
34
- """
35
- Validate if the constraint is satisfied by the dataset.
36
-
37
- This is executed after the apply method to determine if the constraint is satisfied.
38
- If the constraint is not satisfied, the group will not be executed.
39
-
40
- Parameters
41
- ----------
42
- group
43
- A group of datasets that is being validated.
44
-
45
- Returns
46
- -------
47
- :
48
- Whether the constraint is satisfied
49
- """
50
- ...
29
+ An operation to perform on a group of datasets resulting in a new group of datasets.
51
30
 
31
+ This is applied to a group of datasets representing the inputs to a potential diagnostic execution.
52
32
 
53
- @runtime_checkable
54
- class GroupOperation(Protocol):
55
- """
56
- An operation to perform on a group of datasets resulting in a new group of datasets.
33
+ If the operation results in an empty group, the constraint is considered not satisfied.
34
+ The group must satisfy all constraints to be processed.
57
35
 
58
36
  !! warning
59
37
 
@@ -90,18 +68,6 @@ class GroupOperation(Protocol):
90
68
  ...
91
69
 
92
70
 
93
- GroupConstraint = GroupOperation | GroupValidator
94
- """
95
- A constraint that must be satisfied for a group of datasets to be executed.
96
-
97
- This is applied to a group of datasets representing the inputs to a potential diagnostic execution.
98
- The group must satisfy all constraints to be processed.
99
-
100
- This can include operations that are applied to a group of datasets which may modify the group,
101
- but may also include validators that check if the group satisfies a certain condition.
102
- """
103
-
104
-
105
71
  def apply_constraint(
106
72
  dataframe: pd.DataFrame,
107
73
  constraint: GroupConstraint,
@@ -124,39 +90,67 @@ def apply_constraint(
124
90
  :
125
91
  The updated group of datasets or None if the constraint was not satisfied
126
92
  """
127
- try:
128
- updated_group = (
129
- constraint.apply(dataframe, data_catalog) if isinstance(constraint, GroupOperation) else dataframe
130
- )
131
-
132
- valid = constraint.validate(updated_group) if isinstance(constraint, GroupValidator) else True
133
- if not valid:
134
- logger.debug(f"Constraint {constraint} not satisfied for {dataframe}")
135
- raise ConstraintNotSatisfied(f"Constraint {constraint} not satisfied for {dataframe}")
136
- except ConstraintNotSatisfied:
93
+ updated_group = constraint.apply(dataframe, data_catalog)
94
+ if updated_group.empty:
137
95
  logger.debug(f"Constraint {constraint} not satisfied for {dataframe}")
138
96
  return None
139
97
 
140
98
  return updated_group
141
99
 
142
100
 
101
+ def _to_tuple(value: None | str | tuple[str, ...]) -> tuple[str, ...]:
102
+ """
103
+ Clean the value of group_by to a tuple of strings
104
+ """
105
+ if value is None:
106
+ return ()
107
+ if isinstance(value, str):
108
+ return (value,)
109
+ return tuple(value)
110
+
111
+
143
112
  @frozen
144
113
  class RequireFacets:
145
114
  """
146
- A constraint that requires a dataset to have certain facets.
115
+ A constraint that requires datasets to have certain facet values.
147
116
  """
148
117
 
149
118
  dimension: str
150
- required_facets: tuple[str, ...]
119
+ """The name of the facet to filter on."""
151
120
 
152
- def validate(self, group: pd.DataFrame) -> bool:
121
+ required_facets: tuple[str, ...] = field(converter=_to_tuple)
122
+ "The required facet values."
123
+
124
+ operator: Literal["all", "any"] = "all"
125
+ """Whether all or any of the required facets must be present."""
126
+
127
+ group_by: tuple[str, ...] | None = field(converter=_to_tuple, default=None)
128
+ """
129
+ The facets to group the datasets by.
130
+
131
+ Each group created by `group_by` must contain at least one dataset where the
132
+ value of the given dimension is in the list of required facet values.
133
+
134
+ For example, if there are multiple models and variables in the selection,
135
+ `group_by` can be used to make sure that only those models are selected that
136
+ provide all required variables.
137
+ """
138
+
139
+ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
153
140
  """
154
- Check that the required facets are present in the group
141
+ Filter out groups of datasets that do not provide the required facets
155
142
  """
156
- if self.dimension not in group:
157
- logger.warning(f"Dimension {self.dimension} not present in group {group}")
158
- return False
159
- return all(value in group[self.dimension].values for value in self.required_facets)
143
+ op = all if self.operator == "all" else any
144
+ select = pd.Series(True, index=group.index)
145
+ groups = [group] if not self.group_by else (g[1] for g in group.groupby(list(self.group_by)))
146
+ for subgroup in groups:
147
+ if not op(value in subgroup[self.dimension].values for value in self.required_facets):
148
+ logger.debug(
149
+ f"Constraint {self} not satisfied because required facet values "
150
+ f"not found for group {', '.join(subgroup['path'])}"
151
+ )
152
+ select.loc[subgroup.index] = False
153
+ return group[select]
160
154
 
161
155
 
162
156
  @frozen
@@ -271,6 +265,123 @@ class AddSupplementaryDataset:
271
265
  return cls(supplementary_facets, **kwargs[source_type])
272
266
 
273
267
 
268
+ @frozen
269
+ @total_ordering
270
+ class PartialDateTime: # noqa: PLW1641
271
+ """
272
+ A partial datetime object that can be used to compare datetimes.
273
+
274
+ Only the specified fields are used for comparison.
275
+ """
276
+
277
+ year: int | None = None
278
+ month: int | None = None
279
+ day: int | None = None
280
+ hour: int | None = None
281
+ minute: int | None = None
282
+ second: int | None = None
283
+
284
+ @property
285
+ def _attrs(self) -> dict[str, int]:
286
+ """The attributes that are set."""
287
+ return {
288
+ a: v
289
+ for a in self.__slots__ # type: ignore[attr-defined]
290
+ if not a.startswith("_") and (v := getattr(self, a)) is not None
291
+ }
292
+
293
+ def __repr__(self) -> str:
294
+ return f"{self.__class__.__name__}({', '.join(f'{a}={v}' for a, v in self._attrs.items())})"
295
+
296
+ def __eq__(self, other: object) -> bool:
297
+ if not isinstance(other, datetime):
298
+ msg = (
299
+ f"Can only compare PartialDateTime with `datetime.datetime` "
300
+ f"objects, got object {other} of type {type(other)}"
301
+ )
302
+ raise TypeError(msg)
303
+
304
+ for attr, value in self._attrs.items():
305
+ other_value = getattr(other, attr)
306
+ if value != other_value:
307
+ return False
308
+ return True
309
+
310
+ def __lt__(self, other: object) -> bool:
311
+ if not isinstance(other, datetime):
312
+ msg = (
313
+ f"Can only compare PartialDateTime with `datetime.datetime` "
314
+ f"objects, got object {other} of type {type(other)}"
315
+ )
316
+ raise TypeError(msg)
317
+
318
+ for attr, value in self._attrs.items():
319
+ other_value = getattr(other, attr)
320
+ if value != other_value:
321
+ return value < other_value # type: ignore[no-any-return]
322
+ return False
323
+
324
+
325
+ @frozen
326
+ class RequireTimerange:
327
+ """
328
+ A constraint that requires datasets to have a specific timerange.
329
+
330
+ Specify the start and/or end of the required timerange using a precision
331
+ that matches the frequency of the datasets.
332
+
333
+ For example, to ensure that datasets at monthly frequency cover the period
334
+ from 2000 to 2010, use start=PartialDateTime(year=2000, month=1) and
335
+ end=PartialDateTime(year=2010, month=12).
336
+ """
337
+
338
+ group_by: tuple[str, ...]
339
+ """
340
+ The fields to group the datasets by. Groups that do not cover the timerange
341
+ will be removed.
342
+ """
343
+
344
+ start: PartialDateTime | None = None
345
+ """
346
+ The start time of the required timerange. If None, no start time is required.
347
+ """
348
+
349
+ end: PartialDateTime | None = None
350
+ """
351
+ The end time of the required timerange. If None, no end time is required.
352
+ """
353
+
354
+ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
355
+ """
356
+ Check that all subgroups of the group have a contiguous timerange.
357
+ """
358
+ select = pd.Series(True, index=group.index)
359
+ for _, subgroup in group.dropna(subset=["start_time", "end_time"]).groupby(list(self.group_by)):
360
+ start = subgroup["start_time"].min()
361
+ end = subgroup["end_time"].max()
362
+ result = True
363
+ if self.start is not None and start > self.start:
364
+ logger.debug(
365
+ f"Constraint {self} not satisfied because start time {start} "
366
+ f"is after required start time for {', '.join(subgroup['path'])}"
367
+ )
368
+ result = False
369
+ if self.end is not None and end < self.end:
370
+ logger.debug(
371
+ f"Constraint {self} not satisfied because end time {end} "
372
+ f"is before required end time for {', '.join(subgroup['path'])}"
373
+ )
374
+ result = False
375
+ if result:
376
+ contiguous_subgroup = RequireContiguousTimerange(group_by=self.group_by).apply(
377
+ subgroup, data_catalog
378
+ )
379
+ result = len(contiguous_subgroup) == len(subgroup)
380
+ if not result:
381
+ select.loc[subgroup.index] = False
382
+ return group[select]
383
+
384
+
274
385
  @frozen
275
386
  class RequireContiguousTimerange:
276
387
  """
@@ -279,11 +390,11 @@ class RequireContiguousTimerange:
279
390
 
280
391
  group_by: tuple[str, ...]
281
392
  """
282
- The fields to group the datasets by. Each group must be contiguous in time
283
- to fulfill the constraint.
393
+ The fields to group the datasets by. Groups that are not be contiguous in time
394
+ are removed.
284
395
  """
285
396
 
286
- def validate(self, group: pd.DataFrame) -> bool:
397
+ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
287
398
  """
288
399
  Check that all subgroups of the group have a contiguous timerange.
289
400
  """
@@ -293,11 +404,10 @@ class RequireContiguousTimerange:
293
404
  days=31, # Maximum number of days in a month.
294
405
  hours=1, # Allow for potential rounding errors.
295
406
  )
296
- group = group.dropna(subset=["start_time", "end_time"])
297
- if len(group) < 2: # noqa: PLR2004
298
- return True
299
407
 
300
- for _, subgroup in group.groupby(list(self.group_by)):
408
+ select = pd.Series(True, index=group.index)
409
+
410
+ for _, subgroup in group.dropna(subset=["start_time", "end_time"]).groupby(list(self.group_by)):
301
411
  if len(subgroup) < 2: # noqa: PLR2004
302
412
  continue
303
413
  sorted_group = subgroup.sort_values("start_time", kind="stable")
@@ -325,12 +435,13 @@ class RequireContiguousTimerange:
325
435
  paths = sorted_group["path"]
326
436
  for gap_idx in np.flatnonzero(gap_indices):
327
437
  logger.debug(
328
- f"Constraint {self.__class__.__name__} not satisfied "
329
- f"because gap larger than {max_timedelta} found between "
438
+ f"Constraint {self} not satisfied because gap larger "
439
+ f"than {max_timedelta} found between "
330
440
  f"{paths.iloc[gap_idx]} and {paths.iloc[gap_idx + 1]}"
331
441
  )
332
- return False
333
- return True
442
+ select.loc[subgroup.index] = False
443
+
444
+ return group[select]
334
445
 
335
446
 
336
447
  @frozen
@@ -345,17 +456,24 @@ class RequireOverlappingTimerange:
345
456
  the groups to fulfill the constraint.
346
457
  """
347
458
 
348
- def validate(self, group: pd.DataFrame) -> bool:
459
+ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
349
460
  """
350
461
  Check that all subgroups of the group have an overlapping timerange.
351
462
  """
352
- group = group.dropna(subset=["start_time", "end_time"])
353
- if len(group) < 2: # noqa: PLR2004
354
- return True
355
-
356
- starts = group.groupby(list(self.group_by))["start_time"].min()
357
- ends = group.groupby(list(self.group_by))["end_time"].max()
358
- return starts.max() < ends.min() # type: ignore[no-any-return]
463
+ group_with_time = group.dropna(subset=["start_time", "end_time"])
464
+ if len(group_with_time) < 2: # noqa: PLR2004
465
+ return group
466
+
467
+ starts = group_with_time.groupby(list(self.group_by))["start_time"].min()
468
+ ends = group_with_time.groupby(list(self.group_by))["end_time"].max()
469
+ result = starts.max() < ends.min()
470
+ if not result:
471
+ logger.debug(
472
+ f"Constraint {self} not satisfied because no overlapping timerange "
473
+ f"found for groups in {', '.join(group['path'])}"
474
+ )
475
+ return group.loc[[]]
476
+ return group
359
477
 
360
478
 
361
479
  @frozen
@@ -5,7 +5,7 @@ Dataset management and filtering
5
5
  import enum
6
6
  import functools
7
7
  import hashlib
8
- from collections.abc import Collection, Iterable
8
+ from collections.abc import Collection, Iterable, Iterator
9
9
  from typing import Any, Self
10
10
 
11
11
  import pandas as pd
@@ -76,12 +76,6 @@ class FacetFilter:
76
76
  The result will only contain datasets where for all fields,
77
77
  the value of the field is one of the given values.
78
78
  """
79
- keep: bool = True
80
- """
81
- Whether to keep or remove datasets that match the filter.
82
-
83
- If true (default), datasets that match the filter will be kept else they will be removed.
84
- """
85
79
 
86
80
 
87
81
  def sort_selector(inp: Selector) -> Selector:
@@ -159,6 +153,9 @@ class ExecutionDatasetCollection:
159
153
  def __init__(self, collection: dict[SourceDatasetType | str, DatasetCollection]):
160
154
  self._collection = {SourceDatasetType(k): v for k, v in collection.items()}
161
155
 
156
+ def __repr__(self) -> str:
157
+ return f"ExecutionDatasetCollection({self._collection})"
158
+
162
159
  def __contains__(self, key: SourceDatasetType | str) -> bool:
163
160
  if isinstance(key, str):
164
161
  key = SourceDatasetType(key)
@@ -172,9 +169,24 @@ class ExecutionDatasetCollection:
172
169
  def __hash__(self) -> int:
173
170
  return hash(self.hash)
174
171
 
172
+ def __iter__(self) -> Iterator[SourceDatasetType]:
173
+ return iter(self._collection)
174
+
175
+ def keys(self) -> Iterable[SourceDatasetType]:
176
+ """
177
+ Iterate over the source types in the collection.
178
+ """
179
+ return self._collection.keys()
180
+
181
+ def values(self) -> Iterable[DatasetCollection]:
182
+ """
183
+ Iterate over the datasets in the collection.
184
+ """
185
+ return self._collection.values()
186
+
175
187
  def items(self) -> Iterable[tuple[SourceDatasetType, DatasetCollection]]:
176
188
  """
177
- Iterate over the datasets in the collection
189
+ Iterate over the items in the collection.
178
190
  """
179
191
  return self._collection.items()
180
192
 
@@ -14,6 +14,7 @@ from attrs import field, frozen
14
14
  from climate_ref_core.constraints import GroupConstraint
15
15
  from climate_ref_core.datasets import ExecutionDatasetCollection, FacetFilter, SourceDatasetType
16
16
  from climate_ref_core.metric_values import SeriesMetricValue
17
+ from climate_ref_core.metric_values.typing import SeriesDefinition
17
18
  from climate_ref_core.pycmec.metric import CMECMetric
18
19
  from climate_ref_core.pycmec.output import CMECOutput
19
20
 
@@ -182,9 +183,11 @@ class ExecutionResult:
182
183
  Whether the diagnostic execution ran successfully.
183
184
  """
184
185
 
185
- series: Sequence[SeriesMetricValue] = field(factory=tuple)
186
+ series_filename: pathlib.Path | None = None
186
187
  """
187
188
  A collection of series metric values that were extracted from the execution.
189
+
190
+ These are written to a CSV file in the output directory.
188
191
  """
189
192
 
190
193
  @staticmethod
@@ -193,6 +196,7 @@ class ExecutionResult:
193
196
  *,
194
197
  cmec_output_bundle: CMECOutput | dict[str, Any],
195
198
  cmec_metric_bundle: CMECMetric | dict[str, Any],
199
+ series: Sequence[SeriesMetricValue] = tuple(),
196
200
  ) -> ExecutionResult:
197
201
  """
198
202
  Build a ExecutionResult from a CMEC output bundle.
@@ -205,6 +209,8 @@ class ExecutionResult:
205
209
  An output bundle in the CMEC format.
206
210
  cmec_metric_bundle
207
211
  An diagnostic bundle in the CMEC format.
212
+ series
213
+ Series metric values extracted from the execution.
208
214
 
209
215
  Returns
210
216
  -------
@@ -223,17 +229,21 @@ class ExecutionResult:
223
229
  cmec_metric = cmec_metric_bundle
224
230
 
225
231
  definition.to_output_path(filename=None).mkdir(parents=True, exist_ok=True)
226
- bundle_path = definition.to_output_path("output.json")
227
- cmec_output.dump_to_json(bundle_path)
228
232
 
229
- definition.to_output_path(filename=None).mkdir(parents=True, exist_ok=True)
230
- bundle_path = definition.to_output_path("diagnostic.json")
231
- cmec_metric.dump_to_json(bundle_path)
233
+ output_filename = "output.json"
234
+ metric_filename = "diagnostic.json"
235
+ series_filename = "series.json"
236
+
237
+ cmec_output.dump_to_json(definition.to_output_path(output_filename))
238
+ cmec_metric.dump_to_json(definition.to_output_path(metric_filename))
239
+ SeriesMetricValue.dump_to_json(definition.to_output_path(series_filename), series)
232
240
 
241
+ # We are using relative paths for the output files for portability of the results
233
242
  return ExecutionResult(
234
243
  definition=definition,
235
- output_bundle_filename=pathlib.Path("output.json"),
236
- metric_bundle_filename=pathlib.Path("diagnostic.json"),
244
+ output_bundle_filename=pathlib.Path(output_filename),
245
+ metric_bundle_filename=pathlib.Path(metric_filename),
246
+ series_filename=pathlib.Path(series_filename),
237
247
  successful=True,
238
248
  )
239
249
 
@@ -311,7 +321,12 @@ class DataRequirement:
311
321
  Filters to apply to the data catalog of datasets.
312
322
 
313
323
  This is used to reduce the set of datasets to only those that are required by the diagnostic.
314
- The filters are applied iteratively to reduce the set of datasets.
324
+
325
+ Each FacetFilter contains one or more facet values that must all be satisfied
326
+ for a dataset to match that filter. The overall selection keeps any dataset
327
+ that matches at least one of the provided filters.
328
+
329
+ If no filters are specified, all datasets in the data catalog are used.
315
330
  """
316
331
 
317
332
  group_by: tuple[str, ...] | None
@@ -351,6 +366,10 @@ class DataRequirement:
351
366
  :
352
367
  Filtered data catalog
353
368
  """
369
+ if not self.filters or any(not f.facets for f in self.filters):
370
+ return data_catalog
371
+
372
+ select = pd.Series(False, index=data_catalog.index)
354
373
  for facet_filter in self.filters:
355
374
  values = {}
356
375
  for facet, value in facet_filter.facets.items():
@@ -362,11 +381,9 @@ class DataRequirement:
362
381
  )
363
382
  values[facet] = clean_value
364
383
 
365
- mask = data_catalog[list(values)].isin(values).all(axis="columns")
366
- if not facet_filter.keep:
367
- mask = ~mask
368
- data_catalog = data_catalog[mask]
369
- return data_catalog
384
+ select |= data_catalog[list(values)].isin(values).all(axis="columns")
385
+
386
+ return data_catalog[select]
370
387
 
371
388
 
372
389
  @runtime_checkable
@@ -432,6 +449,11 @@ class AbstractDiagnostic(Protocol):
432
449
  is raised.
433
450
  """
434
451
 
452
+ series: Sequence[SeriesDefinition]
453
+ """
454
+ Definition of the series that are produced by the diagnostic.
455
+ """
456
+
435
457
  provider: DiagnosticProvider
436
458
  """
437
459
  The provider that provides the diagnostic.
@@ -493,6 +515,8 @@ class Diagnostic(AbstractDiagnostic):
493
515
  See (climate_ref_example.example.ExampleDiagnostic)[] for an example implementation.
494
516
  """
495
517
 
518
+ series: Sequence[SeriesDefinition] = tuple()
519
+
496
520
  def __init__(self) -> None:
497
521
  super().__init__()
498
522
  self._provider: DiagnosticProvider | None = None
@@ -160,12 +160,15 @@ def import_executor_cls(fqn: str) -> type[Executor]:
160
160
  imp = importlib.import_module(module)
161
161
  executor: type[Executor] = getattr(imp, attribute_name)
162
162
 
163
+ if isinstance(executor, Exception):
164
+ raise executor
165
+
163
166
  # We can't really check if the executor is a subclass of Executor here
164
167
  # Protocols can't be used with issubclass if they have non-method members
165
168
  # We have to check this at class instantiation time
166
169
 
167
170
  return executor
168
- except ModuleNotFoundError:
171
+ except (ModuleNotFoundError, ImportError):
169
172
  logger.error(f"Package '{fqn}' not found")
170
173
  raise InvalidExecutorException(fqn, f"Module '{module}' not found")
171
174
  except AttributeError:
@@ -9,6 +9,7 @@ import contextlib
9
9
  import inspect
10
10
  import logging
11
11
  import multiprocessing
12
+ import os
12
13
  import sys
13
14
  from collections.abc import Generator
14
15
  from pathlib import Path
@@ -94,7 +95,10 @@ def initialise_logging(level: int | str, format: str, log_directory: str | Path)
94
95
  logger.info("Starting REF logging")
95
96
  logger.info(f"arguments: {sys.argv}")
96
97
 
97
- add_log_handler(level=level, format=format, colorize=True)
98
+ # LOGURU_COLORIZE is the default env var used by loguru to determine if color should be used
99
+ # We override this to use NO_COLOR which is more widely supported
100
+ no_color = os.environ.get("NO_COLOR") is not None
101
+ add_log_handler(level=level, format=format, colorize=not no_color)
98
102
 
99
103
 
100
104
  def capture_logging() -> None:
@@ -150,12 +154,12 @@ def remove_log_handler() -> None:
150
154
  """
151
155
  if hasattr(logger, "default_handler_id"):
152
156
  try:
153
- logger.remove(logger.default_handler_id)
157
+ logger.remove(logger.default_handler_id) # pyright: ignore[reportAttributeAccessIssue]
154
158
  except ValueError:
155
159
  # This can happen if the handler has already been removed
156
160
  # or if the logger was never configured
157
161
  pass
158
- del logger.default_handler_id
162
+ del logger.default_handler_id # pyright: ignore[reportAttributeAccessIssue]
159
163
  else:
160
164
  raise AssertionError("No default log handler to remove.")
161
165