climate-ref-core 0.5.4__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,15 +12,68 @@ import pathlib
12
12
  import shutil
13
13
 
14
14
  import pooch
15
+ import pooch.hashes
15
16
  from loguru import logger
16
17
  from rich.progress import track
17
18
 
19
+ from climate_ref_core.env import env
20
+
21
+ DATASET_URL = env.str("REF_DATASET_URL", default="https://pub-b093171261094c4ea9adffa01f94ee06.r2.dev")
22
+
23
+
24
+ def _verify_hash_matches(fname: str | pathlib.Path, known_hash: str) -> bool:
25
+ """
26
+ Check if the hash of a file matches a known hash.
27
+
28
+ Coverts hashes to lowercase before comparison to avoid system specific
29
+ mismatches between hashes in the registry and computed hashes.
30
+
31
+ This is a tweaked version of the `pooch.hashes.hash_matches` function with a custom error message.
32
+
33
+ Parameters
34
+ ----------
35
+ fname
36
+ The path to the file.
37
+ known_hash
38
+ The known hash. Optionally, prepend ``alg:`` to the hash to specify the
39
+ hashing algorithm. Default is SHA256.
40
+
41
+ Raises
42
+ ------
43
+ ValueError
44
+ If the hash does not match.
45
+ FileNotFoundError
46
+ If the file does not exist.
47
+
48
+ Returns
49
+ -------
50
+ bool
51
+ True if the hash matches.
52
+ """
53
+ fname = pathlib.Path(fname)
54
+
55
+ if not fname.exists():
56
+ raise FileNotFoundError(f"File {fname!s} does not exist. Cannot verify hash.")
57
+
58
+ algorithm = pooch.hashes.hash_algorithm(known_hash)
59
+ new_hash = pooch.hashes.file_hash(str(fname), alg=algorithm)
60
+ matches = new_hash.lower() == known_hash.split(":")[-1].lower()
61
+ if not matches:
62
+ raise ValueError(
63
+ f"{algorithm.upper()} hash of downloaded file ({fname!s}) does not match"
64
+ f" the known hash: expected {known_hash} but got {new_hash}. "
65
+ f"The file may have been corrupted or the known hash may be outdated. "
66
+ f"Delete the file and try again."
67
+ )
68
+ return matches
69
+
18
70
 
19
71
  def fetch_all_files(
20
72
  registry: pooch.Pooch,
21
73
  name: str,
22
74
  output_dir: pathlib.Path | None,
23
75
  symlink: bool = False,
76
+ verify: bool = True,
24
77
  ) -> None:
25
78
  """
26
79
  Fetch all files associated with a pooch registry and write them to an output directory.
@@ -45,12 +98,17 @@ def fetch_all_files(
45
98
  symlink
46
99
  If True, symlink all files to this directory.
47
100
  Otherwise, perform a copy.
101
+ verify
102
+ If True, verify the checksums of the local files against the registry.
48
103
  """
49
104
  if output_dir:
50
105
  output_dir.mkdir(parents=True, exist_ok=True)
51
106
 
52
107
  for key in track(registry.registry.keys(), description=f"Fetching {name} data"):
53
108
  fetch_file = registry.fetch(key)
109
+ expected_hash = registry.registry[key]
110
+ if not isinstance(expected_hash, str) or not expected_hash: # pragma: no cover
111
+ raise ValueError(f"Expected a hash for {key} but got {expected_hash}")
54
112
 
55
113
  if output_dir is None:
56
114
  # Just warm the cache and move onto the next file
@@ -68,6 +126,8 @@ def fetch_all_files(
68
126
  shutil.copy(fetch_file, linked_file)
69
127
  else:
70
128
  logger.info(f"File {linked_file} already exists. Skipping.")
129
+ if verify:
130
+ _verify_hash_matches(linked_file, expected_hash)
71
131
 
72
132
 
73
133
  class DatasetRegistryManager:
@@ -5,7 +5,7 @@ Dataset management and filtering
5
5
  import enum
6
6
  import functools
7
7
  import hashlib
8
- from collections.abc import Iterable
8
+ from collections.abc import Collection, Iterable
9
9
  from typing import Any, Self
10
10
 
11
11
  import pandas as pd
@@ -48,19 +48,17 @@ class SourceDatasetType(enum.Enum):
48
48
  return sorted(cls, key=lambda x: x.value)
49
49
 
50
50
 
51
- def _clean_facets(raw_values: dict[str, str | tuple[str, ...] | list[str]]) -> dict[str, tuple[str, ...]]:
51
+ def _clean_facets(raw_values: dict[str, str | Collection[str]]) -> dict[str, tuple[str, ...]]:
52
52
  """
53
53
  Clean the value of a facet filter to a tuple of strings
54
54
  """
55
- result = {}
55
+ result: dict[str, tuple[str, ...]] = {}
56
56
 
57
57
  for key, value in raw_values.items():
58
- if isinstance(value, list):
59
- result[key] = tuple(value)
60
- elif isinstance(value, str):
58
+ if isinstance(value, str):
61
59
  result[key] = (value,)
62
- elif isinstance(value, tuple):
63
- result[key] = value
60
+ else:
61
+ result[key] = tuple(value)
64
62
  return result
65
63
 
66
64
 
@@ -2,13 +2,16 @@
2
2
  Logging utilities
3
3
 
4
4
  The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework.
5
+ The log level and format are configured via the REF configuration file.
5
6
  """
6
7
 
7
8
  import contextlib
8
9
  import inspect
9
10
  import logging
11
+ import multiprocessing
10
12
  import sys
11
13
  from collections.abc import Generator
14
+ from pathlib import Path
12
15
  from typing import Any
13
16
 
14
17
  import pooch
@@ -24,6 +27,28 @@ Filename for the execution log.
24
27
  This file is written via [climate_ref_core.logging.redirect_logs][].
25
28
  """
26
29
 
30
+ DEFAULT_LOG_FORMAT = (
31
+ "<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z}</green> | <level>{level: <8}</level> | "
32
+ "<cyan>{name}</cyan> - <level>{message}</level>"
33
+ )
34
+ """
35
+ Default log format used by the REF
36
+ """
37
+
38
+ VERBOSE_LOG_FORMAT = (
39
+ "<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z} e{elapsed}s</green> | "
40
+ "<level>{level: <8}</level> | "
41
+ "{process.name}:{process.id} | "
42
+ "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
43
+ "<level>{message}</level>"
44
+ )
45
+ """
46
+ The verbose log format is used for debugging and development.
47
+
48
+ This is the format that is used when writing the log messages to file for later debugging.
49
+ It contains information about the process and function that the log message was generated in.
50
+ """
51
+
27
52
 
28
53
  class _InterceptHandler(logging.Handler):
29
54
  def emit(self, record: logging.LogRecord) -> None:
@@ -43,6 +68,35 @@ class _InterceptHandler(logging.Handler):
43
68
  logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
44
69
 
45
70
 
71
+ def initialise_logging(level: int | str, format: str, log_directory: str | Path) -> None: # noqa: A002 # pragma: no cover
72
+ """
73
+ Initialise the logging for the REF
74
+
75
+ This sets up the default log handler and configures the REF logger.
76
+ """
77
+ capture_logging()
78
+ log_directory = Path(log_directory)
79
+ process_name = multiprocessing.current_process().name
80
+
81
+ # Remove any existing handlers
82
+ logger.remove()
83
+
84
+ # Write out debug logs to a file
85
+ log_directory.mkdir(parents=True, exist_ok=True)
86
+ filename = f"climate-ref_{{time:YYYY-MM-DD_HH-mm}}_{process_name}.log"
87
+ logger.add(
88
+ sink=log_directory / filename,
89
+ retention=10,
90
+ level="DEBUG",
91
+ format=VERBOSE_LOG_FORMAT,
92
+ colorize=False,
93
+ )
94
+ logger.info("Starting REF logging")
95
+ logger.info(f"arguments: {sys.argv}")
96
+
97
+ add_log_handler(level=level, format=format, colorize=True)
98
+
99
+
46
100
  def capture_logging() -> None:
47
101
  """
48
102
  Capture logging from the standard library and redirect it to Loguru
@@ -56,6 +110,7 @@ def capture_logging() -> None:
56
110
  logging.basicConfig(handlers=[_InterceptHandler()], level=0, force=True)
57
111
 
58
112
  # Disable some overly verbose logs
113
+ logger.disable("alembic.runtime.migration")
59
114
  logger.disable("matplotlib.colorbar")
60
115
  logger.disable("matplotlib.ticker")
61
116
  logger.disable("matplotlib.font_manager")
@@ -154,4 +209,4 @@ def redirect_logs(definition: ExecutionDefinition, log_level: str) -> Generator[
154
209
  add_log_handler(**logger.default_handler_kwargs) # type: ignore[attr-defined]
155
210
 
156
211
 
157
- __all__ = ["EXECUTION_LOG_FILENAME", "add_log_handler", "capture_logging", "logger", "redirect_logs"]
212
+ __all__ = ["EXECUTION_LOG_FILENAME", "capture_logging", "initialise_logging", "redirect_logs"]
@@ -28,8 +28,8 @@ class DimensionValue:
28
28
 
29
29
  name: str
30
30
  long_name: str
31
- description: str | None
32
- units: str
31
+ description: str | None = None
32
+ units: str | None = None
33
33
 
34
34
 
35
35
  @frozen
@@ -24,6 +24,11 @@ dimensions:
24
24
  description: "Variable ID for the reference dataset (e.g., tas, pr, etc.)"
25
25
  allow_extra_values: true
26
26
  required: false
27
+ - name: grid_label
28
+ long_name: Grid Label
29
+ description: "The grid label of the output (e.g gn)"
30
+ allow_extra_values: true
31
+ required: false
27
32
  - name: member_id
28
33
  long_name: Member ID
29
34
  description: "Unique identifier for each ensemble member, includes the variant label and sub-experiment if present"
@@ -48,7 +53,15 @@ dimensions:
48
53
  - name: global
49
54
  long_name: Global
50
55
  description: "Global aggregate"
51
- units: dimensionless
56
+ - name: NHEX
57
+ long_name: Northern Hemisphere Extra-tropics
58
+ description: "Northern Hemisphere Extra-tropics (30N-90N)"
59
+ - name: SHEX
60
+ long_name: Southern Hemisphere Extra-tropics
61
+ description: "Southern Hemisphere Extra-tropics (30S-90S)"
62
+ - name: Tropics
63
+ long_name: Tropics
64
+ description: "Tropics (30N-30S)"
52
65
  - name: season
53
66
  long_name: Season
54
67
  description: "Parts of the year from which the metric values are calculated"
@@ -58,23 +71,57 @@ dimensions:
58
71
  - name: ann
59
72
  long_name: Annual
60
73
  description: ""
61
- units: dimensionless
62
74
  - name: djf
63
75
  long_name: Dec,Jan,Feb
64
76
  description: "December, January, February"
65
- units: dimensionless
66
77
  - name: mam
67
78
  long_name: Mar,Apr,May
68
79
  description: "March, April, May"
69
- units: dimensionless
70
80
  - name: jja
71
81
  long_name: Jun,Jul,Aug
72
82
  description: "June, July, August"
73
- units: dimensionless
74
83
  - name: son
75
84
  long_name: Sep,Oct,Nov
76
85
  description: "September, October, November"
77
- units: dimensionless
86
+ - name: mode
87
+ long_name: Mode of variability
88
+ description: "Different modes of variability that can be calculated"
89
+ required: false
90
+ allow_extra_values: false
91
+ values:
92
+ - name: NAM
93
+ long_name: Northern Annular Mode
94
+ description: Northern Annular Mode
95
+ - name: NAO
96
+ long_name: North Atlantic Oscillation
97
+ description: North Atlantic Oscillation
98
+ - name: PNA
99
+ long_name: Pacific–North America pattern
100
+ description: Pacific–North America pattern
101
+ - name: SAM
102
+ long_name: Southern Annular Mode
103
+ description: Southern Annular Mode
104
+ - name: PDO
105
+ long_name: Pacific decadal oscillation
106
+ description: Pacific decadal oscillation
107
+ - name: NPO
108
+ long_name: North Pacific Oscillation
109
+ description: North Pacific Oscillation
110
+ - name: NPGO
111
+ long_name: North Pacific Gyre Oscillation
112
+ description: North Pacific Gyre Oscillation
113
+ - name: method
114
+ long_name: EOF Method
115
+ description: "Method for calculating the EOFs in PMP's mode of variability diagnostic"
116
+ required: false
117
+ allow_extra_values: false
118
+ values:
119
+ - name: cbf
120
+ long_name: Common Basis Function
121
+ description: "A projection of the leading EOFs of the reference dataset onto the model data"
122
+ - name: eof1
123
+ long_name: EOF1
124
+ description: "The leading EOF of the reference dataset"
78
125
  - name: statistic
79
126
  long_name: Statistic
80
127
  description: ""
@@ -20,7 +20,7 @@ from copy import deepcopy
20
20
  from enum import Enum
21
21
  from typing import Any, cast
22
22
 
23
- from loguru import logger
23
+ import numpy as np
24
24
  from pydantic import (
25
25
  BaseModel,
26
26
  ConfigDict,
@@ -190,7 +190,6 @@ class MetricResults(RootModel[Any]):
190
190
  expected_keys = set(metdims[dim_name].keys())
191
191
  if not (dict_keys.issubset(expected_keys)):
192
192
  msg = f"Unknown dimension values: {dict_keys - expected_keys} for {dim_name}"
193
- logger.error(msg)
194
193
  if not ALLOW_EXTRA_KEYS: # pragma: no cover
195
194
  raise ValueError(f"{msg}\nExpected keys: {expected_keys}")
196
195
  else:
@@ -228,7 +227,7 @@ class StrNumDict(RootModel[Any]):
228
227
  """A class contains string key and numeric value"""
229
228
 
230
229
  model_config = ConfigDict(strict=True)
231
- root: dict[str, float | int]
230
+ root: dict[str, float | int | None]
232
231
 
233
232
 
234
233
  def remove_dimensions(raw_metric_bundle: dict[str, Any], dimensions: str | list[str]) -> dict[str, Any]:
@@ -542,6 +541,13 @@ def _walk_results(
542
541
  yield ScalarMetricValue(
543
542
  dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
544
543
  )
544
+ elif value is None:
545
+ # Replace any None values with NaN
546
+ # This translates null values in JSON to Python NaN's
547
+ # Missing values are different from NaN values
548
+ yield ScalarMetricValue(
549
+ dimensions=metadata, value=np.nan, attributes=results.get(MetricCV.ATTRIBUTES.value)
550
+ )
545
551
  else:
546
552
  yield from _walk_results(dimensions[1:], value, {**metadata})
547
553
 
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-core
3
- Version: 0.5.4
3
+ Version: 0.6.0
4
4
  Summary: Core library for the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
- License: Apache-2.0
6
+ License-Expression: Apache-2.0
7
7
  License-File: LICENCE
8
8
  License-File: NOTICE
9
- Classifier: Development Status :: 2 - Pre-Alpha
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
10
11
  Classifier: Intended Audience :: Science/Research
11
12
  Classifier: License :: OSI Approved :: Apache Software License
12
13
  Classifier: Operating System :: OS Independent
@@ -18,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.13
18
19
  Classifier: Topic :: Scientific/Engineering
19
20
  Requires-Python: >=3.11
20
21
  Requires-Dist: attrs>=23.2.0
22
+ Requires-Dist: cattrs>=24.1
21
23
  Requires-Dist: environs>=11
22
24
  Requires-Dist: loguru>=0.7.0
23
25
  Requires-Dist: numpy>=1.25.0
@@ -27,6 +29,7 @@ Requires-Dist: pydantic>=2.10.6
27
29
  Requires-Dist: requests
28
30
  Requires-Dist: rich
29
31
  Requires-Dist: ruamel-yaml>=0.18
32
+ Requires-Dist: setuptools>=75.8.0
30
33
  Requires-Dist: typing-extensions
31
34
  Description-Content-Type: text/markdown
32
35
 
@@ -1,24 +1,24 @@
1
1
  climate_ref_core/__init__.py,sha256=MtmPThF2F9_2UODEN6rt1x30LDxrHIZ0wyRN_wsHx5I,127
2
2
  climate_ref_core/constraints.py,sha256=QOqMh5jDBxdWTnQw2HNBizJQDF6Uu97rfJp9WudQWHc,11819
3
- climate_ref_core/dataset_registry.py,sha256=UU62h2xjt_K0Z8Md1uKsxJEvdsfv_fg6WfYRmtqcZks,5104
4
- climate_ref_core/datasets.py,sha256=bX86XPD1Z5zl3E4_56zUU9cjwNOdurU-HiYx7h1PmN4,6191
3
+ climate_ref_core/dataset_registry.py,sha256=mkes7Pgz_zaS_t_BidiDzSd8dmqAvjx2MlgoFMAhn20,7192
4
+ climate_ref_core/datasets.py,sha256=TK50WQwTfbase26s8wPEGEN1BwcedrOd8nk6IlEf3Ww,6124
5
5
  climate_ref_core/diagnostics.py,sha256=5KCtHuhToSpATqjW4HBi56PsOxT5WX4VkqoZPUvYR60,18769
6
6
  climate_ref_core/env.py,sha256=Ph2dejVxTELfP3bL0xES086WLGvV5H6KvsOwCkL6m-k,753
7
7
  climate_ref_core/exceptions.py,sha256=psdipWURLyMq5hmloGxt-8kyqEe0IsENfraok7KTi8I,1437
8
8
  climate_ref_core/executor.py,sha256=NIXIU2rwMnTOR-ztlPlCD-poZO4vxzKQPWYk8veTVkk,5195
9
- climate_ref_core/logging.py,sha256=EBe5WAk1dtosr8MLkG-i7iDNZTI9ufxI4xsvbq3Gdt8,5260
9
+ climate_ref_core/logging.py,sha256=cg6CK2DHGjyLaoRJm75p-Ja82hnVhBBQ4riOKk3l9XY,7063
10
10
  climate_ref_core/providers.py,sha256=by_ZtoLQgg9A60CbFor2-i5EixtZTZ0z8jQqOGRfvA8,12461
11
11
  climate_ref_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  climate_ref_core/metric_values/__init__.py,sha256=aHfwRrqzLOmmaBKf1-4q97DnHb8KwmW0Dhwd79ZQiNQ,634
13
13
  climate_ref_core/metric_values/typing.py,sha256=2DpzmjqQ7tqOPAyjthZ_O14c0-MhiYt-A_n9p6-bOao,1903
14
14
  climate_ref_core/pycmec/README.md,sha256=PzkovlPpsXqFopsYzz5GRvCAipNRGO1Wo-0gc17qr2Y,36
15
15
  climate_ref_core/pycmec/__init__.py,sha256=hXvKGEJQWyAp1i-ndr3D4zuYxkRhcR2LfXgFXlhYOk4,28
16
- climate_ref_core/pycmec/controlled_vocabulary.py,sha256=xio_4jl6mM_WMrwyxo70d0G5dUeIal4IW7eV-EMW4mU,5093
17
- climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=FflwP71JFdnp-N5_OQ9_g4KE_I16fxn1Zn96yybenW4,2706
18
- climate_ref_core/pycmec/metric.py,sha256=XXM5DMk0BhpKcPvvCHCcgA6jKoVGMqXcwiG1UerYYps,18181
16
+ climate_ref_core/pycmec/controlled_vocabulary.py,sha256=ThdTBsSmUemyXSQG5RycN31qp93LfqIyPqvI8x5cdLI,5114
17
+ climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=gx5QyW88pZQVUfiYXmsJtJO6AJg6NbIZgdU4vDIa3fE,4390
18
+ climate_ref_core/pycmec/metric.py,sha256=zymXoutnjbdcxvG_fMJugFLLcBrfSPG0XoV-2tA0ujA,18499
19
19
  climate_ref_core/pycmec/output.py,sha256=4-RQ439sfgNLeQZVDPB1pewF_kTwX7nCK0Z4U6bvbd0,5709
20
- climate_ref_core-0.5.4.dist-info/METADATA,sha256=-U-9rs_xmyD3sgOzP7RcOq5c15Bk5dUz7zOMQnLlXLY,2821
21
- climate_ref_core-0.5.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- climate_ref_core-0.5.4.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
- climate_ref_core-0.5.4.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
- climate_ref_core-0.5.4.dist-info/RECORD,,
20
+ climate_ref_core-0.6.0.dist-info/METADATA,sha256=pVHPIwK0q0j1KyG7ui9FeMa2wFLpoC8zWrBKJ58WG5c,2934
21
+ climate_ref_core-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ climate_ref_core-0.6.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ climate_ref_core-0.6.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
+ climate_ref_core-0.6.0.dist-info/RECORD,,