climate-ref-core 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import pathlib
12
12
  import shutil
13
13
 
14
14
  import pooch
15
+ import pooch.hashes
15
16
  from loguru import logger
16
17
  from rich.progress import track
17
18
 
@@ -20,11 +21,59 @@ from climate_ref_core.env import env
20
21
  DATASET_URL = env.str("REF_DATASET_URL", default="https://pub-b093171261094c4ea9adffa01f94ee06.r2.dev")
21
22
 
22
23
 
24
+ def _verify_hash_matches(fname: str | pathlib.Path, known_hash: str) -> bool:
25
+ """
26
+ Check if the hash of a file matches a known hash.
27
+
28
+ Coverts hashes to lowercase before comparison to avoid system specific
29
+ mismatches between hashes in the registry and computed hashes.
30
+
31
+ This is a tweaked version of the `pooch.hashes.hash_matches` function with a custom error message.
32
+
33
+ Parameters
34
+ ----------
35
+ fname
36
+ The path to the file.
37
+ known_hash
38
+ The known hash. Optionally, prepend ``alg:`` to the hash to specify the
39
+ hashing algorithm. Default is SHA256.
40
+
41
+ Raises
42
+ ------
43
+ ValueError
44
+ If the hash does not match.
45
+ FileNotFoundError
46
+ If the file does not exist.
47
+
48
+ Returns
49
+ -------
50
+ bool
51
+ True if the hash matches.
52
+ """
53
+ fname = pathlib.Path(fname)
54
+
55
+ if not fname.exists():
56
+ raise FileNotFoundError(f"File {fname!s} does not exist. Cannot verify hash.")
57
+
58
+ algorithm = pooch.hashes.hash_algorithm(known_hash)
59
+ new_hash = pooch.hashes.file_hash(str(fname), alg=algorithm)
60
+ matches = new_hash.lower() == known_hash.split(":")[-1].lower()
61
+ if not matches:
62
+ raise ValueError(
63
+ f"{algorithm.upper()} hash of downloaded file ({fname!s}) does not match"
64
+ f" the known hash: expected {known_hash} but got {new_hash}. "
65
+ f"The file may have been corrupted or the known hash may be outdated. "
66
+ f"Delete the file and try again."
67
+ )
68
+ return matches
69
+
70
+
23
71
  def fetch_all_files(
24
72
  registry: pooch.Pooch,
25
73
  name: str,
26
74
  output_dir: pathlib.Path | None,
27
75
  symlink: bool = False,
76
+ verify: bool = True,
28
77
  ) -> None:
29
78
  """
30
79
  Fetch all files associated with a pooch registry and write them to an output directory.
@@ -49,12 +98,17 @@ def fetch_all_files(
49
98
  symlink
50
99
  If True, symlink all files to this directory.
51
100
  Otherwise, perform a copy.
101
+ verify
102
+ If True, verify the checksums of the local files against the registry.
52
103
  """
53
104
  if output_dir:
54
105
  output_dir.mkdir(parents=True, exist_ok=True)
55
106
 
56
107
  for key in track(registry.registry.keys(), description=f"Fetching {name} data"):
57
108
  fetch_file = registry.fetch(key)
109
+ expected_hash = registry.registry[key]
110
+ if not isinstance(expected_hash, str) or not expected_hash: # pragma: no cover
111
+ raise ValueError(f"Expected a hash for {key} but got {expected_hash}")
58
112
 
59
113
  if output_dir is None:
60
114
  # Just warm the cache and move onto the next file
@@ -72,6 +126,8 @@ def fetch_all_files(
72
126
  shutil.copy(fetch_file, linked_file)
73
127
  else:
74
128
  logger.info(f"File {linked_file} already exists. Skipping.")
129
+ if verify:
130
+ _verify_hash_matches(linked_file, expected_hash)
75
131
 
76
132
 
77
133
  class DatasetRegistryManager:
@@ -53,3 +53,11 @@ class ExecutionError(RefException):
53
53
 
54
54
  def __init__(self, message: str) -> None:
55
55
  super().__init__(message)
56
+
57
+
58
+ class DiagnosticError(RefException):
59
+ """Error from diagnostic computing"""
60
+
61
+ def __init__(self, message: str, result: Any):
62
+ super().__init__(message)
63
+ self.result = result
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
9
9
  from loguru import logger
10
10
 
11
11
  from climate_ref_core.diagnostics import ExecutionDefinition, ExecutionResult
12
- from climate_ref_core.exceptions import InvalidExecutorException
12
+ from climate_ref_core.exceptions import DiagnosticError, InvalidExecutorException
13
13
  from climate_ref_core.logging import redirect_logs
14
14
 
15
15
  if TYPE_CHECKING:
@@ -20,6 +20,7 @@ if TYPE_CHECKING:
20
20
  def execute_locally(
21
21
  definition: ExecutionDefinition,
22
22
  log_level: str,
23
+ raise_error: bool = False,
23
24
  ) -> ExecutionResult:
24
25
  """
25
26
  Run a diagnostic execution
@@ -46,10 +47,15 @@ def execute_locally(
46
47
 
47
48
  with redirect_logs(definition, log_level):
48
49
  return definition.diagnostic.run(definition=definition)
49
- except Exception:
50
+ except Exception as e:
50
51
  # If the diagnostic fails, we want to log the error and return a failure result
51
52
  logger.exception(f"Error running {definition.execution_slug()!r}")
52
- return ExecutionResult.build_from_failure(definition)
53
+ result = ExecutionResult.build_from_failure(definition)
54
+
55
+ if raise_error:
56
+ raise DiagnosticError(str(e), result) from e
57
+ else:
58
+ return result
53
59
 
54
60
 
55
61
  @runtime_checkable
@@ -2,13 +2,16 @@
2
2
  Logging utilities
3
3
 
4
4
  The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework.
5
+ The log level and format are configured via the REF configuration file.
5
6
  """
6
7
 
7
8
  import contextlib
8
9
  import inspect
9
10
  import logging
11
+ import multiprocessing
10
12
  import sys
11
13
  from collections.abc import Generator
14
+ from pathlib import Path
12
15
  from typing import Any
13
16
 
14
17
  import pooch
@@ -24,6 +27,28 @@ Filename for the execution log.
24
27
  This file is written via [climate_ref_core.logging.redirect_logs][].
25
28
  """
26
29
 
30
+ DEFAULT_LOG_FORMAT = (
31
+ "<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z}</green> | <level>{level: <8}</level> | "
32
+ "<cyan>{name}</cyan> - <level>{message}</level>"
33
+ )
34
+ """
35
+ Default log format used by the REF
36
+ """
37
+
38
+ VERBOSE_LOG_FORMAT = (
39
+ "<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z} e{elapsed}s</green> | "
40
+ "<level>{level: <8}</level> | "
41
+ "{process.name}:{process.id} | "
42
+ "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
43
+ "<level>{message}</level>"
44
+ )
45
+ """
46
+ The verbose log format is used for debugging and development.
47
+
48
+ This is the format that is used when writing the log messages to file for later debugging.
49
+ It contains information about the process and function that the log message was generated in.
50
+ """
51
+
27
52
 
28
53
  class _InterceptHandler(logging.Handler):
29
54
  def emit(self, record: logging.LogRecord) -> None:
@@ -43,6 +68,35 @@ class _InterceptHandler(logging.Handler):
43
68
  logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
44
69
 
45
70
 
71
+ def initialise_logging(level: int | str, format: str, log_directory: str | Path) -> None: # noqa: A002 # pragma: no cover
72
+ """
73
+ Initialise the logging for the REF
74
+
75
+ This sets up the default log handler and configures the REF logger.
76
+ """
77
+ capture_logging()
78
+ log_directory = Path(log_directory)
79
+ process_name = multiprocessing.current_process().name
80
+
81
+ # Remove any existing handlers
82
+ logger.remove()
83
+
84
+ # Write out debug logs to a file
85
+ log_directory.mkdir(parents=True, exist_ok=True)
86
+ filename = f"climate-ref_{{time:YYYY-MM-DD_HH-mm}}_{process_name}.log"
87
+ logger.add(
88
+ sink=log_directory / filename,
89
+ retention=10,
90
+ level="DEBUG",
91
+ format=VERBOSE_LOG_FORMAT,
92
+ colorize=False,
93
+ )
94
+ logger.info("Starting REF logging")
95
+ logger.info(f"arguments: {sys.argv}")
96
+
97
+ add_log_handler(level=level, format=format, colorize=True)
98
+
99
+
46
100
  def capture_logging() -> None:
47
101
  """
48
102
  Capture logging from the standard library and redirect it to Loguru
@@ -56,6 +110,7 @@ def capture_logging() -> None:
56
110
  logging.basicConfig(handlers=[_InterceptHandler()], level=0, force=True)
57
111
 
58
112
  # Disable some overly verbose logs
113
+ logger.disable("alembic.runtime.migration")
59
114
  logger.disable("matplotlib.colorbar")
60
115
  logger.disable("matplotlib.ticker")
61
116
  logger.disable("matplotlib.font_manager")
@@ -154,4 +209,4 @@ def redirect_logs(definition: ExecutionDefinition, log_level: str) -> Generator[
154
209
  add_log_handler(**logger.default_handler_kwargs) # type: ignore[attr-defined]
155
210
 
156
211
 
157
- __all__ = ["EXECUTION_LOG_FILENAME", "add_log_handler", "capture_logging", "logger", "redirect_logs"]
212
+ __all__ = ["EXECUTION_LOG_FILENAME", "capture_logging", "initialise_logging", "redirect_logs"]
@@ -28,8 +28,8 @@ class DimensionValue:
28
28
 
29
29
  name: str
30
30
  long_name: str
31
- description: str | None
32
- units: str
31
+ description: str | None = None
32
+ units: str | None = None
33
33
 
34
34
 
35
35
  @frozen
@@ -53,7 +53,15 @@ dimensions:
53
53
  - name: global
54
54
  long_name: Global
55
55
  description: "Global aggregate"
56
- units: dimensionless
56
+ - name: NHEX
57
+ long_name: Northern Hemisphere Extra-tropics
58
+ description: "Northern Hemisphere Extra-tropics (30N-90N)"
59
+ - name: SHEX
60
+ long_name: Southern Hemisphere Extra-tropics
61
+ description: "Southern Hemisphere Extra-tropics (30S-90S)"
62
+ - name: Tropics
63
+ long_name: Tropics
64
+ description: "Tropics (30N-30S)"
57
65
  - name: season
58
66
  long_name: Season
59
67
  description: "Parts of the year from which the metric values are calculated"
@@ -63,23 +71,57 @@ dimensions:
63
71
  - name: ann
64
72
  long_name: Annual
65
73
  description: ""
66
- units: dimensionless
67
74
  - name: djf
68
75
  long_name: Dec,Jan,Feb
69
76
  description: "December, January, February"
70
- units: dimensionless
71
77
  - name: mam
72
78
  long_name: Mar,Apr,May
73
79
  description: "March, April, May"
74
- units: dimensionless
75
80
  - name: jja
76
81
  long_name: Jun,Jul,Aug
77
82
  description: "June, July, August"
78
- units: dimensionless
79
83
  - name: son
80
84
  long_name: Sep,Oct,Nov
81
85
  description: "September, October, November"
82
- units: dimensionless
86
+ - name: mode
87
+ long_name: Mode of variability
88
+ description: "Different modes of variability that can be calculated"
89
+ required: false
90
+ allow_extra_values: false
91
+ values:
92
+ - name: NAM
93
+ long_name: Northern Annular Mode
94
+ description: Northern Annular Mode
95
+ - name: NAO
96
+ long_name: North Atlantic Oscillation
97
+ description: North Atlantic Oscillation
98
+ - name: PNA
99
+ long_name: Pacific–North America pattern
100
+ description: Pacific–North America pattern
101
+ - name: SAM
102
+ long_name: Southern Annular Mode
103
+ description: Southern Annular Mode
104
+ - name: PDO
105
+ long_name: Pacific decadal oscillation
106
+ description: Pacific decadal oscillation
107
+ - name: NPO
108
+ long_name: North Pacific Oscillation
109
+ description: North Pacific Oscillation
110
+ - name: NPGO
111
+ long_name: North Pacific Gyre Oscillation
112
+ description: North Pacific Gyre Oscillation
113
+ - name: method
114
+ long_name: EOF Method
115
+ description: "Method for calculating the EOFs in PMP's mode of variability diagnostic"
116
+ required: false
117
+ allow_extra_values: false
118
+ values:
119
+ - name: cbf
120
+ long_name: Common Basis Function
121
+ description: "A projection of the leading EOFs of the reference dataset onto the model data"
122
+ - name: eof1
123
+ long_name: EOF1
124
+ description: "The leading EOF of the reference dataset"
83
125
  - name: statistic
84
126
  long_name: Statistic
85
127
  description: ""
@@ -20,7 +20,7 @@ from copy import deepcopy
20
20
  from enum import Enum
21
21
  from typing import Any, cast
22
22
 
23
- from loguru import logger
23
+ import numpy as np
24
24
  from pydantic import (
25
25
  BaseModel,
26
26
  ConfigDict,
@@ -190,7 +190,6 @@ class MetricResults(RootModel[Any]):
190
190
  expected_keys = set(metdims[dim_name].keys())
191
191
  if not (dict_keys.issubset(expected_keys)):
192
192
  msg = f"Unknown dimension values: {dict_keys - expected_keys} for {dim_name}"
193
- logger.error(msg)
194
193
  if not ALLOW_EXTRA_KEYS: # pragma: no cover
195
194
  raise ValueError(f"{msg}\nExpected keys: {expected_keys}")
196
195
  else:
@@ -228,7 +227,7 @@ class StrNumDict(RootModel[Any]):
228
227
  """A class contains string key and numeric value"""
229
228
 
230
229
  model_config = ConfigDict(strict=True)
231
- root: dict[str, float | int]
230
+ root: dict[str, float | int | None]
232
231
 
233
232
 
234
233
  def remove_dimensions(raw_metric_bundle: dict[str, Any], dimensions: str | list[str]) -> dict[str, Any]:
@@ -542,6 +541,13 @@ def _walk_results(
542
541
  yield ScalarMetricValue(
543
542
  dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
544
543
  )
544
+ elif value is None:
545
+ # Replace any None values with NaN
546
+ # This translates null values in JSON to Python NaN's
547
+ # Missing values are different from NaN values
548
+ yield ScalarMetricValue(
549
+ dimensions=metadata, value=np.nan, attributes=results.get(MetricCV.ATTRIBUTES.value)
550
+ )
545
551
  else:
546
552
  yield from _walk_results(dimensions[1:], value, {**metadata})
547
553
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-core
3
- Version: 0.5.5
3
+ Version: 0.6.1
4
4
  Summary: Core library for the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
- License: Apache-2.0
6
+ License-Expression: Apache-2.0
7
7
  License-File: LICENCE
8
8
  License-File: NOTICE
9
9
  Classifier: Development Status :: 3 - Alpha
@@ -29,6 +29,7 @@ Requires-Dist: pydantic>=2.10.6
29
29
  Requires-Dist: requests
30
30
  Requires-Dist: rich
31
31
  Requires-Dist: ruamel-yaml>=0.18
32
+ Requires-Dist: setuptools>=75.8.0
32
33
  Requires-Dist: typing-extensions
33
34
  Description-Content-Type: text/markdown
34
35
 
@@ -1,24 +1,24 @@
1
1
  climate_ref_core/__init__.py,sha256=MtmPThF2F9_2UODEN6rt1x30LDxrHIZ0wyRN_wsHx5I,127
2
2
  climate_ref_core/constraints.py,sha256=QOqMh5jDBxdWTnQw2HNBizJQDF6Uu97rfJp9WudQWHc,11819
3
- climate_ref_core/dataset_registry.py,sha256=cSzSKcz2juXNQAHO1SEl6e576ZWUuNbLTzrANyDnX3o,5247
3
+ climate_ref_core/dataset_registry.py,sha256=mkes7Pgz_zaS_t_BidiDzSd8dmqAvjx2MlgoFMAhn20,7192
4
4
  climate_ref_core/datasets.py,sha256=TK50WQwTfbase26s8wPEGEN1BwcedrOd8nk6IlEf3Ww,6124
5
5
  climate_ref_core/diagnostics.py,sha256=5KCtHuhToSpATqjW4HBi56PsOxT5WX4VkqoZPUvYR60,18769
6
6
  climate_ref_core/env.py,sha256=Ph2dejVxTELfP3bL0xES086WLGvV5H6KvsOwCkL6m-k,753
7
- climate_ref_core/exceptions.py,sha256=psdipWURLyMq5hmloGxt-8kyqEe0IsENfraok7KTi8I,1437
8
- climate_ref_core/executor.py,sha256=NIXIU2rwMnTOR-ztlPlCD-poZO4vxzKQPWYk8veTVkk,5195
9
- climate_ref_core/logging.py,sha256=EBe5WAk1dtosr8MLkG-i7iDNZTI9ufxI4xsvbq3Gdt8,5260
7
+ climate_ref_core/exceptions.py,sha256=aC_wohLCjOUarZM2VvpGdJzPvYdT31h_MRVbqVk-MIk,1633
8
+ climate_ref_core/executor.py,sha256=QiVOca-d9JxKIktQIinQQYZGr3ecV5mL3nvUwCdMiJQ,5372
9
+ climate_ref_core/logging.py,sha256=cg6CK2DHGjyLaoRJm75p-Ja82hnVhBBQ4riOKk3l9XY,7063
10
10
  climate_ref_core/providers.py,sha256=by_ZtoLQgg9A60CbFor2-i5EixtZTZ0z8jQqOGRfvA8,12461
11
11
  climate_ref_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  climate_ref_core/metric_values/__init__.py,sha256=aHfwRrqzLOmmaBKf1-4q97DnHb8KwmW0Dhwd79ZQiNQ,634
13
13
  climate_ref_core/metric_values/typing.py,sha256=2DpzmjqQ7tqOPAyjthZ_O14c0-MhiYt-A_n9p6-bOao,1903
14
14
  climate_ref_core/pycmec/README.md,sha256=PzkovlPpsXqFopsYzz5GRvCAipNRGO1Wo-0gc17qr2Y,36
15
15
  climate_ref_core/pycmec/__init__.py,sha256=hXvKGEJQWyAp1i-ndr3D4zuYxkRhcR2LfXgFXlhYOk4,28
16
- climate_ref_core/pycmec/controlled_vocabulary.py,sha256=xio_4jl6mM_WMrwyxo70d0G5dUeIal4IW7eV-EMW4mU,5093
17
- climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=nf7T1S8WJ8ja31DG3E5mxKq3aluF_mwICf38mlFtRDM,2849
18
- climate_ref_core/pycmec/metric.py,sha256=XXM5DMk0BhpKcPvvCHCcgA6jKoVGMqXcwiG1UerYYps,18181
16
+ climate_ref_core/pycmec/controlled_vocabulary.py,sha256=ThdTBsSmUemyXSQG5RycN31qp93LfqIyPqvI8x5cdLI,5114
17
+ climate_ref_core/pycmec/cv_cmip7_aft.yaml,sha256=gx5QyW88pZQVUfiYXmsJtJO6AJg6NbIZgdU4vDIa3fE,4390
18
+ climate_ref_core/pycmec/metric.py,sha256=zymXoutnjbdcxvG_fMJugFLLcBrfSPG0XoV-2tA0ujA,18499
19
19
  climate_ref_core/pycmec/output.py,sha256=4-RQ439sfgNLeQZVDPB1pewF_kTwX7nCK0Z4U6bvbd0,5709
20
- climate_ref_core-0.5.5.dist-info/METADATA,sha256=Hevjf3ZdlpIGHKhFFJDOtDX7BtEOUys35nM42Jru2E4,2889
21
- climate_ref_core-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- climate_ref_core-0.5.5.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
- climate_ref_core-0.5.5.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
- climate_ref_core-0.5.5.dist-info/RECORD,,
20
+ climate_ref_core-0.6.1.dist-info/METADATA,sha256=fVjjH-4zZEbuoFw47Sqo1TRFAWwwiz6BZetTD88xQLE,2934
21
+ climate_ref_core-0.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ climate_ref_core-0.6.1.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ climate_ref_core-0.6.1.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
24
+ climate_ref_core-0.6.1.dist-info/RECORD,,