climate-ref-core 0.5.4__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/PKG-INFO +6 -3
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/pyproject.toml +10 -8
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/dataset_registry.py +60 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/datasets.py +6 -8
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/logging.py +56 -1
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/controlled_vocabulary.py +2 -2
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/cv_cmip7_aft.yaml +53 -6
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/metric.py +9 -3
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_dataset_registry/test_dataset_registry.py +55 -7
- climate_ref_core-0.6.0/tests/unit/test_datasets/dataset_collection_hash.yml +2 -0
- climate_ref_core-0.6.0/tests/unit/test_datasets/metric_dataset_hash.yml +2 -0
- climate_ref_core-0.5.4/tests/unit/test_datasets/dataset_collection_hash.yml +0 -2
- climate_ref_core-0.5.4/tests/unit/test_datasets/metric_dataset_hash.yml +0 -2
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/.gitignore +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/LICENCE +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/NOTICE +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/README.md +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/__init__.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/constraints.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/diagnostics.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/env.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/exceptions.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/executor.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/metric_values/__init__.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/metric_values/typing.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/providers.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/py.typed +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/README.md +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/__init__.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/output.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/metric_values/test_typing.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/cmec_metric_sample.json +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/cmec_output_sample.json +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/cv_sample.yaml +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/test_metric_json_schema.yml +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/test_output_json_schema.yml +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/conftest.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/test_cmec_metric.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/test_cmec_output.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/test_controlled_vocabulary.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_constraints.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_datasets/dataset_collection_obs4mips_hash.yml +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_datasets.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_exceptions.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_executor.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_logging.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_metrics.py +0 -0
- {climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/test_providers.py +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climate-ref-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Core library for the CMIP Rapid Evaluation Framework
|
|
5
5
|
Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
|
|
6
|
-
License: Apache-2.0
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
7
|
License-File: LICENCE
|
|
8
8
|
License-File: NOTICE
|
|
9
|
-
Classifier: Development Status ::
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
10
11
|
Classifier: Intended Audience :: Science/Research
|
|
11
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
13
|
Classifier: Operating System :: OS Independent
|
|
@@ -18,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
18
19
|
Classifier: Topic :: Scientific/Engineering
|
|
19
20
|
Requires-Python: >=3.11
|
|
20
21
|
Requires-Dist: attrs>=23.2.0
|
|
22
|
+
Requires-Dist: cattrs>=24.1
|
|
21
23
|
Requires-Dist: environs>=11
|
|
22
24
|
Requires-Dist: loguru>=0.7.0
|
|
23
25
|
Requires-Dist: numpy>=1.25.0
|
|
@@ -27,6 +29,7 @@ Requires-Dist: pydantic>=2.10.6
|
|
|
27
29
|
Requires-Dist: requests
|
|
28
30
|
Requires-Dist: rich
|
|
29
31
|
Requires-Dist: ruamel-yaml>=0.18
|
|
32
|
+
Requires-Dist: setuptools>=75.8.0
|
|
30
33
|
Requires-Dist: typing-extensions
|
|
31
34
|
Description-Content-Type: text/markdown
|
|
32
35
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "climate-ref-core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.0"
|
|
4
4
|
description = "Core library for the CMIP Rapid Evaluation Framework"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -12,21 +12,24 @@ authors = [
|
|
|
12
12
|
{ name = "Nathan Collier", email = "collierno@ornl.gov" },
|
|
13
13
|
{ name = "Dora Hegedus", email = "dora.hegedus@stfc.ac.uk" },
|
|
14
14
|
]
|
|
15
|
+
license = "Apache-2.0"
|
|
15
16
|
requires-python = ">=3.11"
|
|
16
17
|
classifiers = [
|
|
17
|
-
"Development Status ::
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
18
19
|
"Operating System :: OS Independent",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
19
21
|
"Intended Audience :: Science/Research",
|
|
20
|
-
"License :: OSI Approved :: Apache Software License",
|
|
21
22
|
"Programming Language :: Python",
|
|
22
23
|
"Programming Language :: Python :: 3",
|
|
23
24
|
"Programming Language :: Python :: 3.11",
|
|
24
25
|
"Programming Language :: Python :: 3.12",
|
|
25
26
|
"Programming Language :: Python :: 3.13",
|
|
26
27
|
"Topic :: Scientific/Engineering",
|
|
28
|
+
"License :: OSI Approved :: Apache Software License",
|
|
27
29
|
]
|
|
28
30
|
dependencies = [
|
|
29
31
|
"attrs>=23.2.0",
|
|
32
|
+
"cattrs>=24.1",
|
|
30
33
|
"pydantic>=2.10.6",
|
|
31
34
|
"typing_extensions",
|
|
32
35
|
"requests",
|
|
@@ -35,6 +38,8 @@ dependencies = [
|
|
|
35
38
|
"pooch>=1.8.0,<2",
|
|
36
39
|
"ruamel.yaml>=0.18",
|
|
37
40
|
"environs>=11",
|
|
41
|
+
# Not used directly, but required to support some installations
|
|
42
|
+
"setuptools>=75.8.0",
|
|
38
43
|
|
|
39
44
|
# SPEC 0000 constraints
|
|
40
45
|
# We follow [SPEC-0000](https://scientific-python.org/specs/spec-0000/)
|
|
@@ -43,11 +48,8 @@ dependencies = [
|
|
|
43
48
|
"numpy>=1.25.0"
|
|
44
49
|
]
|
|
45
50
|
|
|
46
|
-
[
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
[tool.uv]
|
|
50
|
-
dev-dependencies = [
|
|
51
|
+
[dependency-groups]
|
|
52
|
+
dev = [
|
|
51
53
|
"types-requests",
|
|
52
54
|
]
|
|
53
55
|
|
|
@@ -12,15 +12,68 @@ import pathlib
|
|
|
12
12
|
import shutil
|
|
13
13
|
|
|
14
14
|
import pooch
|
|
15
|
+
import pooch.hashes
|
|
15
16
|
from loguru import logger
|
|
16
17
|
from rich.progress import track
|
|
17
18
|
|
|
19
|
+
from climate_ref_core.env import env
|
|
20
|
+
|
|
21
|
+
DATASET_URL = env.str("REF_DATASET_URL", default="https://pub-b093171261094c4ea9adffa01f94ee06.r2.dev")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _verify_hash_matches(fname: str | pathlib.Path, known_hash: str) -> bool:
|
|
25
|
+
"""
|
|
26
|
+
Check if the hash of a file matches a known hash.
|
|
27
|
+
|
|
28
|
+
Coverts hashes to lowercase before comparison to avoid system specific
|
|
29
|
+
mismatches between hashes in the registry and computed hashes.
|
|
30
|
+
|
|
31
|
+
This is a tweaked version of the `pooch.hashes.hash_matches` function with a custom error message.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
fname
|
|
36
|
+
The path to the file.
|
|
37
|
+
known_hash
|
|
38
|
+
The known hash. Optionally, prepend ``alg:`` to the hash to specify the
|
|
39
|
+
hashing algorithm. Default is SHA256.
|
|
40
|
+
|
|
41
|
+
Raises
|
|
42
|
+
------
|
|
43
|
+
ValueError
|
|
44
|
+
If the hash does not match.
|
|
45
|
+
FileNotFoundError
|
|
46
|
+
If the file does not exist.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
bool
|
|
51
|
+
True if the hash matches.
|
|
52
|
+
"""
|
|
53
|
+
fname = pathlib.Path(fname)
|
|
54
|
+
|
|
55
|
+
if not fname.exists():
|
|
56
|
+
raise FileNotFoundError(f"File {fname!s} does not exist. Cannot verify hash.")
|
|
57
|
+
|
|
58
|
+
algorithm = pooch.hashes.hash_algorithm(known_hash)
|
|
59
|
+
new_hash = pooch.hashes.file_hash(str(fname), alg=algorithm)
|
|
60
|
+
matches = new_hash.lower() == known_hash.split(":")[-1].lower()
|
|
61
|
+
if not matches:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"{algorithm.upper()} hash of downloaded file ({fname!s}) does not match"
|
|
64
|
+
f" the known hash: expected {known_hash} but got {new_hash}. "
|
|
65
|
+
f"The file may have been corrupted or the known hash may be outdated. "
|
|
66
|
+
f"Delete the file and try again."
|
|
67
|
+
)
|
|
68
|
+
return matches
|
|
69
|
+
|
|
18
70
|
|
|
19
71
|
def fetch_all_files(
|
|
20
72
|
registry: pooch.Pooch,
|
|
21
73
|
name: str,
|
|
22
74
|
output_dir: pathlib.Path | None,
|
|
23
75
|
symlink: bool = False,
|
|
76
|
+
verify: bool = True,
|
|
24
77
|
) -> None:
|
|
25
78
|
"""
|
|
26
79
|
Fetch all files associated with a pooch registry and write them to an output directory.
|
|
@@ -45,12 +98,17 @@ def fetch_all_files(
|
|
|
45
98
|
symlink
|
|
46
99
|
If True, symlink all files to this directory.
|
|
47
100
|
Otherwise, perform a copy.
|
|
101
|
+
verify
|
|
102
|
+
If True, verify the checksums of the local files against the registry.
|
|
48
103
|
"""
|
|
49
104
|
if output_dir:
|
|
50
105
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
51
106
|
|
|
52
107
|
for key in track(registry.registry.keys(), description=f"Fetching {name} data"):
|
|
53
108
|
fetch_file = registry.fetch(key)
|
|
109
|
+
expected_hash = registry.registry[key]
|
|
110
|
+
if not isinstance(expected_hash, str) or not expected_hash: # pragma: no cover
|
|
111
|
+
raise ValueError(f"Expected a hash for {key} but got {expected_hash}")
|
|
54
112
|
|
|
55
113
|
if output_dir is None:
|
|
56
114
|
# Just warm the cache and move onto the next file
|
|
@@ -68,6 +126,8 @@ def fetch_all_files(
|
|
|
68
126
|
shutil.copy(fetch_file, linked_file)
|
|
69
127
|
else:
|
|
70
128
|
logger.info(f"File {linked_file} already exists. Skipping.")
|
|
129
|
+
if verify:
|
|
130
|
+
_verify_hash_matches(linked_file, expected_hash)
|
|
71
131
|
|
|
72
132
|
|
|
73
133
|
class DatasetRegistryManager:
|
|
@@ -5,7 +5,7 @@ Dataset management and filtering
|
|
|
5
5
|
import enum
|
|
6
6
|
import functools
|
|
7
7
|
import hashlib
|
|
8
|
-
from collections.abc import Iterable
|
|
8
|
+
from collections.abc import Collection, Iterable
|
|
9
9
|
from typing import Any, Self
|
|
10
10
|
|
|
11
11
|
import pandas as pd
|
|
@@ -48,19 +48,17 @@ class SourceDatasetType(enum.Enum):
|
|
|
48
48
|
return sorted(cls, key=lambda x: x.value)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def _clean_facets(raw_values: dict[str, str |
|
|
51
|
+
def _clean_facets(raw_values: dict[str, str | Collection[str]]) -> dict[str, tuple[str, ...]]:
|
|
52
52
|
"""
|
|
53
53
|
Clean the value of a facet filter to a tuple of strings
|
|
54
54
|
"""
|
|
55
|
-
result = {}
|
|
55
|
+
result: dict[str, tuple[str, ...]] = {}
|
|
56
56
|
|
|
57
57
|
for key, value in raw_values.items():
|
|
58
|
-
if isinstance(value,
|
|
59
|
-
result[key] = tuple(value)
|
|
60
|
-
elif isinstance(value, str):
|
|
58
|
+
if isinstance(value, str):
|
|
61
59
|
result[key] = (value,)
|
|
62
|
-
|
|
63
|
-
result[key] = value
|
|
60
|
+
else:
|
|
61
|
+
result[key] = tuple(value)
|
|
64
62
|
return result
|
|
65
63
|
|
|
66
64
|
|
|
@@ -2,13 +2,16 @@
|
|
|
2
2
|
Logging utilities
|
|
3
3
|
|
|
4
4
|
The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework.
|
|
5
|
+
The log level and format are configured via the REF configuration file.
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
import contextlib
|
|
8
9
|
import inspect
|
|
9
10
|
import logging
|
|
11
|
+
import multiprocessing
|
|
10
12
|
import sys
|
|
11
13
|
from collections.abc import Generator
|
|
14
|
+
from pathlib import Path
|
|
12
15
|
from typing import Any
|
|
13
16
|
|
|
14
17
|
import pooch
|
|
@@ -24,6 +27,28 @@ Filename for the execution log.
|
|
|
24
27
|
This file is written via [climate_ref_core.logging.redirect_logs][].
|
|
25
28
|
"""
|
|
26
29
|
|
|
30
|
+
DEFAULT_LOG_FORMAT = (
|
|
31
|
+
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z}</green> | <level>{level: <8}</level> | "
|
|
32
|
+
"<cyan>{name}</cyan> - <level>{message}</level>"
|
|
33
|
+
)
|
|
34
|
+
"""
|
|
35
|
+
Default log format used by the REF
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
VERBOSE_LOG_FORMAT = (
|
|
39
|
+
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS Z} e{elapsed}s</green> | "
|
|
40
|
+
"<level>{level: <8}</level> | "
|
|
41
|
+
"{process.name}:{process.id} | "
|
|
42
|
+
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
|
|
43
|
+
"<level>{message}</level>"
|
|
44
|
+
)
|
|
45
|
+
"""
|
|
46
|
+
The verbose log format is used for debugging and development.
|
|
47
|
+
|
|
48
|
+
This is the format that is used when writing the log messages to file for later debugging.
|
|
49
|
+
It contains information about the process and function that the log message was generated in.
|
|
50
|
+
"""
|
|
51
|
+
|
|
27
52
|
|
|
28
53
|
class _InterceptHandler(logging.Handler):
|
|
29
54
|
def emit(self, record: logging.LogRecord) -> None:
|
|
@@ -43,6 +68,35 @@ class _InterceptHandler(logging.Handler):
|
|
|
43
68
|
logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
|
|
44
69
|
|
|
45
70
|
|
|
71
|
+
def initialise_logging(level: int | str, format: str, log_directory: str | Path) -> None: # noqa: A002 # pragma: no cover
|
|
72
|
+
"""
|
|
73
|
+
Initialise the logging for the REF
|
|
74
|
+
|
|
75
|
+
This sets up the default log handler and configures the REF logger.
|
|
76
|
+
"""
|
|
77
|
+
capture_logging()
|
|
78
|
+
log_directory = Path(log_directory)
|
|
79
|
+
process_name = multiprocessing.current_process().name
|
|
80
|
+
|
|
81
|
+
# Remove any existing handlers
|
|
82
|
+
logger.remove()
|
|
83
|
+
|
|
84
|
+
# Write out debug logs to a file
|
|
85
|
+
log_directory.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
filename = f"climate-ref_{{time:YYYY-MM-DD_HH-mm}}_{process_name}.log"
|
|
87
|
+
logger.add(
|
|
88
|
+
sink=log_directory / filename,
|
|
89
|
+
retention=10,
|
|
90
|
+
level="DEBUG",
|
|
91
|
+
format=VERBOSE_LOG_FORMAT,
|
|
92
|
+
colorize=False,
|
|
93
|
+
)
|
|
94
|
+
logger.info("Starting REF logging")
|
|
95
|
+
logger.info(f"arguments: {sys.argv}")
|
|
96
|
+
|
|
97
|
+
add_log_handler(level=level, format=format, colorize=True)
|
|
98
|
+
|
|
99
|
+
|
|
46
100
|
def capture_logging() -> None:
|
|
47
101
|
"""
|
|
48
102
|
Capture logging from the standard library and redirect it to Loguru
|
|
@@ -56,6 +110,7 @@ def capture_logging() -> None:
|
|
|
56
110
|
logging.basicConfig(handlers=[_InterceptHandler()], level=0, force=True)
|
|
57
111
|
|
|
58
112
|
# Disable some overly verbose logs
|
|
113
|
+
logger.disable("alembic.runtime.migration")
|
|
59
114
|
logger.disable("matplotlib.colorbar")
|
|
60
115
|
logger.disable("matplotlib.ticker")
|
|
61
116
|
logger.disable("matplotlib.font_manager")
|
|
@@ -154,4 +209,4 @@ def redirect_logs(definition: ExecutionDefinition, log_level: str) -> Generator[
|
|
|
154
209
|
add_log_handler(**logger.default_handler_kwargs) # type: ignore[attr-defined]
|
|
155
210
|
|
|
156
211
|
|
|
157
|
-
__all__ = ["EXECUTION_LOG_FILENAME", "
|
|
212
|
+
__all__ = ["EXECUTION_LOG_FILENAME", "capture_logging", "initialise_logging", "redirect_logs"]
|
{climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/pycmec/cv_cmip7_aft.yaml
RENAMED
|
@@ -24,6 +24,11 @@ dimensions:
|
|
|
24
24
|
description: "Variable ID for the reference dataset (e.g., tas, pr, etc.)"
|
|
25
25
|
allow_extra_values: true
|
|
26
26
|
required: false
|
|
27
|
+
- name: grid_label
|
|
28
|
+
long_name: Grid Label
|
|
29
|
+
description: "The grid label of the output (e.g gn)"
|
|
30
|
+
allow_extra_values: true
|
|
31
|
+
required: false
|
|
27
32
|
- name: member_id
|
|
28
33
|
long_name: Member ID
|
|
29
34
|
description: "Unique identifier for each ensemble member, includes the variant label and sub-experiment if present"
|
|
@@ -48,7 +53,15 @@ dimensions:
|
|
|
48
53
|
- name: global
|
|
49
54
|
long_name: Global
|
|
50
55
|
description: "Global aggregate"
|
|
51
|
-
|
|
56
|
+
- name: NHEX
|
|
57
|
+
long_name: Northern Hemisphere Extra-tropics
|
|
58
|
+
description: "Northern Hemisphere Extra-tropics (30N-90N)"
|
|
59
|
+
- name: SHEX
|
|
60
|
+
long_name: Southern Hemisphere Extra-tropics
|
|
61
|
+
description: "Southern Hemisphere Extra-tropics (30S-90S)"
|
|
62
|
+
- name: Tropics
|
|
63
|
+
long_name: Tropics
|
|
64
|
+
description: "Tropics (30N-30S)"
|
|
52
65
|
- name: season
|
|
53
66
|
long_name: Season
|
|
54
67
|
description: "Parts of the year from which the metric values are calculated"
|
|
@@ -58,23 +71,57 @@ dimensions:
|
|
|
58
71
|
- name: ann
|
|
59
72
|
long_name: Annual
|
|
60
73
|
description: ""
|
|
61
|
-
units: dimensionless
|
|
62
74
|
- name: djf
|
|
63
75
|
long_name: Dec,Jan,Feb
|
|
64
76
|
description: "December, January, February"
|
|
65
|
-
units: dimensionless
|
|
66
77
|
- name: mam
|
|
67
78
|
long_name: Mar,Apr,May
|
|
68
79
|
description: "March, April, May"
|
|
69
|
-
units: dimensionless
|
|
70
80
|
- name: jja
|
|
71
81
|
long_name: Jun,Jul,Aug
|
|
72
82
|
description: "June, July, August"
|
|
73
|
-
units: dimensionless
|
|
74
83
|
- name: son
|
|
75
84
|
long_name: Sep,Oct,Nov
|
|
76
85
|
description: "September, October, November"
|
|
77
|
-
|
|
86
|
+
- name: mode
|
|
87
|
+
long_name: Mode of variability
|
|
88
|
+
description: "Different modes of variability that can be calculated"
|
|
89
|
+
required: false
|
|
90
|
+
allow_extra_values: false
|
|
91
|
+
values:
|
|
92
|
+
- name: NAM
|
|
93
|
+
long_name: Northern Annular Mode
|
|
94
|
+
description: Northern Annular Mode
|
|
95
|
+
- name: NAO
|
|
96
|
+
long_name: North Atlantic Oscillation
|
|
97
|
+
description: North Atlantic Oscillation
|
|
98
|
+
- name: PNA
|
|
99
|
+
long_name: Pacific–North America pattern
|
|
100
|
+
description: Pacific–North America pattern
|
|
101
|
+
- name: SAM
|
|
102
|
+
long_name: Southern Annular Mode
|
|
103
|
+
description: Southern Annular Mode
|
|
104
|
+
- name: PDO
|
|
105
|
+
long_name: Pacific decadal oscillation
|
|
106
|
+
description: Pacific decadal oscillation
|
|
107
|
+
- name: NPO
|
|
108
|
+
long_name: North Pacific Oscillation
|
|
109
|
+
description: North Pacific Oscillation
|
|
110
|
+
- name: NPGO
|
|
111
|
+
long_name: North Pacific Gyre Oscillation
|
|
112
|
+
description: North Pacific Gyre Oscillation
|
|
113
|
+
- name: method
|
|
114
|
+
long_name: EOF Method
|
|
115
|
+
description: "Method for calculating the EOFs in PMP's mode of variability diagnostic"
|
|
116
|
+
required: false
|
|
117
|
+
allow_extra_values: false
|
|
118
|
+
values:
|
|
119
|
+
- name: cbf
|
|
120
|
+
long_name: Common Basis Function
|
|
121
|
+
description: "A projection of the leading EOFs of the reference dataset onto the model data"
|
|
122
|
+
- name: eof1
|
|
123
|
+
long_name: EOF1
|
|
124
|
+
description: "The leading EOF of the reference dataset"
|
|
78
125
|
- name: statistic
|
|
79
126
|
long_name: Statistic
|
|
80
127
|
description: ""
|
|
@@ -20,7 +20,7 @@ from copy import deepcopy
|
|
|
20
20
|
from enum import Enum
|
|
21
21
|
from typing import Any, cast
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
import numpy as np
|
|
24
24
|
from pydantic import (
|
|
25
25
|
BaseModel,
|
|
26
26
|
ConfigDict,
|
|
@@ -190,7 +190,6 @@ class MetricResults(RootModel[Any]):
|
|
|
190
190
|
expected_keys = set(metdims[dim_name].keys())
|
|
191
191
|
if not (dict_keys.issubset(expected_keys)):
|
|
192
192
|
msg = f"Unknown dimension values: {dict_keys - expected_keys} for {dim_name}"
|
|
193
|
-
logger.error(msg)
|
|
194
193
|
if not ALLOW_EXTRA_KEYS: # pragma: no cover
|
|
195
194
|
raise ValueError(f"{msg}\nExpected keys: {expected_keys}")
|
|
196
195
|
else:
|
|
@@ -228,7 +227,7 @@ class StrNumDict(RootModel[Any]):
|
|
|
228
227
|
"""A class contains string key and numeric value"""
|
|
229
228
|
|
|
230
229
|
model_config = ConfigDict(strict=True)
|
|
231
|
-
root: dict[str, float | int]
|
|
230
|
+
root: dict[str, float | int | None]
|
|
232
231
|
|
|
233
232
|
|
|
234
233
|
def remove_dimensions(raw_metric_bundle: dict[str, Any], dimensions: str | list[str]) -> dict[str, Any]:
|
|
@@ -542,6 +541,13 @@ def _walk_results(
|
|
|
542
541
|
yield ScalarMetricValue(
|
|
543
542
|
dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
|
|
544
543
|
)
|
|
544
|
+
elif value is None:
|
|
545
|
+
# Replace any None values with NaN
|
|
546
|
+
# This translates null values in JSON to Python NaN's
|
|
547
|
+
# Missing values are different from NaN values
|
|
548
|
+
yield ScalarMetricValue(
|
|
549
|
+
dimensions=metadata, value=np.nan, attributes=results.get(MetricCV.ATTRIBUTES.value)
|
|
550
|
+
)
|
|
545
551
|
else:
|
|
546
552
|
yield from _walk_results(dimensions[1:], value, {**metadata})
|
|
547
553
|
|
|
@@ -5,10 +5,13 @@ import pytest
|
|
|
5
5
|
|
|
6
6
|
from climate_ref_core.dataset_registry import (
|
|
7
7
|
DatasetRegistryManager,
|
|
8
|
+
_verify_hash_matches,
|
|
8
9
|
dataset_registry_manager,
|
|
9
10
|
fetch_all_files,
|
|
10
11
|
)
|
|
11
12
|
|
|
13
|
+
NUM_OBS4REF_FILES = 67
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
@pytest.fixture
|
|
14
17
|
def fake_registry_file():
|
|
@@ -99,28 +102,73 @@ class TestDatasetRegistry:
|
|
|
99
102
|
|
|
100
103
|
|
|
101
104
|
@pytest.mark.parametrize("symlink", [True, False])
|
|
102
|
-
|
|
105
|
+
@pytest.mark.parametrize("verify", [True, False])
|
|
106
|
+
def test_fetch_all_files(mocker, tmp_path, symlink, verify):
|
|
107
|
+
mock_verify = mocker.patch("climate_ref_core.dataset_registry._verify_hash_matches")
|
|
108
|
+
|
|
103
109
|
downloaded_file = tmp_path / "out.txt"
|
|
104
110
|
downloaded_file.write_text("foo")
|
|
105
111
|
|
|
106
112
|
registry = dataset_registry_manager["obs4ref"]
|
|
107
113
|
registry.fetch = mocker.MagicMock(return_value=downloaded_file)
|
|
108
114
|
|
|
109
|
-
fetch_all_files(registry, "obs4ref", tmp_path, symlink=symlink)
|
|
110
|
-
assert registry.fetch.call_count ==
|
|
115
|
+
fetch_all_files(registry, "obs4ref", tmp_path, symlink=symlink, verify=verify)
|
|
116
|
+
assert registry.fetch.call_count == NUM_OBS4REF_FILES
|
|
111
117
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
)
|
|
118
|
+
key = "obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc"
|
|
119
|
+
expected_file = tmp_path / key
|
|
115
120
|
|
|
116
121
|
assert expected_file.exists()
|
|
117
122
|
assert expected_file.is_symlink() == symlink
|
|
118
123
|
assert expected_file.read_text() == "foo"
|
|
119
124
|
|
|
125
|
+
if verify:
|
|
126
|
+
mock_verify.assert_any_call(expected_file, registry.registry[key])
|
|
127
|
+
else:
|
|
128
|
+
mock_verify.assert_not_called()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_verify_hash_matches(mocker, tmp_path):
|
|
132
|
+
expected_hash = "sha256:expectedhashvalue"
|
|
133
|
+
|
|
134
|
+
mock_hashes = mocker.patch("climate_ref_core.dataset_registry.pooch.hashes")
|
|
135
|
+
mock_hashes.hash_algorithm.return_value = "sha256"
|
|
136
|
+
mock_hashes.file_hash.return_value = "expectedhashvalue"
|
|
137
|
+
|
|
138
|
+
file_path = tmp_path / "file.txt"
|
|
139
|
+
file_path.touch()
|
|
140
|
+
|
|
141
|
+
_verify_hash_matches(file_path, expected_hash)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_verify_hash_missing_file(tmp_path):
|
|
145
|
+
expected_hash = "sha256:expectedhashvalue"
|
|
146
|
+
|
|
147
|
+
file_path = tmp_path / "file.txt"
|
|
148
|
+
|
|
149
|
+
with pytest.raises(FileNotFoundError, match="file.txt does not exist. Cannot verify hash"):
|
|
150
|
+
_verify_hash_matches(file_path, expected_hash)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_verify_hash_differs(mocker, tmp_path):
|
|
154
|
+
expected_hash = "sha256:expectedhashvalue"
|
|
155
|
+
|
|
156
|
+
mock_hashes = mocker.patch("climate_ref_core.dataset_registry.pooch.hashes")
|
|
157
|
+
mock_hashes.hash_algorithm.return_value = "sha256"
|
|
158
|
+
mock_hashes.file_hash.return_value = "opps"
|
|
159
|
+
|
|
160
|
+
file_path = tmp_path / "file.txt"
|
|
161
|
+
file_path.touch()
|
|
162
|
+
|
|
163
|
+
with pytest.raises(
|
|
164
|
+
ValueError, match=f"does not match the known hash. expected {expected_hash} but got opps."
|
|
165
|
+
):
|
|
166
|
+
_verify_hash_matches(file_path, expected_hash)
|
|
167
|
+
|
|
120
168
|
|
|
121
169
|
def test_fetch_all_files_no_output(mocker):
|
|
122
170
|
registry = dataset_registry_manager["obs4ref"]
|
|
123
171
|
registry.fetch = mocker.MagicMock()
|
|
124
172
|
|
|
125
173
|
fetch_all_files(registry, "obs4ref", None)
|
|
126
|
-
assert registry.fetch.call_count ==
|
|
174
|
+
assert registry.fetch.call_count == NUM_OBS4REF_FILES
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/metric_values/__init__.py
RENAMED
|
File without changes
|
{climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/src/climate_ref_core/metric_values/typing.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/cmec_testdata/cv_sample.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{climate_ref_core-0.5.4 → climate_ref_core-0.6.0}/tests/unit/pycmec/test_controlled_vocabulary.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|