PyPI - model-config-tests - Versions diffs - 0.0.1__py3-none-any.whl - Mend

model-config-tests 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

model_config_tests/__init__.py +0 -0
model_config_tests/__main__.py +18 -0
model_config_tests/conftest.py +110 -0
model_config_tests/exp_test_helper.py +187 -0
model_config_tests/models/__init__.py +4 -0
model_config_tests/models/accessom2.py +90 -0
model_config_tests/models/accessom3.py +90 -0
model_config_tests/models/model.py +52 -0
model_config_tests/test_access_om2_config.py +179 -0
model_config_tests/test_bit_reproducibility.py +131 -0
model_config_tests/test_config.py +225 -0
model_config_tests/util.py +35 -0
model_config_tests-0.0.1.dist-info/LICENSE +201 -0
model_config_tests-0.0.1.dist-info/METADATA +100 -0
model_config_tests-0.0.1.dist-info/RECORD +18 -0
model_config_tests-0.0.1.dist-info/WHEEL +5 -0
model_config_tests-0.0.1.dist-info/entry_points.txt +2 -0
model_config_tests-0.0.1.dist-info/top_level.txt +1 -0

model_config_tests/__init__.py ADDED Viewed

File without changes

model_config_tests/__main__.py ADDED Viewed

@@ -0,0 +1,18 @@
+import os
+import sys
+# Running pytests using --pyargs does not run pytest_addoption in conftest.py
+# Using workaround as described here:
+# https://stackoverflow.com/questions/41270604/using-command-line-parameters-with-pytest-pyargs
+HERE = os.path.dirname(__file__)
+def main():
+    import pytest
+    errcode = pytest.main([HERE] + sys.argv[1:])
+    sys.exit(errcode)
+if __name__ == "__main__":
+    main()

model_config_tests/conftest.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+import os
+from pathlib import Path
+import pytest
+import yaml
+from ruamel.yaml import YAML
+@pytest.fixture(scope="session")
+def output_path(request):
+    """Set the output path: This contains control and lab directories for each
+    test and test output files - e.g. CHECKSUMS
+    """
+    path = request.config.getoption("--output-path")
+    if path is None:
+        # Set default to $TMPDIR/test-model-repro/
+        tmp_dir = os.environ.get("TMPDIR")
+        path = f"{tmp_dir}/test-model-repro"
+    return Path(path)
+@pytest.fixture(scope="session")
+def control_path(request):
+    """Set the path of the model configuration directory to test"""
+    path = request.config.getoption("--control-path")
+    if path is None:
+        # Set default to current working directory
+        path = Path.cwd()
+    return Path(path)
+@pytest.fixture(scope="session")
+def checksum_path(request, control_path):
+    """Set the path of the model configuration directory to test"""
+    path = request.config.getoption("--checksum-path")
+    if path is None:
+        # Set default to checksum stored on model configuration
+        path = control_path / "testing" / "checksum" / "historical-3hr-checksum.json"
+    return Path(path)
+@pytest.fixture(scope="session")
+def metadata(control_path: Path):
+    """Read the metadata file in the control directory"""
+    metadata_path = control_path / "metadata.yaml"
+    # Use ruamel.yaml as that is what is used to read metadata files in Payu
+    # It also errors out if there are duplicate keys in metadata
+    content = YAML().load(metadata_path)
+    return content
+@pytest.fixture(scope="session")
+def config(control_path: Path):
+    """Read the config file in the control directory"""
+    config_path = control_path / "config.yaml"
+    with open(config_path) as f:
+        config_content = yaml.safe_load(f)
+    return config_content
+@pytest.fixture(scope="session")
+def target_branch(request):
+    """Set the target branch - i.e., the branch the configuration will be
+    merged into. This used is to infer configuration information, if the
+    configuration branches follow a common naming scheme (e.g. ACCESS-OM2)"""
+    return request.config.getoption("--target-branch")
+# Set up command line options and default for directory paths
+def pytest_addoption(parser):
+    """Attaches optional command line arguments"""
+    parser.addoption(
+        "--output-path",
+        action="store",
+        help="Specify the output directory path for test output",
+    )
+    parser.addoption(
+        "--control-path",
+        action="store",
+        help="Specify the model configuration path to test",
+    )
+    parser.addoption(
+        "--checksum-path",
+        action="store",
+        help="Specify the checksum file to compare against",
+    )
+    parser.addoption(
+        "--target-branch", action="store", help="Specify the target branch name"
+    )
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "slow: mark tests as slow (deselect with '-m \"not slow\"')"
+    )
+    config.addinivalue_line(
+        "markers", "checksum: mark tests to run as part of reproducibility CI tests"
+    )
+    config.addinivalue_line(
+        "markers", "config: mark as configuration tests in quick QA CI checks"
+    )
+    config.addinivalue_line(
+        "markers", "access_om2: mark as access-om2 specific tests in quick QA CI checks"
+    )

model_config_tests/exp_test_helper.py ADDED Viewed

@@ -0,0 +1,187 @@
+# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+import glob
+import os
+import re
+import shutil
+import subprocess as sp
+import sys
+from pathlib import Path
+import yaml
+from model_config_tests.models import index as model_index
+from model_config_tests.util import wait_for_qsub
+class ExpTestHelper:
+    def __init__(self, control_path: Path, lab_path: Path):
+        self.exp_name = control_path.name
+        self.control_path = control_path
+        self.lab_path = lab_path
+        self.config_path = control_path / "config.yaml"
+        self.archive_path = lab_path / "archive" / self.exp_name
+        self.work_path = lab_path / "work" / self.exp_name
+        self.output000 = self.archive_path / "output000"
+        self.output001 = self.archive_path / "output001"
+        with open(self.config_path) as f:
+            self.config = yaml.safe_load(f)
+        self.set_model()
+    def set_model(self):
+        """Set model based on payu config. Currently only setting top-level
+        model"""
+        self.model_name = self.config.get("model")
+        ModelType = model_index[self.model_name]
+        self.model = ModelType(self)
+    def extract_checksums(
+        self, output_directory: Path = None, schema_version: str = None
+    ):
+        """Use model subclass to extract checksums from output"""
+        return self.model.extract_checksums(output_directory, schema_version)
+    def has_run(self):
+        """
+        See whether this experiment has been run.
+        """
+        return self.model.output_exists()
+    def setup_for_test_run(self):
+        """
+        Various config.yaml settings need to be modified in order to run in the
+        test environment.
+        """
+        with open(self.config_path) as f:
+            doc = yaml.safe_load(f)
+        # Disable git runlog
+        doc["runlog"] = False
+        # Disable metadata and set override experiment name for work/archive
+        # directories
+        doc["metadata"] = {"enable": False}
+        doc["experiment"] = self.exp_name
+        # Set laboratory path
+        doc["laboratory"] = str(self.lab_path)
+        with open(self.config_path, "w") as f:
+            yaml.dump(doc, f)
+    def run(self):
+        """
+        Run the experiment using payu and check output.
+        Don't do any work if it has already run.
+        """
+        if self.has_run():
+            return 0, None, None, None
+        else:
+            return self.force_qsub_run()
+    def force_qsub_run(self):
+        """
+        Run using qsub
+        """
+        # Change to experiment directory and run.
+        owd = Path.cwd()
+        try:
+            os.chdir(self.control_path)
+            sp.check_output(["payu", "sweep", "--lab", self.lab_path])
+            run_id = sp.check_output(["payu", "run", "--lab", self.lab_path])
+            run_id = run_id.decode().splitlines()[0]
+        except sp.CalledProcessError:
+            print("Error: call to payu run failed.", file=sys.stderr)
+            return 1, None, None, None
+        finally:
+            os.chdir(owd)
+        wait_for_qsub(run_id)
+        run_id = run_id.split(".")[0]
+        output_files = []
+        # Read qsub stdout file
+        stdout_filename = glob.glob(str(self.control_path / f"*.o{run_id}"))
+        print(stdout_filename)
+        if len(stdout_filename) != 1:
+            print("Error: there are too many stdout files.", file=sys.stderr)
+            return 2, None, None, None
+        stdout_filename = stdout_filename[0]
+        output_files.append(stdout_filename)
+        stdout = ""
+        with open(stdout_filename) as f:
+            stdout = f.read()
+        # Read qsub stderr file
+        stderr_filename = glob.glob(str(self.control_path / f"*.e{run_id}"))
+        stderr = ""
+        if len(stderr_filename) == 1:
+            stderr_filename = stderr_filename[0]
+            output_files.append(stderr_filename)
+            with open(stderr_filename) as f:
+                stderr = f.read()
+        # TODO: Early return if not collating
+        # Read the qsub id of the collate job from the stdout.
+        # Payu puts this here.
+        m = re.search(r"(\d+.gadi-pbs)\n", stdout)
+        if m is None:
+            print("Error: qsub id of collate job.", file=sys.stderr)
+            return 3, stdout, stderr, output_files
+        # Wait for the collate to complete.
+        run_id = m.group(1)
+        wait_for_qsub(run_id)
+        # Return files created by qsub so caller can read or delete.
+        collate_files = self.control_path / f"*.[oe]{run_id}"
+        output_files += glob.glob(str(collate_files))
+        return 0, stdout, stderr, output_files
+    def setup_and_run(self):
+        self.setup_for_test_run()
+        return self.run()
+def setup_exp(control_path: Path, output_path: Path, exp_name: str):
+    """
+    Create a exp by copying over base config
+    """
+    # Set experiment control path
+    if control_path.name != "base-experiment":
+        exp_name = f"{control_path.name}-{exp_name}"
+    exp_control_path = output_path / "control" / exp_name
+    # Copy over base control directory (e.g. model configuration)
+    if exp_control_path.exists():
+        shutil.rmtree(exp_control_path)
+    shutil.copytree(control_path, exp_control_path, symlinks=True)
+    exp_lab_path = output_path / "lab"
+    exp = ExpTestHelper(control_path=exp_control_path, lab_path=exp_lab_path)
+    # Remove any pre-existing archive or work directories for the experiment
+    try:
+        shutil.rmtree(exp.archive_path)
+    except FileNotFoundError:
+        pass
+    try:
+        shutil.rmtree(exp.work_path)
+    except FileNotFoundError:
+        pass
+    return exp

model_config_tests/models/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from model_config_tests.models.accessom2 import AccessOm2
+from model_config_tests.models.accessom3 import AccessOm3
+index = {"access-om2": AccessOm2, "access-om3": AccessOm3}

model_config_tests/models/accessom2.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""Specific Access-OM2 Model setup and post-processing"""
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+import f90nml
+from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model
+class AccessOm2(Model):
+    def __init__(self, experiment):
+        super().__init__(experiment)
+        self.output_file = self.experiment.output000 / "access-om2.out"
+        self.accessom2_config = experiment.control_path / "accessom2.nml"
+        self.ocean_config = experiment.control_path / "ocean" / "input.nml"
+    def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
+        """Set config files to a short time period for experiment run.
+        Default is 3 hours"""
+        with open(self.accessom2_config) as f:
+            nml = f90nml.read(f)
+        # Check that two of years, months, seconds is zero
+        if sum(x == 0 for x in (years, months, seconds)) != 2:
+            raise NotImplementedError(
+                "Cannot specify runtime in seconds and years and months"
+                + " at the same time. Two of which must be zero"
+            )
+        nml["date_manager_nml"]["restart_period"] = [years, months, seconds]
+        nml.write(self.accessom2_config, force=True)
+    def output_exists(self) -> bool:
+        """Check for existing output file"""
+        return self.output_file.exists()
+    def extract_checksums(
+        self, output_directory: Path = None, schema_version: str = None
+    ) -> dict[str, Any]:
+        """Parse output file and create checksum using defined schema"""
+        if output_directory:
+            output_filename = output_directory / "access-om2.out"
+        else:
+            output_filename = self.output_file
+        # Regex pattern for checksums in the `<model>.out` file
+        # Examples:
+        # [chksum] ht              -2390360641069121536
+        # [chksum] hu               6389284661071183872
+        # [chksum] htr               928360042410663049
+        pattern = r"\[chksum\]\s+(.+)\s+(-?\d+)"
+        # checksums outputted in form:
+        # {
+        #   "ht": ["-2390360641069121536"],
+        #   "hu": ["6389284661071183872"],
+        #   "htr": ["928360042410663049"]
+        # }
+        # with potential for multiple checksums for one key.
+        output_checksums: dict[str, list[any]] = defaultdict(list)
+        with open(output_filename) as f:
+            for line in f:
+                # Check for checksum pattern match
+                match = re.match(pattern, line)
+                if match:
+                    # Extract values
+                    field = match.group(1).strip()
+                    checksum = match.group(2).strip()
+                    output_checksums[field].append(checksum)
+        if schema_version is None:
+            schema_version = self.default_schema_version
+        if schema_version == SCHEMA_VERSION_1_0_0:
+            checksums = {
+                "schema_version": schema_version,
+                "output": dict(output_checksums),
+            }
+        else:
+            raise NotImplementedError(
+                f"Unsupported checksum schema version: {schema_version}"
+            )
+        return checksums

model_config_tests/models/accessom3.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""Specific Access-OM3 Model setup and post-processing"""
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+from payu.models.cesm_cmeps import Runconfig
+from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model
+class AccessOm3(Model):
+    def __init__(self, experiment):
+        super().__init__(experiment)
+        self.output_file = self.experiment.output000 / "ocean.stats"
+        self.runconfig = experiment.control_path / "nuopc.runconfig"
+        self.ocean_config = experiment.control_path / "input.nml"
+    def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
+        """Set config files to a short time period for experiment run.
+        Default is 3 hours"""
+        runconfig = Runconfig(self.runconfig)
+        if years == months == 0:
+            freq = "nseconds"
+            n = str(seconds)
+        elif seconds == 0:
+            freq = "nmonths"
+            n = str(12 * years + months)
+        else:
+            raise NotImplementedError(
+                "Cannot specify runtime in seconds and year/months at the same time"
+            )
+        runconfig.set("CLOCK_attributes", "restart_n", n)
+        runconfig.set("CLOCK_attributes", "restart_option", freq)
+        runconfig.set("CLOCK_attributes", "stop_n", n)
+        runconfig.set("CLOCK_attributes", "stop_option", freq)
+        runconfig.write()
+    def output_exists(self) -> bool:
+        """Check for existing output file"""
+        return self.output_file.exists()
+    def extract_checksums(
+        self, output_directory: Path = None, schema_version: str = None
+    ) -> dict[str, Any]:
+        """Parse output file and create checksum using defined schema"""
+        if output_directory:
+            output_filename = output_directory / "ocean.stats"
+        else:
+            output_filename = self.output_file
+        # ocean.stats is used for regression testing in MOM6's own test suite
+        # See https://github.com/mom-ocean/MOM6/blob/2ab885eddfc47fc0c8c0bae46bc61531104428d5/.testing/Makefile#L495-L501
+        # Rows in ocean.stats look like:
+        #      0,  693135.000,     0, En 3.0745627134675957E-23, CFL  0.00000, ...
+        # where the first three columns are Step, Day, Truncs and the remaining
+        # columns include a label for what they are (e.g. En = Energy/Mass)
+        # Header info is only included for new runs so can't be relied on
+        output_checksums: dict[str, list[any]] = defaultdict(list)
+        with open(output_filename) as f:
+            lines = f.readlines()
+            # Skip header if it exists (for new runs)
+            istart = 2 if "Step" in lines[0] else 0
+            for line in lines[istart:]:
+                for col in line.split(","):
+                    # Only keep columns with labels (ie not Step, Day, Truncs)
+                    col = re.split(" +", col.strip().rstrip("\n"))
+                    if len(col) > 1:
+                        output_checksums[col[0]].append(col[-1])
+        if schema_version is None:
+            schema_version = self.default_schema_version
+        if schema_version == SCHEMA_VERSION_1_0_0:
+            checksums = {
+                "schema_version": schema_version,
+                "output": dict(output_checksums),
+            }
+        else:
+            raise NotImplementedError(
+                f"Unsupported checksum schema version: {schema_version}"
+            )
+        return checksums

model_config_tests/models/model.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Generic Model class"""
+from pathlib import Path
+SCHEMA_VERSION_1_0_0 = "1-0-0"
+SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/7666d95967de4dfd19b0d271f167fdcfd3f46962/au.org.access-nri/model/reproducibility/checksums/1-0-0.json"
+SCHEMA_VERSION_TO_URL = {SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL}
+DEFAULT_SCHEMA_VERSION = "1-0-0"
+class Model:
+    def __init__(self, experiment):
+        self.experiment = experiment
+        self.default_schema_version = DEFAULT_SCHEMA_VERSION
+        self.schema_version_to_url = SCHEMA_VERSION_TO_URL
+    def extract_checksums(self, output_directory: Path, schema_version: str):
+        """Extract checksums from output directory"""
+        raise NotImplementedError
+    def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
+        """Configure model runtime"""
+        raise NotImplementedError
+    def output_exists(self):
+        """Check for existing output files"""
+        raise NotImplementedError
+    def check_checksums_over_restarts(
+        self, long_run_checksum, short_run_checksum_0, short_run_checksum_1
+    ) -> bool:
+        """Compare a checksums from a long run (e.g. 2 days) against
+        checksums from 2 short runs (e.g. 1 day)"""
+        short_run_checksums = short_run_checksum_0["output"]
+        for field, checksums in short_run_checksum_1["output"].items():
+            if field not in short_run_checksums:
+                short_run_checksums[field] = checksums
+            else:
+                short_run_checksums[field].extend(checksums)
+        matching_checksums = True
+        for field, checksums in long_run_checksum["output"].items():
+            for checksum in checksums:
+                if (
+                    field not in short_run_checksums
+                    or checksum not in short_run_checksums[field]
+                ):
+                    print(f"Unequal checksum: {field}: {checksum}")
+                    matching_checksums = False
+        return matching_checksums