model-config-tests 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,18 @@
1
+ import os
2
+ import sys
3
+
4
+ # Running pytests using --pyargs does not run pytest_addoption in conftest.py
5
+ # Using workaround as described here:
6
+ # https://stackoverflow.com/questions/41270604/using-command-line-parameters-with-pytest-pyargs
7
+ HERE = os.path.dirname(__file__)
8
+
9
+
10
+ def main():
11
+ import pytest
12
+
13
+ errcode = pytest.main([HERE] + sys.argv[1:])
14
+ sys.exit(errcode)
15
+
16
+
17
+ if __name__ == "__main__":
18
+ main()
@@ -0,0 +1,110 @@
1
+ # Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+ import yaml
9
+ from ruamel.yaml import YAML
10
+
11
+
12
+ @pytest.fixture(scope="session")
13
+ def output_path(request):
14
+ """Set the output path: This contains control and lab directories for each
15
+ test and test output files - e.g. CHECKSUMS
16
+ """
17
+ path = request.config.getoption("--output-path")
18
+ if path is None:
19
+ # Set default to $TMPDIR/test-model-repro/
20
+ tmp_dir = os.environ.get("TMPDIR")
21
+ path = f"{tmp_dir}/test-model-repro"
22
+ return Path(path)
23
+
24
+
25
+ @pytest.fixture(scope="session")
26
+ def control_path(request):
27
+ """Set the path of the model configuration directory to test"""
28
+ path = request.config.getoption("--control-path")
29
+ if path is None:
30
+ # Set default to current working directory
31
+ path = Path.cwd()
32
+ return Path(path)
33
+
34
+
35
+ @pytest.fixture(scope="session")
36
+ def checksum_path(request, control_path):
37
+ """Set the path of the model configuration directory to test"""
38
+ path = request.config.getoption("--checksum-path")
39
+ if path is None:
40
+ # Set default to checksum stored on model configuration
41
+ path = control_path / "testing" / "checksum" / "historical-3hr-checksum.json"
42
+ return Path(path)
43
+
44
+
45
+ @pytest.fixture(scope="session")
46
+ def metadata(control_path: Path):
47
+ """Read the metadata file in the control directory"""
48
+ metadata_path = control_path / "metadata.yaml"
49
+ # Use ruamel.yaml as that is what is used to read metadata files in Payu
50
+ # It also errors out if there are duplicate keys in metadata
51
+ content = YAML().load(metadata_path)
52
+ return content
53
+
54
+
55
+ @pytest.fixture(scope="session")
56
+ def config(control_path: Path):
57
+ """Read the config file in the control directory"""
58
+ config_path = control_path / "config.yaml"
59
+ with open(config_path) as f:
60
+ config_content = yaml.safe_load(f)
61
+ return config_content
62
+
63
+
64
+ @pytest.fixture(scope="session")
65
+ def target_branch(request):
66
+ """Set the target branch - i.e., the branch the configuration will be
67
+ merged into. This used is to infer configuration information, if the
68
+ configuration branches follow a common naming scheme (e.g. ACCESS-OM2)"""
69
+ return request.config.getoption("--target-branch")
70
+
71
+
72
+ # Set up command line options and default for directory paths
73
+ def pytest_addoption(parser):
74
+ """Attaches optional command line arguments"""
75
+ parser.addoption(
76
+ "--output-path",
77
+ action="store",
78
+ help="Specify the output directory path for test output",
79
+ )
80
+
81
+ parser.addoption(
82
+ "--control-path",
83
+ action="store",
84
+ help="Specify the model configuration path to test",
85
+ )
86
+
87
+ parser.addoption(
88
+ "--checksum-path",
89
+ action="store",
90
+ help="Specify the checksum file to compare against",
91
+ )
92
+
93
+ parser.addoption(
94
+ "--target-branch", action="store", help="Specify the target branch name"
95
+ )
96
+
97
+
98
+ def pytest_configure(config):
99
+ config.addinivalue_line(
100
+ "markers", "slow: mark tests as slow (deselect with '-m \"not slow\"')"
101
+ )
102
+ config.addinivalue_line(
103
+ "markers", "checksum: mark tests to run as part of reproducibility CI tests"
104
+ )
105
+ config.addinivalue_line(
106
+ "markers", "config: mark as configuration tests in quick QA CI checks"
107
+ )
108
+ config.addinivalue_line(
109
+ "markers", "access_om2: mark as access-om2 specific tests in quick QA CI checks"
110
+ )
@@ -0,0 +1,187 @@
1
+ # Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import glob
5
+ import os
6
+ import re
7
+ import shutil
8
+ import subprocess as sp
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ import yaml
13
+
14
+ from model_config_tests.models import index as model_index
15
+ from model_config_tests.util import wait_for_qsub
16
+
17
+
18
+ class ExpTestHelper:
19
+
20
+ def __init__(self, control_path: Path, lab_path: Path):
21
+
22
+ self.exp_name = control_path.name
23
+ self.control_path = control_path
24
+ self.lab_path = lab_path
25
+ self.config_path = control_path / "config.yaml"
26
+ self.archive_path = lab_path / "archive" / self.exp_name
27
+ self.work_path = lab_path / "work" / self.exp_name
28
+ self.output000 = self.archive_path / "output000"
29
+ self.output001 = self.archive_path / "output001"
30
+
31
+ with open(self.config_path) as f:
32
+ self.config = yaml.safe_load(f)
33
+
34
+ self.set_model()
35
+
36
+ def set_model(self):
37
+ """Set model based on payu config. Currently only setting top-level
38
+ model"""
39
+ self.model_name = self.config.get("model")
40
+ ModelType = model_index[self.model_name]
41
+ self.model = ModelType(self)
42
+
43
+ def extract_checksums(
44
+ self, output_directory: Path = None, schema_version: str = None
45
+ ):
46
+ """Use model subclass to extract checksums from output"""
47
+ return self.model.extract_checksums(output_directory, schema_version)
48
+
49
+ def has_run(self):
50
+ """
51
+ See whether this experiment has been run.
52
+ """
53
+ return self.model.output_exists()
54
+
55
+ def setup_for_test_run(self):
56
+ """
57
+ Various config.yaml settings need to be modified in order to run in the
58
+ test environment.
59
+ """
60
+
61
+ with open(self.config_path) as f:
62
+ doc = yaml.safe_load(f)
63
+
64
+ # Disable git runlog
65
+ doc["runlog"] = False
66
+
67
+ # Disable metadata and set override experiment name for work/archive
68
+ # directories
69
+ doc["metadata"] = {"enable": False}
70
+ doc["experiment"] = self.exp_name
71
+
72
+ # Set laboratory path
73
+ doc["laboratory"] = str(self.lab_path)
74
+
75
+ with open(self.config_path, "w") as f:
76
+ yaml.dump(doc, f)
77
+
78
+ def run(self):
79
+ """
80
+ Run the experiment using payu and check output.
81
+
82
+ Don't do any work if it has already run.
83
+ """
84
+
85
+ if self.has_run():
86
+ return 0, None, None, None
87
+ else:
88
+ return self.force_qsub_run()
89
+
90
+ def force_qsub_run(self):
91
+ """
92
+ Run using qsub
93
+ """
94
+
95
+ # Change to experiment directory and run.
96
+ owd = Path.cwd()
97
+ try:
98
+ os.chdir(self.control_path)
99
+ sp.check_output(["payu", "sweep", "--lab", self.lab_path])
100
+ run_id = sp.check_output(["payu", "run", "--lab", self.lab_path])
101
+ run_id = run_id.decode().splitlines()[0]
102
+ except sp.CalledProcessError:
103
+ print("Error: call to payu run failed.", file=sys.stderr)
104
+ return 1, None, None, None
105
+ finally:
106
+ os.chdir(owd)
107
+
108
+ wait_for_qsub(run_id)
109
+ run_id = run_id.split(".")[0]
110
+
111
+ output_files = []
112
+ # Read qsub stdout file
113
+ stdout_filename = glob.glob(str(self.control_path / f"*.o{run_id}"))
114
+ print(stdout_filename)
115
+ if len(stdout_filename) != 1:
116
+ print("Error: there are too many stdout files.", file=sys.stderr)
117
+ return 2, None, None, None
118
+
119
+ stdout_filename = stdout_filename[0]
120
+ output_files.append(stdout_filename)
121
+ stdout = ""
122
+ with open(stdout_filename) as f:
123
+ stdout = f.read()
124
+
125
+ # Read qsub stderr file
126
+ stderr_filename = glob.glob(str(self.control_path / f"*.e{run_id}"))
127
+ stderr = ""
128
+ if len(stderr_filename) == 1:
129
+ stderr_filename = stderr_filename[0]
130
+ output_files.append(stderr_filename)
131
+ with open(stderr_filename) as f:
132
+ stderr = f.read()
133
+
134
+ # TODO: Early return if not collating
135
+
136
+ # Read the qsub id of the collate job from the stdout.
137
+ # Payu puts this here.
138
+ m = re.search(r"(\d+.gadi-pbs)\n", stdout)
139
+ if m is None:
140
+ print("Error: qsub id of collate job.", file=sys.stderr)
141
+ return 3, stdout, stderr, output_files
142
+
143
+ # Wait for the collate to complete.
144
+ run_id = m.group(1)
145
+ wait_for_qsub(run_id)
146
+
147
+ # Return files created by qsub so caller can read or delete.
148
+ collate_files = self.control_path / f"*.[oe]{run_id}"
149
+ output_files += glob.glob(str(collate_files))
150
+
151
+ return 0, stdout, stderr, output_files
152
+
153
+ def setup_and_run(self):
154
+ self.setup_for_test_run()
155
+ return self.run()
156
+
157
+
158
+ def setup_exp(control_path: Path, output_path: Path, exp_name: str):
159
+ """
160
+ Create a exp by copying over base config
161
+ """
162
+ # Set experiment control path
163
+ if control_path.name != "base-experiment":
164
+ exp_name = f"{control_path.name}-{exp_name}"
165
+
166
+ exp_control_path = output_path / "control" / exp_name
167
+
168
+ # Copy over base control directory (e.g. model configuration)
169
+ if exp_control_path.exists():
170
+ shutil.rmtree(exp_control_path)
171
+ shutil.copytree(control_path, exp_control_path, symlinks=True)
172
+
173
+ exp_lab_path = output_path / "lab"
174
+
175
+ exp = ExpTestHelper(control_path=exp_control_path, lab_path=exp_lab_path)
176
+
177
+ # Remove any pre-existing archive or work directories for the experiment
178
+ try:
179
+ shutil.rmtree(exp.archive_path)
180
+ except FileNotFoundError:
181
+ pass
182
+ try:
183
+ shutil.rmtree(exp.work_path)
184
+ except FileNotFoundError:
185
+ pass
186
+
187
+ return exp
@@ -0,0 +1,4 @@
1
+ from model_config_tests.models.accessom2 import AccessOm2
2
+ from model_config_tests.models.accessom3 import AccessOm3
3
+
4
+ index = {"access-om2": AccessOm2, "access-om3": AccessOm3}
@@ -0,0 +1,90 @@
1
+ """Specific Access-OM2 Model setup and post-processing"""
2
+
3
+ import re
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import f90nml
9
+
10
+ from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model
11
+
12
+
13
+ class AccessOm2(Model):
14
+ def __init__(self, experiment):
15
+ super().__init__(experiment)
16
+ self.output_file = self.experiment.output000 / "access-om2.out"
17
+
18
+ self.accessom2_config = experiment.control_path / "accessom2.nml"
19
+ self.ocean_config = experiment.control_path / "ocean" / "input.nml"
20
+
21
+ def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
22
+ """Set config files to a short time period for experiment run.
23
+ Default is 3 hours"""
24
+ with open(self.accessom2_config) as f:
25
+ nml = f90nml.read(f)
26
+
27
+ # Check that two of years, months, seconds is zero
28
+ if sum(x == 0 for x in (years, months, seconds)) != 2:
29
+ raise NotImplementedError(
30
+ "Cannot specify runtime in seconds and years and months"
31
+ + " at the same time. Two of which must be zero"
32
+ )
33
+
34
+ nml["date_manager_nml"]["restart_period"] = [years, months, seconds]
35
+ nml.write(self.accessom2_config, force=True)
36
+
37
+ def output_exists(self) -> bool:
38
+ """Check for existing output file"""
39
+ return self.output_file.exists()
40
+
41
+ def extract_checksums(
42
+ self, output_directory: Path = None, schema_version: str = None
43
+ ) -> dict[str, Any]:
44
+ """Parse output file and create checksum using defined schema"""
45
+ if output_directory:
46
+ output_filename = output_directory / "access-om2.out"
47
+ else:
48
+ output_filename = self.output_file
49
+
50
+ # Regex pattern for checksums in the `<model>.out` file
51
+ # Examples:
52
+ # [chksum] ht -2390360641069121536
53
+ # [chksum] hu 6389284661071183872
54
+ # [chksum] htr 928360042410663049
55
+ pattern = r"\[chksum\]\s+(.+)\s+(-?\d+)"
56
+
57
+ # checksums outputted in form:
58
+ # {
59
+ # "ht": ["-2390360641069121536"],
60
+ # "hu": ["6389284661071183872"],
61
+ # "htr": ["928360042410663049"]
62
+ # }
63
+ # with potential for multiple checksums for one key.
64
+ output_checksums: dict[str, list[any]] = defaultdict(list)
65
+
66
+ with open(output_filename) as f:
67
+ for line in f:
68
+ # Check for checksum pattern match
69
+ match = re.match(pattern, line)
70
+ if match:
71
+ # Extract values
72
+ field = match.group(1).strip()
73
+ checksum = match.group(2).strip()
74
+
75
+ output_checksums[field].append(checksum)
76
+
77
+ if schema_version is None:
78
+ schema_version = self.default_schema_version
79
+
80
+ if schema_version == SCHEMA_VERSION_1_0_0:
81
+ checksums = {
82
+ "schema_version": schema_version,
83
+ "output": dict(output_checksums),
84
+ }
85
+ else:
86
+ raise NotImplementedError(
87
+ f"Unsupported checksum schema version: {schema_version}"
88
+ )
89
+
90
+ return checksums
@@ -0,0 +1,90 @@
1
+ """Specific Access-OM3 Model setup and post-processing"""
2
+
3
+ import re
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from payu.models.cesm_cmeps import Runconfig
9
+
10
+ from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model
11
+
12
+
13
+ class AccessOm3(Model):
14
+ def __init__(self, experiment):
15
+ super().__init__(experiment)
16
+ self.output_file = self.experiment.output000 / "ocean.stats"
17
+
18
+ self.runconfig = experiment.control_path / "nuopc.runconfig"
19
+ self.ocean_config = experiment.control_path / "input.nml"
20
+
21
+ def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
22
+ """Set config files to a short time period for experiment run.
23
+ Default is 3 hours"""
24
+ runconfig = Runconfig(self.runconfig)
25
+
26
+ if years == months == 0:
27
+ freq = "nseconds"
28
+ n = str(seconds)
29
+ elif seconds == 0:
30
+ freq = "nmonths"
31
+ n = str(12 * years + months)
32
+ else:
33
+ raise NotImplementedError(
34
+ "Cannot specify runtime in seconds and year/months at the same time"
35
+ )
36
+
37
+ runconfig.set("CLOCK_attributes", "restart_n", n)
38
+ runconfig.set("CLOCK_attributes", "restart_option", freq)
39
+ runconfig.set("CLOCK_attributes", "stop_n", n)
40
+ runconfig.set("CLOCK_attributes", "stop_option", freq)
41
+
42
+ runconfig.write()
43
+
44
+ def output_exists(self) -> bool:
45
+ """Check for existing output file"""
46
+ return self.output_file.exists()
47
+
48
+ def extract_checksums(
49
+ self, output_directory: Path = None, schema_version: str = None
50
+ ) -> dict[str, Any]:
51
+ """Parse output file and create checksum using defined schema"""
52
+ if output_directory:
53
+ output_filename = output_directory / "ocean.stats"
54
+ else:
55
+ output_filename = self.output_file
56
+
57
+ # ocean.stats is used for regression testing in MOM6's own test suite
58
+ # See https://github.com/mom-ocean/MOM6/blob/2ab885eddfc47fc0c8c0bae46bc61531104428d5/.testing/Makefile#L495-L501
59
+ # Rows in ocean.stats look like:
60
+ # 0, 693135.000, 0, En 3.0745627134675957E-23, CFL 0.00000, ...
61
+ # where the first three columns are Step, Day, Truncs and the remaining
62
+ # columns include a label for what they are (e.g. En = Energy/Mass)
63
+ # Header info is only included for new runs so can't be relied on
64
+ output_checksums: dict[str, list[any]] = defaultdict(list)
65
+
66
+ with open(output_filename) as f:
67
+ lines = f.readlines()
68
+ # Skip header if it exists (for new runs)
69
+ istart = 2 if "Step" in lines[0] else 0
70
+ for line in lines[istart:]:
71
+ for col in line.split(","):
72
+ # Only keep columns with labels (ie not Step, Day, Truncs)
73
+ col = re.split(" +", col.strip().rstrip("\n"))
74
+ if len(col) > 1:
75
+ output_checksums[col[0]].append(col[-1])
76
+
77
+ if schema_version is None:
78
+ schema_version = self.default_schema_version
79
+
80
+ if schema_version == SCHEMA_VERSION_1_0_0:
81
+ checksums = {
82
+ "schema_version": schema_version,
83
+ "output": dict(output_checksums),
84
+ }
85
+ else:
86
+ raise NotImplementedError(
87
+ f"Unsupported checksum schema version: {schema_version}"
88
+ )
89
+
90
+ return checksums
@@ -0,0 +1,52 @@
1
+ """Generic Model class"""
2
+
3
+ from pathlib import Path
4
+
5
+ SCHEMA_VERSION_1_0_0 = "1-0-0"
6
+ SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/7666d95967de4dfd19b0d271f167fdcfd3f46962/au.org.access-nri/model/reproducibility/checksums/1-0-0.json"
7
+ SCHEMA_VERSION_TO_URL = {SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL}
8
+ DEFAULT_SCHEMA_VERSION = "1-0-0"
9
+
10
+
11
+ class Model:
12
+ def __init__(self, experiment):
13
+ self.experiment = experiment
14
+
15
+ self.default_schema_version = DEFAULT_SCHEMA_VERSION
16
+ self.schema_version_to_url = SCHEMA_VERSION_TO_URL
17
+
18
+ def extract_checksums(self, output_directory: Path, schema_version: str):
19
+ """Extract checksums from output directory"""
20
+ raise NotImplementedError
21
+
22
+ def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800):
23
+ """Configure model runtime"""
24
+ raise NotImplementedError
25
+
26
+ def output_exists(self):
27
+ """Check for existing output files"""
28
+ raise NotImplementedError
29
+
30
+ def check_checksums_over_restarts(
31
+ self, long_run_checksum, short_run_checksum_0, short_run_checksum_1
32
+ ) -> bool:
33
+ """Compare a checksums from a long run (e.g. 2 days) against
34
+ checksums from 2 short runs (e.g. 1 day)"""
35
+ short_run_checksums = short_run_checksum_0["output"]
36
+ for field, checksums in short_run_checksum_1["output"].items():
37
+ if field not in short_run_checksums:
38
+ short_run_checksums[field] = checksums
39
+ else:
40
+ short_run_checksums[field].extend(checksums)
41
+
42
+ matching_checksums = True
43
+ for field, checksums in long_run_checksum["output"].items():
44
+ for checksum in checksums:
45
+ if (
46
+ field not in short_run_checksums
47
+ or checksum not in short_run_checksums[field]
48
+ ):
49
+ print(f"Unequal checksum: {field}: {checksum}")
50
+ matching_checksums = False
51
+
52
+ return matching_checksums