esgf-qa 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,263 @@
1
+ import json
2
+ import os
3
+ import tempfile
4
+ from datetime import timedelta
5
+
6
+ import pytest
7
+
8
+ from esgf_qa import con_checks as cc
9
+ from esgf_qa.con_checks import (
10
+ compare_dicts,
11
+ compare_nested_dicts,
12
+ printtimedelta,
13
+ truncate_str,
14
+ )
15
+
16
+
17
+ def test_printtimedelta():
18
+ """
19
+ Test the printtimedelta function.
20
+ """
21
+ # Test cases for timedelta values
22
+ test_cases = [
23
+ (timedelta(seconds=1), "1.0 seconds"),
24
+ (timedelta(seconds=120), "2.0 minutes"),
25
+ (timedelta(seconds=3600), "1.0 hours"),
26
+ (timedelta(days=1), "1.0 days"),
27
+ (timedelta(days=365), "365.0 days"),
28
+ ]
29
+ for timedelta_value, expected_output in test_cases:
30
+ result = printtimedelta(timedelta_value.total_seconds())
31
+ assert result == expected_output
32
+
33
+
34
+ def test_truncate_str():
35
+ """
36
+ Test the truncate_str function.
37
+ """
38
+ # Test cases for different string lengths and max_lengths
39
+ test_cases = [
40
+ ("This is a long string", 10, "This is...string"),
41
+ ("This is a short string", 25, "This is a short string"),
42
+ (
43
+ "This is a really long string that needs to be truncated",
44
+ 12,
45
+ "This is...truncated",
46
+ ),
47
+ ("This is not really a short string", 0, "This is not really a short string"),
48
+ ("This is a really short string", 16, "This is...string"),
49
+ (
50
+ "Someone should truncate this truncatable string!",
51
+ -10,
52
+ "Someone should truncate this truncatable string!",
53
+ ),
54
+ ("Someone should truncate this truncatable string!", 20, "Someone...string!"),
55
+ ]
56
+ for s, max_length, expected_output in test_cases:
57
+ result = truncate_str(s, max_length)
58
+ assert result == expected_output
59
+
60
+
61
+ def test_compare_dicts():
62
+ """
63
+ Test the compare_dicts function.
64
+ """
65
+ # Test cases for different dictionary values and exclude keys
66
+ test_cases = [
67
+ (
68
+ {"a": 1, "b": 2, "c": 3},
69
+ {"a": 1, "b": 2, "c": 4},
70
+ set(),
71
+ ["c"],
72
+ ),
73
+ (
74
+ {"a": 1, "b": 2, "c": 3},
75
+ {"a": 1, "b": 2, "c": 3},
76
+ set(),
77
+ [],
78
+ ),
79
+ (
80
+ {"a": 1, "b": 2, "c": 3},
81
+ {"a": 1, "b": 3, "c": 3},
82
+ {"b"},
83
+ [],
84
+ ),
85
+ (
86
+ {"a": 1, "b": 2, "c": 3},
87
+ {"a": 1, "b": 3, "c": 3},
88
+ set(),
89
+ ["b"],
90
+ ),
91
+ (
92
+ {"a": 1, "b": 2, "c": 3},
93
+ {"a": 1, "b": 2, "c": 3},
94
+ {"b", "c"},
95
+ [],
96
+ ),
97
+ (
98
+ {"a": 1, "b": 2, "c": 3},
99
+ {"a": 1, "b": 2, "c": 3},
100
+ set(),
101
+ [],
102
+ ),
103
+ ]
104
+ for dict1, dict2, exclude_keys, expected_output in test_cases:
105
+ result = compare_dicts(dict1, dict2, exclude_keys)
106
+ assert result == expected_output
107
+
108
+
109
+ def test_compare_nested_dicts():
110
+ """
111
+ Test the compare_nested_dicts function.
112
+ """
113
+ # Test cases for different nested dictionary values and exclude keys
114
+ test_cases = [
115
+ (
116
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
117
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
118
+ set(),
119
+ {"b": ["y"]},
120
+ ),
121
+ (
122
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
123
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
124
+ {"b"},
125
+ {"b": ["y"]},
126
+ ),
127
+ (
128
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
129
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
130
+ {"y"},
131
+ {},
132
+ ),
133
+ (
134
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
135
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
136
+ set(),
137
+ {},
138
+ ),
139
+ (
140
+ {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
141
+ {"a": {"x": 1, "y": 2}, "b": {"x": 2, "y": 4, "z": 5}},
142
+ set(),
143
+ {"b": ["x", "z"]},
144
+ ),
145
+ ]
146
+ for dict1, dict2, exclude_keys, expected_output in test_cases:
147
+ result = compare_nested_dicts(dict1, dict2, exclude_keys)
148
+ assert result == expected_output
149
+
150
+
151
+ @pytest.fixture
152
+ def temp_files():
153
+ """Create temporary JSON files for testing consistency/continuity."""
154
+ files = {}
155
+ with tempfile.TemporaryDirectory() as tmpdir:
156
+ # Create 2 fake files per dataset
157
+ for ds in ["ds1", "ds2"]:
158
+ files[ds] = []
159
+ for i in range(2):
160
+ fpath = os.path.join(tmpdir, f"{ds}_file{i}.json")
161
+ data = {
162
+ "global_attributes": {"title": "fake", "history": "x"},
163
+ "global_attributes_non_required": {"notes": "x"},
164
+ "global_attributes_dtypes": {"title": "str"},
165
+ "variable_attributes": {"var1": {"units": "m"}},
166
+ "variable_attributes_dtypes": {"var1": {"units": "str"}},
167
+ "dimensions": {"time": 10, "lat": 5, "lon": 5},
168
+ "coordinates": {"lat": [0, 1, 2, 3, 4], "lon": [0, 1, 2, 3, 4]},
169
+ "time_info": {
170
+ "timen": 0,
171
+ "boundn": 10,
172
+ "time0": 0,
173
+ "bound0": 10,
174
+ "units": "days since 2000-01-01",
175
+ "calendar": "gregorian",
176
+ "frequency": "day",
177
+ },
178
+ }
179
+ with open(fpath, "w") as f:
180
+ json.dump(data, f)
181
+ files[ds].append(fpath)
182
+ yield files # return dict of dataset -> list of file paths
183
+
184
+
185
+ @pytest.fixture
186
+ def files_to_check_dict(temp_files):
187
+ """Create the files_to_check_dict required by con_checks."""
188
+ d = {}
189
+ for ds, flist in temp_files.items():
190
+ for f in flist:
191
+ d[f] = {
192
+ "consistency_file": f,
193
+ "ts": "20000101-20000102",
194
+ "result_file": os.path.join(
195
+ tempfile.gettempdir(), f"result_{os.path.basename(f)}.json"
196
+ ),
197
+ "result_file_ds": os.path.join(
198
+ tempfile.gettempdir(), f"result_ds_{ds}.json"
199
+ ),
200
+ }
201
+ return d
202
+
203
+
204
+ @pytest.fixture
205
+ def ds_map(temp_files):
206
+ """Map dataset names to file paths."""
207
+ return {ds: flist for ds, flist in temp_files.items()}
208
+
209
+
210
+ class TestConChecks:
211
+ def test_consistency_checks(self, ds_map, files_to_check_dict):
212
+ results = cc.consistency_checks("ds1", ds_map, files_to_check_dict, {})
213
+ assert isinstance(results, dict)
214
+ assert "Required global attributes" in results
215
+
216
+ def test_continuity_checks(self, ds_map, files_to_check_dict):
217
+ results = cc.continuity_checks("ds1", ds_map, files_to_check_dict, {})
218
+ assert isinstance(results, dict)
219
+ assert "Time continuity" in results
220
+
221
+ def test_compatibility_checks(self, ds_map, files_to_check_dict):
222
+ results = cc.compatibility_checks("ds1", ds_map, files_to_check_dict, {})
223
+ assert isinstance(results, dict)
224
+ # The open_mfdataset will fail on minimal data, so we can check that msgs exist
225
+ assert any("open_mfdataset" in key for key in results.keys())
226
+
227
+ def test_dataset_coverage_checks(temp_files):
228
+ # Example: manually create a minimal dataset map with two datasets
229
+ ds_map = {
230
+ "ds1": ["file_ds1_1.json", "file_ds1_2.json"],
231
+ "ds2": ["file_ds2_1.json", "file_ds2_2.json"],
232
+ }
233
+
234
+ # Corresponding files_to_check_dict for those files
235
+ files_to_check_dict = {}
236
+ for ds, flist in ds_map.items():
237
+ ts_ranges = (
238
+ ["20000101-20001231", "20010101-20011231"]
239
+ if ds == "ds1"
240
+ else ["20010101-20010630", "20010701-20011231"]
241
+ )
242
+ for idx, f in enumerate(flist):
243
+ files_to_check_dict[f] = {
244
+ "consistency_file": f,
245
+ "ts": ts_ranges[idx],
246
+ "result_file": f"result_{f}.json",
247
+ "result_file_ds": f"result_ds_{ds}.json",
248
+ }
249
+ results = cc.dataset_coverage_checks(ds_map, files_to_check_dict, {})
250
+ # There should be weight=1 messages for ds2 due to differing start year
251
+ assert "Time coverage" in results["ds2"]
252
+ assert any(
253
+ "Time series starts at '2001'" in msg
254
+ for msg in results["ds2"]["Time coverage"]["msgs"]
255
+ )
256
+
257
+ def test_inter_dataset_consistency_checks(self, ds_map, files_to_check_dict):
258
+ results, ref_ds = cc.inter_dataset_consistency_checks(
259
+ ds_map, files_to_check_dict, {}
260
+ )
261
+ assert isinstance(results, dict)
262
+ assert isinstance(ref_ds, dict)
263
+ assert "general_reference" in ref_ds
tests/test_qaviewer.py ADDED
@@ -0,0 +1,147 @@
1
+ import asyncio
2
+
3
+ import pytest
4
+ from textual.widgets import Input
5
+
6
+ from esgf_qa.qaviewer import QCViewer, iter_nodes, transform_keys
7
+
8
+
9
+ # ------------------------
10
+ # Unit tests for helpers
11
+ # ------------------------
12
+ def test_transform_keys_basic():
13
+ data = {
14
+ "info": {"id": "DS1", "date": "2025-10-30"},
15
+ "fail": {3: {"test1": ["file1.nc"]}},
16
+ "pass": {3: {"test2": ["file2.nc"]}},
17
+ "error": {"checker1": {"func": "some error"}},
18
+ }
19
+ result = transform_keys(data)
20
+ assert "Info" in result
21
+ assert "Failed Checks" in result
22
+ assert "Passed Checks" in result
23
+ assert "Runtime Errors" in result
24
+ assert result["Info"]["Dataset-ID"] == "DS1"
25
+ assert result["Failed Checks"]["Required"]["test1"] == ["file1.nc"]
26
+
27
+
28
+ def test_iter_nodes_flat_tree():
29
+ class Node:
30
+ def __init__(self):
31
+ self.children = []
32
+
33
+ root = Node()
34
+ root.children = [Node(), Node()]
35
+ nodes = list(iter_nodes(root))
36
+ assert len(nodes) == 3
37
+
38
+
39
+ # ------------------------
40
+ # Async tests for QCViewer
41
+ # ------------------------
42
+ @pytest.mark.asyncio
43
+ async def test_qcviewer_tree_population():
44
+ """
45
+ Tests that result.json is correctly converted into the tree widget structure.
46
+
47
+ - Starts QCViewer app in test environment
48
+ - Waits for the population of the tree
49
+ - Asserts that the root node has children
50
+ - Asserts that the "info" node exists
51
+ - Asserts that Dataset-ID node exists
52
+ """
53
+ data = {"Info": {"Dataset-ID": "DS1"}, "Failed Checks": {}, "Passed Checks": {}}
54
+ app = QCViewer(data)
55
+
56
+ async with app.run_test() as _pilot:
57
+ # wait until tree root has children
58
+ for _ in range(20): # try up to 2 seconds
59
+ if app.qc_tree.root.children:
60
+ break
61
+ await asyncio.sleep(0.1)
62
+ else:
63
+ raise RuntimeError("Tree did not populate children in time")
64
+
65
+ tree = app.qc_tree
66
+ root = tree.root
67
+ assert root.children
68
+ # Get the Info node
69
+ info_node = next((c for c in root.children if str(c.label) == "Info"), None)
70
+ assert info_node is not None
71
+ # Check Dataset-ID child
72
+ ds_id_node = next(
73
+ (c for c in info_node.children if str(c.label) == "Dataset-ID"), None
74
+ )
75
+ assert ds_id_node is not None
76
+
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_search_functionality():
80
+ """
81
+ Tests using the functionality to search the tree.
82
+
83
+ - Focuses on search input
84
+ - Simulates submission of a search query
85
+ - Asserts that ``matches`` is populated correctly
86
+ - Checks the navigation with ``action_next_match``, ``action_prev_match``
87
+ """
88
+ data = {"Info": {"Dataset-ID": "DS1"}, "Failed Checks": {}, "Passed Checks": {}}
89
+ app = QCViewer(data)
90
+
91
+ async with app.run_test() as pilot:
92
+ await pilot.pause()
93
+ search_input = app.query_one("#search", Input)
94
+ app.action_focus_search()
95
+
96
+ # Simulate user submitting search text
97
+ app.on_input_submitted(Input.Submitted(search_input, "Dataset-ID"))
98
+
99
+ # There should be one match
100
+ assert len(app.matches) == 1
101
+ match_node = app.matches[0]
102
+ assert str(match_node.label) == "Dataset-ID"
103
+
104
+ # Test next/prev match wrapping (only one match)
105
+ _old_index = app.match_index
106
+ app.action_next_match()
107
+ assert app.match_index == 0
108
+ app.action_prev_match()
109
+ assert app.match_index == 0
110
+
111
+
112
+ @pytest.mark.asyncio
113
+ async def test_toggle_expand_node_behaviour():
114
+ """
115
+ Tests the node expansion/collapse logic.
116
+
117
+ - Finds the info node
118
+ - Checks the state (collapsed initially)
119
+ - Calls ``toggle_expand_node``to expand and checks state
120
+ - Calls ``toggle_expand_node`` to collapse and checks state
121
+ """
122
+ data = {"Info": {"Dataset-ID": "DS1", "Date": "2025-10-30"}}
123
+ app = QCViewer(data)
124
+
125
+ async with app.run_test() as _pilot:
126
+ # Wait until tree root has children (poll up to 2 seconds)
127
+ for _ in range(20):
128
+ if app.qc_tree.root.children:
129
+ break
130
+ await asyncio.sleep(0.1)
131
+ else:
132
+ raise RuntimeError("Tree did not populate children in time")
133
+
134
+ tree = app.qc_tree
135
+ info_node = next(
136
+ (c for c in tree.root.children if str(c.label) == "Info"), None
137
+ )
138
+ assert info_node is not None
139
+
140
+ # Initially collapsed
141
+ assert not info_node.is_expanded
142
+ # Expand node
143
+ app.toggle_expand_node(info_node)
144
+ assert info_node.is_expanded
145
+ # Collapse node
146
+ app.toggle_expand_node(info_node)
147
+ assert not info_node.is_expanded
@@ -0,0 +1,191 @@
1
+ import json
2
+ import os
3
+
4
+ import pytest
5
+
6
+ from esgf_qa.run_qa import (
7
+ process_dataset,
8
+ process_file,
9
+ run_compliance_checker,
10
+ )
11
+
12
+
13
+ @pytest.fixture
14
+ def tmp_env(tmp_path):
15
+ """Fixture that sets up a temporary environment with paths and sample structures."""
16
+ result_dir = tmp_path / "results"
17
+ result_dir.mkdir()
18
+ progress_file = tmp_path / "progress.txt"
19
+ progress_file.write_text("")
20
+ return {"tmp": tmp_path, "results": result_dir, "progress": progress_file}
21
+
22
+
23
+ @pytest.fixture
24
+ def dummy_nc_file(tmp_env):
25
+ """Create a fake dataset file."""
26
+ file_path = tmp_env["tmp"] / "dummy.nc"
27
+ file_path.write_text("fake dataset content")
28
+ return str(file_path)
29
+
30
+
31
+ @pytest.fixture
32
+ def fake_check_suite(monkeypatch):
33
+ """Monkeypatch CheckSuite to avoid real compliance logic."""
34
+
35
+ class DummyCheck:
36
+ def __init__(self, name):
37
+ self.name = name
38
+ self.weight = 1
39
+ self.value = "PASS"
40
+ self.msgs = []
41
+ self.check_method = "check_method"
42
+ self.children = []
43
+
44
+ class DummyCheckSuite:
45
+ def __init__(self, options=None):
46
+ self.options = options or {}
47
+ self.checkers = {}
48
+
49
+ def load_all_available_checkers(self):
50
+ pass
51
+
52
+ def load_dataset(self, file_path):
53
+ return f"dataset:{file_path}"
54
+
55
+ def run_all(self, ds, checkers, include_checks=None, skip_checks=None):
56
+ return {
57
+ checker: (
58
+ [DummyCheck("time_bounds")], # flat list of results
59
+ {}, # errors
60
+ )
61
+ for checker in checkers
62
+ }
63
+
64
+ monkeypatch.setattr("esgf_qa.run_qa.CheckSuite", DummyCheckSuite)
65
+ return DummyCheckSuite
66
+
67
+
68
+ class TestDummyQA:
69
+ """Tests for run_compliance_checker, process_file, and process_dataset."""
70
+
71
+ def test_run_compliance_checker_basic(self, fake_check_suite, dummy_nc_file):
72
+ checkers = ["cf:latest"]
73
+ results = run_compliance_checker(dummy_nc_file, checkers)
74
+ assert isinstance(results, dict)
75
+ assert "cf:latest" in results
76
+ assert isinstance(results["cf:latest"], tuple)
77
+ assert isinstance(results["cf:latest"][0], list)
78
+
79
+ def test_process_file(self, fake_check_suite, tmp_env, dummy_nc_file):
80
+ """When no previous results exist, should run checks and write output."""
81
+ files_to_check_dict = {
82
+ dummy_nc_file: {
83
+ "result_file": str(tmp_env["results"] / "res.json"),
84
+ "consistency_file": str(tmp_env["results"] / "cons.json"),
85
+ }
86
+ }
87
+ processed_files = []
88
+ checkers = ["cf:latest"]
89
+ checker_options = {}
90
+
91
+ file_path, result = process_file(
92
+ dummy_nc_file,
93
+ checkers,
94
+ checker_options,
95
+ files_to_check_dict,
96
+ processed_files,
97
+ str(tmp_env["progress"]),
98
+ )
99
+
100
+ # should write JSON to disk
101
+ result_file = files_to_check_dict[dummy_nc_file]["result_file"]
102
+ assert os.path.isfile(result_file)
103
+ with open(result_file) as f:
104
+ data = json.load(f)
105
+ assert "cf" in data
106
+ assert "errors" in data["cf"]
107
+
108
+ def test_process_file_cached_result(self, fake_check_suite, tmp_env, dummy_nc_file):
109
+ """Should read from disk if result already exists and no errors."""
110
+ result_file = tmp_env["results"] / "res.json"
111
+ consistency_file = tmp_env["results"] / "cons.json"
112
+ result_file.write_text(json.dumps({"cf": {"errors": {}}}))
113
+ consistency_file.write_text("dummy consistency file")
114
+
115
+ files_to_check_dict = {
116
+ dummy_nc_file: {
117
+ "result_file": str(result_file),
118
+ "consistency_file": str(consistency_file),
119
+ }
120
+ }
121
+ processed_files = [dummy_nc_file]
122
+ checkers = ["cf:latest"]
123
+ checker_options = {}
124
+
125
+ file_path, result = process_file(
126
+ dummy_nc_file,
127
+ checkers,
128
+ checker_options,
129
+ files_to_check_dict,
130
+ processed_files,
131
+ str(tmp_env["progress"]),
132
+ )
133
+
134
+ # Should reuse cached result, not rewrite
135
+ assert result == {"cf": {"errors": {}}}
136
+
137
+ def test_process_dataset(self, fake_check_suite, tmp_env, dummy_nc_file):
138
+ """process_dataset should run checks for not yet checked dataset."""
139
+ ds = "dataset1"
140
+ ds_map = {ds: [dummy_nc_file]}
141
+ result_file_ds = tmp_env["results"] / "res_ds.json"
142
+
143
+ files_to_check_dict = {dummy_nc_file: {"result_file_ds": str(result_file_ds)}}
144
+
145
+ processed_datasets = set()
146
+ checkers = ["unknown_checker:latest"]
147
+ checker_options = {}
148
+
149
+ ds_id, result = process_dataset(
150
+ ds,
151
+ ds_map,
152
+ checkers,
153
+ checker_options,
154
+ files_to_check_dict,
155
+ processed_datasets,
156
+ str(tmp_env["progress"]),
157
+ )
158
+
159
+ # should write JSON file for dataset results
160
+ assert ds_id == "dataset1"
161
+ assert os.path.isfile(result_file_ds)
162
+ with open(result_file_ds) as f:
163
+ data = json.load(f)
164
+ assert "unknown_checker" in data
165
+ assert "errors" in data["unknown_checker"]
166
+ assert "msg" in data["unknown_checker"]["errors"]["unknown_checker"]
167
+
168
+ def test_process_dataset_cached(self, fake_check_suite, tmp_env, dummy_nc_file):
169
+ """Should read dataset result if already processed and valid."""
170
+ ds = "dataset2"
171
+ ds_map = {ds: [dummy_nc_file]}
172
+ result_file_ds = tmp_env["results"] / "res_ds2.json"
173
+ result_file_ds.write_text(json.dumps({"cf": {"errors": {}}}))
174
+
175
+ files_to_check_dict = {dummy_nc_file: {"result_file_ds": str(result_file_ds)}}
176
+ processed_datasets = {ds}
177
+ checkers = ["cf:latest"]
178
+ checker_options = {}
179
+
180
+ ds_id, result = process_dataset(
181
+ ds,
182
+ ds_map,
183
+ checkers,
184
+ checker_options,
185
+ files_to_check_dict,
186
+ processed_datasets,
187
+ str(tmp_env["progress"]),
188
+ )
189
+
190
+ assert ds_id == ds
191
+ assert result == {"cf": {"errors": {}}}