PyPI - esgf-qa - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

esgf-qa 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

esgf_qa/_constants.py +42 -1
esgf_qa/_version.py +2 -2
esgf_qa/cluster_results.py +466 -0
esgf_qa/con_checks.py +209 -11
esgf_qa/run_qa.py +247 -418
{esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/METADATA +42 -28
esgf_qa-0.4.0.dist-info/RECORD +19 -0
{esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/top_level.txt +1 -1
tests/test_cli.py +267 -0
tests/test_cluster_results.py +166 -0
tests/test_con_checks.py +263 -0
tests/test_qaviewer.py +147 -0
tests/test_run_dummy_qa.py +191 -0
tests/test_run_qa.py +181 -0
docs/esgf-qa_Logo.png +0 -0
esgf_qa-0.3.0.dist-info/RECORD +0 -13
{esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/WHEEL +0 -0
{esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/entry_points.txt +0 -0
{esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/licenses/LICENSE +0 -0

tests/test_con_checks.py ADDED Viewed

@@ -0,0 +1,263 @@
+import json
+import os
+import tempfile
+from datetime import timedelta
+import pytest
+from esgf_qa import con_checks as cc
+from esgf_qa.con_checks import (
+    compare_dicts,
+    compare_nested_dicts,
+    printtimedelta,
+    truncate_str,
+)
+def test_printtimedelta():
+    """
+    Test the printtimedelta function.
+    """
+    # Test cases for timedelta values
+    test_cases = [
+        (timedelta(seconds=1), "1.0 seconds"),
+        (timedelta(seconds=120), "2.0 minutes"),
+        (timedelta(seconds=3600), "1.0 hours"),
+        (timedelta(days=1), "1.0 days"),
+        (timedelta(days=365), "365.0 days"),
+    ]
+    for timedelta_value, expected_output in test_cases:
+        result = printtimedelta(timedelta_value.total_seconds())
+        assert result == expected_output
+def test_truncate_str():
+    """
+    Test the truncate_str function.
+    """
+    # Test cases for different string lengths and max_lengths
+    test_cases = [
+        ("This is a long string", 10, "This is...string"),
+        ("This is a short string", 25, "This is a short string"),
+        (
+            "This is a really long string that needs to be truncated",
+            12,
+            "This is...truncated",
+        ),
+        ("This is not really a short string", 0, "This is not really a short string"),
+        ("This is a really short string", 16, "This is...string"),
+        (
+            "Someone should truncate this truncatable string!",
+            -10,
+            "Someone should truncate this truncatable string!",
+        ),
+        ("Someone should truncate this truncatable string!", 20, "Someone...string!"),
+    ]
+    for s, max_length, expected_output in test_cases:
+        result = truncate_str(s, max_length)
+        assert result == expected_output
+def test_compare_dicts():
+    """
+    Test the compare_dicts function.
+    """
+    # Test cases for different dictionary values and exclude keys
+    test_cases = [
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 2, "c": 4},
+            set(),
+            ["c"],
+        ),
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 2, "c": 3},
+            set(),
+            [],
+        ),
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 3, "c": 3},
+            {"b"},
+            [],
+        ),
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 3, "c": 3},
+            set(),
+            ["b"],
+        ),
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 2, "c": 3},
+            {"b", "c"},
+            [],
+        ),
+        (
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 1, "b": 2, "c": 3},
+            set(),
+            [],
+        ),
+    ]
+    for dict1, dict2, exclude_keys, expected_output in test_cases:
+        result = compare_dicts(dict1, dict2, exclude_keys)
+        assert result == expected_output
+def test_compare_nested_dicts():
+    """
+    Test the compare_nested_dicts function.
+    """
+    # Test cases for different nested dictionary values and exclude keys
+    test_cases = [
+        (
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
+            set(),
+            {"b": ["y"]},
+        ),
+        (
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
+            {"b"},
+            {"b": ["y"]},
+        ),
+        (
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 5}},
+            {"y"},
+            {},
+        ),
+        (
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            set(),
+            {},
+        ),
+        (
+            {"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}},
+            {"a": {"x": 1, "y": 2}, "b": {"x": 2, "y": 4, "z": 5}},
+            set(),
+            {"b": ["x", "z"]},
+        ),
+    ]
+    for dict1, dict2, exclude_keys, expected_output in test_cases:
+        result = compare_nested_dicts(dict1, dict2, exclude_keys)
+        assert result == expected_output
+@pytest.fixture
+def temp_files():
+    """Create temporary JSON files for testing consistency/continuity."""
+    files = {}
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create 2 fake files per dataset
+        for ds in ["ds1", "ds2"]:
+            files[ds] = []
+            for i in range(2):
+                fpath = os.path.join(tmpdir, f"{ds}_file{i}.json")
+                data = {
+                    "global_attributes": {"title": "fake", "history": "x"},
+                    "global_attributes_non_required": {"notes": "x"},
+                    "global_attributes_dtypes": {"title": "str"},
+                    "variable_attributes": {"var1": {"units": "m"}},
+                    "variable_attributes_dtypes": {"var1": {"units": "str"}},
+                    "dimensions": {"time": 10, "lat": 5, "lon": 5},
+                    "coordinates": {"lat": [0, 1, 2, 3, 4], "lon": [0, 1, 2, 3, 4]},
+                    "time_info": {
+                        "timen": 0,
+                        "boundn": 10,
+                        "time0": 0,
+                        "bound0": 10,
+                        "units": "days since 2000-01-01",
+                        "calendar": "gregorian",
+                        "frequency": "day",
+                    },
+                }
+                with open(fpath, "w") as f:
+                    json.dump(data, f)
+                files[ds].append(fpath)
+        yield files  # return dict of dataset -> list of file paths
+@pytest.fixture
+def files_to_check_dict(temp_files):
+    """Create the files_to_check_dict required by con_checks."""
+    d = {}
+    for ds, flist in temp_files.items():
+        for f in flist:
+            d[f] = {
+                "consistency_file": f,
+                "ts": "20000101-20000102",
+                "result_file": os.path.join(
+                    tempfile.gettempdir(), f"result_{os.path.basename(f)}.json"
+                ),
+                "result_file_ds": os.path.join(
+                    tempfile.gettempdir(), f"result_ds_{ds}.json"
+                ),
+            }
+    return d
+@pytest.fixture
+def ds_map(temp_files):
+    """Map dataset names to file paths."""
+    return {ds: flist for ds, flist in temp_files.items()}
+class TestConChecks:
+    def test_consistency_checks(self, ds_map, files_to_check_dict):
+        results = cc.consistency_checks("ds1", ds_map, files_to_check_dict, {})
+        assert isinstance(results, dict)
+        assert "Required global attributes" in results
+    def test_continuity_checks(self, ds_map, files_to_check_dict):
+        results = cc.continuity_checks("ds1", ds_map, files_to_check_dict, {})
+        assert isinstance(results, dict)
+        assert "Time continuity" in results
+    def test_compatibility_checks(self, ds_map, files_to_check_dict):
+        results = cc.compatibility_checks("ds1", ds_map, files_to_check_dict, {})
+        assert isinstance(results, dict)
+        # The open_mfdataset will fail on minimal data, so we can check that msgs exist
+        assert any("open_mfdataset" in key for key in results.keys())
+    def test_dataset_coverage_checks(temp_files):
+        # Example: manually create a minimal dataset map with two datasets
+        ds_map = {
+            "ds1": ["file_ds1_1.json", "file_ds1_2.json"],
+            "ds2": ["file_ds2_1.json", "file_ds2_2.json"],
+        }
+        # Corresponding files_to_check_dict for those files
+        files_to_check_dict = {}
+        for ds, flist in ds_map.items():
+            ts_ranges = (
+                ["20000101-20001231", "20010101-20011231"]
+                if ds == "ds1"
+                else ["20010101-20010630", "20010701-20011231"]
+            )
+            for idx, f in enumerate(flist):
+                files_to_check_dict[f] = {
+                    "consistency_file": f,
+                    "ts": ts_ranges[idx],
+                    "result_file": f"result_{f}.json",
+                    "result_file_ds": f"result_ds_{ds}.json",
+                }
+        results = cc.dataset_coverage_checks(ds_map, files_to_check_dict, {})
+        # There should be weight=1 messages for ds2 due to differing start year
+        assert "Time coverage" in results["ds2"]
+        assert any(
+            "Time series starts at '2001'" in msg
+            for msg in results["ds2"]["Time coverage"]["msgs"]
+        )
+    def test_inter_dataset_consistency_checks(self, ds_map, files_to_check_dict):
+        results, ref_ds = cc.inter_dataset_consistency_checks(
+            ds_map, files_to_check_dict, {}
+        )
+        assert isinstance(results, dict)
+        assert isinstance(ref_ds, dict)
+        assert "general_reference" in ref_ds

tests/test_qaviewer.py ADDED Viewed

@@ -0,0 +1,147 @@
+import asyncio
+import pytest
+from textual.widgets import Input
+from esgf_qa.qaviewer import QCViewer, iter_nodes, transform_keys
+# ------------------------
+# Unit tests for helpers
+# ------------------------
+def test_transform_keys_basic():
+    data = {
+        "info": {"id": "DS1", "date": "2025-10-30"},
+        "fail": {3: {"test1": ["file1.nc"]}},
+        "pass": {3: {"test2": ["file2.nc"]}},
+        "error": {"checker1": {"func": "some error"}},
+    }
+    result = transform_keys(data)
+    assert "Info" in result
+    assert "Failed Checks" in result
+    assert "Passed Checks" in result
+    assert "Runtime Errors" in result
+    assert result["Info"]["Dataset-ID"] == "DS1"
+    assert result["Failed Checks"]["Required"]["test1"] == ["file1.nc"]
+def test_iter_nodes_flat_tree():
+    class Node:
+        def __init__(self):
+            self.children = []
+    root = Node()
+    root.children = [Node(), Node()]
+    nodes = list(iter_nodes(root))
+    assert len(nodes) == 3
+# ------------------------
+# Async tests for QCViewer
+# ------------------------
+@pytest.mark.asyncio
+async def test_qcviewer_tree_population():
+    """
+    Tests that result.json is correctly converted into the tree widget structure.
+    - Starts QCViewer app in test environment
+    - Waits for the population of the tree
+    - Asserts that the root node has children
+    - Asserts that the "info" node exists
+    - Asserts that Dataset-ID node exists
+    """
+    data = {"Info": {"Dataset-ID": "DS1"}, "Failed Checks": {}, "Passed Checks": {}}
+    app = QCViewer(data)
+    async with app.run_test() as _pilot:
+        # wait until tree root has children
+        for _ in range(20):  # try up to 2 seconds
+            if app.qc_tree.root.children:
+                break
+            await asyncio.sleep(0.1)
+        else:
+            raise RuntimeError("Tree did not populate children in time")
+        tree = app.qc_tree
+        root = tree.root
+        assert root.children
+        # Get the Info node
+        info_node = next((c for c in root.children if str(c.label) == "Info"), None)
+        assert info_node is not None
+        # Check Dataset-ID child
+        ds_id_node = next(
+            (c for c in info_node.children if str(c.label) == "Dataset-ID"), None
+        )
+        assert ds_id_node is not None
+@pytest.mark.asyncio
+async def test_search_functionality():
+    """
+    Tests using the functionality to search the tree.
+    - Focuses on search input
+    - Simulates submission of a search query
+    - Asserts that ``matches`` is populated correctly
+    - Checks the navigation with ``action_next_match``, ``action_prev_match``
+    """
+    data = {"Info": {"Dataset-ID": "DS1"}, "Failed Checks": {}, "Passed Checks": {}}
+    app = QCViewer(data)
+    async with app.run_test() as pilot:
+        await pilot.pause()
+        search_input = app.query_one("#search", Input)
+        app.action_focus_search()
+        # Simulate user submitting search text
+        app.on_input_submitted(Input.Submitted(search_input, "Dataset-ID"))
+        # There should be one match
+        assert len(app.matches) == 1
+        match_node = app.matches[0]
+        assert str(match_node.label) == "Dataset-ID"
+        # Test next/prev match wrapping (only one match)
+        _old_index = app.match_index
+        app.action_next_match()
+        assert app.match_index == 0
+        app.action_prev_match()
+        assert app.match_index == 0
+@pytest.mark.asyncio
+async def test_toggle_expand_node_behaviour():
+    """
+    Tests the node expansion/collapse logic.
+    - Finds the info node
+    - Checks the state (collapsed initially)
+    - Calls ``toggle_expand_node``to expand and checks state
+    - Calls ``toggle_expand_node`` to collapse and checks state
+    """
+    data = {"Info": {"Dataset-ID": "DS1", "Date": "2025-10-30"}}
+    app = QCViewer(data)
+    async with app.run_test() as _pilot:
+        # Wait until tree root has children (poll up to 2 seconds)
+        for _ in range(20):
+            if app.qc_tree.root.children:
+                break
+            await asyncio.sleep(0.1)
+        else:
+            raise RuntimeError("Tree did not populate children in time")
+        tree = app.qc_tree
+        info_node = next(
+            (c for c in tree.root.children if str(c.label) == "Info"), None
+        )
+        assert info_node is not None
+        # Initially collapsed
+        assert not info_node.is_expanded
+        # Expand node
+        app.toggle_expand_node(info_node)
+        assert info_node.is_expanded
+        # Collapse node
+        app.toggle_expand_node(info_node)
+        assert not info_node.is_expanded

tests/test_run_dummy_qa.py ADDED Viewed

@@ -0,0 +1,191 @@
+import json
+import os
+import pytest
+from esgf_qa.run_qa import (
+    process_dataset,
+    process_file,
+    run_compliance_checker,
+)
+@pytest.fixture
+def tmp_env(tmp_path):
+    """Fixture that sets up a temporary environment with paths and sample structures."""
+    result_dir = tmp_path / "results"
+    result_dir.mkdir()
+    progress_file = tmp_path / "progress.txt"
+    progress_file.write_text("")
+    return {"tmp": tmp_path, "results": result_dir, "progress": progress_file}
+@pytest.fixture
+def dummy_nc_file(tmp_env):
+    """Create a fake dataset file."""
+    file_path = tmp_env["tmp"] / "dummy.nc"
+    file_path.write_text("fake dataset content")
+    return str(file_path)
+@pytest.fixture
+def fake_check_suite(monkeypatch):
+    """Monkeypatch CheckSuite to avoid real compliance logic."""
+    class DummyCheck:
+        def __init__(self, name):
+            self.name = name
+            self.weight = 1
+            self.value = "PASS"
+            self.msgs = []
+            self.check_method = "check_method"
+            self.children = []
+    class DummyCheckSuite:
+        def __init__(self, options=None):
+            self.options = options or {}
+            self.checkers = {}
+        def load_all_available_checkers(self):
+            pass
+        def load_dataset(self, file_path):
+            return f"dataset:{file_path}"
+        def run_all(self, ds, checkers, include_checks=None, skip_checks=None):
+            return {
+                checker: (
+                    [DummyCheck("time_bounds")],  # flat list of results
+                    {},  # errors
+                )
+                for checker in checkers
+            }
+    monkeypatch.setattr("esgf_qa.run_qa.CheckSuite", DummyCheckSuite)
+    return DummyCheckSuite
+class TestDummyQA:
+    """Tests for run_compliance_checker, process_file, and process_dataset."""
+    def test_run_compliance_checker_basic(self, fake_check_suite, dummy_nc_file):
+        checkers = ["cf:latest"]
+        results = run_compliance_checker(dummy_nc_file, checkers)
+        assert isinstance(results, dict)
+        assert "cf:latest" in results
+        assert isinstance(results["cf:latest"], tuple)
+        assert isinstance(results["cf:latest"][0], list)
+    def test_process_file(self, fake_check_suite, tmp_env, dummy_nc_file):
+        """When no previous results exist, should run checks and write output."""
+        files_to_check_dict = {
+            dummy_nc_file: {
+                "result_file": str(tmp_env["results"] / "res.json"),
+                "consistency_file": str(tmp_env["results"] / "cons.json"),
+            }
+        }
+        processed_files = []
+        checkers = ["cf:latest"]
+        checker_options = {}
+        file_path, result = process_file(
+            dummy_nc_file,
+            checkers,
+            checker_options,
+            files_to_check_dict,
+            processed_files,
+            str(tmp_env["progress"]),
+        )
+        # should write JSON to disk
+        result_file = files_to_check_dict[dummy_nc_file]["result_file"]
+        assert os.path.isfile(result_file)
+        with open(result_file) as f:
+            data = json.load(f)
+        assert "cf" in data
+        assert "errors" in data["cf"]
+    def test_process_file_cached_result(self, fake_check_suite, tmp_env, dummy_nc_file):
+        """Should read from disk if result already exists and no errors."""
+        result_file = tmp_env["results"] / "res.json"
+        consistency_file = tmp_env["results"] / "cons.json"
+        result_file.write_text(json.dumps({"cf": {"errors": {}}}))
+        consistency_file.write_text("dummy consistency file")
+        files_to_check_dict = {
+            dummy_nc_file: {
+                "result_file": str(result_file),
+                "consistency_file": str(consistency_file),
+            }
+        }
+        processed_files = [dummy_nc_file]
+        checkers = ["cf:latest"]
+        checker_options = {}
+        file_path, result = process_file(
+            dummy_nc_file,
+            checkers,
+            checker_options,
+            files_to_check_dict,
+            processed_files,
+            str(tmp_env["progress"]),
+        )
+        # Should reuse cached result, not rewrite
+        assert result == {"cf": {"errors": {}}}
+    def test_process_dataset(self, fake_check_suite, tmp_env, dummy_nc_file):
+        """process_dataset should run checks for not yet checked dataset."""
+        ds = "dataset1"
+        ds_map = {ds: [dummy_nc_file]}
+        result_file_ds = tmp_env["results"] / "res_ds.json"
+        files_to_check_dict = {dummy_nc_file: {"result_file_ds": str(result_file_ds)}}
+        processed_datasets = set()
+        checkers = ["unknown_checker:latest"]
+        checker_options = {}
+        ds_id, result = process_dataset(
+            ds,
+            ds_map,
+            checkers,
+            checker_options,
+            files_to_check_dict,
+            processed_datasets,
+            str(tmp_env["progress"]),
+        )
+        # should write JSON file for dataset results
+        assert ds_id == "dataset1"
+        assert os.path.isfile(result_file_ds)
+        with open(result_file_ds) as f:
+            data = json.load(f)
+        assert "unknown_checker" in data
+        assert "errors" in data["unknown_checker"]
+        assert "msg" in data["unknown_checker"]["errors"]["unknown_checker"]
+    def test_process_dataset_cached(self, fake_check_suite, tmp_env, dummy_nc_file):
+        """Should read dataset result if already processed and valid."""
+        ds = "dataset2"
+        ds_map = {ds: [dummy_nc_file]}
+        result_file_ds = tmp_env["results"] / "res_ds2.json"
+        result_file_ds.write_text(json.dumps({"cf": {"errors": {}}}))
+        files_to_check_dict = {dummy_nc_file: {"result_file_ds": str(result_file_ds)}}
+        processed_datasets = {ds}
+        checkers = ["cf:latest"]
+        checker_options = {}
+        ds_id, result = process_dataset(
+            ds,
+            ds_map,
+            checkers,
+            checker_options,
+            files_to_check_dict,
+            processed_datasets,
+            str(tmp_env["progress"]),
+        )
+        assert ds_id == ds
+        assert result == {"cf": {"errors": {}}}

esgf-qa 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

esgf-qa 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl