PyPI - esgf-qa - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

esgf-qa 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

esgf_qa/_constants.py +63 -1
esgf_qa/_version.py +2 -2
esgf_qa/cluster_results.py +467 -0
esgf_qa/con_checks.py +209 -11
esgf_qa/run_qa.py +356 -463
{esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/METADATA +47 -31
esgf_qa-0.5.0.dist-info/RECORD +19 -0
{esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/WHEEL +1 -1
{esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/top_level.txt +1 -1
tests/test_cli.py +271 -0
tests/test_cluster_results.py +166 -0
tests/test_con_checks.py +263 -0
tests/test_qaviewer.py +147 -0
tests/test_run_dummy_qa.py +191 -0
tests/test_run_qa.py +181 -0
docs/esgf-qa_Logo.png +0 -0
esgf_qa-0.3.0.dist-info/RECORD +0 -13
{esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/entry_points.txt +0 -0
{esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/licenses/LICENSE +0 -0

esgf_qa/con_checks.py CHANGED Viewed

@@ -32,7 +32,29 @@ def printtimedelta(d):
 def truncate_str(s, max_length=16):
-    if max_length <= 15 or len(s) <= max_length:
+    """
+    Truncate string if too long.
+    Parameters
+    ----------
+    s : str
+        String to truncate.
+    max_length : int, optional
+        Maximum length of string. Default is 16.
+    Returns
+    -------
+    str
+        Truncated string.
+    Examples
+    --------
+    >>> truncate_str("This is a long string", 10)
+    'This...string'
+    >>> truncate_str("This is a short string", 16)
+    'This is a short string'
+    """
+    if max_length <= 0 or max_length is None or len(s) <= max_length:
         return s
     # Select start and end of string
@@ -58,6 +80,23 @@ def truncate_str(s, max_length=16):
 def compare_dicts(dict1, dict2, exclude_keys=None):
+    """
+    Compare two dictionaries and return keys with differing values.
+    Parameters
+    ----------
+    dict1 : dict
+        First dictionary to compare.
+    dict2 : dict
+        Second dictionary to compare.
+    exclude_keys : list, optional
+        List of keys to exclude from comparison.
+    Returns
+    -------
+    list
+        List of keys with differing values.
+    """
     if exclude_keys is None:
         exclude_keys = set()
     else:
@@ -67,17 +106,36 @@ def compare_dicts(dict1, dict2, exclude_keys=None):
     all_keys = (set(dict1) | set(dict2)) - exclude_keys
     # Collect keys with differing values
-    differing_keys = [key for key in all_keys if dict1.get(key) != dict2.get(key)]
+    differing_keys = [
+        key for key in sorted(list(all_keys)) if dict1.get(key) != dict2.get(key)
+    ]
     return differing_keys
 def compare_nested_dicts(dict1, dict2, exclude_keys=None):
+    """
+    Compare two nested dictionaries and return keys with differing values.
+    Parameters
+    ----------
+    dict1 : dict
+        First dictionary to compare.
+    dict2 : dict
+        Second dictionary to compare.
+    exclude_keys : list, optional
+        List of keys to exclude from comparison.
+    Returns
+    -------
+    dict
+        Dictionary of keys with differing values.
+    """
     diffs = {}
     all_root_keys = set(dict1) | set(dict2)
-    for root_key in all_root_keys:
+    for root_key in sorted(list(all_root_keys)):
         subdict1 = dict1.get(root_key, {})
         subdict2 = dict2.get(root_key, {})
@@ -95,6 +153,32 @@ def compare_nested_dicts(dict1, dict2, exclude_keys=None):
 def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
+    """
+    Consistency checks.
+    Runs inter-file consistency checks on a dataset:
+        - Global attributes (values and data types)
+        - Variable attributes (values and data types)
+        - Coordinates (values)
+        - Dimensions (names and sizes)
+    Parameters
+    ----------
+    ds : str
+        Dataset to process.
+    ds_map : dict
+        Dictionary mapping dataset IDs to file paths.
+    files_to_check_dict : dict
+        A special dictionary mapping files to check to datasets.
+    checker_options : dict
+        Dictionary of checker options.
+    Returns
+    -------
+    dict
+        A dictionary containing the results of the consistency checks.
+    """
     results = defaultdict(level1_factory)
     filelist = sorted(ds_map[ds])
     consistency_files = OrderedDict(
@@ -251,6 +335,27 @@ def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
 def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
+    """
+    Checks inter-file time and time_bnds continuity for a dataset.
+    This check identifies gaps in time or time_bnds between files of a dataset.
+    Parameters
+    ----------
+    ds : str
+        Dataset to process.
+    ds_map : dict
+        Dictionary mapping dataset IDs to file paths.
+    files_to_check_dict : dict
+        A special dictionary mapping files to check to datasets.
+    checker_options : dict
+        Dictionary of checker options.
+    Returns
+    -------
+    dict
+        Dictionary of results.
+    """
     results = defaultdict(level1_factory)
     filelist = sorted(ds_map[ds])
     consistency_files = OrderedDict(
@@ -276,6 +381,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
                     calendar=data["time_info"]["calendar"],
                 )
                 if data["time_info"]["timen"]
+                and data["time_info"]["units"]
+                and data["time_info"]["calendar"]
                 else None
             )
             boundn = (
@@ -285,6 +392,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
                     calendar=data["time_info"]["calendar"],
                 )
                 if data["time_info"]["boundn"]
+                and data["time_info"]["units"]
+                and data["time_info"]["calendar"]
                 else None
             )
             if i == 1:
@@ -296,6 +405,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
                     calendar=data["time_info"]["calendar"],
                 )
                 if data["time_info"]["time0"]
+                and data["time_info"]["units"]
+                and data["time_info"]["calendar"]
                 else None
             )
             bound0 = (
@@ -305,6 +416,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
                     calendar=data["time_info"]["calendar"],
                 )
                 if data["time_info"]["bound0"]
+                and data["time_info"]["units"]
+                and data["time_info"]["calendar"]
                 else None
             )
             freq = data["time_info"]["frequency"]
@@ -337,23 +450,55 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
 def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
+    """
+    Compatibility checks for a dataset.
+    Checks for:
+        - xarray open_mfdataset (compat='override', join='outer')
+        - xarray open_mfdataset (compat='no_conflicts', join='exact')
+    Parameters
+    ----------
+    ds : str
+        Dataset to process.
+    ds_map : dict
+        Dictionary mapping dataset IDs to file paths.
+    files_to_check_dict : dict
+        A special dictionary mapping files to check to datasets.
+    checker_options : dict
+        Dictionary of checker options.
+    Returns
+    -------
+    dict
+        Dictionary of results.
+    """
     results = defaultdict(level1_factory)
     filelist = sorted(ds_map[ds])
     # open_mfdataset - override
-    test = "xarray open_mfdataset - override"
+    test = "xarray open_mfdataset (compat='override', join='outer')"
     results[test]["weight"] = 3
     try:
-        with xr.open_mfdataset(filelist, coords="minimal", compat="override") as ds:
+        with xr.open_mfdataset(
+            filelist, coords="minimal", compat="override", data_vars="all", join="outer"
+        ) as ds:
             pass
     except Exception as e:
         results[test]["msgs"][str(e)].extend(filelist)
     # open_mfdataset - no_conflicts
-    test = "xarray open_mfdataset - no_conflicts"
+    test = "xarray open_mfdataset (compat='no_conflicts', join='exact')"
     results[test]["weight"] = 3
     try:
-        with xr.open_mfdataset(filelist, coords="minimal", compat="no_conflicts") as ds:
+        with xr.open_mfdataset(
+            filelist,
+            coords="minimal",
+            compat="no_conflicts",
+            data_vars="all",
+            join="exact",
+        ) as ds:
             pass
     except Exception as e:
         results[test]["msgs"][str(e)].extend(filelist)
@@ -362,6 +507,25 @@ def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
 def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
+    """
+    Checks consistency of dataset time coverage.
+    Variables that differ in their time coverage are reported.
+    Parameters
+    ----------
+    ds_map : dict
+        Dictionary mapping dataset IDs to file paths.
+    files_to_check_dict : dict
+        A special dictionary mapping files to check to datasets.
+    checker_options : dict
+        Dictionary of checker options.
+    Returns
+    -------
+    dict
+        Dictionary of results.
+    """
     results = defaultdict(level0_factory)
     test = "Time coverage"
@@ -420,17 +584,27 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
     # Compare coverage
     if len(coverage_start.keys()) > 1:
-        scov = min(coverage_start.values())
-        ecov = max(coverage_end.values())
+        try:
+            scov = min(coverage_start.values())
+        except ValueError:
+            scov = None
+        try:
+            ecov = max(coverage_end.values())
+        except ValueError:
+            ecov = None
         # Get all ds where coverage_start differs
         for ds in coverage_start.keys():
             fl = sorted(ds_map[ds])
-            if coverage_start[ds] != scov:
+            if scov is None:
+                pass
+            elif coverage_start[ds] != scov:
                 results[ds][test]["weight"] = 1
                 results[ds][test]["msgs"][
                     f"Time series starts at '{coverage_start[ds]}' while other time series start at '{scov}'"
                 ] = [fl[0]]
-            if ds in coverage_end and coverage_end[ds] != ecov:
+            if ecov is None:
+                pass
+            elif ds in coverage_end and coverage_end[ds] != ecov:
                 results[ds][test]["weight"] = 1
                 results[ds][test]["msgs"][
                     f"Time series ends at '{coverage_end[ds]}' while other time series end at '{ecov}'"
@@ -440,6 +614,30 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
 def inter_dataset_consistency_checks(ds_map, files_to_check_dict, checker_options):
+    """
+    Inter-dataset consistency checks.
+    Will group datasets by realm and grid for certain checks.
+    Runs inter-dataset consistency checks:
+        - Required and non-required global attributes (values and data types)
+        - Coordinates (values)
+        - Dimensions (names and sizes)
+    Parameters
+    ----------
+    ds_map : dict
+        Dictionary mapping dataset IDs to file paths.
+    files_to_check_dict : dict
+        A special dictionary mapping files to check to datasets.
+    checker_options : dict
+        Dictionary of checker options.
+    Returns
+    -------
+    dict
+        Dictionary of results.
+    """
     results = defaultdict(level0_factory)
     filedict = {}
     consistency_data = {}

esgf-qa 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

esgf-qa 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl