esgf-qa 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
esgf_qa/con_checks.py CHANGED
@@ -32,7 +32,29 @@ def printtimedelta(d):
32
32
 
33
33
 
34
34
  def truncate_str(s, max_length=16):
35
- if max_length <= 15 or len(s) <= max_length:
35
+ """
36
+ Truncate string if too long.
37
+
38
+ Parameters
39
+ ----------
40
+ s : str
41
+ String to truncate.
42
+ max_length : int, optional
43
+ Maximum length of string. Default is 16.
44
+
45
+ Returns
46
+ -------
47
+ str
48
+ Truncated string.
49
+
50
+ Examples
51
+ --------
52
+ >>> truncate_str("This is a long string", 10)
53
+ 'This...string'
54
+ >>> truncate_str("This is a short string", 16)
55
+ 'This is a short string'
56
+ """
57
+ if max_length <= 0 or max_length is None or len(s) <= max_length:
36
58
  return s
37
59
 
38
60
  # Select start and end of string
@@ -58,6 +80,23 @@ def truncate_str(s, max_length=16):
58
80
 
59
81
 
60
82
  def compare_dicts(dict1, dict2, exclude_keys=None):
83
+ """
84
+ Compare two dictionaries and return keys with differing values.
85
+
86
+ Parameters
87
+ ----------
88
+ dict1 : dict
89
+ First dictionary to compare.
90
+ dict2 : dict
91
+ Second dictionary to compare.
92
+ exclude_keys : list, optional
93
+ List of keys to exclude from comparison.
94
+
95
+ Returns
96
+ -------
97
+ list
98
+ List of keys with differing values.
99
+ """
61
100
  if exclude_keys is None:
62
101
  exclude_keys = set()
63
102
  else:
@@ -67,17 +106,36 @@ def compare_dicts(dict1, dict2, exclude_keys=None):
67
106
  all_keys = (set(dict1) | set(dict2)) - exclude_keys
68
107
 
69
108
  # Collect keys with differing values
70
- differing_keys = [key for key in all_keys if dict1.get(key) != dict2.get(key)]
109
+ differing_keys = [
110
+ key for key in sorted(list(all_keys)) if dict1.get(key) != dict2.get(key)
111
+ ]
71
112
 
72
113
  return differing_keys
73
114
 
74
115
 
75
116
  def compare_nested_dicts(dict1, dict2, exclude_keys=None):
117
+ """
118
+ Compare two nested dictionaries and return keys with differing values.
119
+
120
+ Parameters
121
+ ----------
122
+ dict1 : dict
123
+ First dictionary to compare.
124
+ dict2 : dict
125
+ Second dictionary to compare.
126
+ exclude_keys : list, optional
127
+ List of keys to exclude from comparison.
128
+
129
+ Returns
130
+ -------
131
+ dict
132
+ Dictionary of keys with differing values.
133
+ """
76
134
  diffs = {}
77
135
 
78
136
  all_root_keys = set(dict1) | set(dict2)
79
137
 
80
- for root_key in all_root_keys:
138
+ for root_key in sorted(list(all_root_keys)):
81
139
  subdict1 = dict1.get(root_key, {})
82
140
  subdict2 = dict2.get(root_key, {})
83
141
 
@@ -95,6 +153,32 @@ def compare_nested_dicts(dict1, dict2, exclude_keys=None):
95
153
 
96
154
 
97
155
  def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
156
+ """
157
+ Consistency checks.
158
+
159
+ Runs inter-file consistency checks on a dataset:
160
+
161
+ - Global attributes (values and data types)
162
+ - Variable attributes (values and data types)
163
+ - Coordinates (values)
164
+ - Dimensions (names and sizes)
165
+
166
+ Parameters
167
+ ----------
168
+ ds : str
169
+ Dataset to process.
170
+ ds_map : dict
171
+ Dictionary mapping dataset IDs to file paths.
172
+ files_to_check_dict : dict
173
+ A special dictionary mapping files to check to datasets.
174
+ checker_options : dict
175
+ Dictionary of checker options.
176
+
177
+ Returns
178
+ -------
179
+ dict
180
+ A dictionary containing the results of the consistency checks.
181
+ """
98
182
  results = defaultdict(level1_factory)
99
183
  filelist = sorted(ds_map[ds])
100
184
  consistency_files = OrderedDict(
@@ -251,6 +335,27 @@ def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
251
335
 
252
336
 
253
337
  def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
338
+ """
339
+ Checks inter-file time and time_bnds continuity for a dataset.
340
+
341
+ This check identifies gaps in time or time_bnds between files of a dataset.
342
+
343
+ Parameters
344
+ ----------
345
+ ds : str
346
+ Dataset to process.
347
+ ds_map : dict
348
+ Dictionary mapping dataset IDs to file paths.
349
+ files_to_check_dict : dict
350
+ A special dictionary mapping files to check to datasets.
351
+ checker_options : dict
352
+ Dictionary of checker options.
353
+
354
+ Returns
355
+ -------
356
+ dict
357
+ Dictionary of results.
358
+ """
254
359
  results = defaultdict(level1_factory)
255
360
  filelist = sorted(ds_map[ds])
256
361
  consistency_files = OrderedDict(
@@ -276,6 +381,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
276
381
  calendar=data["time_info"]["calendar"],
277
382
  )
278
383
  if data["time_info"]["timen"]
384
+ and data["time_info"]["units"]
385
+ and data["time_info"]["calendar"]
279
386
  else None
280
387
  )
281
388
  boundn = (
@@ -285,6 +392,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
285
392
  calendar=data["time_info"]["calendar"],
286
393
  )
287
394
  if data["time_info"]["boundn"]
395
+ and data["time_info"]["units"]
396
+ and data["time_info"]["calendar"]
288
397
  else None
289
398
  )
290
399
  if i == 1:
@@ -296,6 +405,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
296
405
  calendar=data["time_info"]["calendar"],
297
406
  )
298
407
  if data["time_info"]["time0"]
408
+ and data["time_info"]["units"]
409
+ and data["time_info"]["calendar"]
299
410
  else None
300
411
  )
301
412
  bound0 = (
@@ -305,6 +416,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
305
416
  calendar=data["time_info"]["calendar"],
306
417
  )
307
418
  if data["time_info"]["bound0"]
419
+ and data["time_info"]["units"]
420
+ and data["time_info"]["calendar"]
308
421
  else None
309
422
  )
310
423
  freq = data["time_info"]["frequency"]
@@ -337,23 +450,55 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
337
450
 
338
451
 
339
452
  def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
453
+ """
454
+ Compatibility checks for a dataset.
455
+
456
+ Checks for:
457
+
458
+ - xarray open_mfdataset (compat='override', join='outer')
459
+ - xarray open_mfdataset (compat='no_conflicts', join='exact')
460
+
461
+ Parameters
462
+ ----------
463
+ ds : str
464
+ Dataset to process.
465
+ ds_map : dict
466
+ Dictionary mapping dataset IDs to file paths.
467
+ files_to_check_dict : dict
468
+ A special dictionary mapping files to check to datasets.
469
+ checker_options : dict
470
+ Dictionary of checker options.
471
+
472
+ Returns
473
+ -------
474
+ dict
475
+ Dictionary of results.
476
+ """
340
477
  results = defaultdict(level1_factory)
341
478
  filelist = sorted(ds_map[ds])
342
479
 
343
480
  # open_mfdataset - override
344
- test = "xarray open_mfdataset - override"
481
+ test = "xarray open_mfdataset (compat='override', join='outer')"
345
482
  results[test]["weight"] = 3
346
483
  try:
347
- with xr.open_mfdataset(filelist, coords="minimal", compat="override") as ds:
484
+ with xr.open_mfdataset(
485
+ filelist, coords="minimal", compat="override", data_vars="all", join="outer"
486
+ ) as ds:
348
487
  pass
349
488
  except Exception as e:
350
489
  results[test]["msgs"][str(e)].extend(filelist)
351
490
 
352
491
  # open_mfdataset - no_conflicts
353
- test = "xarray open_mfdataset - no_conflicts"
492
+ test = "xarray open_mfdataset (compat='no_conflicts', join='exact')"
354
493
  results[test]["weight"] = 3
355
494
  try:
356
- with xr.open_mfdataset(filelist, coords="minimal", compat="no_conflicts") as ds:
495
+ with xr.open_mfdataset(
496
+ filelist,
497
+ coords="minimal",
498
+ compat="no_conflicts",
499
+ data_vars="all",
500
+ join="exact",
501
+ ) as ds:
357
502
  pass
358
503
  except Exception as e:
359
504
  results[test]["msgs"][str(e)].extend(filelist)
@@ -362,6 +507,25 @@ def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
362
507
 
363
508
 
364
509
  def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
510
+ """
511
+ Checks consistency of dataset time coverage.
512
+
513
+ Variables that differ in their time coverage are reported.
514
+
515
+ Parameters
516
+ ----------
517
+ ds_map : dict
518
+ Dictionary mapping dataset IDs to file paths.
519
+ files_to_check_dict : dict
520
+ A special dictionary mapping files to check to datasets.
521
+ checker_options : dict
522
+ Dictionary of checker options.
523
+
524
+ Returns
525
+ -------
526
+ dict
527
+ Dictionary of results.
528
+ """
365
529
  results = defaultdict(level0_factory)
366
530
  test = "Time coverage"
367
531
 
@@ -420,17 +584,27 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
420
584
 
421
585
  # Compare coverage
422
586
  if len(coverage_start.keys()) > 1:
423
- scov = min(coverage_start.values())
424
- ecov = max(coverage_end.values())
587
+ try:
588
+ scov = min(coverage_start.values())
589
+ except ValueError:
590
+ scov = None
591
+ try:
592
+ ecov = max(coverage_end.values())
593
+ except ValueError:
594
+ ecov = None
425
595
  # Get all ds where coverage_start differs
426
596
  for ds in coverage_start.keys():
427
597
  fl = sorted(ds_map[ds])
428
- if coverage_start[ds] != scov:
598
+ if scov is None:
599
+ pass
600
+ elif coverage_start[ds] != scov:
429
601
  results[ds][test]["weight"] = 1
430
602
  results[ds][test]["msgs"][
431
603
  f"Time series starts at '{coverage_start[ds]}' while other time series start at '{scov}'"
432
604
  ] = [fl[0]]
433
- if ds in coverage_end and coverage_end[ds] != ecov:
605
+ if ecov is None:
606
+ pass
607
+ elif ds in coverage_end and coverage_end[ds] != ecov:
434
608
  results[ds][test]["weight"] = 1
435
609
  results[ds][test]["msgs"][
436
610
  f"Time series ends at '{coverage_end[ds]}' while other time series end at '{ecov}'"
@@ -440,6 +614,30 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
440
614
 
441
615
 
442
616
  def inter_dataset_consistency_checks(ds_map, files_to_check_dict, checker_options):
617
+ """
618
+ Inter-dataset consistency checks.
619
+
620
+ Will group datasets by realm and grid for certain checks.
621
+ Runs inter-dataset consistency checks:
622
+
623
+ - Required and non-required global attributes (values and data types)
624
+ - Coordinates (values)
625
+ - Dimensions (names and sizes)
626
+
627
+ Parameters
628
+ ----------
629
+ ds_map : dict
630
+ Dictionary mapping dataset IDs to file paths.
631
+ files_to_check_dict : dict
632
+ A special dictionary mapping files to check to datasets.
633
+ checker_options : dict
634
+ Dictionary of checker options.
635
+
636
+ Returns
637
+ -------
638
+ dict
639
+ Dictionary of results.
640
+ """
443
641
  results = defaultdict(level0_factory)
444
642
  filedict = {}
445
643
  consistency_data = {}