cloudnetpy-qc 1.24.3__tar.gz → 1.25.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {cloudnetpy_qc-1.24.3/cloudnetpy_qc.egg-info → cloudnetpy_qc-1.25.1}/PKG-INFO +2 -1
  2. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/README.md +1 -0
  3. cloudnetpy_qc-1.25.1/cloudnetpy_qc/coverage.py +66 -0
  4. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/quality.py +66 -58
  5. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/utils.py +48 -0
  6. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/version.py +2 -2
  7. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1/cloudnetpy_qc.egg-info}/PKG-INFO +2 -1
  8. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc.egg-info/SOURCES.txt +1 -0
  9. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/pyproject.toml +1 -1
  10. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/tests/test_qc.py +1 -0
  11. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/LICENSE +0 -0
  12. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/MANIFEST.in +0 -0
  13. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/__init__.py +0 -0
  14. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/data/area-type-table.xml +0 -0
  15. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/data/cf-standard-name-table.xml +0 -0
  16. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/data/data_quality_config.ini +0 -0
  17. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/data/standardized-region-list.xml +0 -0
  18. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/py.typed +0 -0
  19. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc/variables.py +0 -0
  20. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc.egg-info/dependency_links.txt +0 -0
  21. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc.egg-info/requires.txt +0 -0
  22. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/cloudnetpy_qc.egg-info/top_level.txt +0 -0
  23. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/setup.cfg +0 -0
  24. {cloudnetpy_qc-1.24.3 → cloudnetpy_qc-1.25.1}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloudnetpy_qc
3
- Version: 1.24.3
3
+ Version: 1.25.1
4
4
  Summary: Quality control routines for CloudnetPy products
5
5
  Author-email: Finnish Meteorological Institute <actris-cloudnet@fmi.fi>
6
6
  License: MIT License
@@ -80,6 +80,7 @@ print(json_object)
80
80
  - `timestamp`: UTC timestamp of the test
81
81
  - `qcVersion`: `cloudnetpy-qc` version
82
82
  - `tests`: `Test[]`
83
+ - `data_coverage`: float
83
84
 
84
85
  ### `Test`
85
86
 
@@ -27,6 +27,7 @@ print(json_object)
27
27
  - `timestamp`: UTC timestamp of the test
28
28
  - `qcVersion`: `cloudnetpy-qc` version
29
29
  - `tests`: `Test[]`
30
+ - `data_coverage`: float
30
31
 
31
32
  ### `Test`
32
33
 
@@ -0,0 +1,66 @@
1
+ import datetime
2
+
3
+ import netCDF4
4
+ import numpy as np
5
+
6
+ from cloudnetpy_qc.variables import Product
7
+
8
+ RESOLUTIONS = {
9
+ Product.DISDROMETER: datetime.timedelta(minutes=1),
10
+ Product.L3_CF: datetime.timedelta(hours=1),
11
+ Product.L3_IWC: datetime.timedelta(hours=1),
12
+ Product.L3_LWC: datetime.timedelta(hours=1),
13
+ Product.MWR: datetime.timedelta(minutes=5),
14
+ Product.MWR_MULTI: datetime.timedelta(minutes=30),
15
+ Product.MWR_SINGLE: datetime.timedelta(minutes=5),
16
+ Product.WEATHER_STATION: datetime.timedelta(minutes=10),
17
+ Product.RAIN_GAUGE: datetime.timedelta(minutes=1),
18
+ Product.DOPPLER_LIDAR_WIND: datetime.timedelta(hours=1.5),
19
+ }
20
+ DEFAULT_RESOLUTION = datetime.timedelta(seconds=30)
21
+
22
+
23
+ def data_coverage(
24
+ nc: netCDF4.Dataset,
25
+ ) -> tuple[float, datetime.timedelta, datetime.timedelta] | None:
26
+ time = np.array(nc["time"][:])
27
+ time_unit = datetime.timedelta(hours=1)
28
+ try:
29
+ n_time = len(time)
30
+ except (TypeError, ValueError):
31
+ return None
32
+ if n_time < 2:
33
+ return None
34
+ if nc.cloudnet_file_type == "model":
35
+ expected_res = _model_resolution(nc)
36
+ else:
37
+ product = Product(nc.cloudnet_file_type)
38
+ expected_res = RESOLUTIONS.get(product, DEFAULT_RESOLUTION)
39
+ duration = get_duration(nc)
40
+ bins = max(1, duration // expected_res)
41
+ hist, _ = np.histogram(time, bins=bins, range=(0, duration / time_unit))
42
+ coverage = np.count_nonzero(hist > 0) / len(hist)
43
+ actual_res = np.median(np.diff(time)) * time_unit
44
+ return coverage, expected_res, actual_res
45
+
46
+
47
+ def _model_resolution(nc: netCDF4.Dataset) -> datetime.timedelta:
48
+ source = nc.source.lower()
49
+ if "gdas" in source or "ecmwf open" in source:
50
+ return datetime.timedelta(hours=3)
51
+ return datetime.timedelta(hours=1)
52
+
53
+
54
+ def get_duration(nc: netCDF4.Dataset) -> datetime.timedelta:
55
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
56
+ if now.date() == _get_date(nc):
57
+ midnight = now.replace(hour=0, minute=0, second=0, microsecond=0)
58
+ duration = now - midnight
59
+ else:
60
+ duration = datetime.timedelta(days=1)
61
+ return duration
62
+
63
+
64
+ def _get_date(nc: netCDF4.Dataset) -> datetime.date:
65
+ date_in_file = [int(getattr(nc, x)) for x in ("year", "month", "day")]
66
+ return datetime.date(*date_in_file)
@@ -14,9 +14,12 @@ from typing import NamedTuple, TypedDict
14
14
  import netCDF4
15
15
  import numpy as np
16
16
  import scipy.stats
17
+ from cftime import num2pydate
17
18
  from numpy import ma
18
19
  from requests import RequestException
19
20
 
21
+ from cloudnetpy_qc.coverage import data_coverage, get_duration
22
+
20
23
  from . import utils
21
24
  from .variables import LEVELS, VARIABLES, Product
22
25
  from .version import __version__
@@ -53,6 +56,7 @@ class FileReport(NamedTuple):
53
56
  timestamp: datetime.datetime
54
57
  qc_version: str
55
58
  tests: list[TestReport]
59
+ data_coverage: float | None
56
60
 
57
61
  def to_dict(self) -> dict:
58
62
  return {
@@ -72,8 +76,9 @@ class FileReport(NamedTuple):
72
76
 
73
77
 
74
78
  class SiteMeta(TypedDict):
75
- latitude: float | None
76
- longitude: float | None
79
+ time: np.ndarray | None
80
+ latitude: float | np.ndarray | None
81
+ longitude: float | np.ndarray | None
77
82
  altitude: float | None
78
83
 
79
84
 
@@ -84,6 +89,7 @@ def run_tests(
84
89
  ignore_tests: list[str] | None = None,
85
90
  ) -> FileReport:
86
91
  filename = Path(filename)
92
+ coverage = None
87
93
  if isinstance(product, str):
88
94
  product = Product(product)
89
95
  with netCDF4.Dataset(filename) as nc:
@@ -110,11 +116,15 @@ def run_tests(
110
116
  test_instance._add_error(
111
117
  f"Failed to run test: {err} ({type(err).__name__})"
112
118
  )
119
+ logging.exception("Failed to run test:")
113
120
  test_reports.append(test_instance.report)
121
+ if test_instance.coverage is not None:
122
+ coverage = test_instance.coverage
114
123
  return FileReport(
115
124
  timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
116
125
  qc_version=__version__,
117
126
  tests=test_reports,
127
+ data_coverage=coverage,
118
128
  )
119
129
 
120
130
 
@@ -124,6 +134,7 @@ class Test:
124
134
  name: str
125
135
  description: str
126
136
  products: Iterable[Product] = Product.all()
137
+ coverage: float | None = None
127
138
 
128
139
  def __init__(
129
140
  self, nc: netCDF4.Dataset, filename: Path, product: Product, site_meta: SiteMeta
@@ -190,19 +201,6 @@ class Test:
190
201
  )
191
202
  self._add_warning(msg)
192
203
 
193
- def _get_date(self):
194
- date_in_file = [int(getattr(self.nc, x)) for x in ("year", "month", "day")]
195
- return datetime.date(*date_in_file)
196
-
197
- def _get_duration(self) -> datetime.timedelta:
198
- now = datetime.datetime.now(tz=datetime.timezone.utc)
199
- if now.date() == self._get_date():
200
- midnight = now.replace(hour=0, minute=0, second=0, microsecond=0)
201
- duration = now - midnight
202
- else:
203
- duration = datetime.timedelta(days=1)
204
- return duration
205
-
206
204
 
207
205
  # --------------------#
208
206
  # ------ Infos ------ #
@@ -287,45 +285,12 @@ class TestDataCoverage(Test):
287
285
  name = "Data coverage"
288
286
  description = "Test that file contains enough data."
289
287
 
290
- RESOLUTIONS = {
291
- Product.DISDROMETER: datetime.timedelta(minutes=1),
292
- Product.L3_CF: datetime.timedelta(hours=1),
293
- Product.L3_IWC: datetime.timedelta(hours=1),
294
- Product.L3_LWC: datetime.timedelta(hours=1),
295
- Product.MWR: datetime.timedelta(minutes=5),
296
- Product.MWR_MULTI: datetime.timedelta(minutes=30),
297
- Product.MWR_SINGLE: datetime.timedelta(minutes=5),
298
- Product.WEATHER_STATION: datetime.timedelta(minutes=10),
299
- Product.RAIN_GAUGE: datetime.timedelta(minutes=1),
300
- Product.DOPPLER_LIDAR_WIND: datetime.timedelta(hours=1.5),
301
- }
302
- DEFAULT_RESOLUTION = datetime.timedelta(seconds=30)
303
-
304
- def _model_resolution(self):
305
- source = self.nc.source.lower()
306
- if "gdas" in source or "ecmwf open" in source:
307
- return datetime.timedelta(hours=3)
308
- return datetime.timedelta(hours=1)
309
-
310
288
  def run(self):
311
- time = np.array(self.nc["time"][:])
312
- time_unit = datetime.timedelta(hours=1)
313
- try:
314
- n_time = len(time)
315
- except (TypeError, ValueError):
316
- return
317
- if n_time < 2:
289
+ coverage, expected_res, actual_res = data_coverage(self.nc)
290
+ if coverage is None:
318
291
  return
319
- if self.nc.cloudnet_file_type == "model":
320
- expected_res = self._model_resolution()
321
- else:
322
- expected_res = self.RESOLUTIONS.get(self.product, self.DEFAULT_RESOLUTION)
323
- duration = self._get_duration()
324
- bins = max(1, duration // expected_res)
325
- hist, _bin_edges = np.histogram(
326
- time, bins=bins, range=(0, duration / time_unit)
327
- )
328
- missing = np.count_nonzero(hist == 0) / len(hist) * 100
292
+ self.coverage = coverage
293
+ missing = (1 - coverage) * 100
329
294
  if missing > 20:
330
295
  message = f"{round(missing)}% of day's data is missing."
331
296
  if missing > 60:
@@ -333,7 +298,6 @@ class TestDataCoverage(Test):
333
298
  else:
334
299
  self._add_info(message)
335
300
 
336
- actual_res = np.median(np.diff(time)) * time_unit
337
301
  if actual_res > expected_res * 1.05:
338
302
  self._add_warning(
339
303
  f"Expected a measurement with interval at least {expected_res},"
@@ -789,7 +753,7 @@ class TestModelData(Test):
789
753
  if n_time < 2:
790
754
  return
791
755
 
792
- duration = self._get_duration()
756
+ duration = get_duration(self.nc)
793
757
  should_be_data_until = duration / time_unit
794
758
 
795
759
  for key in ("temperature", "pressure", "q"):
@@ -838,14 +802,27 @@ class TestCoordinates(Test):
838
802
  self._add_error(f"Variable '{key}' is missing")
839
803
 
840
804
  if "latitude" in self.nc.variables and "longitude" in self.nc.variables:
841
- site_lat = self.site_meta["latitude"]
842
- site_lon = self.site_meta["longitude"]
805
+ site_lat = np.atleast_1d(self.site_meta["latitude"])
806
+ site_lon = np.atleast_1d(self.site_meta["longitude"])
843
807
  file_lat = np.atleast_1d(self.nc["latitude"][:])
844
808
  file_lon = np.atleast_1d(self.nc["longitude"][:])
845
809
  file_lon[file_lon > 180] -= 360
846
- dist = utils.haversine(site_lat, site_lon, file_lat, file_lon)
810
+
811
+ if self.site_meta.get("time") and file_lat.size > 1 and file_lon.size > 1:
812
+ site_time = self._read_site_time()
813
+ file_time = self._read_file_time()
814
+ idx = utils.find_closest(file_time, site_time)
815
+ file_lat = file_lat[idx]
816
+ file_lon = file_lon[idx]
817
+ else:
818
+ file_lat, file_lon = utils.average_coordinate(file_lat, file_lon)
819
+ site_lat, site_lon = utils.average_coordinate(site_lat, site_lon)
820
+
821
+ dist = np.atleast_1d(
822
+ utils.haversine(site_lat, site_lon, file_lat, file_lon)
823
+ )
847
824
  i = np.argmax(dist)
848
- max_dist = 100 if self.nc.cloudnet_file_type == "model" else 10
825
+ max_dist = self._calc_max_dist(site_lat, site_lon)
849
826
  if dist[i] > max_dist:
850
827
  self._add_error(
851
828
  f"Variables 'latitude' and 'longitude' do not match "
@@ -867,6 +844,37 @@ class TestCoordinates(Test):
867
844
  f"but received {round(file_alt[i])}\u00a0m"
868
845
  )
869
846
 
847
+ def _read_site_time(self):
848
+ for dt in self.site_meta["time"]:
849
+ if (
850
+ not isinstance(dt, datetime.datetime)
851
+ or dt.tzinfo is None
852
+ or dt.tzinfo.utcoffset(dt) is None
853
+ ):
854
+ raise ValueError("Naive datetimes are not supported")
855
+ naive_dt = [
856
+ dt.astimezone(datetime.timezone.utc).replace(tzinfo=None)
857
+ for dt in self.site_meta["time"]
858
+ ]
859
+ return np.array(naive_dt, dtype="datetime64[s]")
860
+
861
+ def _read_file_time(self):
862
+ naive_dt = num2pydate(
863
+ self.nc["time"][:], self.nc["time"].units, self.nc["time"].calendar
864
+ )
865
+ return np.array(naive_dt, dtype="datetime64[s]")
866
+
867
+ def _calc_max_dist(self, latitude, longitude):
868
+ if self.nc.cloudnet_file_type == "model":
869
+ angle = 1 # Model resolution should be at least 1 degrees.
870
+ half_angle = angle / 2
871
+ min_lat = np.maximum(-90, latitude - half_angle)
872
+ max_lat = np.minimum(90, latitude + half_angle)
873
+ min_lon = np.maximum(-180, longitude - half_angle)
874
+ max_lon = np.minimum(180, longitude + half_angle)
875
+ return utils.haversine(min_lat, min_lon, max_lat, max_lon)
876
+ return 10
877
+
870
878
 
871
879
  # ------------------------------#
872
880
  # ------ Error / Warning ------ #
@@ -141,3 +141,51 @@ def haversine(
141
141
 
142
142
  a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
143
143
  return 2 * r * np.arcsin(np.sqrt(a))
144
+
145
+
146
+ def find_closest(x: npt.NDArray, x_new: npt.NDArray) -> npt.NDArray[np.intp]:
147
+ """Find the closest values between two arrays.
148
+
149
+ Args:
150
+ x: Sorted array.
151
+ x_new: Sorted array.
152
+
153
+ Returns:
154
+ Indices into `x` which correspond to the closest values in `x_new`.
155
+
156
+ Example:
157
+ >>> x = np.array([0.9, 1.2, 2.0, 2.1])
158
+ >>> x_new = np.array([1, 2])
159
+ >>> find_closest(x, x_new)
160
+ array([0, 2])
161
+ """
162
+ idx = np.searchsorted(x, x_new)
163
+ idx_left = np.clip(idx - 1, 0, len(x) - 1)
164
+ idx_right = np.clip(idx, 0, len(x) - 1)
165
+ diff_left = np.abs(x_new - x[idx_left])
166
+ diff_right = np.abs(x_new - x[idx_right])
167
+ return np.where(diff_left < diff_right, idx_left, idx_right)
168
+
169
+
170
+ def average_coordinate(
171
+ latitude: npt.NDArray, longitude: npt.NDArray
172
+ ) -> tuple[float, float]:
173
+ """Calculate average position from given coordinates.
174
+
175
+ Args:
176
+ latitude: Array of latitudes.
177
+ longitude: Array of longitudes.
178
+
179
+ Returns:
180
+ Tuple of average latitude and longitude.
181
+ """
182
+ if latitude.size == longitude.size == 1:
183
+ return latitude[0], longitude[0]
184
+ latitude = np.radians(latitude)
185
+ longitude = np.radians(longitude)
186
+ x = np.mean(np.cos(latitude) * np.cos(longitude))
187
+ y = np.mean(np.cos(latitude) * np.sin(longitude))
188
+ z = np.mean(np.sin(latitude))
189
+ avg_lat = np.degrees(np.atan2(z, np.sqrt(x * x + y * y)))
190
+ avg_lon = np.degrees(np.atan2(y, x))
191
+ return avg_lat, avg_lon
@@ -1,6 +1,6 @@
1
1
  """Cloudnetpy-QC version."""
2
2
 
3
3
  MAJOR = 1
4
- MINOR = 24
5
- PATCH = 3
4
+ MINOR = 25
5
+ PATCH = 1
6
6
  __version__ = f"{MAJOR}.{MINOR}.{PATCH}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloudnetpy_qc
3
- Version: 1.24.3
3
+ Version: 1.25.1
4
4
  Summary: Quality control routines for CloudnetPy products
5
5
  Author-email: Finnish Meteorological Institute <actris-cloudnet@fmi.fi>
6
6
  License: MIT License
@@ -80,6 +80,7 @@ print(json_object)
80
80
  - `timestamp`: UTC timestamp of the test
81
81
  - `qcVersion`: `cloudnetpy-qc` version
82
82
  - `tests`: `Test[]`
83
+ - `data_coverage`: float
83
84
 
84
85
  ### `Test`
85
86
 
@@ -3,6 +3,7 @@ MANIFEST.in
3
3
  README.md
4
4
  pyproject.toml
5
5
  cloudnetpy_qc/__init__.py
6
+ cloudnetpy_qc/coverage.py
6
7
  cloudnetpy_qc/py.typed
7
8
  cloudnetpy_qc/quality.py
8
9
  cloudnetpy_qc/utils.py
@@ -30,7 +30,7 @@ Repository = "https://github.com/actris-cloudnet/cloudnetpy-qc"
30
30
  Changelog = "https://github.com/actris-cloudnet/cloudnetpy-qc/blob/main/CHANGELOG.md"
31
31
 
32
32
  [[tool.mypy.overrides]]
33
- module = ["cfchecker.*", "scipy.*"]
33
+ module = ["cfchecker.*", "cftime.*", "scipy.*"]
34
34
  ignore_missing_imports = true
35
35
 
36
36
  [tool.release-version]
@@ -93,6 +93,7 @@ class Check:
93
93
  def __init__(self, filename: str, file_type: str | None = None):
94
94
  # Norunda
95
95
  site_meta: quality.SiteMeta = {
96
+ "time": None,
96
97
  "latitude": 60.086,
97
98
  "longitude": 17.479,
98
99
  "altitude": 46.0,
File without changes
File without changes