cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. cloudnetpy/categorize/__init__.py +1 -2
  2. cloudnetpy/categorize/atmos_utils.py +297 -67
  3. cloudnetpy/categorize/attenuation.py +31 -0
  4. cloudnetpy/categorize/attenuations/__init__.py +37 -0
  5. cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
  6. cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
  7. cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
  8. cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
  9. cloudnetpy/categorize/categorize.py +332 -156
  10. cloudnetpy/categorize/classify.py +127 -125
  11. cloudnetpy/categorize/containers.py +107 -76
  12. cloudnetpy/categorize/disdrometer.py +40 -0
  13. cloudnetpy/categorize/droplet.py +23 -21
  14. cloudnetpy/categorize/falling.py +53 -24
  15. cloudnetpy/categorize/freezing.py +25 -12
  16. cloudnetpy/categorize/insects.py +35 -23
  17. cloudnetpy/categorize/itu.py +243 -0
  18. cloudnetpy/categorize/lidar.py +36 -41
  19. cloudnetpy/categorize/melting.py +34 -26
  20. cloudnetpy/categorize/model.py +84 -37
  21. cloudnetpy/categorize/mwr.py +18 -14
  22. cloudnetpy/categorize/radar.py +215 -102
  23. cloudnetpy/cli.py +578 -0
  24. cloudnetpy/cloudnetarray.py +43 -89
  25. cloudnetpy/concat_lib.py +218 -78
  26. cloudnetpy/constants.py +28 -10
  27. cloudnetpy/datasource.py +61 -86
  28. cloudnetpy/exceptions.py +49 -20
  29. cloudnetpy/instruments/__init__.py +5 -0
  30. cloudnetpy/instruments/basta.py +29 -12
  31. cloudnetpy/instruments/bowtie.py +135 -0
  32. cloudnetpy/instruments/ceilo.py +138 -115
  33. cloudnetpy/instruments/ceilometer.py +164 -80
  34. cloudnetpy/instruments/cl61d.py +21 -5
  35. cloudnetpy/instruments/cloudnet_instrument.py +74 -36
  36. cloudnetpy/instruments/copernicus.py +108 -30
  37. cloudnetpy/instruments/da10.py +54 -0
  38. cloudnetpy/instruments/disdrometer/common.py +126 -223
  39. cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
  40. cloudnetpy/instruments/disdrometer/thies.py +254 -87
  41. cloudnetpy/instruments/fd12p.py +201 -0
  42. cloudnetpy/instruments/galileo.py +65 -23
  43. cloudnetpy/instruments/hatpro.py +123 -49
  44. cloudnetpy/instruments/instruments.py +113 -1
  45. cloudnetpy/instruments/lufft.py +39 -17
  46. cloudnetpy/instruments/mira.py +268 -61
  47. cloudnetpy/instruments/mrr.py +187 -0
  48. cloudnetpy/instruments/nc_lidar.py +19 -8
  49. cloudnetpy/instruments/nc_radar.py +109 -55
  50. cloudnetpy/instruments/pollyxt.py +135 -51
  51. cloudnetpy/instruments/radiometrics.py +313 -59
  52. cloudnetpy/instruments/rain_e_h3.py +171 -0
  53. cloudnetpy/instruments/rpg.py +321 -189
  54. cloudnetpy/instruments/rpg_reader.py +74 -40
  55. cloudnetpy/instruments/toa5.py +49 -0
  56. cloudnetpy/instruments/vaisala.py +95 -343
  57. cloudnetpy/instruments/weather_station.py +774 -105
  58. cloudnetpy/metadata.py +90 -19
  59. cloudnetpy/model_evaluation/file_handler.py +55 -52
  60. cloudnetpy/model_evaluation/metadata.py +46 -20
  61. cloudnetpy/model_evaluation/model_metadata.py +1 -1
  62. cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
  63. cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
  64. cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
  65. cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
  66. cloudnetpy/model_evaluation/products/model_products.py +43 -35
  67. cloudnetpy/model_evaluation/products/observation_products.py +41 -35
  68. cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
  69. cloudnetpy/model_evaluation/products/tools.py +29 -20
  70. cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
  71. cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
  72. cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
  73. cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
  74. cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
  75. cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
  76. cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
  77. cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
  78. cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
  79. cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
  80. cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
  81. cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
  82. cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
  83. cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
  84. cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
  85. cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
  86. cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
  87. cloudnetpy/model_evaluation/utils.py +2 -1
  88. cloudnetpy/output.py +170 -111
  89. cloudnetpy/plotting/__init__.py +2 -1
  90. cloudnetpy/plotting/plot_meta.py +562 -822
  91. cloudnetpy/plotting/plotting.py +1142 -704
  92. cloudnetpy/products/__init__.py +1 -0
  93. cloudnetpy/products/classification.py +370 -88
  94. cloudnetpy/products/der.py +85 -55
  95. cloudnetpy/products/drizzle.py +77 -34
  96. cloudnetpy/products/drizzle_error.py +15 -11
  97. cloudnetpy/products/drizzle_tools.py +79 -59
  98. cloudnetpy/products/epsilon.py +211 -0
  99. cloudnetpy/products/ier.py +27 -50
  100. cloudnetpy/products/iwc.py +55 -48
  101. cloudnetpy/products/lwc.py +96 -70
  102. cloudnetpy/products/mwr_tools.py +186 -0
  103. cloudnetpy/products/product_tools.py +170 -128
  104. cloudnetpy/utils.py +455 -240
  105. cloudnetpy/version.py +2 -2
  106. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
  107. cloudnetpy-1.87.3.dist-info/RECORD +127 -0
  108. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
  109. cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
  110. docs/source/conf.py +2 -2
  111. cloudnetpy/categorize/atmos.py +0 -361
  112. cloudnetpy/products/mwr_multi.py +0 -68
  113. cloudnetpy/products/mwr_single.py +0 -75
  114. cloudnetpy-1.49.9.dist-info/RECORD +0 -112
  115. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
  116. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
cloudnetpy/utils.py CHANGED
@@ -1,31 +1,35 @@
1
- """ This module contains general helper functions. """
1
+ """This module contains general helper functions."""
2
+
2
3
  import datetime
3
4
  import logging
4
5
  import os
5
6
  import re
7
+ import textwrap
6
8
  import uuid
7
9
  import warnings
8
- from datetime import timezone
9
- from typing import Iterator
10
+ from collections.abc import Callable, Iterator
11
+ from os import PathLike
12
+ from typing import Literal, TypeVar
10
13
 
11
14
  import netCDF4
12
15
  import numpy as np
13
- import requests
16
+ import numpy.typing as npt
14
17
  from numpy import ma
15
18
  from scipy import ndimage, stats
16
- from scipy.interpolate import RectBivariateSpline, RegularGridInterpolator, griddata
17
-
19
+ from scipy import ndimage as ndi
20
+ from scipy.interpolate import (
21
+ RectBivariateSpline,
22
+ RegularGridInterpolator,
23
+ griddata,
24
+ interp1d,
25
+ )
26
+
27
+ from cloudnetpy.cloudnetarray import CloudnetArray
28
+ from cloudnetpy.constants import SEC_IN_DAY, SEC_IN_HOUR, SEC_IN_MINUTE
18
29
  from cloudnetpy.exceptions import ValidTimeStampError
19
30
 
20
- Epoch = tuple[int, int, int]
21
- Date = tuple[str, str, str]
22
-
23
- SECONDS_PER_MINUTE = 60
24
- SECONDS_PER_HOUR = 3600
25
- SECONDS_PER_DAY = 86400
26
-
27
31
 
28
- def seconds2hours(time_in_seconds: np.ndarray) -> np.ndarray:
32
+ def seconds2hours(time_in_seconds: npt.NDArray) -> npt.NDArray:
29
33
  """Converts seconds since some epoch to fraction hour.
30
34
 
31
35
  Args:
@@ -38,32 +42,19 @@ def seconds2hours(time_in_seconds: np.ndarray) -> np.ndarray:
38
42
  Excludes leap seconds.
39
43
 
40
44
  """
41
- seconds_since_midnight = np.mod(time_in_seconds, SECONDS_PER_DAY)
42
- fraction_hour = seconds_since_midnight / SECONDS_PER_HOUR
45
+ seconds_since_midnight = np.mod(time_in_seconds, SEC_IN_DAY)
46
+ fraction_hour = seconds_since_midnight / SEC_IN_HOUR
43
47
  if fraction_hour[-1] == 0:
44
48
  fraction_hour[-1] = 24
45
49
  return fraction_hour
46
50
 
47
51
 
48
- def seconds2time(time_in_seconds: float) -> list:
49
- """Converts seconds since some epoch to time of day.
50
-
51
- Args:
52
- time_in_seconds: seconds since some epoch.
53
-
54
- Returns:
55
- list: [hours, minutes, seconds] formatted as '05' etc.
56
-
57
- """
58
- seconds_since_midnight = np.mod(time_in_seconds, SECONDS_PER_DAY)
59
- hours = seconds_since_midnight // SECONDS_PER_HOUR
60
- minutes = seconds_since_midnight % SECONDS_PER_HOUR // SECONDS_PER_MINUTE
61
- seconds = seconds_since_midnight % SECONDS_PER_MINUTE
62
- time = [hours, minutes, seconds]
63
- return [str(t).zfill(2) for t in time]
64
-
65
-
66
- def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
52
+ def seconds2date(
53
+ time_in_seconds: float,
54
+ epoch: datetime.datetime = datetime.datetime(
55
+ 2001, 1, 1, tzinfo=datetime.timezone.utc
56
+ ),
57
+ ) -> datetime.datetime:
67
58
  """Converts seconds since some epoch to datetime (UTC).
68
59
 
69
60
  Args:
@@ -71,31 +62,23 @@ def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
71
62
  epoch: Epoch, default is (2001, 1, 1) (UTC).
72
63
 
73
64
  Returns:
74
- [year, month, day, hours, minutes, seconds] formatted as '05' etc (UTC).
65
+ Datetime
75
66
 
76
67
  """
77
- epoch_in_seconds = datetime.datetime.timestamp(
78
- datetime.datetime(*epoch, tzinfo=timezone.utc)
79
- )
80
- timestamp = time_in_seconds + epoch_in_seconds
81
- return (
82
- datetime.datetime.utcfromtimestamp(timestamp)
83
- .strftime("%Y %m %d %H %M %S")
84
- .split()
85
- )
68
+ return epoch + datetime.timedelta(seconds=float(time_in_seconds))
86
69
 
87
70
 
88
- def datetime2decimal_hours(data: np.ndarray | list) -> np.ndarray:
89
- """Converts array of datetime to decimal_hours"""
71
+ def datetime2decimal_hours(data: npt.NDArray | list) -> npt.NDArray:
72
+ """Converts array of datetime to decimal_hours."""
90
73
  output = []
91
74
  for timestamp in data:
92
75
  t = timestamp.time()
93
- decimal_hours = t.hour + t.minute / 60 + t.second / 3600
76
+ decimal_hours = t.hour + t.minute / SEC_IN_MINUTE + t.second / SEC_IN_HOUR
94
77
  output.append(decimal_hours)
95
78
  return np.array(output)
96
79
 
97
80
 
98
- def time_grid(time_step: int = 30) -> np.ndarray:
81
+ def time_grid(time_step: int = 30) -> npt.NDArray:
99
82
  """Returns decimal hour array between 0 and 24.
100
83
 
101
84
  Computes fraction hour time vector 0-24 with user-given
@@ -112,12 +95,13 @@ def time_grid(time_step: int = 30) -> np.ndarray:
112
95
 
113
96
  """
114
97
  if time_step < 1:
115
- raise ValueError("Time resolution should be >= 1 seconds")
116
- half_step = time_step / SECONDS_PER_HOUR / 2
98
+ msg = "Time resolution should be >= 1 seconds"
99
+ raise ValueError(msg)
100
+ half_step = time_step / SEC_IN_HOUR / 2
117
101
  return np.arange(half_step, 24 + half_step, half_step * 2)
118
102
 
119
103
 
120
- def binvec(x: np.ndarray | list) -> np.ndarray:
104
+ def binvec(x: npt.NDArray | list) -> npt.NDArray:
121
105
  """Converts 1-D center points to bins with even spacing.
122
106
 
123
107
  Args:
@@ -136,60 +120,55 @@ def binvec(x: np.ndarray | list) -> np.ndarray:
136
120
  return np.linspace(edge1, edge2, len(x) + 1)
137
121
 
138
122
 
123
+ REBIN_STAT = Literal["mean", "std", "max"]
124
+ REBIN_STAT_FN: dict[REBIN_STAT, Callable] = {
125
+ "mean": ma.mean,
126
+ "std": ma.std,
127
+ "max": ma.max,
128
+ }
129
+
130
+
139
131
  def rebin_2d(
140
- x_in: np.ndarray,
141
- array: ma.MaskedArray,
142
- x_new: np.ndarray,
143
- statistic: str = "mean",
132
+ x_in: npt.NDArray,
133
+ array: npt.NDArray,
134
+ x_new: npt.NDArray,
135
+ statistic: REBIN_STAT = "mean",
144
136
  n_min: int = 1,
145
- ) -> tuple[ma.MaskedArray, list]:
146
- """Rebins 2-D data in one dimension.
137
+ *,
138
+ keepdim: bool = False,
139
+ mask_zeros: bool = False,
140
+ ) -> tuple[ma.MaskedArray, npt.NDArray]:
141
+ edges = binvec(x_new)
142
+ binn = np.digitize(x_in, edges) - 1
143
+ n_bins = len(x_new)
144
+ counts = np.bincount(binn[binn >= 0], minlength=n_bins)
147
145
 
148
- Args:
149
- x_in: 1-D array with shape (n,).
150
- array: 2-D input data with shape (n, m).
151
- x_new: 1-D target vector (center points) with shape (N,).
152
- statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
153
- Default is 'mean'.
154
- n_min: Minimum number of points to have good statistics in a bin. Default is 1.
146
+ stat_fn = REBIN_STAT_FN[statistic]
155
147
 
156
- Returns:
157
- tuple: Rebinned data with shape (N, m) and indices of bins without enough data.
148
+ shape = array.shape if keepdim else (n_bins, array.shape[1])
149
+ result: ma.MaskedArray = ma.masked_array(np.ones(shape, dtype="float32"), mask=True)
158
150
 
159
- Notes:
160
- 0-values are masked in the returned array.
151
+ for bin_ind in range(n_bins):
152
+ if counts[bin_ind] < n_min:
153
+ continue
154
+ mask = binn == bin_ind
155
+ block = array[mask, :]
156
+ x_ind = mask if keepdim else bin_ind
157
+ result[x_ind, :] = stat_fn(block, axis=0)
161
158
 
162
- """
163
- edges = binvec(x_new)
164
- result = np.zeros((len(x_new), array.shape[1]))
165
- array_screened = ma.masked_invalid(array, copy=True) # data may contain nan-values
166
- for ind, values in enumerate(array_screened.T):
167
- mask = ~values.mask
168
- if ma.any(values[mask]):
169
- result[:, ind], _, _ = stats.binned_statistic(
170
- x_in[mask], values[mask], statistic=statistic, bins=edges
171
- )
172
- result[~np.isfinite(result)] = 0
173
- masked_result = ma.masked_equal(result, 0)
159
+ empty_bins = np.where(counts < n_min)[0]
174
160
 
175
- # Fill bins with not enough profiles
176
- empty_indices = []
177
- for ind in range(len(edges) - 1):
178
- is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
179
- if len(is_data) < n_min:
180
- masked_result[ind, :] = ma.masked
181
- empty_indices.append(ind)
182
- if len(empty_indices) > 0:
183
- logging.debug(f"No radar data in {len(empty_indices)} bins")
161
+ if mask_zeros:
162
+ result[result == 0] = ma.masked
184
163
 
185
- return masked_result, empty_indices
164
+ return result, empty_bins
186
165
 
187
166
 
188
167
  def rebin_1d(
189
- x_in: np.ndarray,
190
- array: np.ndarray | ma.MaskedArray,
191
- x_new: np.ndarray,
192
- statistic: str = "mean",
168
+ x_in: npt.NDArray,
169
+ array: npt.NDArray | ma.MaskedArray,
170
+ x_new: npt.NDArray,
171
+ statistic: REBIN_STAT = "mean",
193
172
  ) -> ma.MaskedArray:
194
173
  """Rebins 1D array.
195
174
 
@@ -201,22 +180,24 @@ def rebin_1d(
201
180
  Default is 'mean'.
202
181
 
203
182
  Returns:
204
- Rebinned data with shape (N,).
183
+ Re-binned data with shape (N,).
205
184
 
206
185
  """
207
186
  edges = binvec(x_new)
208
- result = np.zeros(len(x_new))
187
+ result = ma.zeros(len(x_new))
209
188
  array_screened = ma.masked_invalid(array, copy=True) # data may contain nan-values
210
- mask = ~array_screened.mask # pylint: disable=E1101
189
+ mask = ~array_screened.mask
211
190
  if ma.any(array_screened[mask]):
212
191
  result, _, _ = stats.binned_statistic(
213
- x_in[mask], array_screened[mask], statistic=statistic, bins=edges
192
+ x_in[mask],
193
+ array_screened[mask],
194
+ statistic=statistic,
195
+ bins=edges,
214
196
  )
215
- result[~np.isfinite(result)] = 0
216
- return ma.masked_equal(result, 0)
197
+ return ma.masked_invalid(result, copy=True)
217
198
 
218
199
 
219
- def filter_isolated_pixels(array: np.ndarray) -> np.ndarray:
200
+ def filter_isolated_pixels(array: npt.NDArray) -> npt.NDArray:
220
201
  """From a 2D boolean array, remove completely isolated single cells.
221
202
 
222
203
  Args:
@@ -236,7 +217,7 @@ def filter_isolated_pixels(array: np.ndarray) -> np.ndarray:
236
217
  return _filter(array, structure)
237
218
 
238
219
 
239
- def filter_x_pixels(array: np.ndarray) -> np.ndarray:
220
+ def filter_x_pixels(array: npt.NDArray) -> npt.NDArray:
240
221
  """From a 2D boolean array, remove cells isolated in x-direction.
241
222
 
242
223
  Args:
@@ -259,7 +240,7 @@ def filter_x_pixels(array: np.ndarray) -> np.ndarray:
259
240
  return _filter(array, structure)
260
241
 
261
242
 
262
- def _filter(array: np.ndarray, structure: np.ndarray) -> np.ndarray:
243
+ def _filter(array: npt.NDArray, structure: npt.NDArray) -> npt.NDArray:
263
244
  filtered_array = np.copy(array)
264
245
  id_regions, num_ids = ndimage.label(filtered_array, structure=structure)
265
246
  id_sizes = np.array(ndimage.sum(array, id_regions, range(num_ids + 1))).astype(int)
@@ -268,8 +249,19 @@ def _filter(array: np.ndarray, structure: np.ndarray) -> np.ndarray:
268
249
  return filtered_array
269
250
 
270
251
 
271
- def isbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
272
- """Tests if nth bit (0,1,2..) is set.
252
+ def remove_small_objects(
253
+ mask: npt.NDArray, max_size: int, connectivity: int
254
+ ) -> npt.NDArray:
255
+ """Removes small connected components from boolean mask."""
256
+ structure = ndi.generate_binary_structure(mask.ndim, connectivity)
257
+ labels, num = ndi.label(mask, structure=structure)
258
+ sizes = ndi.sum(mask, labels, index=np.arange(1, num + 1))
259
+ keep_labels = np.where(sizes > max_size)[0] + 1
260
+ return np.isin(labels, keep_labels)
261
+
262
+
263
+ def isbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
264
+ """Tests if nth bit (0,1,2,...) is set.
273
265
 
274
266
  Args:
275
267
  array: Integer array.
@@ -287,17 +279,18 @@ def isbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
287
279
  >>> isbit(4, 2)
288
280
  True
289
281
 
290
- See also:
282
+ See Also:
291
283
  utils.setbit()
292
284
 
293
285
  """
294
286
  if nth_bit < 0:
295
- raise ValueError("Negative bit number")
287
+ msg = "Negative bit number"
288
+ raise ValueError(msg)
296
289
  mask = 1 << nth_bit
297
290
  return array & mask > 0
298
291
 
299
292
 
300
- def setbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
293
+ def setbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
301
294
  """Sets nth bit (0, 1, 2, ...) on number.
302
295
 
303
296
  Args:
@@ -316,24 +309,25 @@ def setbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
316
309
  >>> setbit(0, 2)
317
310
  4
318
311
 
319
- See also:
312
+ See Also:
320
313
  utils.isbit()
321
314
 
322
315
  """
323
316
  if nth_bit < 0:
324
- raise ValueError("Negative bit number")
317
+ msg = "Negative bit number"
318
+ raise ValueError(msg)
325
319
  mask = 1 << nth_bit
326
320
  array |= mask
327
321
  return array
328
322
 
329
323
 
330
324
  def interpolate_2d(
331
- x: np.ndarray,
332
- y: np.ndarray,
333
- z: np.ndarray,
334
- x_new: np.ndarray,
335
- y_new: np.ndarray,
336
- ) -> np.ndarray:
325
+ x: npt.NDArray,
326
+ y: npt.NDArray,
327
+ z: npt.NDArray,
328
+ x_new: npt.NDArray,
329
+ y_new: npt.NDArray,
330
+ ) -> npt.NDArray:
337
331
  """Linear interpolation of gridded 2d data.
338
332
 
339
333
  Args:
@@ -355,11 +349,11 @@ def interpolate_2d(
355
349
 
356
350
 
357
351
  def interpolate_2d_mask(
358
- x: np.ndarray,
359
- y: np.ndarray,
352
+ x: npt.NDArray,
353
+ y: npt.NDArray,
360
354
  z: ma.MaskedArray,
361
- x_new: np.ndarray,
362
- y_new: np.ndarray,
355
+ x_new: npt.NDArray,
356
+ y_new: npt.NDArray,
363
357
  ) -> ma.MaskedArray:
364
358
  """2D linear interpolation preserving the mask.
365
359
 
@@ -378,32 +372,34 @@ def interpolate_2d_mask(
378
372
  interpolation. Input data may contain nan-values.
379
373
 
380
374
  """
381
- z = ma.array(ma.masked_invalid(z, copy=True)) # ma.array() to avoid pylint nag
375
+ z = ma.array(ma.masked_invalid(z, copy=True))
382
376
  # Interpolate ignoring masked values:
383
- valid_points = np.logical_not(z.mask) # ~z.mask causes pylint nag
377
+ valid_points = np.logical_not(z.mask)
384
378
  xx, yy = np.meshgrid(y, x)
385
379
  x_valid = xx[valid_points]
386
380
  y_valid = yy[valid_points]
387
381
  z_valid = z[valid_points]
388
382
  xx_new, yy_new = np.meshgrid(y_new, x_new)
389
383
  data = griddata(
390
- (x_valid, y_valid), z_valid.ravel(), (xx_new, yy_new), method="linear"
384
+ (x_valid, y_valid),
385
+ z_valid.ravel(),
386
+ (xx_new, yy_new),
387
+ method="linear",
391
388
  )
392
389
  # Preserve mask:
393
- mask_fun = RectBivariateSpline(x, y, z.mask[:], kx=1, ky=1)
390
+ mask_fun = RectBivariateSpline(x, y, ma.getmaskarray(z), kx=1, ky=1)
394
391
  mask = mask_fun(x_new, y_new)
395
392
  mask[mask < 0.5] = 0
396
393
  masked_array = ma.array(data, mask=mask.astype(bool))
397
- masked_array = ma.masked_invalid(masked_array)
398
- return masked_array
394
+ return ma.masked_invalid(masked_array)
399
395
 
400
396
 
401
397
  def interpolate_2d_nearest(
402
- x: np.ndarray,
403
- y: np.ndarray,
404
- z: np.ndarray,
405
- x_new: np.ndarray,
406
- y_new: np.ndarray,
398
+ x: npt.NDArray,
399
+ y: npt.NDArray,
400
+ z: ma.MaskedArray,
401
+ x_new: npt.NDArray,
402
+ y_new: npt.NDArray,
407
403
  ) -> ma.MaskedArray:
408
404
  """2D nearest neighbor interpolation preserving mask.
409
405
 
@@ -421,25 +417,111 @@ def interpolate_2d_nearest(
421
417
  Points outside the original range will be interpolated but masked.
422
418
 
423
419
  """
424
- data = ma.copy(z)
420
+ data = ma.filled(z, np.nan)
425
421
  fun = RegularGridInterpolator(
426
422
  (x, y),
427
423
  data,
428
424
  method="nearest",
429
425
  bounds_error=False,
430
- fill_value=ma.masked,
431
426
  )
432
427
  xx, yy = np.meshgrid(x_new, y_new)
433
- return fun((xx, yy)).T
428
+ zz = fun((xx, yy)).T
429
+ return ma.masked_where(np.isnan(zz), zz)
430
+
431
+
432
+ def interpolate_2D_along_y(
433
+ y: npt.NDArray,
434
+ z: npt.NDArray | ma.MaskedArray,
435
+ y_new: npt.NDArray,
436
+ ) -> ma.MaskedArray:
437
+ """Fast 1D nearest-neighbor interpolation along y for each x.
434
438
 
439
+ Args:
440
+ y: 1D numpy array of y-coordinates (length M).
441
+ z: 2D array of shape (N, M).
442
+ y_new: 1D numpy array of new y-coordinates.
443
+
444
+ Returns:
445
+ Masked 2D masked array interpolated along y.
435
446
 
436
- def calc_relative_error(reference: np.ndarray, array: np.ndarray) -> np.ndarray:
447
+ Notes:
448
+ Only interpolates along y. Points outside range are masked.
449
+ """
450
+ idx = np.searchsorted(y, y_new, side="left")
451
+ idx = np.clip(idx, 0, len(y) - 1)
452
+ left = np.maximum(idx - 1, 0)
453
+ choose_right = (idx == 0) | (
454
+ (idx < len(y)) & (np.abs(y[idx] - y_new) < np.abs(y_new - y[left]))
455
+ )
456
+ idx[~choose_right] = left[~choose_right]
457
+ z_interp = ma.array(z[:, idx])
458
+ z_mask = ma.getmaskarray(z_interp)
459
+ mask = (y_new < y.min()) | (y_new > y.max())
460
+ z_mask[:, mask] = True
461
+ return ma.MaskedArray(z_interp, mask=z_mask)
462
+
463
+
464
+ def interpolate_1d(
465
+ time: npt.NDArray,
466
+ y: ma.MaskedArray,
467
+ time_new: npt.NDArray,
468
+ max_time: float,
469
+ method: str = "linear",
470
+ ) -> ma.MaskedArray:
471
+ """1D linear interpolation preserving the mask.
472
+
473
+ Args:
474
+ time: 1D array in fraction hour.
475
+ y: 1D array, data values.
476
+ time_new: 1D array, new time coordinates.
477
+ max_time: Maximum allowed gap in minutes. Values outside this gap will
478
+ be masked.
479
+ method: Interpolation method, 'linear' (default) or 'nearest'.
480
+ """
481
+ if np.max(time) > 24 or np.min(time) < 0:
482
+ msg = "Time vector must be in fraction hours between 0 and 24"
483
+ raise ValueError(msg)
484
+ if ma.is_masked(y):
485
+ if y.mask.all():
486
+ return ma.masked_all(time_new.shape)
487
+ time = time[~y.mask]
488
+ y = y[~y.mask]
489
+ fun = interp1d(time, y, kind=method, fill_value=(y[0], y[-1]), bounds_error=False)
490
+ interpolated = ma.array(fun(time_new))
491
+ bad_idx = get_gap_ind(time, time_new, max_time / 60)
492
+
493
+ if len(bad_idx) > 0:
494
+ msg = f"Unable to interpolate for {len(bad_idx)} time steps"
495
+ logging.warning(msg)
496
+ interpolated[bad_idx] = ma.masked
497
+
498
+ return interpolated
499
+
500
+
501
+ def get_gap_ind(
502
+ grid: npt.NDArray, new_grid: npt.NDArray, threshold: float
503
+ ) -> list[int]:
504
+ """Finds indices in new_grid that are too far from grid."""
505
+ if grid.size == 0:
506
+ return list(range(len(new_grid)))
507
+ idxs = np.searchsorted(grid, new_grid)
508
+ left_dist = np.where(idxs > 0, np.abs(new_grid - grid[idxs - 1]), np.inf)
509
+ right_dist = np.where(
510
+ idxs < len(grid),
511
+ np.abs(new_grid - grid[np.clip(idxs, 0, len(grid) - 1)]),
512
+ np.inf,
513
+ )
514
+ nearest = np.minimum(left_dist, right_dist)
515
+ return np.where(nearest > threshold)[0].tolist()
516
+
517
+
518
+ def calc_relative_error(reference: npt.NDArray, array: npt.NDArray) -> npt.NDArray:
437
519
  """Calculates relative error (%)."""
438
520
  return ((array - reference) / reference) * 100
439
521
 
440
522
 
441
- def db2lin(array: float | np.ndarray, scale: int = 10) -> np.ndarray:
442
- """dB to linear conversion."""
523
+ def db2lin(array: float | npt.NDArray, scale: int = 10) -> npt.NDArray:
524
+ """DB to linear conversion."""
443
525
  data = array / scale
444
526
  with warnings.catch_warnings():
445
527
  warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -448,19 +530,19 @@ def db2lin(array: float | np.ndarray, scale: int = 10) -> np.ndarray:
448
530
  return np.power(10, data)
449
531
 
450
532
 
451
- def lin2db(array: np.ndarray, scale: int = 10) -> np.ndarray:
533
+ def lin2db(array: npt.NDArray, scale: int = 10) -> npt.NDArray:
452
534
  """Linear to dB conversion."""
453
535
  if ma.isMaskedArray(array):
454
536
  return scale * ma.log10(array)
455
537
  return scale * np.log10(array)
456
538
 
457
539
 
458
- def mdiff(array: np.ndarray) -> float:
540
+ def mdiff(array: npt.NDArray) -> float:
459
541
  """Returns median difference of 1-D array."""
460
542
  return float(ma.median(ma.diff(array)))
461
543
 
462
544
 
463
- def l2norm(*args) -> ma.MaskedArray:
545
+ def l2norm(*args: npt.NDArray | float) -> ma.MaskedArray:
464
546
  """Returns l2 norm.
465
547
 
466
548
  Args:
@@ -470,20 +552,23 @@ def l2norm(*args) -> ma.MaskedArray:
470
552
  The l2 norm.
471
553
 
472
554
  """
473
- ss = 0
555
+ arg_cpy: float | npt.NDArray
556
+ ss: float | npt.NDArray = 0
474
557
  for arg in args:
475
558
  if isinstance(arg, ma.MaskedArray):
476
559
  # Raise only non-masked values, not sure if this is needed...
477
- arg = ma.copy(arg)
478
- arg[~arg.mask] = arg[~arg.mask] ** 2
560
+ arg_cpy = ma.copy(arg)
561
+ arg_cpy[~arg.mask] = arg_cpy[~arg.mask] ** 2
479
562
  else:
480
- arg = arg**2
481
- ss = ss + arg
563
+ arg_cpy = arg**2
564
+ ss = ss + arg_cpy
482
565
  return ma.sqrt(ss)
483
566
 
484
567
 
485
568
  def l2norm_weighted(
486
- values: tuple, overall_scale: float, term_weights: tuple
569
+ values: tuple,
570
+ overall_scale: float,
571
+ term_weights: tuple,
487
572
  ) -> ma.MaskedArray:
488
573
  """Calculates scaled and weighted Euclidean distance.
489
574
 
@@ -503,12 +588,12 @@ def l2norm_weighted(
503
588
  TODO: Use masked arrays instead of tuples.
504
589
 
505
590
  """
506
- generic_values = ma.array(values, dtype=object)
591
+ generic_values: ma.MaskedArray = ma.array(values, dtype=object)
507
592
  weighted_values = ma.multiply(generic_values, term_weights)
508
593
  return overall_scale * l2norm(*weighted_values)
509
594
 
510
595
 
511
- def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray:
596
+ def cumsumr(array: npt.NDArray, axis: int = 0) -> npt.NDArray:
512
597
  """Finds cumulative sum that resets on 0.
513
598
 
514
599
  Args:
@@ -525,12 +610,10 @@ def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray:
525
610
 
526
611
  """
527
612
  cums = array.cumsum(axis=axis)
528
- return cums - np.maximum.accumulate(
529
- cums * (array == 0), axis=axis
530
- ) # pylint: disable=E1101
613
+ return cums - np.maximum.accumulate(cums * (array == 0), axis=axis)
531
614
 
532
615
 
533
- def ffill(array: np.ndarray, value: int = 0) -> np.ndarray:
616
+ def ffill(array: npt.NDArray, value: int = 0) -> npt.NDArray:
534
617
  """Forward fills an array.
535
618
 
536
619
  Args:
@@ -552,15 +635,19 @@ def ffill(array: np.ndarray, value: int = 0) -> np.ndarray:
552
635
  ndims = len(array.shape)
553
636
  ran = np.arange(array.shape[ndims - 1])
554
637
  idx = np.where((array != value), ran, 0)
555
- idx = np.maximum.accumulate(idx, axis=ndims - 1) # pylint: disable=E1101
638
+ idx = np.maximum.accumulate(idx, axis=ndims - 1)
556
639
  if ndims == 2:
557
640
  return array[np.arange(idx.shape[0])[:, None], idx]
558
641
  return array[idx]
559
642
 
560
643
 
561
644
  def init(
562
- n_vars: int, shape: tuple, dtype: type = float, masked: bool = True
563
- ) -> Iterator[np.ndarray | ma.MaskedArray]:
645
+ n_vars: int,
646
+ shape: tuple,
647
+ dtype: type = float,
648
+ *,
649
+ masked: bool = True,
650
+ ) -> Iterator[npt.NDArray | ma.MaskedArray]:
564
651
  """Initializes several numpy arrays.
565
652
 
566
653
  Args:
@@ -590,14 +677,14 @@ def init(
590
677
  yield np.zeros(shape, dtype=dtype)
591
678
 
592
679
 
593
- def n_elements(array: np.ndarray, dist: float, var: str | None = None) -> int:
680
+ def n_elements(array: npt.NDArray, dist: float, var: str | None = None) -> int:
594
681
  """Returns the number of elements that cover certain distance.
595
682
 
596
683
  Args:
597
684
  array: Input array with arbitrary units or time in fraction hour. *x* should
598
685
  be evenly spaced or at least close to.
599
686
  dist: Distance to be covered. If x is fraction time, *dist* is in minutes.
600
- Otherwise *x* and *dist* should have the same units.
687
+ Otherwise, *x* and *dist* should have the same units.
601
688
  var: If 'time', input is fraction hour and distance in minutes, else inputs
602
689
  have the same units. Default is None (same units).
603
690
 
@@ -625,11 +712,11 @@ def n_elements(array: np.ndarray, dist: float, var: str | None = None) -> int:
625
712
  """
626
713
  n = dist / mdiff(array)
627
714
  if var == "time":
628
- n = n / 60
715
+ n = n / SEC_IN_MINUTE
629
716
  return int(np.round(n))
630
717
 
631
718
 
632
- def isscalar(array) -> bool:
719
+ def isscalar(array: npt.NDArray | float | list | netCDF4.Variable) -> bool:
633
720
  """Tests if input is scalar.
634
721
 
635
722
  By "scalar" we mean that array has a single value.
@@ -645,52 +732,60 @@ def isscalar(array) -> bool:
645
732
  True
646
733
 
647
734
  """
648
- arr = ma.array(array)
649
- if not hasattr(arr, "__len__") or arr.shape == () or len(arr) == 1:
650
- return True
651
- return False
735
+ arr: ma.MaskedArray = ma.array(array)
736
+ return not hasattr(arr, "__len__") or arr.shape == () or len(arr) == 1
652
737
 
653
738
 
654
739
  def get_time() -> str:
655
740
  """Returns current UTC-time."""
656
- return f"{datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} +00:00"
741
+ t_zone = datetime.timezone.utc
742
+ form = "%Y-%m-%d %H:%M:%S"
743
+ return f"{datetime.datetime.now(tz=t_zone).strftime(form)} +00:00"
657
744
 
658
745
 
659
746
  def date_range(
660
- start_date: datetime.date, end_date: datetime.date
747
+ start_date: datetime.date,
748
+ end_date: datetime.date,
661
749
  ) -> Iterator[datetime.date]:
662
750
  """Returns range between two dates (datetimes)."""
663
751
  for n in range(int((end_date - start_date).days)):
664
752
  yield start_date + datetime.timedelta(n)
665
753
 
666
754
 
667
- def get_uuid() -> str:
668
- """Returns unique identifier."""
669
- return str(uuid.uuid4())
755
+ def get_uuid(input_uuid: str | uuid.UUID | None) -> uuid.UUID:
756
+ """Parse or generate unique identifier."""
757
+ if input_uuid is None:
758
+ return uuid.uuid4()
759
+ if isinstance(input_uuid, str):
760
+ return uuid.UUID(input_uuid)
761
+ return input_uuid
670
762
 
671
763
 
672
- def get_wl_band(radar_frequency: float) -> int:
673
- """Returns integer corresponding to radar frequency.
764
+ def get_wl_band(radar_frequency: float) -> Literal["X", "Ka", "W"]:
765
+ """Returns IEEE radar band corresponding to radar frequency.
674
766
 
675
767
  Args:
676
768
  radar_frequency: Radar frequency (GHz).
677
769
 
678
770
  Returns:
679
- 0 = 35GHz radar, 1 = 94Ghz radar.
771
+ IEEE radar band as string.
680
772
 
681
773
  """
682
- return 0 if (30 < radar_frequency < 40) else 1
683
-
684
-
685
- def get_frequency(wl_band: int) -> str:
686
- """Returns radar frequency string corresponding to wl band."""
687
- return "35.5" if wl_band == 0 else "94"
774
+ if 8 < radar_frequency < 12:
775
+ return "X"
776
+ if 27 < radar_frequency < 40:
777
+ return "Ka"
778
+ if 75 < radar_frequency < 110:
779
+ return "W"
780
+ msg = f"Unknown band: {radar_frequency} GHz"
781
+ raise ValueError(msg)
688
782
 
689
783
 
690
- def transpose(data: np.ndarray) -> np.ndarray:
784
+ def transpose(data: npt.NDArray) -> npt.NDArray:
691
785
  """Transposes numpy array of (n, ) to (n, 1)."""
692
786
  if data.ndim != 1 or len(data) <= 1:
693
- raise ValueError("Invalid input array shape")
787
+ msg = "Invalid input array shape"
788
+ raise ValueError(msg)
694
789
  return data[:, np.newaxis]
695
790
 
696
791
 
@@ -713,8 +808,12 @@ def del_dict_keys(data: dict, keys: tuple | list) -> dict:
713
808
 
714
809
 
715
810
  def array_to_probability(
716
- array: np.ndarray, loc: float, scale: float, invert: bool = False
717
- ) -> np.ndarray:
811
+ array: npt.NDArray,
812
+ loc: float,
813
+ scale: float,
814
+ *,
815
+ invert: bool = False,
816
+ ) -> npt.NDArray:
718
817
  """Converts continuous variable into 0-1 probability.
719
818
 
720
819
  Args:
@@ -740,7 +839,7 @@ def array_to_probability(
740
839
  return prob
741
840
 
742
841
 
743
- def range_to_height(range_los: np.ndarray, tilt_angle: float) -> np.ndarray:
842
+ def range_to_height(range_los: npt.NDArray, tilt_angle: float) -> npt.NDArray:
744
843
  """Converts distances from a tilted instrument to height above the ground.
745
844
 
746
845
  Args:
@@ -759,27 +858,21 @@ def range_to_height(range_los: np.ndarray, tilt_angle: float) -> np.ndarray:
759
858
 
760
859
  def is_empty_line(line: str) -> bool:
761
860
  """Tests if a line (of a text file) is empty."""
762
- if line in ("\n", "\r\n"):
763
- return True
764
- return False
861
+ return line in ("\n", "\r\n")
765
862
 
766
863
 
767
864
  def is_timestamp(timestamp: str) -> bool:
768
- """Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss"""
865
+ """Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss."""
769
866
  reg_exp = re.compile(r"-\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
770
- if reg_exp.match(timestamp) is not None:
771
- return True
772
- return False
867
+ return reg_exp.match(timestamp) is not None
773
868
 
774
869
 
775
- def get_sorted_filenames(file_path: str, extension: str) -> list:
870
+ def get_sorted_filenames(file_path: str | PathLike, extension: str) -> list[str]:
776
871
  """Returns full paths of files with some extension, sorted by filename."""
777
872
  extension = extension.lower()
778
873
  all_files = os.listdir(file_path)
779
874
  files = [
780
- "/".join((file_path, file))
781
- for file in all_files
782
- if file.lower().endswith(extension)
875
+ f"{file_path}/{file}" for file in all_files if file.lower().endswith(extension)
783
876
  ]
784
877
  files.sort()
785
878
  return files
@@ -793,23 +886,16 @@ def str_to_numeric(value: str) -> int | float:
793
886
  return float(value)
794
887
 
795
888
 
796
- def fetch_cloudnet_model_types() -> list:
797
- """Finds different model types."""
798
- url = "https://cloudnet.fmi.fi/api/models"
799
- data = requests.get(url=url, timeout=60).json()
800
- models = [model["id"] for model in data]
801
- model_types = [model.split("-")[0] for model in models]
802
- return list(set(model_types))
803
-
804
-
805
- def get_epoch(units: str) -> Epoch:
889
+ def get_epoch(units: str) -> datetime.datetime:
806
890
  """Finds epoch from units string."""
807
- fallback = (2001, 1, 1)
891
+ fallback = datetime.datetime(2001, 1, 1, tzinfo=datetime.timezone.utc)
808
892
  try:
809
893
  date = units.split()[2]
810
894
  except IndexError:
811
895
  return fallback
812
896
  date = date.replace(",", "")
897
+ if "T" in date:
898
+ date = date[: date.index("T")]
813
899
  try:
814
900
  date_components = [int(x) for x in date.split("-")]
815
901
  except ValueError:
@@ -818,13 +904,15 @@ def get_epoch(units: str) -> Epoch:
818
904
  except ValueError:
819
905
  return fallback
820
906
  year, month, day = date_components
821
- current_year = datetime.datetime.today().year
907
+ current_year = datetime.datetime.now(tz=datetime.timezone.utc).year
822
908
  if (1900 < year <= current_year) and (0 < month < 13) and (0 < day < 32):
823
- return year, month, day
909
+ return datetime.datetime(year, month, day, tzinfo=datetime.timezone.utc)
824
910
  return fallback
825
911
 
826
912
 
827
- def screen_by_time(data_in: dict, epoch: Epoch, expected_date: str) -> dict:
913
+ def screen_by_time(
914
+ data_in: dict, epoch: datetime.datetime, expected_date: datetime.date
915
+ ) -> dict:
828
916
  """Screen data by time.
829
917
 
830
918
  Args:
@@ -861,7 +949,9 @@ def screen_by_time(data_in: dict, epoch: Epoch, expected_date: str) -> dict:
861
949
  return data
862
950
 
863
951
 
864
- def find_valid_time_indices(time: np.ndarray, epoch: Epoch, expected_date: str) -> list:
952
+ def find_valid_time_indices(
953
+ time: npt.NDArray, epoch: datetime.datetime, expected_date: datetime.date
954
+ ) -> list[int]:
865
955
  """Finds valid time array indices for the given date.
866
956
 
867
957
  Args:
@@ -884,15 +974,15 @@ def find_valid_time_indices(time: np.ndarray, epoch: Epoch, expected_date: str)
884
974
  ind_sorted = np.argsort(time)
885
975
  ind_valid: list[int] = []
886
976
  for ind in ind_sorted:
887
- date_str = "-".join(seconds2date(time[ind], epoch=epoch)[:3])
888
- if date_str == expected_date and time[ind] not in time[ind_valid]:
977
+ date = seconds2date(time[ind], epoch=epoch).date()
978
+ if date == expected_date and time[ind] not in time[ind_valid]:
889
979
  ind_valid.append(ind)
890
980
  if not ind_valid:
891
981
  raise ValidTimeStampError
892
982
  return ind_valid
893
983
 
894
984
 
895
- def append_data(data_in: dict, key: str, array: np.ndarray) -> dict:
985
+ def append_data(data_in: dict, key: str, array: npt.NDArray) -> dict:
896
986
  """Appends data to a dictionary field (creates the field if not yet present).
897
987
 
898
988
  Args:
@@ -909,7 +999,7 @@ def append_data(data_in: dict, key: str, array: np.ndarray) -> dict:
909
999
  return data
910
1000
 
911
1001
 
912
- def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
1002
+ def edges2mid(data: npt.NDArray, reference: Literal["upper", "lower"]) -> npt.NDArray:
913
1003
  """Shifts values half bin towards up or down.
914
1004
 
915
1005
  Args:
@@ -920,8 +1010,6 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
920
1010
  Shifted values.
921
1011
 
922
1012
  """
923
- if reference not in ("lower", "upper"):
924
- raise ValueError
925
1013
  gaps = (data[1:] - data[0:-1]) / 2
926
1014
  if reference == "lower":
927
1015
  gaps = np.append(gaps, gaps[-1])
@@ -930,29 +1018,156 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
930
1018
  return data - gaps
931
1019
 
932
1020
 
933
- def get_file_type(filename: str) -> str:
934
- """Returns cloudnet file type from new and legacy files."""
935
- with netCDF4.Dataset(filename) as nc:
936
- if hasattr(nc, "cloudnet_file_type"):
937
- file_type = nc.cloudnet_file_type
938
- return file_type
939
- product = filename.split("_")[-1][:-3]
940
- if product in ("categorize", "classification", "drizzle"):
941
- return product
942
- if product[:3] in ("lwc", "iwc"):
943
- return product[:3]
944
- raise ValueError("Unknown file type")
945
-
946
-
947
- def get_files_with_common_range(files: list) -> list:
948
- """Returns files with the same (most common) number of range gates."""
949
- n_range = []
950
- for file in files:
1021
+ def get_files_with_variables(filenames: list, variables: list[str]) -> list:
1022
+ """Returns files where all variables exist."""
1023
+ valid_files = []
1024
+ for file in filenames:
951
1025
  with netCDF4.Dataset(file) as nc:
952
- n_range.append(len(nc.variables["range"]))
953
- most_common = np.bincount(n_range).argmax()
954
- n_removed = len([n for n in n_range if n != most_common])
955
- if n_removed > 0:
956
- logging.warning(f"Removing {n_removed} files due to inconsistent height vector")
957
- ind = np.where(n_range == most_common)[0]
958
- return [file for i, file in enumerate(files) if i in ind]
1026
+ for variable in variables:
1027
+ if variable not in nc.variables:
1028
+ break
1029
+ else:
1030
+ valid_files.append(file)
1031
+ return valid_files
1032
+
1033
+
1034
+ def is_all_masked(array: npt.NDArray) -> bool:
1035
+ """Tests if all values are masked."""
1036
+ return bool(ma.isMaskedArray(array) and hasattr(array, "mask") and array.mask.all())
1037
+
1038
+
1039
+ def find_masked_profiles_indices(array: ma.MaskedArray) -> list:
1040
+ """Finds indices of masked profiles in a 2-D array."""
1041
+ non_masked_counts = np.ma.count(array, axis=1)
1042
+ masked_profiles_indices = np.where(non_masked_counts == 0)[0]
1043
+ return list(masked_profiles_indices)
1044
+
1045
+
1046
+ T = TypeVar("T", int, str)
1047
+
1048
+
1049
+ def _format_definition(kind: str, definitions: dict[T, str]) -> str:
1050
+ lines = [""]
1051
+ for key, value in definitions.items():
1052
+ prefix = f"{kind} {key}: "
1053
+ indent = " " * len(prefix)
1054
+ text = " ".join(value.split())
1055
+ wrapped = textwrap.wrap(prefix + text, subsequent_indent=indent)
1056
+ lines.extend(wrapped)
1057
+ return "\n".join(lines)
1058
+
1059
+
1060
+ def status_field_definition(definitions: dict[T, str]) -> str:
1061
+ return _format_definition("Value", definitions)
1062
+
1063
+
1064
+ def bit_field_definition(definitions: dict[T, str]) -> str:
1065
+ return _format_definition("Bit", definitions)
1066
+
1067
+
1068
+ def path_lengths_from_ground(height_agl: npt.NDArray) -> npt.NDArray:
1069
+ return np.diff(height_agl, prepend=0)
1070
+
1071
+
1072
+ def add_site_geolocation(
1073
+ data: dict,
1074
+ *,
1075
+ gps: bool,
1076
+ site_meta: dict | None = None,
1077
+ dataset: netCDF4.Dataset | None = None,
1078
+ ) -> None:
1079
+ tmp_data = {}
1080
+ tmp_source = {}
1081
+ value: npt.NDArray | float | None
1082
+
1083
+ for key in ("latitude", "longitude", "altitude"):
1084
+ value = None
1085
+ source = None
1086
+ # Prefer accurate GPS coordinates. Don't trust altitude because its less
1087
+ # accurate and at least in Lindenberg BASTA there are large jumps.
1088
+ if gps and key != "altitude":
1089
+ values = None
1090
+ if isinstance(dataset, netCDF4.Dataset) and key in dataset.variables:
1091
+ values = dataset[key][:]
1092
+ elif key in data:
1093
+ values = data[key].data
1094
+ if (
1095
+ values is not None
1096
+ and not np.all(ma.getmaskarray(values))
1097
+ and np.any(values != 0)
1098
+ ):
1099
+ value = ma.masked_where(values == 0, values)
1100
+ source = "GPS"
1101
+ # User-supplied site coordinate.
1102
+ if value is None and site_meta is not None and key in site_meta:
1103
+ value = np.array(float(site_meta[key]))
1104
+ source = "site coordinates"
1105
+ # From source data (CHM15k, CL61, MRR-PRO, Copernicus, Galileo...).
1106
+ # Assume value is manually set, so cannot trust it.
1107
+ if (
1108
+ value is None
1109
+ and isinstance(dataset, netCDF4.Dataset)
1110
+ and key in dataset.variables
1111
+ and not np.all(ma.getmaskarray(dataset[key][:]))
1112
+ ):
1113
+ value = dataset[key][:]
1114
+ source = "raw file"
1115
+ # From source global attributes (MIRA).
1116
+ # Seems to be manually set, so cannot trust it.
1117
+ if (
1118
+ value is None
1119
+ and isinstance(dataset, netCDF4.Dataset)
1120
+ and hasattr(dataset, key.capitalize())
1121
+ ):
1122
+ value = _parse_global_attribute_numeral(dataset, key.capitalize())
1123
+ source = "raw file"
1124
+ if value is not None:
1125
+ tmp_data[key] = value
1126
+ tmp_source[key] = source
1127
+
1128
+ if "latitude" in tmp_data and "longitude" in tmp_data:
1129
+ lat = np.atleast_1d(tmp_data["latitude"])
1130
+ lon = np.atleast_1d(tmp_data["longitude"])
1131
+ lon[lon > 180] - 360
1132
+ if _are_stationary(lat, lon):
1133
+ tmp_data["latitude"] = float(ma.mean(lat))
1134
+ tmp_data["longitude"] = float(ma.mean(lon))
1135
+ else:
1136
+ tmp_data["latitude"] = lat
1137
+ tmp_data["longitude"] = lon
1138
+
1139
+ if "altitude" in tmp_data:
1140
+ alt = np.atleast_1d(tmp_data["altitude"])
1141
+ if ma.max(alt) - ma.min(alt) < 100:
1142
+ tmp_data["altitude"] = float(ma.mean(alt))
1143
+
1144
+ for key in ("latitude", "longitude", "altitude"):
1145
+ if key in tmp_data:
1146
+ data[key] = CloudnetArray(
1147
+ tmp_data[key],
1148
+ key,
1149
+ source=tmp_source[key],
1150
+ dimensions=None if isinstance(tmp_data[key], float) else ("time",),
1151
+ )
1152
+
1153
+
1154
+ def _parse_global_attribute_numeral(dataset: netCDF4.Dataset, key: str) -> float | None:
1155
+ new_str = ""
1156
+ attr = getattr(dataset, key)
1157
+ if attr == "Unknown":
1158
+ return None
1159
+ for char in attr:
1160
+ if char.isdigit() or char == ".":
1161
+ new_str += char
1162
+ return float(new_str)
1163
+
1164
+
1165
+ def _are_stationary(latitude: npt.NDArray, longitude: npt.NDArray) -> bool:
1166
+ min_lat, max_lat = np.min(latitude), np.max(latitude)
1167
+ min_lon, max_lon = np.min(longitude), np.max(longitude)
1168
+ lat_threshold = 0.01 # deg, around 1 km
1169
+ avg_lat = (min_lat + max_lat) / 2
1170
+ lon_threshold = lat_threshold / np.cos(np.radians(avg_lat))
1171
+ lat_diff = max_lat - min_lat
1172
+ lon_diff = max_lon - min_lon
1173
+ return lat_diff <= lat_threshold and lon_diff <= lon_threshold