cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudnetpy/categorize/__init__.py +1 -2
- cloudnetpy/categorize/atmos_utils.py +297 -67
- cloudnetpy/categorize/attenuation.py +31 -0
- cloudnetpy/categorize/attenuations/__init__.py +37 -0
- cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
- cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
- cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
- cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
- cloudnetpy/categorize/categorize.py +332 -156
- cloudnetpy/categorize/classify.py +127 -125
- cloudnetpy/categorize/containers.py +107 -76
- cloudnetpy/categorize/disdrometer.py +40 -0
- cloudnetpy/categorize/droplet.py +23 -21
- cloudnetpy/categorize/falling.py +53 -24
- cloudnetpy/categorize/freezing.py +25 -12
- cloudnetpy/categorize/insects.py +35 -23
- cloudnetpy/categorize/itu.py +243 -0
- cloudnetpy/categorize/lidar.py +36 -41
- cloudnetpy/categorize/melting.py +34 -26
- cloudnetpy/categorize/model.py +84 -37
- cloudnetpy/categorize/mwr.py +18 -14
- cloudnetpy/categorize/radar.py +215 -102
- cloudnetpy/cli.py +578 -0
- cloudnetpy/cloudnetarray.py +43 -89
- cloudnetpy/concat_lib.py +218 -78
- cloudnetpy/constants.py +28 -10
- cloudnetpy/datasource.py +61 -86
- cloudnetpy/exceptions.py +49 -20
- cloudnetpy/instruments/__init__.py +5 -0
- cloudnetpy/instruments/basta.py +29 -12
- cloudnetpy/instruments/bowtie.py +135 -0
- cloudnetpy/instruments/ceilo.py +138 -115
- cloudnetpy/instruments/ceilometer.py +164 -80
- cloudnetpy/instruments/cl61d.py +21 -5
- cloudnetpy/instruments/cloudnet_instrument.py +74 -36
- cloudnetpy/instruments/copernicus.py +108 -30
- cloudnetpy/instruments/da10.py +54 -0
- cloudnetpy/instruments/disdrometer/common.py +126 -223
- cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
- cloudnetpy/instruments/disdrometer/thies.py +254 -87
- cloudnetpy/instruments/fd12p.py +201 -0
- cloudnetpy/instruments/galileo.py +65 -23
- cloudnetpy/instruments/hatpro.py +123 -49
- cloudnetpy/instruments/instruments.py +113 -1
- cloudnetpy/instruments/lufft.py +39 -17
- cloudnetpy/instruments/mira.py +268 -61
- cloudnetpy/instruments/mrr.py +187 -0
- cloudnetpy/instruments/nc_lidar.py +19 -8
- cloudnetpy/instruments/nc_radar.py +109 -55
- cloudnetpy/instruments/pollyxt.py +135 -51
- cloudnetpy/instruments/radiometrics.py +313 -59
- cloudnetpy/instruments/rain_e_h3.py +171 -0
- cloudnetpy/instruments/rpg.py +321 -189
- cloudnetpy/instruments/rpg_reader.py +74 -40
- cloudnetpy/instruments/toa5.py +49 -0
- cloudnetpy/instruments/vaisala.py +95 -343
- cloudnetpy/instruments/weather_station.py +774 -105
- cloudnetpy/metadata.py +90 -19
- cloudnetpy/model_evaluation/file_handler.py +55 -52
- cloudnetpy/model_evaluation/metadata.py +46 -20
- cloudnetpy/model_evaluation/model_metadata.py +1 -1
- cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
- cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
- cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
- cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
- cloudnetpy/model_evaluation/products/model_products.py +43 -35
- cloudnetpy/model_evaluation/products/observation_products.py +41 -35
- cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
- cloudnetpy/model_evaluation/products/tools.py +29 -20
- cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
- cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
- cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
- cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
- cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
- cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
- cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
- cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
- cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
- cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
- cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
- cloudnetpy/model_evaluation/utils.py +2 -1
- cloudnetpy/output.py +170 -111
- cloudnetpy/plotting/__init__.py +2 -1
- cloudnetpy/plotting/plot_meta.py +562 -822
- cloudnetpy/plotting/plotting.py +1142 -704
- cloudnetpy/products/__init__.py +1 -0
- cloudnetpy/products/classification.py +370 -88
- cloudnetpy/products/der.py +85 -55
- cloudnetpy/products/drizzle.py +77 -34
- cloudnetpy/products/drizzle_error.py +15 -11
- cloudnetpy/products/drizzle_tools.py +79 -59
- cloudnetpy/products/epsilon.py +211 -0
- cloudnetpy/products/ier.py +27 -50
- cloudnetpy/products/iwc.py +55 -48
- cloudnetpy/products/lwc.py +96 -70
- cloudnetpy/products/mwr_tools.py +186 -0
- cloudnetpy/products/product_tools.py +170 -128
- cloudnetpy/utils.py +455 -240
- cloudnetpy/version.py +2 -2
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
- cloudnetpy-1.87.3.dist-info/RECORD +127 -0
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
- cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
- docs/source/conf.py +2 -2
- cloudnetpy/categorize/atmos.py +0 -361
- cloudnetpy/products/mwr_multi.py +0 -68
- cloudnetpy/products/mwr_single.py +0 -75
- cloudnetpy-1.49.9.dist-info/RECORD +0 -112
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
cloudnetpy/utils.py
CHANGED
|
@@ -1,31 +1,35 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""This module contains general helper functions."""
|
|
2
|
+
|
|
2
3
|
import datetime
|
|
3
4
|
import logging
|
|
4
5
|
import os
|
|
5
6
|
import re
|
|
7
|
+
import textwrap
|
|
6
8
|
import uuid
|
|
7
9
|
import warnings
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
+
from collections.abc import Callable, Iterator
|
|
11
|
+
from os import PathLike
|
|
12
|
+
from typing import Literal, TypeVar
|
|
10
13
|
|
|
11
14
|
import netCDF4
|
|
12
15
|
import numpy as np
|
|
13
|
-
import
|
|
16
|
+
import numpy.typing as npt
|
|
14
17
|
from numpy import ma
|
|
15
18
|
from scipy import ndimage, stats
|
|
16
|
-
from scipy
|
|
17
|
-
|
|
19
|
+
from scipy import ndimage as ndi
|
|
20
|
+
from scipy.interpolate import (
|
|
21
|
+
RectBivariateSpline,
|
|
22
|
+
RegularGridInterpolator,
|
|
23
|
+
griddata,
|
|
24
|
+
interp1d,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from cloudnetpy.cloudnetarray import CloudnetArray
|
|
28
|
+
from cloudnetpy.constants import SEC_IN_DAY, SEC_IN_HOUR, SEC_IN_MINUTE
|
|
18
29
|
from cloudnetpy.exceptions import ValidTimeStampError
|
|
19
30
|
|
|
20
|
-
Epoch = tuple[int, int, int]
|
|
21
|
-
Date = tuple[str, str, str]
|
|
22
|
-
|
|
23
|
-
SECONDS_PER_MINUTE = 60
|
|
24
|
-
SECONDS_PER_HOUR = 3600
|
|
25
|
-
SECONDS_PER_DAY = 86400
|
|
26
|
-
|
|
27
31
|
|
|
28
|
-
def seconds2hours(time_in_seconds:
|
|
32
|
+
def seconds2hours(time_in_seconds: npt.NDArray) -> npt.NDArray:
|
|
29
33
|
"""Converts seconds since some epoch to fraction hour.
|
|
30
34
|
|
|
31
35
|
Args:
|
|
@@ -38,32 +42,19 @@ def seconds2hours(time_in_seconds: np.ndarray) -> np.ndarray:
|
|
|
38
42
|
Excludes leap seconds.
|
|
39
43
|
|
|
40
44
|
"""
|
|
41
|
-
seconds_since_midnight = np.mod(time_in_seconds,
|
|
42
|
-
fraction_hour = seconds_since_midnight /
|
|
45
|
+
seconds_since_midnight = np.mod(time_in_seconds, SEC_IN_DAY)
|
|
46
|
+
fraction_hour = seconds_since_midnight / SEC_IN_HOUR
|
|
43
47
|
if fraction_hour[-1] == 0:
|
|
44
48
|
fraction_hour[-1] = 24
|
|
45
49
|
return fraction_hour
|
|
46
50
|
|
|
47
51
|
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
list: [hours, minutes, seconds] formatted as '05' etc.
|
|
56
|
-
|
|
57
|
-
"""
|
|
58
|
-
seconds_since_midnight = np.mod(time_in_seconds, SECONDS_PER_DAY)
|
|
59
|
-
hours = seconds_since_midnight // SECONDS_PER_HOUR
|
|
60
|
-
minutes = seconds_since_midnight % SECONDS_PER_HOUR // SECONDS_PER_MINUTE
|
|
61
|
-
seconds = seconds_since_midnight % SECONDS_PER_MINUTE
|
|
62
|
-
time = [hours, minutes, seconds]
|
|
63
|
-
return [str(t).zfill(2) for t in time]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
|
|
52
|
+
def seconds2date(
|
|
53
|
+
time_in_seconds: float,
|
|
54
|
+
epoch: datetime.datetime = datetime.datetime(
|
|
55
|
+
2001, 1, 1, tzinfo=datetime.timezone.utc
|
|
56
|
+
),
|
|
57
|
+
) -> datetime.datetime:
|
|
67
58
|
"""Converts seconds since some epoch to datetime (UTC).
|
|
68
59
|
|
|
69
60
|
Args:
|
|
@@ -71,31 +62,23 @@ def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
|
|
|
71
62
|
epoch: Epoch, default is (2001, 1, 1) (UTC).
|
|
72
63
|
|
|
73
64
|
Returns:
|
|
74
|
-
|
|
65
|
+
Datetime
|
|
75
66
|
|
|
76
67
|
"""
|
|
77
|
-
|
|
78
|
-
datetime.datetime(*epoch, tzinfo=timezone.utc)
|
|
79
|
-
)
|
|
80
|
-
timestamp = time_in_seconds + epoch_in_seconds
|
|
81
|
-
return (
|
|
82
|
-
datetime.datetime.utcfromtimestamp(timestamp)
|
|
83
|
-
.strftime("%Y %m %d %H %M %S")
|
|
84
|
-
.split()
|
|
85
|
-
)
|
|
68
|
+
return epoch + datetime.timedelta(seconds=float(time_in_seconds))
|
|
86
69
|
|
|
87
70
|
|
|
88
|
-
def datetime2decimal_hours(data:
|
|
89
|
-
"""Converts array of datetime to decimal_hours"""
|
|
71
|
+
def datetime2decimal_hours(data: npt.NDArray | list) -> npt.NDArray:
|
|
72
|
+
"""Converts array of datetime to decimal_hours."""
|
|
90
73
|
output = []
|
|
91
74
|
for timestamp in data:
|
|
92
75
|
t = timestamp.time()
|
|
93
|
-
decimal_hours = t.hour + t.minute /
|
|
76
|
+
decimal_hours = t.hour + t.minute / SEC_IN_MINUTE + t.second / SEC_IN_HOUR
|
|
94
77
|
output.append(decimal_hours)
|
|
95
78
|
return np.array(output)
|
|
96
79
|
|
|
97
80
|
|
|
98
|
-
def time_grid(time_step: int = 30) ->
|
|
81
|
+
def time_grid(time_step: int = 30) -> npt.NDArray:
|
|
99
82
|
"""Returns decimal hour array between 0 and 24.
|
|
100
83
|
|
|
101
84
|
Computes fraction hour time vector 0-24 with user-given
|
|
@@ -112,12 +95,13 @@ def time_grid(time_step: int = 30) -> np.ndarray:
|
|
|
112
95
|
|
|
113
96
|
"""
|
|
114
97
|
if time_step < 1:
|
|
115
|
-
|
|
116
|
-
|
|
98
|
+
msg = "Time resolution should be >= 1 seconds"
|
|
99
|
+
raise ValueError(msg)
|
|
100
|
+
half_step = time_step / SEC_IN_HOUR / 2
|
|
117
101
|
return np.arange(half_step, 24 + half_step, half_step * 2)
|
|
118
102
|
|
|
119
103
|
|
|
120
|
-
def binvec(x:
|
|
104
|
+
def binvec(x: npt.NDArray | list) -> npt.NDArray:
|
|
121
105
|
"""Converts 1-D center points to bins with even spacing.
|
|
122
106
|
|
|
123
107
|
Args:
|
|
@@ -136,60 +120,55 @@ def binvec(x: np.ndarray | list) -> np.ndarray:
|
|
|
136
120
|
return np.linspace(edge1, edge2, len(x) + 1)
|
|
137
121
|
|
|
138
122
|
|
|
123
|
+
REBIN_STAT = Literal["mean", "std", "max"]
|
|
124
|
+
REBIN_STAT_FN: dict[REBIN_STAT, Callable] = {
|
|
125
|
+
"mean": ma.mean,
|
|
126
|
+
"std": ma.std,
|
|
127
|
+
"max": ma.max,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
139
131
|
def rebin_2d(
|
|
140
|
-
x_in:
|
|
141
|
-
array:
|
|
142
|
-
x_new:
|
|
143
|
-
statistic:
|
|
132
|
+
x_in: npt.NDArray,
|
|
133
|
+
array: npt.NDArray,
|
|
134
|
+
x_new: npt.NDArray,
|
|
135
|
+
statistic: REBIN_STAT = "mean",
|
|
144
136
|
n_min: int = 1,
|
|
145
|
-
|
|
146
|
-
|
|
137
|
+
*,
|
|
138
|
+
keepdim: bool = False,
|
|
139
|
+
mask_zeros: bool = False,
|
|
140
|
+
) -> tuple[ma.MaskedArray, npt.NDArray]:
|
|
141
|
+
edges = binvec(x_new)
|
|
142
|
+
binn = np.digitize(x_in, edges) - 1
|
|
143
|
+
n_bins = len(x_new)
|
|
144
|
+
counts = np.bincount(binn[binn >= 0], minlength=n_bins)
|
|
147
145
|
|
|
148
|
-
|
|
149
|
-
x_in: 1-D array with shape (n,).
|
|
150
|
-
array: 2-D input data with shape (n, m).
|
|
151
|
-
x_new: 1-D target vector (center points) with shape (N,).
|
|
152
|
-
statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
|
|
153
|
-
Default is 'mean'.
|
|
154
|
-
n_min: Minimum number of points to have good statistics in a bin. Default is 1.
|
|
146
|
+
stat_fn = REBIN_STAT_FN[statistic]
|
|
155
147
|
|
|
156
|
-
|
|
157
|
-
|
|
148
|
+
shape = array.shape if keepdim else (n_bins, array.shape[1])
|
|
149
|
+
result: ma.MaskedArray = ma.masked_array(np.ones(shape, dtype="float32"), mask=True)
|
|
158
150
|
|
|
159
|
-
|
|
160
|
-
|
|
151
|
+
for bin_ind in range(n_bins):
|
|
152
|
+
if counts[bin_ind] < n_min:
|
|
153
|
+
continue
|
|
154
|
+
mask = binn == bin_ind
|
|
155
|
+
block = array[mask, :]
|
|
156
|
+
x_ind = mask if keepdim else bin_ind
|
|
157
|
+
result[x_ind, :] = stat_fn(block, axis=0)
|
|
161
158
|
|
|
162
|
-
|
|
163
|
-
edges = binvec(x_new)
|
|
164
|
-
result = np.zeros((len(x_new), array.shape[1]))
|
|
165
|
-
array_screened = ma.masked_invalid(array, copy=True) # data may contain nan-values
|
|
166
|
-
for ind, values in enumerate(array_screened.T):
|
|
167
|
-
mask = ~values.mask
|
|
168
|
-
if ma.any(values[mask]):
|
|
169
|
-
result[:, ind], _, _ = stats.binned_statistic(
|
|
170
|
-
x_in[mask], values[mask], statistic=statistic, bins=edges
|
|
171
|
-
)
|
|
172
|
-
result[~np.isfinite(result)] = 0
|
|
173
|
-
masked_result = ma.masked_equal(result, 0)
|
|
159
|
+
empty_bins = np.where(counts < n_min)[0]
|
|
174
160
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
for ind in range(len(edges) - 1):
|
|
178
|
-
is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
|
|
179
|
-
if len(is_data) < n_min:
|
|
180
|
-
masked_result[ind, :] = ma.masked
|
|
181
|
-
empty_indices.append(ind)
|
|
182
|
-
if len(empty_indices) > 0:
|
|
183
|
-
logging.debug(f"No radar data in {len(empty_indices)} bins")
|
|
161
|
+
if mask_zeros:
|
|
162
|
+
result[result == 0] = ma.masked
|
|
184
163
|
|
|
185
|
-
return
|
|
164
|
+
return result, empty_bins
|
|
186
165
|
|
|
187
166
|
|
|
188
167
|
def rebin_1d(
|
|
189
|
-
x_in:
|
|
190
|
-
array:
|
|
191
|
-
x_new:
|
|
192
|
-
statistic:
|
|
168
|
+
x_in: npt.NDArray,
|
|
169
|
+
array: npt.NDArray | ma.MaskedArray,
|
|
170
|
+
x_new: npt.NDArray,
|
|
171
|
+
statistic: REBIN_STAT = "mean",
|
|
193
172
|
) -> ma.MaskedArray:
|
|
194
173
|
"""Rebins 1D array.
|
|
195
174
|
|
|
@@ -201,22 +180,24 @@ def rebin_1d(
|
|
|
201
180
|
Default is 'mean'.
|
|
202
181
|
|
|
203
182
|
Returns:
|
|
204
|
-
|
|
183
|
+
Re-binned data with shape (N,).
|
|
205
184
|
|
|
206
185
|
"""
|
|
207
186
|
edges = binvec(x_new)
|
|
208
|
-
result =
|
|
187
|
+
result = ma.zeros(len(x_new))
|
|
209
188
|
array_screened = ma.masked_invalid(array, copy=True) # data may contain nan-values
|
|
210
|
-
mask = ~array_screened.mask
|
|
189
|
+
mask = ~array_screened.mask
|
|
211
190
|
if ma.any(array_screened[mask]):
|
|
212
191
|
result, _, _ = stats.binned_statistic(
|
|
213
|
-
x_in[mask],
|
|
192
|
+
x_in[mask],
|
|
193
|
+
array_screened[mask],
|
|
194
|
+
statistic=statistic,
|
|
195
|
+
bins=edges,
|
|
214
196
|
)
|
|
215
|
-
|
|
216
|
-
return ma.masked_equal(result, 0)
|
|
197
|
+
return ma.masked_invalid(result, copy=True)
|
|
217
198
|
|
|
218
199
|
|
|
219
|
-
def filter_isolated_pixels(array:
|
|
200
|
+
def filter_isolated_pixels(array: npt.NDArray) -> npt.NDArray:
|
|
220
201
|
"""From a 2D boolean array, remove completely isolated single cells.
|
|
221
202
|
|
|
222
203
|
Args:
|
|
@@ -236,7 +217,7 @@ def filter_isolated_pixels(array: np.ndarray) -> np.ndarray:
|
|
|
236
217
|
return _filter(array, structure)
|
|
237
218
|
|
|
238
219
|
|
|
239
|
-
def filter_x_pixels(array:
|
|
220
|
+
def filter_x_pixels(array: npt.NDArray) -> npt.NDArray:
|
|
240
221
|
"""From a 2D boolean array, remove cells isolated in x-direction.
|
|
241
222
|
|
|
242
223
|
Args:
|
|
@@ -259,7 +240,7 @@ def filter_x_pixels(array: np.ndarray) -> np.ndarray:
|
|
|
259
240
|
return _filter(array, structure)
|
|
260
241
|
|
|
261
242
|
|
|
262
|
-
def _filter(array:
|
|
243
|
+
def _filter(array: npt.NDArray, structure: npt.NDArray) -> npt.NDArray:
|
|
263
244
|
filtered_array = np.copy(array)
|
|
264
245
|
id_regions, num_ids = ndimage.label(filtered_array, structure=structure)
|
|
265
246
|
id_sizes = np.array(ndimage.sum(array, id_regions, range(num_ids + 1))).astype(int)
|
|
@@ -268,8 +249,19 @@ def _filter(array: np.ndarray, structure: np.ndarray) -> np.ndarray:
|
|
|
268
249
|
return filtered_array
|
|
269
250
|
|
|
270
251
|
|
|
271
|
-
def
|
|
272
|
-
|
|
252
|
+
def remove_small_objects(
|
|
253
|
+
mask: npt.NDArray, max_size: int, connectivity: int
|
|
254
|
+
) -> npt.NDArray:
|
|
255
|
+
"""Removes small connected components from boolean mask."""
|
|
256
|
+
structure = ndi.generate_binary_structure(mask.ndim, connectivity)
|
|
257
|
+
labels, num = ndi.label(mask, structure=structure)
|
|
258
|
+
sizes = ndi.sum(mask, labels, index=np.arange(1, num + 1))
|
|
259
|
+
keep_labels = np.where(sizes > max_size)[0] + 1
|
|
260
|
+
return np.isin(labels, keep_labels)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def isbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
|
|
264
|
+
"""Tests if nth bit (0,1,2,...) is set.
|
|
273
265
|
|
|
274
266
|
Args:
|
|
275
267
|
array: Integer array.
|
|
@@ -287,17 +279,18 @@ def isbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
|
|
|
287
279
|
>>> isbit(4, 2)
|
|
288
280
|
True
|
|
289
281
|
|
|
290
|
-
See
|
|
282
|
+
See Also:
|
|
291
283
|
utils.setbit()
|
|
292
284
|
|
|
293
285
|
"""
|
|
294
286
|
if nth_bit < 0:
|
|
295
|
-
|
|
287
|
+
msg = "Negative bit number"
|
|
288
|
+
raise ValueError(msg)
|
|
296
289
|
mask = 1 << nth_bit
|
|
297
290
|
return array & mask > 0
|
|
298
291
|
|
|
299
292
|
|
|
300
|
-
def setbit(array:
|
|
293
|
+
def setbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
|
|
301
294
|
"""Sets nth bit (0, 1, 2, ...) on number.
|
|
302
295
|
|
|
303
296
|
Args:
|
|
@@ -316,24 +309,25 @@ def setbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
|
|
|
316
309
|
>>> setbit(0, 2)
|
|
317
310
|
4
|
|
318
311
|
|
|
319
|
-
See
|
|
312
|
+
See Also:
|
|
320
313
|
utils.isbit()
|
|
321
314
|
|
|
322
315
|
"""
|
|
323
316
|
if nth_bit < 0:
|
|
324
|
-
|
|
317
|
+
msg = "Negative bit number"
|
|
318
|
+
raise ValueError(msg)
|
|
325
319
|
mask = 1 << nth_bit
|
|
326
320
|
array |= mask
|
|
327
321
|
return array
|
|
328
322
|
|
|
329
323
|
|
|
330
324
|
def interpolate_2d(
|
|
331
|
-
x:
|
|
332
|
-
y:
|
|
333
|
-
z:
|
|
334
|
-
x_new:
|
|
335
|
-
y_new:
|
|
336
|
-
) ->
|
|
325
|
+
x: npt.NDArray,
|
|
326
|
+
y: npt.NDArray,
|
|
327
|
+
z: npt.NDArray,
|
|
328
|
+
x_new: npt.NDArray,
|
|
329
|
+
y_new: npt.NDArray,
|
|
330
|
+
) -> npt.NDArray:
|
|
337
331
|
"""Linear interpolation of gridded 2d data.
|
|
338
332
|
|
|
339
333
|
Args:
|
|
@@ -355,11 +349,11 @@ def interpolate_2d(
|
|
|
355
349
|
|
|
356
350
|
|
|
357
351
|
def interpolate_2d_mask(
|
|
358
|
-
x:
|
|
359
|
-
y:
|
|
352
|
+
x: npt.NDArray,
|
|
353
|
+
y: npt.NDArray,
|
|
360
354
|
z: ma.MaskedArray,
|
|
361
|
-
x_new:
|
|
362
|
-
y_new:
|
|
355
|
+
x_new: npt.NDArray,
|
|
356
|
+
y_new: npt.NDArray,
|
|
363
357
|
) -> ma.MaskedArray:
|
|
364
358
|
"""2D linear interpolation preserving the mask.
|
|
365
359
|
|
|
@@ -378,32 +372,34 @@ def interpolate_2d_mask(
|
|
|
378
372
|
interpolation. Input data may contain nan-values.
|
|
379
373
|
|
|
380
374
|
"""
|
|
381
|
-
z = ma.array(ma.masked_invalid(z, copy=True))
|
|
375
|
+
z = ma.array(ma.masked_invalid(z, copy=True))
|
|
382
376
|
# Interpolate ignoring masked values:
|
|
383
|
-
valid_points = np.logical_not(z.mask)
|
|
377
|
+
valid_points = np.logical_not(z.mask)
|
|
384
378
|
xx, yy = np.meshgrid(y, x)
|
|
385
379
|
x_valid = xx[valid_points]
|
|
386
380
|
y_valid = yy[valid_points]
|
|
387
381
|
z_valid = z[valid_points]
|
|
388
382
|
xx_new, yy_new = np.meshgrid(y_new, x_new)
|
|
389
383
|
data = griddata(
|
|
390
|
-
(x_valid, y_valid),
|
|
384
|
+
(x_valid, y_valid),
|
|
385
|
+
z_valid.ravel(),
|
|
386
|
+
(xx_new, yy_new),
|
|
387
|
+
method="linear",
|
|
391
388
|
)
|
|
392
389
|
# Preserve mask:
|
|
393
|
-
mask_fun = RectBivariateSpline(x, y, z
|
|
390
|
+
mask_fun = RectBivariateSpline(x, y, ma.getmaskarray(z), kx=1, ky=1)
|
|
394
391
|
mask = mask_fun(x_new, y_new)
|
|
395
392
|
mask[mask < 0.5] = 0
|
|
396
393
|
masked_array = ma.array(data, mask=mask.astype(bool))
|
|
397
|
-
|
|
398
|
-
return masked_array
|
|
394
|
+
return ma.masked_invalid(masked_array)
|
|
399
395
|
|
|
400
396
|
|
|
401
397
|
def interpolate_2d_nearest(
|
|
402
|
-
x:
|
|
403
|
-
y:
|
|
404
|
-
z:
|
|
405
|
-
x_new:
|
|
406
|
-
y_new:
|
|
398
|
+
x: npt.NDArray,
|
|
399
|
+
y: npt.NDArray,
|
|
400
|
+
z: ma.MaskedArray,
|
|
401
|
+
x_new: npt.NDArray,
|
|
402
|
+
y_new: npt.NDArray,
|
|
407
403
|
) -> ma.MaskedArray:
|
|
408
404
|
"""2D nearest neighbor interpolation preserving mask.
|
|
409
405
|
|
|
@@ -421,25 +417,111 @@ def interpolate_2d_nearest(
|
|
|
421
417
|
Points outside the original range will be interpolated but masked.
|
|
422
418
|
|
|
423
419
|
"""
|
|
424
|
-
data = ma.
|
|
420
|
+
data = ma.filled(z, np.nan)
|
|
425
421
|
fun = RegularGridInterpolator(
|
|
426
422
|
(x, y),
|
|
427
423
|
data,
|
|
428
424
|
method="nearest",
|
|
429
425
|
bounds_error=False,
|
|
430
|
-
fill_value=ma.masked,
|
|
431
426
|
)
|
|
432
427
|
xx, yy = np.meshgrid(x_new, y_new)
|
|
433
|
-
|
|
428
|
+
zz = fun((xx, yy)).T
|
|
429
|
+
return ma.masked_where(np.isnan(zz), zz)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def interpolate_2D_along_y(
|
|
433
|
+
y: npt.NDArray,
|
|
434
|
+
z: npt.NDArray | ma.MaskedArray,
|
|
435
|
+
y_new: npt.NDArray,
|
|
436
|
+
) -> ma.MaskedArray:
|
|
437
|
+
"""Fast 1D nearest-neighbor interpolation along y for each x.
|
|
434
438
|
|
|
439
|
+
Args:
|
|
440
|
+
y: 1D numpy array of y-coordinates (length M).
|
|
441
|
+
z: 2D array of shape (N, M).
|
|
442
|
+
y_new: 1D numpy array of new y-coordinates.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
Masked 2D masked array interpolated along y.
|
|
435
446
|
|
|
436
|
-
|
|
447
|
+
Notes:
|
|
448
|
+
Only interpolates along y. Points outside range are masked.
|
|
449
|
+
"""
|
|
450
|
+
idx = np.searchsorted(y, y_new, side="left")
|
|
451
|
+
idx = np.clip(idx, 0, len(y) - 1)
|
|
452
|
+
left = np.maximum(idx - 1, 0)
|
|
453
|
+
choose_right = (idx == 0) | (
|
|
454
|
+
(idx < len(y)) & (np.abs(y[idx] - y_new) < np.abs(y_new - y[left]))
|
|
455
|
+
)
|
|
456
|
+
idx[~choose_right] = left[~choose_right]
|
|
457
|
+
z_interp = ma.array(z[:, idx])
|
|
458
|
+
z_mask = ma.getmaskarray(z_interp)
|
|
459
|
+
mask = (y_new < y.min()) | (y_new > y.max())
|
|
460
|
+
z_mask[:, mask] = True
|
|
461
|
+
return ma.MaskedArray(z_interp, mask=z_mask)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def interpolate_1d(
|
|
465
|
+
time: npt.NDArray,
|
|
466
|
+
y: ma.MaskedArray,
|
|
467
|
+
time_new: npt.NDArray,
|
|
468
|
+
max_time: float,
|
|
469
|
+
method: str = "linear",
|
|
470
|
+
) -> ma.MaskedArray:
|
|
471
|
+
"""1D linear interpolation preserving the mask.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
time: 1D array in fraction hour.
|
|
475
|
+
y: 1D array, data values.
|
|
476
|
+
time_new: 1D array, new time coordinates.
|
|
477
|
+
max_time: Maximum allowed gap in minutes. Values outside this gap will
|
|
478
|
+
be masked.
|
|
479
|
+
method: Interpolation method, 'linear' (default) or 'nearest'.
|
|
480
|
+
"""
|
|
481
|
+
if np.max(time) > 24 or np.min(time) < 0:
|
|
482
|
+
msg = "Time vector must be in fraction hours between 0 and 24"
|
|
483
|
+
raise ValueError(msg)
|
|
484
|
+
if ma.is_masked(y):
|
|
485
|
+
if y.mask.all():
|
|
486
|
+
return ma.masked_all(time_new.shape)
|
|
487
|
+
time = time[~y.mask]
|
|
488
|
+
y = y[~y.mask]
|
|
489
|
+
fun = interp1d(time, y, kind=method, fill_value=(y[0], y[-1]), bounds_error=False)
|
|
490
|
+
interpolated = ma.array(fun(time_new))
|
|
491
|
+
bad_idx = get_gap_ind(time, time_new, max_time / 60)
|
|
492
|
+
|
|
493
|
+
if len(bad_idx) > 0:
|
|
494
|
+
msg = f"Unable to interpolate for {len(bad_idx)} time steps"
|
|
495
|
+
logging.warning(msg)
|
|
496
|
+
interpolated[bad_idx] = ma.masked
|
|
497
|
+
|
|
498
|
+
return interpolated
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def get_gap_ind(
|
|
502
|
+
grid: npt.NDArray, new_grid: npt.NDArray, threshold: float
|
|
503
|
+
) -> list[int]:
|
|
504
|
+
"""Finds indices in new_grid that are too far from grid."""
|
|
505
|
+
if grid.size == 0:
|
|
506
|
+
return list(range(len(new_grid)))
|
|
507
|
+
idxs = np.searchsorted(grid, new_grid)
|
|
508
|
+
left_dist = np.where(idxs > 0, np.abs(new_grid - grid[idxs - 1]), np.inf)
|
|
509
|
+
right_dist = np.where(
|
|
510
|
+
idxs < len(grid),
|
|
511
|
+
np.abs(new_grid - grid[np.clip(idxs, 0, len(grid) - 1)]),
|
|
512
|
+
np.inf,
|
|
513
|
+
)
|
|
514
|
+
nearest = np.minimum(left_dist, right_dist)
|
|
515
|
+
return np.where(nearest > threshold)[0].tolist()
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def calc_relative_error(reference: npt.NDArray, array: npt.NDArray) -> npt.NDArray:
|
|
437
519
|
"""Calculates relative error (%)."""
|
|
438
520
|
return ((array - reference) / reference) * 100
|
|
439
521
|
|
|
440
522
|
|
|
441
|
-
def db2lin(array: float |
|
|
442
|
-
"""
|
|
523
|
+
def db2lin(array: float | npt.NDArray, scale: int = 10) -> npt.NDArray:
|
|
524
|
+
"""DB to linear conversion."""
|
|
443
525
|
data = array / scale
|
|
444
526
|
with warnings.catch_warnings():
|
|
445
527
|
warnings.simplefilter("ignore", category=RuntimeWarning)
|
|
@@ -448,19 +530,19 @@ def db2lin(array: float | np.ndarray, scale: int = 10) -> np.ndarray:
|
|
|
448
530
|
return np.power(10, data)
|
|
449
531
|
|
|
450
532
|
|
|
451
|
-
def lin2db(array:
|
|
533
|
+
def lin2db(array: npt.NDArray, scale: int = 10) -> npt.NDArray:
|
|
452
534
|
"""Linear to dB conversion."""
|
|
453
535
|
if ma.isMaskedArray(array):
|
|
454
536
|
return scale * ma.log10(array)
|
|
455
537
|
return scale * np.log10(array)
|
|
456
538
|
|
|
457
539
|
|
|
458
|
-
def mdiff(array:
|
|
540
|
+
def mdiff(array: npt.NDArray) -> float:
|
|
459
541
|
"""Returns median difference of 1-D array."""
|
|
460
542
|
return float(ma.median(ma.diff(array)))
|
|
461
543
|
|
|
462
544
|
|
|
463
|
-
def l2norm(*args) -> ma.MaskedArray:
|
|
545
|
+
def l2norm(*args: npt.NDArray | float) -> ma.MaskedArray:
|
|
464
546
|
"""Returns l2 norm.
|
|
465
547
|
|
|
466
548
|
Args:
|
|
@@ -470,20 +552,23 @@ def l2norm(*args) -> ma.MaskedArray:
|
|
|
470
552
|
The l2 norm.
|
|
471
553
|
|
|
472
554
|
"""
|
|
473
|
-
|
|
555
|
+
arg_cpy: float | npt.NDArray
|
|
556
|
+
ss: float | npt.NDArray = 0
|
|
474
557
|
for arg in args:
|
|
475
558
|
if isinstance(arg, ma.MaskedArray):
|
|
476
559
|
# Raise only non-masked values, not sure if this is needed...
|
|
477
|
-
|
|
478
|
-
|
|
560
|
+
arg_cpy = ma.copy(arg)
|
|
561
|
+
arg_cpy[~arg.mask] = arg_cpy[~arg.mask] ** 2
|
|
479
562
|
else:
|
|
480
|
-
|
|
481
|
-
ss = ss +
|
|
563
|
+
arg_cpy = arg**2
|
|
564
|
+
ss = ss + arg_cpy
|
|
482
565
|
return ma.sqrt(ss)
|
|
483
566
|
|
|
484
567
|
|
|
485
568
|
def l2norm_weighted(
|
|
486
|
-
values: tuple,
|
|
569
|
+
values: tuple,
|
|
570
|
+
overall_scale: float,
|
|
571
|
+
term_weights: tuple,
|
|
487
572
|
) -> ma.MaskedArray:
|
|
488
573
|
"""Calculates scaled and weighted Euclidean distance.
|
|
489
574
|
|
|
@@ -503,12 +588,12 @@ def l2norm_weighted(
|
|
|
503
588
|
TODO: Use masked arrays instead of tuples.
|
|
504
589
|
|
|
505
590
|
"""
|
|
506
|
-
generic_values = ma.array(values, dtype=object)
|
|
591
|
+
generic_values: ma.MaskedArray = ma.array(values, dtype=object)
|
|
507
592
|
weighted_values = ma.multiply(generic_values, term_weights)
|
|
508
593
|
return overall_scale * l2norm(*weighted_values)
|
|
509
594
|
|
|
510
595
|
|
|
511
|
-
def cumsumr(array:
|
|
596
|
+
def cumsumr(array: npt.NDArray, axis: int = 0) -> npt.NDArray:
|
|
512
597
|
"""Finds cumulative sum that resets on 0.
|
|
513
598
|
|
|
514
599
|
Args:
|
|
@@ -525,12 +610,10 @@ def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray:
|
|
|
525
610
|
|
|
526
611
|
"""
|
|
527
612
|
cums = array.cumsum(axis=axis)
|
|
528
|
-
return cums - np.maximum.accumulate(
|
|
529
|
-
cums * (array == 0), axis=axis
|
|
530
|
-
) # pylint: disable=E1101
|
|
613
|
+
return cums - np.maximum.accumulate(cums * (array == 0), axis=axis)
|
|
531
614
|
|
|
532
615
|
|
|
533
|
-
def ffill(array:
|
|
616
|
+
def ffill(array: npt.NDArray, value: int = 0) -> npt.NDArray:
|
|
534
617
|
"""Forward fills an array.
|
|
535
618
|
|
|
536
619
|
Args:
|
|
@@ -552,15 +635,19 @@ def ffill(array: np.ndarray, value: int = 0) -> np.ndarray:
|
|
|
552
635
|
ndims = len(array.shape)
|
|
553
636
|
ran = np.arange(array.shape[ndims - 1])
|
|
554
637
|
idx = np.where((array != value), ran, 0)
|
|
555
|
-
idx = np.maximum.accumulate(idx, axis=ndims - 1)
|
|
638
|
+
idx = np.maximum.accumulate(idx, axis=ndims - 1)
|
|
556
639
|
if ndims == 2:
|
|
557
640
|
return array[np.arange(idx.shape[0])[:, None], idx]
|
|
558
641
|
return array[idx]
|
|
559
642
|
|
|
560
643
|
|
|
561
644
|
def init(
|
|
562
|
-
n_vars: int,
|
|
563
|
-
|
|
645
|
+
n_vars: int,
|
|
646
|
+
shape: tuple,
|
|
647
|
+
dtype: type = float,
|
|
648
|
+
*,
|
|
649
|
+
masked: bool = True,
|
|
650
|
+
) -> Iterator[npt.NDArray | ma.MaskedArray]:
|
|
564
651
|
"""Initializes several numpy arrays.
|
|
565
652
|
|
|
566
653
|
Args:
|
|
@@ -590,14 +677,14 @@ def init(
|
|
|
590
677
|
yield np.zeros(shape, dtype=dtype)
|
|
591
678
|
|
|
592
679
|
|
|
593
|
-
def n_elements(array:
|
|
680
|
+
def n_elements(array: npt.NDArray, dist: float, var: str | None = None) -> int:
|
|
594
681
|
"""Returns the number of elements that cover certain distance.
|
|
595
682
|
|
|
596
683
|
Args:
|
|
597
684
|
array: Input array with arbitrary units or time in fraction hour. *x* should
|
|
598
685
|
be evenly spaced or at least close to.
|
|
599
686
|
dist: Distance to be covered. If x is fraction time, *dist* is in minutes.
|
|
600
|
-
Otherwise *x* and *dist* should have the same units.
|
|
687
|
+
Otherwise, *x* and *dist* should have the same units.
|
|
601
688
|
var: If 'time', input is fraction hour and distance in minutes, else inputs
|
|
602
689
|
have the same units. Default is None (same units).
|
|
603
690
|
|
|
@@ -625,11 +712,11 @@ def n_elements(array: np.ndarray, dist: float, var: str | None = None) -> int:
|
|
|
625
712
|
"""
|
|
626
713
|
n = dist / mdiff(array)
|
|
627
714
|
if var == "time":
|
|
628
|
-
n = n /
|
|
715
|
+
n = n / SEC_IN_MINUTE
|
|
629
716
|
return int(np.round(n))
|
|
630
717
|
|
|
631
718
|
|
|
632
|
-
def isscalar(array) -> bool:
|
|
719
|
+
def isscalar(array: npt.NDArray | float | list | netCDF4.Variable) -> bool:
|
|
633
720
|
"""Tests if input is scalar.
|
|
634
721
|
|
|
635
722
|
By "scalar" we mean that array has a single value.
|
|
@@ -645,52 +732,60 @@ def isscalar(array) -> bool:
|
|
|
645
732
|
True
|
|
646
733
|
|
|
647
734
|
"""
|
|
648
|
-
arr = ma.array(array)
|
|
649
|
-
|
|
650
|
-
return True
|
|
651
|
-
return False
|
|
735
|
+
arr: ma.MaskedArray = ma.array(array)
|
|
736
|
+
return not hasattr(arr, "__len__") or arr.shape == () or len(arr) == 1
|
|
652
737
|
|
|
653
738
|
|
|
654
739
|
def get_time() -> str:
|
|
655
740
|
"""Returns current UTC-time."""
|
|
656
|
-
|
|
741
|
+
t_zone = datetime.timezone.utc
|
|
742
|
+
form = "%Y-%m-%d %H:%M:%S"
|
|
743
|
+
return f"{datetime.datetime.now(tz=t_zone).strftime(form)} +00:00"
|
|
657
744
|
|
|
658
745
|
|
|
659
746
|
def date_range(
|
|
660
|
-
start_date: datetime.date,
|
|
747
|
+
start_date: datetime.date,
|
|
748
|
+
end_date: datetime.date,
|
|
661
749
|
) -> Iterator[datetime.date]:
|
|
662
750
|
"""Returns range between two dates (datetimes)."""
|
|
663
751
|
for n in range(int((end_date - start_date).days)):
|
|
664
752
|
yield start_date + datetime.timedelta(n)
|
|
665
753
|
|
|
666
754
|
|
|
667
|
-
def get_uuid() ->
|
|
668
|
-
"""
|
|
669
|
-
|
|
755
|
+
def get_uuid(input_uuid: str | uuid.UUID | None) -> uuid.UUID:
|
|
756
|
+
"""Parse or generate unique identifier."""
|
|
757
|
+
if input_uuid is None:
|
|
758
|
+
return uuid.uuid4()
|
|
759
|
+
if isinstance(input_uuid, str):
|
|
760
|
+
return uuid.UUID(input_uuid)
|
|
761
|
+
return input_uuid
|
|
670
762
|
|
|
671
763
|
|
|
672
|
-
def get_wl_band(radar_frequency: float) ->
|
|
673
|
-
"""Returns
|
|
764
|
+
def get_wl_band(radar_frequency: float) -> Literal["X", "Ka", "W"]:
|
|
765
|
+
"""Returns IEEE radar band corresponding to radar frequency.
|
|
674
766
|
|
|
675
767
|
Args:
|
|
676
768
|
radar_frequency: Radar frequency (GHz).
|
|
677
769
|
|
|
678
770
|
Returns:
|
|
679
|
-
|
|
771
|
+
IEEE radar band as string.
|
|
680
772
|
|
|
681
773
|
"""
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
774
|
+
if 8 < radar_frequency < 12:
|
|
775
|
+
return "X"
|
|
776
|
+
if 27 < radar_frequency < 40:
|
|
777
|
+
return "Ka"
|
|
778
|
+
if 75 < radar_frequency < 110:
|
|
779
|
+
return "W"
|
|
780
|
+
msg = f"Unknown band: {radar_frequency} GHz"
|
|
781
|
+
raise ValueError(msg)
|
|
688
782
|
|
|
689
783
|
|
|
690
|
-
def transpose(data:
|
|
784
|
+
def transpose(data: npt.NDArray) -> npt.NDArray:
|
|
691
785
|
"""Transposes numpy array of (n, ) to (n, 1)."""
|
|
692
786
|
if data.ndim != 1 or len(data) <= 1:
|
|
693
|
-
|
|
787
|
+
msg = "Invalid input array shape"
|
|
788
|
+
raise ValueError(msg)
|
|
694
789
|
return data[:, np.newaxis]
|
|
695
790
|
|
|
696
791
|
|
|
@@ -713,8 +808,12 @@ def del_dict_keys(data: dict, keys: tuple | list) -> dict:
|
|
|
713
808
|
|
|
714
809
|
|
|
715
810
|
def array_to_probability(
|
|
716
|
-
array:
|
|
717
|
-
|
|
811
|
+
array: npt.NDArray,
|
|
812
|
+
loc: float,
|
|
813
|
+
scale: float,
|
|
814
|
+
*,
|
|
815
|
+
invert: bool = False,
|
|
816
|
+
) -> npt.NDArray:
|
|
718
817
|
"""Converts continuous variable into 0-1 probability.
|
|
719
818
|
|
|
720
819
|
Args:
|
|
@@ -740,7 +839,7 @@ def array_to_probability(
|
|
|
740
839
|
return prob
|
|
741
840
|
|
|
742
841
|
|
|
743
|
-
def range_to_height(range_los:
|
|
842
|
+
def range_to_height(range_los: npt.NDArray, tilt_angle: float) -> npt.NDArray:
|
|
744
843
|
"""Converts distances from a tilted instrument to height above the ground.
|
|
745
844
|
|
|
746
845
|
Args:
|
|
@@ -759,27 +858,21 @@ def range_to_height(range_los: np.ndarray, tilt_angle: float) -> np.ndarray:
|
|
|
759
858
|
|
|
760
859
|
def is_empty_line(line: str) -> bool:
|
|
761
860
|
"""Tests if a line (of a text file) is empty."""
|
|
762
|
-
|
|
763
|
-
return True
|
|
764
|
-
return False
|
|
861
|
+
return line in ("\n", "\r\n")
|
|
765
862
|
|
|
766
863
|
|
|
767
864
|
def is_timestamp(timestamp: str) -> bool:
|
|
768
|
-
"""Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss"""
|
|
865
|
+
"""Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss."""
|
|
769
866
|
reg_exp = re.compile(r"-\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
|
|
770
|
-
|
|
771
|
-
return True
|
|
772
|
-
return False
|
|
867
|
+
return reg_exp.match(timestamp) is not None
|
|
773
868
|
|
|
774
869
|
|
|
775
|
-
def get_sorted_filenames(file_path: str, extension: str) -> list:
|
|
870
|
+
def get_sorted_filenames(file_path: str | PathLike, extension: str) -> list[str]:
|
|
776
871
|
"""Returns full paths of files with some extension, sorted by filename."""
|
|
777
872
|
extension = extension.lower()
|
|
778
873
|
all_files = os.listdir(file_path)
|
|
779
874
|
files = [
|
|
780
|
-
"/".
|
|
781
|
-
for file in all_files
|
|
782
|
-
if file.lower().endswith(extension)
|
|
875
|
+
f"{file_path}/{file}" for file in all_files if file.lower().endswith(extension)
|
|
783
876
|
]
|
|
784
877
|
files.sort()
|
|
785
878
|
return files
|
|
@@ -793,23 +886,16 @@ def str_to_numeric(value: str) -> int | float:
|
|
|
793
886
|
return float(value)
|
|
794
887
|
|
|
795
888
|
|
|
796
|
-
def
|
|
797
|
-
"""Finds different model types."""
|
|
798
|
-
url = "https://cloudnet.fmi.fi/api/models"
|
|
799
|
-
data = requests.get(url=url, timeout=60).json()
|
|
800
|
-
models = [model["id"] for model in data]
|
|
801
|
-
model_types = [model.split("-")[0] for model in models]
|
|
802
|
-
return list(set(model_types))
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
def get_epoch(units: str) -> Epoch:
|
|
889
|
+
def get_epoch(units: str) -> datetime.datetime:
|
|
806
890
|
"""Finds epoch from units string."""
|
|
807
|
-
fallback = (2001, 1, 1)
|
|
891
|
+
fallback = datetime.datetime(2001, 1, 1, tzinfo=datetime.timezone.utc)
|
|
808
892
|
try:
|
|
809
893
|
date = units.split()[2]
|
|
810
894
|
except IndexError:
|
|
811
895
|
return fallback
|
|
812
896
|
date = date.replace(",", "")
|
|
897
|
+
if "T" in date:
|
|
898
|
+
date = date[: date.index("T")]
|
|
813
899
|
try:
|
|
814
900
|
date_components = [int(x) for x in date.split("-")]
|
|
815
901
|
except ValueError:
|
|
@@ -818,13 +904,15 @@ def get_epoch(units: str) -> Epoch:
|
|
|
818
904
|
except ValueError:
|
|
819
905
|
return fallback
|
|
820
906
|
year, month, day = date_components
|
|
821
|
-
current_year = datetime.datetime.
|
|
907
|
+
current_year = datetime.datetime.now(tz=datetime.timezone.utc).year
|
|
822
908
|
if (1900 < year <= current_year) and (0 < month < 13) and (0 < day < 32):
|
|
823
|
-
return year, month, day
|
|
909
|
+
return datetime.datetime(year, month, day, tzinfo=datetime.timezone.utc)
|
|
824
910
|
return fallback
|
|
825
911
|
|
|
826
912
|
|
|
827
|
-
def screen_by_time(
|
|
913
|
+
def screen_by_time(
|
|
914
|
+
data_in: dict, epoch: datetime.datetime, expected_date: datetime.date
|
|
915
|
+
) -> dict:
|
|
828
916
|
"""Screen data by time.
|
|
829
917
|
|
|
830
918
|
Args:
|
|
@@ -861,7 +949,9 @@ def screen_by_time(data_in: dict, epoch: Epoch, expected_date: str) -> dict:
|
|
|
861
949
|
return data
|
|
862
950
|
|
|
863
951
|
|
|
864
|
-
def find_valid_time_indices(
|
|
952
|
+
def find_valid_time_indices(
|
|
953
|
+
time: npt.NDArray, epoch: datetime.datetime, expected_date: datetime.date
|
|
954
|
+
) -> list[int]:
|
|
865
955
|
"""Finds valid time array indices for the given date.
|
|
866
956
|
|
|
867
957
|
Args:
|
|
@@ -884,15 +974,15 @@ def find_valid_time_indices(time: np.ndarray, epoch: Epoch, expected_date: str)
|
|
|
884
974
|
ind_sorted = np.argsort(time)
|
|
885
975
|
ind_valid: list[int] = []
|
|
886
976
|
for ind in ind_sorted:
|
|
887
|
-
|
|
888
|
-
if
|
|
977
|
+
date = seconds2date(time[ind], epoch=epoch).date()
|
|
978
|
+
if date == expected_date and time[ind] not in time[ind_valid]:
|
|
889
979
|
ind_valid.append(ind)
|
|
890
980
|
if not ind_valid:
|
|
891
981
|
raise ValidTimeStampError
|
|
892
982
|
return ind_valid
|
|
893
983
|
|
|
894
984
|
|
|
895
|
-
def append_data(data_in: dict, key: str, array:
|
|
985
|
+
def append_data(data_in: dict, key: str, array: npt.NDArray) -> dict:
|
|
896
986
|
"""Appends data to a dictionary field (creates the field if not yet present).
|
|
897
987
|
|
|
898
988
|
Args:
|
|
@@ -909,7 +999,7 @@ def append_data(data_in: dict, key: str, array: np.ndarray) -> dict:
|
|
|
909
999
|
return data
|
|
910
1000
|
|
|
911
1001
|
|
|
912
|
-
def edges2mid(data:
|
|
1002
|
+
def edges2mid(data: npt.NDArray, reference: Literal["upper", "lower"]) -> npt.NDArray:
|
|
913
1003
|
"""Shifts values half bin towards up or down.
|
|
914
1004
|
|
|
915
1005
|
Args:
|
|
@@ -920,8 +1010,6 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
|
|
|
920
1010
|
Shifted values.
|
|
921
1011
|
|
|
922
1012
|
"""
|
|
923
|
-
if reference not in ("lower", "upper"):
|
|
924
|
-
raise ValueError
|
|
925
1013
|
gaps = (data[1:] - data[0:-1]) / 2
|
|
926
1014
|
if reference == "lower":
|
|
927
1015
|
gaps = np.append(gaps, gaps[-1])
|
|
@@ -930,29 +1018,156 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
|
|
|
930
1018
|
return data - gaps
|
|
931
1019
|
|
|
932
1020
|
|
|
933
|
-
def
|
|
934
|
-
"""Returns
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
file_type = nc.cloudnet_file_type
|
|
938
|
-
return file_type
|
|
939
|
-
product = filename.split("_")[-1][:-3]
|
|
940
|
-
if product in ("categorize", "classification", "drizzle"):
|
|
941
|
-
return product
|
|
942
|
-
if product[:3] in ("lwc", "iwc"):
|
|
943
|
-
return product[:3]
|
|
944
|
-
raise ValueError("Unknown file type")
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
def get_files_with_common_range(files: list) -> list:
|
|
948
|
-
"""Returns files with the same (most common) number of range gates."""
|
|
949
|
-
n_range = []
|
|
950
|
-
for file in files:
|
|
1021
|
+
def get_files_with_variables(filenames: list, variables: list[str]) -> list:
|
|
1022
|
+
"""Returns files where all variables exist."""
|
|
1023
|
+
valid_files = []
|
|
1024
|
+
for file in filenames:
|
|
951
1025
|
with netCDF4.Dataset(file) as nc:
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
1026
|
+
for variable in variables:
|
|
1027
|
+
if variable not in nc.variables:
|
|
1028
|
+
break
|
|
1029
|
+
else:
|
|
1030
|
+
valid_files.append(file)
|
|
1031
|
+
return valid_files
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
def is_all_masked(array: npt.NDArray) -> bool:
|
|
1035
|
+
"""Tests if all values are masked."""
|
|
1036
|
+
return bool(ma.isMaskedArray(array) and hasattr(array, "mask") and array.mask.all())
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def find_masked_profiles_indices(array: ma.MaskedArray) -> list:
|
|
1040
|
+
"""Finds indices of masked profiles in a 2-D array."""
|
|
1041
|
+
non_masked_counts = np.ma.count(array, axis=1)
|
|
1042
|
+
masked_profiles_indices = np.where(non_masked_counts == 0)[0]
|
|
1043
|
+
return list(masked_profiles_indices)
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
T = TypeVar("T", int, str)
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
def _format_definition(kind: str, definitions: dict[T, str]) -> str:
|
|
1050
|
+
lines = [""]
|
|
1051
|
+
for key, value in definitions.items():
|
|
1052
|
+
prefix = f"{kind} {key}: "
|
|
1053
|
+
indent = " " * len(prefix)
|
|
1054
|
+
text = " ".join(value.split())
|
|
1055
|
+
wrapped = textwrap.wrap(prefix + text, subsequent_indent=indent)
|
|
1056
|
+
lines.extend(wrapped)
|
|
1057
|
+
return "\n".join(lines)
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
def status_field_definition(definitions: dict[T, str]) -> str:
|
|
1061
|
+
return _format_definition("Value", definitions)
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
def bit_field_definition(definitions: dict[T, str]) -> str:
|
|
1065
|
+
return _format_definition("Bit", definitions)
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def path_lengths_from_ground(height_agl: npt.NDArray) -> npt.NDArray:
|
|
1069
|
+
return np.diff(height_agl, prepend=0)
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
def add_site_geolocation(
|
|
1073
|
+
data: dict,
|
|
1074
|
+
*,
|
|
1075
|
+
gps: bool,
|
|
1076
|
+
site_meta: dict | None = None,
|
|
1077
|
+
dataset: netCDF4.Dataset | None = None,
|
|
1078
|
+
) -> None:
|
|
1079
|
+
tmp_data = {}
|
|
1080
|
+
tmp_source = {}
|
|
1081
|
+
value: npt.NDArray | float | None
|
|
1082
|
+
|
|
1083
|
+
for key in ("latitude", "longitude", "altitude"):
|
|
1084
|
+
value = None
|
|
1085
|
+
source = None
|
|
1086
|
+
# Prefer accurate GPS coordinates. Don't trust altitude because its less
|
|
1087
|
+
# accurate and at least in Lindenberg BASTA there are large jumps.
|
|
1088
|
+
if gps and key != "altitude":
|
|
1089
|
+
values = None
|
|
1090
|
+
if isinstance(dataset, netCDF4.Dataset) and key in dataset.variables:
|
|
1091
|
+
values = dataset[key][:]
|
|
1092
|
+
elif key in data:
|
|
1093
|
+
values = data[key].data
|
|
1094
|
+
if (
|
|
1095
|
+
values is not None
|
|
1096
|
+
and not np.all(ma.getmaskarray(values))
|
|
1097
|
+
and np.any(values != 0)
|
|
1098
|
+
):
|
|
1099
|
+
value = ma.masked_where(values == 0, values)
|
|
1100
|
+
source = "GPS"
|
|
1101
|
+
# User-supplied site coordinate.
|
|
1102
|
+
if value is None and site_meta is not None and key in site_meta:
|
|
1103
|
+
value = np.array(float(site_meta[key]))
|
|
1104
|
+
source = "site coordinates"
|
|
1105
|
+
# From source data (CHM15k, CL61, MRR-PRO, Copernicus, Galileo...).
|
|
1106
|
+
# Assume value is manually set, so cannot trust it.
|
|
1107
|
+
if (
|
|
1108
|
+
value is None
|
|
1109
|
+
and isinstance(dataset, netCDF4.Dataset)
|
|
1110
|
+
and key in dataset.variables
|
|
1111
|
+
and not np.all(ma.getmaskarray(dataset[key][:]))
|
|
1112
|
+
):
|
|
1113
|
+
value = dataset[key][:]
|
|
1114
|
+
source = "raw file"
|
|
1115
|
+
# From source global attributes (MIRA).
|
|
1116
|
+
# Seems to be manually set, so cannot trust it.
|
|
1117
|
+
if (
|
|
1118
|
+
value is None
|
|
1119
|
+
and isinstance(dataset, netCDF4.Dataset)
|
|
1120
|
+
and hasattr(dataset, key.capitalize())
|
|
1121
|
+
):
|
|
1122
|
+
value = _parse_global_attribute_numeral(dataset, key.capitalize())
|
|
1123
|
+
source = "raw file"
|
|
1124
|
+
if value is not None:
|
|
1125
|
+
tmp_data[key] = value
|
|
1126
|
+
tmp_source[key] = source
|
|
1127
|
+
|
|
1128
|
+
if "latitude" in tmp_data and "longitude" in tmp_data:
|
|
1129
|
+
lat = np.atleast_1d(tmp_data["latitude"])
|
|
1130
|
+
lon = np.atleast_1d(tmp_data["longitude"])
|
|
1131
|
+
lon[lon > 180] - 360
|
|
1132
|
+
if _are_stationary(lat, lon):
|
|
1133
|
+
tmp_data["latitude"] = float(ma.mean(lat))
|
|
1134
|
+
tmp_data["longitude"] = float(ma.mean(lon))
|
|
1135
|
+
else:
|
|
1136
|
+
tmp_data["latitude"] = lat
|
|
1137
|
+
tmp_data["longitude"] = lon
|
|
1138
|
+
|
|
1139
|
+
if "altitude" in tmp_data:
|
|
1140
|
+
alt = np.atleast_1d(tmp_data["altitude"])
|
|
1141
|
+
if ma.max(alt) - ma.min(alt) < 100:
|
|
1142
|
+
tmp_data["altitude"] = float(ma.mean(alt))
|
|
1143
|
+
|
|
1144
|
+
for key in ("latitude", "longitude", "altitude"):
|
|
1145
|
+
if key in tmp_data:
|
|
1146
|
+
data[key] = CloudnetArray(
|
|
1147
|
+
tmp_data[key],
|
|
1148
|
+
key,
|
|
1149
|
+
source=tmp_source[key],
|
|
1150
|
+
dimensions=None if isinstance(tmp_data[key], float) else ("time",),
|
|
1151
|
+
)
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
def _parse_global_attribute_numeral(dataset: netCDF4.Dataset, key: str) -> float | None:
|
|
1155
|
+
new_str = ""
|
|
1156
|
+
attr = getattr(dataset, key)
|
|
1157
|
+
if attr == "Unknown":
|
|
1158
|
+
return None
|
|
1159
|
+
for char in attr:
|
|
1160
|
+
if char.isdigit() or char == ".":
|
|
1161
|
+
new_str += char
|
|
1162
|
+
return float(new_str)
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def _are_stationary(latitude: npt.NDArray, longitude: npt.NDArray) -> bool:
|
|
1166
|
+
min_lat, max_lat = np.min(latitude), np.max(latitude)
|
|
1167
|
+
min_lon, max_lon = np.min(longitude), np.max(longitude)
|
|
1168
|
+
lat_threshold = 0.01 # deg, around 1 km
|
|
1169
|
+
avg_lat = (min_lat + max_lat) / 2
|
|
1170
|
+
lon_threshold = lat_threshold / np.cos(np.radians(avg_lat))
|
|
1171
|
+
lat_diff = max_lat - min_lat
|
|
1172
|
+
lon_diff = max_lon - min_lon
|
|
1173
|
+
return lat_diff <= lat_threshold and lon_diff <= lon_threshold
|