cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudnetpy/categorize/__init__.py +1 -2
- cloudnetpy/categorize/atmos_utils.py +297 -67
- cloudnetpy/categorize/attenuation.py +31 -0
- cloudnetpy/categorize/attenuations/__init__.py +37 -0
- cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
- cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
- cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
- cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
- cloudnetpy/categorize/categorize.py +332 -156
- cloudnetpy/categorize/classify.py +127 -125
- cloudnetpy/categorize/containers.py +107 -76
- cloudnetpy/categorize/disdrometer.py +40 -0
- cloudnetpy/categorize/droplet.py +23 -21
- cloudnetpy/categorize/falling.py +53 -24
- cloudnetpy/categorize/freezing.py +25 -12
- cloudnetpy/categorize/insects.py +35 -23
- cloudnetpy/categorize/itu.py +243 -0
- cloudnetpy/categorize/lidar.py +36 -41
- cloudnetpy/categorize/melting.py +34 -26
- cloudnetpy/categorize/model.py +84 -37
- cloudnetpy/categorize/mwr.py +18 -14
- cloudnetpy/categorize/radar.py +215 -102
- cloudnetpy/cli.py +578 -0
- cloudnetpy/cloudnetarray.py +43 -89
- cloudnetpy/concat_lib.py +218 -78
- cloudnetpy/constants.py +28 -10
- cloudnetpy/datasource.py +61 -86
- cloudnetpy/exceptions.py +49 -20
- cloudnetpy/instruments/__init__.py +5 -0
- cloudnetpy/instruments/basta.py +29 -12
- cloudnetpy/instruments/bowtie.py +135 -0
- cloudnetpy/instruments/ceilo.py +138 -115
- cloudnetpy/instruments/ceilometer.py +164 -80
- cloudnetpy/instruments/cl61d.py +21 -5
- cloudnetpy/instruments/cloudnet_instrument.py +74 -36
- cloudnetpy/instruments/copernicus.py +108 -30
- cloudnetpy/instruments/da10.py +54 -0
- cloudnetpy/instruments/disdrometer/common.py +126 -223
- cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
- cloudnetpy/instruments/disdrometer/thies.py +254 -87
- cloudnetpy/instruments/fd12p.py +201 -0
- cloudnetpy/instruments/galileo.py +65 -23
- cloudnetpy/instruments/hatpro.py +123 -49
- cloudnetpy/instruments/instruments.py +113 -1
- cloudnetpy/instruments/lufft.py +39 -17
- cloudnetpy/instruments/mira.py +268 -61
- cloudnetpy/instruments/mrr.py +187 -0
- cloudnetpy/instruments/nc_lidar.py +19 -8
- cloudnetpy/instruments/nc_radar.py +109 -55
- cloudnetpy/instruments/pollyxt.py +135 -51
- cloudnetpy/instruments/radiometrics.py +313 -59
- cloudnetpy/instruments/rain_e_h3.py +171 -0
- cloudnetpy/instruments/rpg.py +321 -189
- cloudnetpy/instruments/rpg_reader.py +74 -40
- cloudnetpy/instruments/toa5.py +49 -0
- cloudnetpy/instruments/vaisala.py +95 -343
- cloudnetpy/instruments/weather_station.py +774 -105
- cloudnetpy/metadata.py +90 -19
- cloudnetpy/model_evaluation/file_handler.py +55 -52
- cloudnetpy/model_evaluation/metadata.py +46 -20
- cloudnetpy/model_evaluation/model_metadata.py +1 -1
- cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
- cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
- cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
- cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
- cloudnetpy/model_evaluation/products/model_products.py +43 -35
- cloudnetpy/model_evaluation/products/observation_products.py +41 -35
- cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
- cloudnetpy/model_evaluation/products/tools.py +29 -20
- cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
- cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
- cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
- cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
- cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
- cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
- cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
- cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
- cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
- cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
- cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
- cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
- cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
- cloudnetpy/model_evaluation/utils.py +2 -1
- cloudnetpy/output.py +170 -111
- cloudnetpy/plotting/__init__.py +2 -1
- cloudnetpy/plotting/plot_meta.py +562 -822
- cloudnetpy/plotting/plotting.py +1142 -704
- cloudnetpy/products/__init__.py +1 -0
- cloudnetpy/products/classification.py +370 -88
- cloudnetpy/products/der.py +85 -55
- cloudnetpy/products/drizzle.py +77 -34
- cloudnetpy/products/drizzle_error.py +15 -11
- cloudnetpy/products/drizzle_tools.py +79 -59
- cloudnetpy/products/epsilon.py +211 -0
- cloudnetpy/products/ier.py +27 -50
- cloudnetpy/products/iwc.py +55 -48
- cloudnetpy/products/lwc.py +96 -70
- cloudnetpy/products/mwr_tools.py +186 -0
- cloudnetpy/products/product_tools.py +170 -128
- cloudnetpy/utils.py +455 -240
- cloudnetpy/version.py +2 -2
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
- cloudnetpy-1.87.3.dist-info/RECORD +127 -0
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
- cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
- docs/source/conf.py +2 -2
- cloudnetpy/categorize/atmos.py +0 -361
- cloudnetpy/products/mwr_multi.py +0 -68
- cloudnetpy/products/mwr_single.py +0 -75
- cloudnetpy-1.49.9.dist-info/RECORD +0 -112
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
- {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
|
@@ -1,34 +1,42 @@
|
|
|
1
|
+
import csv
|
|
1
2
|
import datetime
|
|
2
3
|
import logging
|
|
3
|
-
|
|
4
|
+
import re
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from collections.abc import Callable, Iterable, Iterator, Sequence
|
|
4
7
|
from itertools import islice
|
|
5
|
-
from
|
|
6
|
-
from typing import Any
|
|
8
|
+
from os import PathLike
|
|
9
|
+
from typing import Any
|
|
10
|
+
from uuid import UUID
|
|
7
11
|
|
|
8
12
|
import numpy as np
|
|
13
|
+
import numpy.typing as npt
|
|
14
|
+
from numpy import ma
|
|
9
15
|
|
|
10
16
|
from cloudnetpy import output
|
|
11
17
|
from cloudnetpy.cloudnetarray import CloudnetArray
|
|
18
|
+
from cloudnetpy.constants import MM_TO_M, SEC_IN_HOUR
|
|
12
19
|
from cloudnetpy.exceptions import DisdrometerDataError
|
|
13
20
|
from cloudnetpy.instruments import instruments
|
|
14
|
-
from cloudnetpy.
|
|
21
|
+
from cloudnetpy.utils import get_uuid
|
|
15
22
|
|
|
16
23
|
from .common import ATTRIBUTES, Disdrometer
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
def parsivel2nc(
|
|
20
|
-
disdrometer_file:
|
|
21
|
-
output_file: str,
|
|
27
|
+
disdrometer_file: str | PathLike | Iterable[str | PathLike],
|
|
28
|
+
output_file: str | PathLike,
|
|
22
29
|
site_meta: dict,
|
|
23
|
-
uuid: str | None = None,
|
|
30
|
+
uuid: str | UUID | None = None,
|
|
24
31
|
date: str | datetime.date | None = None,
|
|
25
32
|
telegram: Sequence[int | None] | None = None,
|
|
26
|
-
|
|
33
|
+
timestamps: Sequence[datetime.datetime] | None = None,
|
|
34
|
+
) -> UUID:
|
|
27
35
|
"""Converts OTT Parsivel-2 disdrometer data into Cloudnet Level 1b netCDF
|
|
28
36
|
file.
|
|
29
37
|
|
|
30
38
|
Args:
|
|
31
|
-
disdrometer_file: Filename of disdrometer
|
|
39
|
+
disdrometer_file: Filename of disdrometer file or list of filenames.
|
|
32
40
|
output_file: Output filename.
|
|
33
41
|
site_meta: Dictionary containing information about the site. Required key
|
|
34
42
|
is `name`.
|
|
@@ -38,6 +46,7 @@ def parsivel2nc(
|
|
|
38
46
|
the instrument's operating instructions. Unknown values are indicated
|
|
39
47
|
with None. Telegram is required if the input file doesn't contain a
|
|
40
48
|
header.
|
|
49
|
+
timestamps: Specify list of timestamps if they are missing in the input file.
|
|
41
50
|
|
|
42
51
|
Returns:
|
|
43
52
|
UUID of the generated file.
|
|
@@ -55,28 +64,36 @@ def parsivel2nc(
|
|
|
55
64
|
"""
|
|
56
65
|
if isinstance(date, str):
|
|
57
66
|
date = datetime.date.fromisoformat(date)
|
|
58
|
-
|
|
67
|
+
uuid = get_uuid(uuid)
|
|
68
|
+
if isinstance(disdrometer_file, str | PathLike):
|
|
69
|
+
disdrometer_file = [disdrometer_file]
|
|
70
|
+
disdrometer = Parsivel(disdrometer_file, site_meta, telegram, date, timestamps)
|
|
59
71
|
disdrometer.sort_timestamps()
|
|
60
72
|
disdrometer.remove_duplicate_timestamps()
|
|
73
|
+
disdrometer.mask_invalid_values()
|
|
74
|
+
if len(disdrometer.data["time"].data) < 2:
|
|
75
|
+
msg = "Too few data points"
|
|
76
|
+
raise DisdrometerDataError(msg)
|
|
61
77
|
disdrometer.convert_units()
|
|
62
78
|
disdrometer.add_meta()
|
|
63
79
|
attributes = output.add_time_attribute(ATTRIBUTES, disdrometer.date)
|
|
64
80
|
output.update_attributes(disdrometer.data, attributes)
|
|
65
|
-
|
|
81
|
+
output.save_level1b(disdrometer, output_file, uuid)
|
|
66
82
|
return uuid
|
|
67
83
|
|
|
68
84
|
|
|
69
|
-
class Parsivel(
|
|
85
|
+
class Parsivel(Disdrometer):
|
|
70
86
|
def __init__(
|
|
71
87
|
self,
|
|
72
|
-
|
|
88
|
+
filenames: Iterable[str | PathLike],
|
|
73
89
|
site_meta: dict,
|
|
74
90
|
telegram: Sequence[int | None] | None = None,
|
|
75
91
|
expected_date: datetime.date | None = None,
|
|
76
|
-
|
|
92
|
+
timestamps: Sequence[datetime.datetime] | None = None,
|
|
93
|
+
) -> None:
|
|
77
94
|
super().__init__()
|
|
78
95
|
self.site_meta = site_meta
|
|
79
|
-
self.raw_data = _read_parsivel(
|
|
96
|
+
self.raw_data = _read_parsivel(filenames, telegram, timestamps)
|
|
80
97
|
self._screen_time(expected_date)
|
|
81
98
|
self.n_velocity = 32
|
|
82
99
|
self.n_diameter = 32
|
|
@@ -87,89 +104,74 @@ class Parsivel(CloudnetInstrument):
|
|
|
87
104
|
self._create_velocity_vectors()
|
|
88
105
|
self._create_diameter_vectors()
|
|
89
106
|
|
|
90
|
-
def _screen_time(self, expected_date: datetime.date | None = None):
|
|
107
|
+
def _screen_time(self, expected_date: datetime.date | None = None) -> None:
|
|
91
108
|
if expected_date is None:
|
|
92
109
|
self.date = self.raw_data["time"][0].astype(object).date()
|
|
93
110
|
return
|
|
94
111
|
self.date = expected_date
|
|
95
112
|
valid_mask = self.raw_data["time"].astype("datetime64[D]") == self.date
|
|
96
113
|
if np.count_nonzero(valid_mask) == 0:
|
|
97
|
-
|
|
114
|
+
msg = f"No data found on {expected_date}"
|
|
115
|
+
raise DisdrometerDataError(msg)
|
|
98
116
|
for key in self.raw_data:
|
|
99
117
|
self.raw_data[key] = self.raw_data[key][valid_mask]
|
|
100
118
|
|
|
101
|
-
def _append_data(self):
|
|
119
|
+
def _append_data(self) -> None:
|
|
102
120
|
for key, values in self.raw_data.items():
|
|
103
121
|
if key.startswith("_"):
|
|
104
122
|
continue
|
|
123
|
+
name = key
|
|
124
|
+
values_out = values
|
|
105
125
|
match key:
|
|
106
126
|
case "spectrum":
|
|
107
|
-
|
|
127
|
+
name = "data_raw"
|
|
108
128
|
dimensions = ["time", "diameter", "velocity"]
|
|
109
129
|
case "number_concentration" | "fall_velocity":
|
|
110
130
|
dimensions = ["time", "diameter"]
|
|
111
131
|
case "time":
|
|
112
132
|
dimensions = []
|
|
113
133
|
base = values[0].astype("datetime64[D]")
|
|
114
|
-
|
|
134
|
+
values_out = (values - base) / np.timedelta64(1, "h")
|
|
115
135
|
case _:
|
|
116
136
|
dimensions = ["time"]
|
|
117
|
-
self.data[
|
|
137
|
+
self.data[name] = CloudnetArray(values_out, name, dimensions=dimensions)
|
|
118
138
|
if "_sensor_id" in self.raw_data:
|
|
119
139
|
first_id = self.raw_data["_sensor_id"][0]
|
|
120
140
|
for sensor_id in self.raw_data["_sensor_id"]:
|
|
121
141
|
if sensor_id != first_id:
|
|
122
|
-
|
|
142
|
+
msg = "Multiple sensor IDs are not supported"
|
|
143
|
+
raise DisdrometerDataError(msg)
|
|
123
144
|
self.serial_number = first_id
|
|
124
145
|
|
|
125
|
-
def _create_velocity_vectors(self):
|
|
146
|
+
def _create_velocity_vectors(self) -> None:
|
|
126
147
|
n_values = [10, 5, 5, 5, 5, 2]
|
|
127
148
|
spreads = [0.1, 0.2, 0.4, 0.8, 1.6, 3.2]
|
|
128
|
-
|
|
149
|
+
self.store_vectors(n_values, spreads, "velocity")
|
|
129
150
|
|
|
130
|
-
def _create_diameter_vectors(self):
|
|
151
|
+
def _create_diameter_vectors(self) -> None:
|
|
131
152
|
n_values = [10, 5, 5, 5, 5, 2]
|
|
132
153
|
spreads = [0.125, 0.25, 0.5, 1, 2, 3]
|
|
133
|
-
|
|
154
|
+
self.store_vectors(n_values, spreads, "diameter")
|
|
155
|
+
|
|
156
|
+
def mask_invalid_values(self) -> None:
|
|
157
|
+
if variable := self.data.get("number_concentration"):
|
|
158
|
+
variable.data = ma.masked_where(variable.data == -9.999, variable.data)
|
|
159
|
+
if variable := self.data.get("fall_velocity"):
|
|
160
|
+
variable.data = ma.masked_where(variable.data == 0, variable.data)
|
|
134
161
|
|
|
135
|
-
def convert_units(self):
|
|
136
|
-
|
|
137
|
-
mmh_to_ms = 3600 * mm_to_m
|
|
162
|
+
def convert_units(self) -> None:
|
|
163
|
+
mmh_to_ms = SEC_IN_HOUR / MM_TO_M
|
|
138
164
|
c_to_k = 273.15
|
|
139
165
|
self._convert_data(("rainfall_rate",), mmh_to_ms)
|
|
140
166
|
self._convert_data(("snowfall_rate",), mmh_to_ms)
|
|
141
|
-
self._convert_data(("diameter", "diameter_spread", "diameter_bnds"),
|
|
167
|
+
self._convert_data(("diameter", "diameter_spread", "diameter_bnds"), 1e3)
|
|
142
168
|
self._convert_data(("V_sensor_supply",), 10)
|
|
143
169
|
self._convert_data(("T_sensor",), c_to_k, method="add")
|
|
170
|
+
if variable := self.data.get("number_concentration"):
|
|
171
|
+
variable.data = np.power(10, variable.data).round().astype(np.uint32)
|
|
144
172
|
|
|
145
|
-
def add_meta(self):
|
|
146
|
-
valid_keys = ("latitude", "longitude", "altitude")
|
|
147
|
-
for key, value in self.site_meta.items():
|
|
148
|
-
key = key.lower()
|
|
149
|
-
if key in valid_keys:
|
|
150
|
-
self.data[key] = CloudnetArray(float(value), key)
|
|
151
173
|
|
|
152
|
-
|
|
153
|
-
self,
|
|
154
|
-
keys: tuple[str, ...],
|
|
155
|
-
value: float,
|
|
156
|
-
method: Literal["divide", "add"] = "divide",
|
|
157
|
-
):
|
|
158
|
-
for key in keys:
|
|
159
|
-
if key not in self.data:
|
|
160
|
-
continue
|
|
161
|
-
variable = self.data[key]
|
|
162
|
-
if method == "divide":
|
|
163
|
-
variable.data = variable.data.astype("f4") / value
|
|
164
|
-
variable.data_type = "f4"
|
|
165
|
-
elif method == "add":
|
|
166
|
-
variable.data = variable.data.astype("f4") + value
|
|
167
|
-
variable.data_type = "f4"
|
|
168
|
-
else:
|
|
169
|
-
raise ValueError
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
HEADERS = {
|
|
174
|
+
CSV_HEADERS = {
|
|
173
175
|
"Date": "_date",
|
|
174
176
|
"Time": "_time",
|
|
175
177
|
"Intensity of precipitation (mm/h)": "rainfall_rate",
|
|
@@ -190,6 +192,86 @@ HEADERS = {
|
|
|
190
192
|
"Spectrum": "spectrum",
|
|
191
193
|
}
|
|
192
194
|
|
|
195
|
+
TOA5_HEADERS = {
|
|
196
|
+
"RECORD": "_record",
|
|
197
|
+
"TIMESTAMP": "_datetime",
|
|
198
|
+
"datetime_utc": "_datetime",
|
|
199
|
+
"rainIntensity": "rainfall_rate",
|
|
200
|
+
"rain_intensity": "rainfall_rate",
|
|
201
|
+
"rain rate [mm/h]": "rainfall_rate",
|
|
202
|
+
"snowIntensity": "snowfall_rate",
|
|
203
|
+
"snow_intensity": "snowfall_rate",
|
|
204
|
+
"accPrec": "_rain_accum",
|
|
205
|
+
"precipitation": "_rain_accum",
|
|
206
|
+
"rain accum [mm]": "_rain_accum",
|
|
207
|
+
"weatherCodeWaWa": "synop_WaWa",
|
|
208
|
+
"wawa": "synop_WaWa",
|
|
209
|
+
"weather_code_wawa": "synop_WaWa",
|
|
210
|
+
"radarReflectivity": "radar_reflectivity",
|
|
211
|
+
"radar_reflectivity": "radar_reflectivity",
|
|
212
|
+
"Z [dBz]": "radar_reflectivity",
|
|
213
|
+
"morVisibility": "visibility",
|
|
214
|
+
"mor_visibility": "visibility",
|
|
215
|
+
"MOR visibility [m]": "visibility",
|
|
216
|
+
"kineticEnergy": "kinetic_energy",
|
|
217
|
+
"kinetic_energy": "kinetic_energy",
|
|
218
|
+
"signalAmplitude": "sig_laser",
|
|
219
|
+
"signal_amplitude": "sig_laser",
|
|
220
|
+
"Signal amplitude": "sig_laser",
|
|
221
|
+
"sensorTemperature": "T_sensor",
|
|
222
|
+
"sensor_temperature": "T_sensor",
|
|
223
|
+
"Temperature sensor [°C]": "T_sensor",
|
|
224
|
+
"pbcTemperature": "_T_pcb",
|
|
225
|
+
"pbc_temperature": "_T_pcb",
|
|
226
|
+
"rightTemperature": "_T_right",
|
|
227
|
+
"right_temperature": "_T_right",
|
|
228
|
+
"leftTemperature": "_T_left",
|
|
229
|
+
"left_temperature": "_T_left",
|
|
230
|
+
"heatingCurrent": "I_heating",
|
|
231
|
+
"heating_current": "I_heating",
|
|
232
|
+
"sensorVoltage": "V_power_supply",
|
|
233
|
+
"sensor_voltage": "V_power_supply",
|
|
234
|
+
"Power supply voltage in the sensor [V]": "V_power_supply",
|
|
235
|
+
"sensorStatus": "state_sensor",
|
|
236
|
+
"sensor_status": "state_sensor",
|
|
237
|
+
"Sensor status": "state_sensor",
|
|
238
|
+
"errorCode": "error_code",
|
|
239
|
+
"error_code": "error_code",
|
|
240
|
+
"Error code": "error_code",
|
|
241
|
+
"numberParticles": "n_particles",
|
|
242
|
+
"number_particles": "n_particles",
|
|
243
|
+
"Number of detected particles": "n_particles",
|
|
244
|
+
"N": "number_concentration",
|
|
245
|
+
"V": "fall_velocity",
|
|
246
|
+
"spectrum": "spectrum",
|
|
247
|
+
"Current heating system [A]": "I_heating",
|
|
248
|
+
"sample interval [s]": "interval",
|
|
249
|
+
"Serial number": "_sensor_id",
|
|
250
|
+
"IOP firmware version": "_iop_firmware_version",
|
|
251
|
+
"Station name": "_station_name",
|
|
252
|
+
"Rain amount absolute [mm]": "_rain_amount_absolute",
|
|
253
|
+
# Kenttärova
|
|
254
|
+
"wawa [ww]": "synop_WW",
|
|
255
|
+
"wawa [METAR]": "_metar_speci",
|
|
256
|
+
"wawa [NWS]": "_nws",
|
|
257
|
+
"DSP firmware version": "_dsp_firmware_version",
|
|
258
|
+
"Start of measurement [DD.MM.YY_HH:MM:SS]": "_datetime_skip",
|
|
259
|
+
"Sensor time [HH:MM:SS]": "_time_skip",
|
|
260
|
+
"Sensor date [DD.MM.YY]": "_date_skip",
|
|
261
|
+
"Station number": "_station_number",
|
|
262
|
+
"Temperature PCB [°C]": "_T_pcb",
|
|
263
|
+
"Temperature right sensor head [°C]": "_T_right",
|
|
264
|
+
"Temperature left sensor head [°C]": "_T_left",
|
|
265
|
+
"Rain intensity 16 bit low [mm/h]": "_rainfall_rate_16_bit_low",
|
|
266
|
+
"Rain intensity 16 bit high [mm/h]": "_rainfall_rate_16_bit_high",
|
|
267
|
+
"Rain accumulated 16 bit [mm]": "_rain_accum_16_bit",
|
|
268
|
+
"Reflectivity 16 bit [dBZ]": "_radar_reflectivity_16_bit",
|
|
269
|
+
"Kinetic energy [J m-2 h-1)]": "kinetic_energy",
|
|
270
|
+
"Snow depth intensity (vol equiv.) [mm/h]": "snowfall_rate",
|
|
271
|
+
"Number of particles": "n_particles",
|
|
272
|
+
"Particle list (empty, see particle file)": "_particles",
|
|
273
|
+
}
|
|
274
|
+
|
|
193
275
|
TELEGRAM = {
|
|
194
276
|
1: "rainfall_rate",
|
|
195
277
|
2: "_rain_accum",
|
|
@@ -249,9 +331,11 @@ def _parse_date(tokens: Iterator[str]) -> datetime.date:
|
|
|
249
331
|
elif "." in token:
|
|
250
332
|
day, month, year = token.split(".")
|
|
251
333
|
else:
|
|
252
|
-
|
|
334
|
+
msg = f"Unsupported date: '{input}'"
|
|
335
|
+
raise ValueError(msg)
|
|
253
336
|
if len(year) != 4:
|
|
254
|
-
|
|
337
|
+
msg = f"Unsupported date: '{input}'"
|
|
338
|
+
raise ValueError(msg)
|
|
255
339
|
return datetime.date(int(year), int(month), int(day))
|
|
256
340
|
|
|
257
341
|
|
|
@@ -269,14 +353,21 @@ def _parse_datetime(tokens: Iterator[str]) -> datetime.datetime:
|
|
|
269
353
|
hour = int(token[8:10])
|
|
270
354
|
minute = int(token[10:12])
|
|
271
355
|
second = int(token[12:14])
|
|
272
|
-
return datetime.datetime(
|
|
356
|
+
return datetime.datetime(
|
|
357
|
+
year,
|
|
358
|
+
month,
|
|
359
|
+
day,
|
|
360
|
+
hour,
|
|
361
|
+
minute,
|
|
362
|
+
second,
|
|
363
|
+
)
|
|
273
364
|
|
|
274
365
|
|
|
275
|
-
def _parse_vector(tokens: Iterator[str]) ->
|
|
366
|
+
def _parse_vector(tokens: Iterator[str]) -> npt.NDArray:
|
|
276
367
|
return np.array([_parse_float(tokens) for _i in range(32)])
|
|
277
368
|
|
|
278
369
|
|
|
279
|
-
def _parse_spectrum(tokens: Iterator[str]) ->
|
|
370
|
+
def _parse_spectrum(tokens: Iterator[str]) -> npt.NDArray:
|
|
280
371
|
first = next(tokens)
|
|
281
372
|
if first == "<SPECTRUM>ZERO</SPECTRUM>":
|
|
282
373
|
return np.zeros((32, 32), dtype="i2")
|
|
@@ -284,17 +375,24 @@ def _parse_spectrum(tokens: Iterator[str]) -> np.ndarray:
|
|
|
284
375
|
raw = [first.removeprefix("<SPECTRUM>")]
|
|
285
376
|
raw.extend(islice(tokens, 1023))
|
|
286
377
|
if next(tokens) != "</SPECTRUM>":
|
|
287
|
-
|
|
378
|
+
msg = "Invalid spectrum format"
|
|
379
|
+
raise ValueError(msg)
|
|
288
380
|
values = [int(x) if x != "" else 0 for x in raw]
|
|
381
|
+
elif "/" in first:
|
|
382
|
+
values = [int(x) for x in first.removesuffix("/R").split("/")]
|
|
289
383
|
else:
|
|
290
384
|
values = [int(first)]
|
|
291
385
|
values.extend(int(x) for x in islice(tokens, 1023))
|
|
292
386
|
if len(values) != 1024:
|
|
293
|
-
|
|
387
|
+
msg = f"Invalid spectrum length: {len(values)}"
|
|
388
|
+
raise ValueError(msg)
|
|
294
389
|
return np.array(values, dtype="i2").reshape((32, 32))
|
|
295
390
|
|
|
296
391
|
|
|
297
|
-
|
|
392
|
+
ParserType = Callable[[Iterator[str]], Any]
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
PARSERS: dict[str, ParserType] = {
|
|
298
396
|
"I_heating": _parse_float,
|
|
299
397
|
"T_sensor": _parse_int,
|
|
300
398
|
"_T_pcb": _parse_int,
|
|
@@ -323,9 +421,19 @@ PARSERS: dict[str, Callable[[Iterator[str]], Any]] = {
|
|
|
323
421
|
"visibility": _parse_int,
|
|
324
422
|
}
|
|
325
423
|
|
|
424
|
+
EMPTY_VALUES: dict[ParserType, Any] = {
|
|
425
|
+
_parse_int: 0,
|
|
426
|
+
_parse_float: 0.0,
|
|
427
|
+
_parse_date: datetime.date(2000, 1, 1),
|
|
428
|
+
_parse_time: datetime.time(12, 0, 0),
|
|
429
|
+
_parse_datetime: datetime.datetime(2000, 1, 1),
|
|
430
|
+
_parse_vector: np.zeros(32, dtype=float),
|
|
431
|
+
_parse_spectrum: np.zeros((32, 32), dtype="i2"),
|
|
432
|
+
}
|
|
433
|
+
|
|
326
434
|
|
|
327
435
|
def _parse_headers(line: str) -> list[str]:
|
|
328
|
-
return [
|
|
436
|
+
return [CSV_HEADERS[header.strip()] for header in line.split(";")]
|
|
329
437
|
|
|
330
438
|
|
|
331
439
|
def _parse_telegram(telegram: Sequence[int | None]) -> list[str]:
|
|
@@ -342,43 +450,294 @@ def _read_rows(headers: list[str], rows: list[str]) -> dict[str, list]:
|
|
|
342
450
|
if row == "":
|
|
343
451
|
continue
|
|
344
452
|
try:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
unread_tokens = list(tokens)
|
|
348
|
-
if unread_tokens:
|
|
349
|
-
raise ValueError("More values than expected")
|
|
350
|
-
for header, value in zip(headers, parsed):
|
|
453
|
+
parsed = _parse_row(row, headers)
|
|
454
|
+
for header, value in zip(headers, parsed, strict=True):
|
|
351
455
|
result[header].append(value)
|
|
352
456
|
except (ValueError, StopIteration):
|
|
353
457
|
invalid_rows += 1
|
|
354
458
|
continue
|
|
355
459
|
if invalid_rows == len(rows):
|
|
356
|
-
|
|
460
|
+
msg = "No valid data in file"
|
|
461
|
+
raise DisdrometerDataError(msg)
|
|
357
462
|
if invalid_rows > 0:
|
|
358
|
-
logging.info(
|
|
463
|
+
logging.info("Skipped %s invalid rows", invalid_rows)
|
|
359
464
|
return result
|
|
360
465
|
|
|
361
466
|
|
|
362
|
-
def
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
467
|
+
def _parse_row(row_in: str, headers: list[str]) -> list:
|
|
468
|
+
tokens = iter(row_in.removesuffix(";").split(";"))
|
|
469
|
+
parsed = [PARSERS.get(header, next)(tokens) for header in headers]
|
|
470
|
+
if unread_tokens := list(tokens):
|
|
471
|
+
msg = f"Unused tokens: {unread_tokens}"
|
|
472
|
+
raise ValueError(msg)
|
|
473
|
+
return parsed
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _read_toa5(filename: str | PathLike) -> dict[str, list]:
|
|
477
|
+
"""Read ASCII data from Campbell Scientific datalogger such as CR1000.
|
|
478
|
+
|
|
479
|
+
References:
|
|
480
|
+
CR1000 Measurement and Control System.
|
|
481
|
+
https://s.campbellsci.com/documents/us/manuals/cr1000.pdf
|
|
482
|
+
"""
|
|
483
|
+
with open(filename, errors="ignore") as file:
|
|
484
|
+
reader = csv.reader(file)
|
|
485
|
+
_origin_line = next(reader)
|
|
486
|
+
header_line = next(reader)
|
|
487
|
+
headers = [
|
|
488
|
+
TOA5_HEADERS.get(re.sub(r"\(.*", "", field)) for field in header_line
|
|
489
|
+
]
|
|
490
|
+
if unknown_headers := [
|
|
491
|
+
header_line[i] for i in range(len(header_line)) if headers[i] is None
|
|
492
|
+
]:
|
|
493
|
+
msg = "Unknown headers: " + ", ".join(unknown_headers)
|
|
494
|
+
logging.warning(msg)
|
|
495
|
+
_units_line = next(reader)
|
|
496
|
+
_process_line = next(reader)
|
|
497
|
+
data: dict[str, list] = {header: [] for header in headers if header is not None}
|
|
498
|
+
n_rows = 0
|
|
499
|
+
n_invalid_rows = 0
|
|
500
|
+
for data_line in reader:
|
|
501
|
+
n_rows += 1
|
|
502
|
+
scalars: dict[str, datetime.datetime | int | float | str] = {}
|
|
503
|
+
arrays: dict[str, list] = {
|
|
504
|
+
"number_concentration": [],
|
|
505
|
+
"fall_velocity": [],
|
|
506
|
+
"spectrum": [],
|
|
507
|
+
}
|
|
508
|
+
try:
|
|
509
|
+
for header, value in zip(headers, data_line, strict=True):
|
|
510
|
+
if header is None:
|
|
511
|
+
continue
|
|
512
|
+
if header == "_datetime":
|
|
513
|
+
scalars[header] = datetime.datetime.strptime(
|
|
514
|
+
value,
|
|
515
|
+
"%Y-%m-%d %H:%M:%S",
|
|
516
|
+
)
|
|
517
|
+
elif header in ("number_concentration", "fall_velocity"):
|
|
518
|
+
arrays[header].append(float(value))
|
|
519
|
+
elif header == "spectrum":
|
|
520
|
+
arrays[header].append(int(value))
|
|
521
|
+
elif PARSERS.get(header) == _parse_int:
|
|
522
|
+
scalars[header] = int(value)
|
|
523
|
+
elif PARSERS.get(header) == _parse_float:
|
|
524
|
+
scalars[header] = float(value)
|
|
525
|
+
else:
|
|
526
|
+
scalars[header] = value
|
|
527
|
+
except ValueError:
|
|
528
|
+
n_invalid_rows += 1
|
|
529
|
+
continue
|
|
530
|
+
for header, scalar in scalars.items():
|
|
531
|
+
data[header].append(scalar)
|
|
532
|
+
if "spectrum" in headers:
|
|
533
|
+
data["spectrum"].append(
|
|
534
|
+
np.array(arrays["spectrum"], dtype="i2").reshape((32, 32)),
|
|
535
|
+
)
|
|
536
|
+
if "number_concentration" in headers:
|
|
537
|
+
data["number_concentration"].append(arrays["number_concentration"])
|
|
538
|
+
if "fall_velocity" in headers:
|
|
539
|
+
data["fall_velocity"].append(arrays["fall_velocity"])
|
|
540
|
+
if n_invalid_rows == n_rows:
|
|
541
|
+
msg = "No valid data in file"
|
|
542
|
+
raise DisdrometerDataError(msg)
|
|
543
|
+
if n_invalid_rows > 0:
|
|
544
|
+
logging.info("Skipped %s invalid rows", n_invalid_rows)
|
|
545
|
+
return data
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _read_pyatmoslogger_file(filename: str | PathLike) -> dict[str, list]:
|
|
549
|
+
"""Read CSV file from pyAtmosLogger.
|
|
550
|
+
|
|
551
|
+
References:
|
|
552
|
+
https://pypi.org/project/pyAtmosLogger/
|
|
553
|
+
"""
|
|
554
|
+
with open(filename, errors="ignore") as file:
|
|
555
|
+
lines = file.readlines()
|
|
556
|
+
header_line = lines[0].strip().strip(";").split(";")
|
|
557
|
+
headers = [
|
|
558
|
+
TOA5_HEADERS.get(
|
|
559
|
+
re.sub(
|
|
560
|
+
r"N[0-9][0-9]",
|
|
561
|
+
"N",
|
|
562
|
+
re.sub(r"v[0-9][0-9]", "V", re.sub(r"M\_.*", "spectrum", field)),
|
|
563
|
+
),
|
|
564
|
+
)
|
|
565
|
+
for field in header_line
|
|
381
566
|
]
|
|
382
|
-
|
|
567
|
+
if unknown_headers := [
|
|
568
|
+
header_line[i] for i in range(len(header_line)) if headers[i] is None
|
|
569
|
+
]:
|
|
570
|
+
msg = "Unknown headers: " + ", ".join(unknown_headers)
|
|
571
|
+
logging.warning(msg)
|
|
572
|
+
|
|
573
|
+
data: dict[str, list] = {header: [] for header in headers if header is not None}
|
|
574
|
+
n_rows = 0
|
|
575
|
+
n_invalid_rows = 0
|
|
576
|
+
for data_line in lines[1:]:
|
|
577
|
+
data_line_splat = data_line.strip().strip(";").split(";")
|
|
578
|
+
n_rows += 1
|
|
579
|
+
scalars: dict[str, datetime.datetime | int | float | str] = {}
|
|
580
|
+
arrays: dict[str, list] = {
|
|
581
|
+
"number_concentration": [],
|
|
582
|
+
"fall_velocity": [],
|
|
583
|
+
"spectrum": [],
|
|
584
|
+
}
|
|
585
|
+
try:
|
|
586
|
+
for header, value in zip(headers, data_line_splat, strict=True):
|
|
587
|
+
if header is None:
|
|
588
|
+
continue
|
|
589
|
+
if header == "_datetime":
|
|
590
|
+
scalars[header] = datetime.datetime.strptime(
|
|
591
|
+
value,
|
|
592
|
+
"%Y-%m-%d %H:%M:%S",
|
|
593
|
+
)
|
|
594
|
+
elif header in ("number_concentration", "fall_velocity"):
|
|
595
|
+
arrays[header].append(float(value))
|
|
596
|
+
elif header == "spectrum":
|
|
597
|
+
arrays[header].append(int(value))
|
|
598
|
+
elif PARSERS.get(header) == _parse_int:
|
|
599
|
+
scalars[header] = int(value)
|
|
600
|
+
elif PARSERS.get(header) == _parse_float:
|
|
601
|
+
scalars[header] = float(value)
|
|
602
|
+
else:
|
|
603
|
+
scalars[header] = value
|
|
604
|
+
except ValueError:
|
|
605
|
+
n_invalid_rows += 1
|
|
606
|
+
continue
|
|
607
|
+
for header, scalar in scalars.items():
|
|
608
|
+
data[header].append(scalar)
|
|
609
|
+
if "spectrum" in headers:
|
|
610
|
+
data["spectrum"].append(
|
|
611
|
+
np.array(arrays["spectrum"], dtype="i2").reshape((32, 32)),
|
|
612
|
+
)
|
|
613
|
+
if "number_concentration" in headers:
|
|
614
|
+
data["number_concentration"].append(arrays["number_concentration"])
|
|
615
|
+
if "fall_velocity" in headers:
|
|
616
|
+
data["fall_velocity"].append(arrays["fall_velocity"])
|
|
617
|
+
if n_invalid_rows == n_rows:
|
|
618
|
+
msg = "No valid data in file"
|
|
619
|
+
raise DisdrometerDataError(msg)
|
|
620
|
+
if n_invalid_rows > 0:
|
|
621
|
+
logging.info("Skipped %s invalid rows", n_invalid_rows)
|
|
622
|
+
return data
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def _read_typ_op4a(lines: list[str]) -> dict[str, Any]:
|
|
626
|
+
"""Read output of "CS/PA" command. The output starts with line "TYP OP4A"
|
|
627
|
+
followed by one line per measured variable in format: <number>:<value>.
|
|
628
|
+
Output ends with characters: <ETX><CR><LF><NUL>. Lines are separated by
|
|
629
|
+
<CR><LF>.
|
|
630
|
+
"""
|
|
631
|
+
data = {}
|
|
632
|
+
for line in lines:
|
|
633
|
+
if ":" not in line:
|
|
634
|
+
continue
|
|
635
|
+
key, value = line.strip().split(":", maxsplit=1)
|
|
636
|
+
# Skip datetime and 16-bit values.
|
|
637
|
+
if key in ("19", "30", "31", "32", "33"):
|
|
638
|
+
continue
|
|
639
|
+
varname = TELEGRAM.get(int(key))
|
|
640
|
+
if varname is None:
|
|
641
|
+
continue
|
|
642
|
+
parser = PARSERS.get(varname, next)
|
|
643
|
+
tokens = value.split(";")
|
|
644
|
+
data[varname] = parser(iter(tokens))
|
|
645
|
+
return data
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def _read_fmi(content: str) -> dict[str, list]:
|
|
649
|
+
r"""Read format used by Finnish Meteorological Institute and University of
|
|
650
|
+
Helsinki.
|
|
651
|
+
|
|
652
|
+
Format consists of sequence of the following:
|
|
653
|
+
- "[YYYY-MM-DD HH:MM:SS\n"
|
|
654
|
+
- output of "CS/PA" command without non-printable characters at the end
|
|
655
|
+
- "]\n"
|
|
656
|
+
"""
|
|
657
|
+
output: dict[str, list] = {"_datetime": []}
|
|
658
|
+
for m in re.finditer(
|
|
659
|
+
r"\[(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) "
|
|
660
|
+
r"(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+)"
|
|
661
|
+
r"(?P<output>[^\]]*)\]",
|
|
662
|
+
content,
|
|
663
|
+
):
|
|
664
|
+
try:
|
|
665
|
+
record = _read_typ_op4a(m["output"].splitlines())
|
|
666
|
+
except ValueError:
|
|
667
|
+
continue
|
|
668
|
+
|
|
669
|
+
for key, value in record.items():
|
|
670
|
+
if key not in output:
|
|
671
|
+
output[key] = [None] * len(output["_datetime"])
|
|
672
|
+
output[key].append(value)
|
|
673
|
+
for key in output:
|
|
674
|
+
if key not in record and key != "_datetime":
|
|
675
|
+
output[key].append(None)
|
|
676
|
+
|
|
677
|
+
output["_datetime"].append(
|
|
678
|
+
datetime.datetime(
|
|
679
|
+
int(m["year"]),
|
|
680
|
+
int(m["month"]),
|
|
681
|
+
int(m["day"]),
|
|
682
|
+
int(m["hour"]),
|
|
683
|
+
int(m["minute"]),
|
|
684
|
+
int(m["second"]),
|
|
685
|
+
)
|
|
686
|
+
)
|
|
687
|
+
return output
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def _read_parsivel(
|
|
691
|
+
filenames: Iterable[str | PathLike],
|
|
692
|
+
telegram: Sequence[int | None] | None = None,
|
|
693
|
+
timestamps: Sequence[datetime.datetime] | None = None,
|
|
694
|
+
) -> dict[str, npt.NDArray]:
|
|
695
|
+
combined_data = defaultdict(list)
|
|
696
|
+
for filename in filenames:
|
|
697
|
+
with open(filename, encoding="latin1", errors="ignore") as file:
|
|
698
|
+
content = file.read()
|
|
699
|
+
lines = content.splitlines()
|
|
700
|
+
if not lines:
|
|
701
|
+
msg = f"File '{filename}' is empty"
|
|
702
|
+
raise DisdrometerDataError(msg)
|
|
703
|
+
if "TOA5" in lines[0]:
|
|
704
|
+
data = _read_toa5(filename)
|
|
705
|
+
elif "N00" in lines[0]:
|
|
706
|
+
data = _read_pyatmoslogger_file(filename)
|
|
707
|
+
elif "TYP OP4A" in lines[0]:
|
|
708
|
+
data = _read_typ_op4a(lines)
|
|
709
|
+
data = {key: [value] for key, value in data.items()}
|
|
710
|
+
elif "Date" in lines[0]:
|
|
711
|
+
headers = _parse_headers(lines[0])
|
|
712
|
+
data = _read_rows(headers, lines[1:])
|
|
713
|
+
elif "[" in lines[0]:
|
|
714
|
+
data = _read_fmi(content)
|
|
715
|
+
elif telegram is not None:
|
|
716
|
+
headers = _parse_telegram(telegram)
|
|
717
|
+
data = _read_rows(headers, lines)
|
|
718
|
+
else:
|
|
719
|
+
msg = "telegram must be specified for files without header"
|
|
720
|
+
raise ValueError(msg)
|
|
721
|
+
if "_datetime" not in data and timestamps is None:
|
|
722
|
+
data["_datetime"] = [
|
|
723
|
+
datetime.datetime.combine(date, time)
|
|
724
|
+
for date, time in zip(data["_date"], data["_time"], strict=True)
|
|
725
|
+
]
|
|
726
|
+
for key, values in data.items():
|
|
727
|
+
combined_data[key].extend(values)
|
|
728
|
+
if timestamps is not None:
|
|
729
|
+
combined_data["_datetime"] = list(timestamps)
|
|
730
|
+
result: dict = {}
|
|
731
|
+
for key, value in combined_data.items():
|
|
732
|
+
array = np.array(
|
|
733
|
+
[
|
|
734
|
+
x
|
|
735
|
+
if x is not None
|
|
736
|
+
else (EMPTY_VALUES[PARSERS[key]] if key in PARSERS else "")
|
|
737
|
+
for x in value
|
|
738
|
+
]
|
|
739
|
+
)
|
|
740
|
+
mask = [np.full(array.shape[1:], x is None) for x in value]
|
|
741
|
+
result[key] = ma.array(array, mask=mask)
|
|
383
742
|
result["time"] = result["_datetime"].astype("datetime64[s]")
|
|
384
743
|
return result
|