cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. cloudnetpy/categorize/__init__.py +1 -2
  2. cloudnetpy/categorize/atmos_utils.py +297 -67
  3. cloudnetpy/categorize/attenuation.py +31 -0
  4. cloudnetpy/categorize/attenuations/__init__.py +37 -0
  5. cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
  6. cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
  7. cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
  8. cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
  9. cloudnetpy/categorize/categorize.py +332 -156
  10. cloudnetpy/categorize/classify.py +127 -125
  11. cloudnetpy/categorize/containers.py +107 -76
  12. cloudnetpy/categorize/disdrometer.py +40 -0
  13. cloudnetpy/categorize/droplet.py +23 -21
  14. cloudnetpy/categorize/falling.py +53 -24
  15. cloudnetpy/categorize/freezing.py +25 -12
  16. cloudnetpy/categorize/insects.py +35 -23
  17. cloudnetpy/categorize/itu.py +243 -0
  18. cloudnetpy/categorize/lidar.py +36 -41
  19. cloudnetpy/categorize/melting.py +34 -26
  20. cloudnetpy/categorize/model.py +84 -37
  21. cloudnetpy/categorize/mwr.py +18 -14
  22. cloudnetpy/categorize/radar.py +215 -102
  23. cloudnetpy/cli.py +578 -0
  24. cloudnetpy/cloudnetarray.py +43 -89
  25. cloudnetpy/concat_lib.py +218 -78
  26. cloudnetpy/constants.py +28 -10
  27. cloudnetpy/datasource.py +61 -86
  28. cloudnetpy/exceptions.py +49 -20
  29. cloudnetpy/instruments/__init__.py +5 -0
  30. cloudnetpy/instruments/basta.py +29 -12
  31. cloudnetpy/instruments/bowtie.py +135 -0
  32. cloudnetpy/instruments/ceilo.py +138 -115
  33. cloudnetpy/instruments/ceilometer.py +164 -80
  34. cloudnetpy/instruments/cl61d.py +21 -5
  35. cloudnetpy/instruments/cloudnet_instrument.py +74 -36
  36. cloudnetpy/instruments/copernicus.py +108 -30
  37. cloudnetpy/instruments/da10.py +54 -0
  38. cloudnetpy/instruments/disdrometer/common.py +126 -223
  39. cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
  40. cloudnetpy/instruments/disdrometer/thies.py +254 -87
  41. cloudnetpy/instruments/fd12p.py +201 -0
  42. cloudnetpy/instruments/galileo.py +65 -23
  43. cloudnetpy/instruments/hatpro.py +123 -49
  44. cloudnetpy/instruments/instruments.py +113 -1
  45. cloudnetpy/instruments/lufft.py +39 -17
  46. cloudnetpy/instruments/mira.py +268 -61
  47. cloudnetpy/instruments/mrr.py +187 -0
  48. cloudnetpy/instruments/nc_lidar.py +19 -8
  49. cloudnetpy/instruments/nc_radar.py +109 -55
  50. cloudnetpy/instruments/pollyxt.py +135 -51
  51. cloudnetpy/instruments/radiometrics.py +313 -59
  52. cloudnetpy/instruments/rain_e_h3.py +171 -0
  53. cloudnetpy/instruments/rpg.py +321 -189
  54. cloudnetpy/instruments/rpg_reader.py +74 -40
  55. cloudnetpy/instruments/toa5.py +49 -0
  56. cloudnetpy/instruments/vaisala.py +95 -343
  57. cloudnetpy/instruments/weather_station.py +774 -105
  58. cloudnetpy/metadata.py +90 -19
  59. cloudnetpy/model_evaluation/file_handler.py +55 -52
  60. cloudnetpy/model_evaluation/metadata.py +46 -20
  61. cloudnetpy/model_evaluation/model_metadata.py +1 -1
  62. cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
  63. cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
  64. cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
  65. cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
  66. cloudnetpy/model_evaluation/products/model_products.py +43 -35
  67. cloudnetpy/model_evaluation/products/observation_products.py +41 -35
  68. cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
  69. cloudnetpy/model_evaluation/products/tools.py +29 -20
  70. cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
  71. cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
  72. cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
  73. cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
  74. cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
  75. cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
  76. cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
  77. cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
  78. cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
  79. cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
  80. cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
  81. cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
  82. cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
  83. cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
  84. cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
  85. cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
  86. cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
  87. cloudnetpy/model_evaluation/utils.py +2 -1
  88. cloudnetpy/output.py +170 -111
  89. cloudnetpy/plotting/__init__.py +2 -1
  90. cloudnetpy/plotting/plot_meta.py +562 -822
  91. cloudnetpy/plotting/plotting.py +1142 -704
  92. cloudnetpy/products/__init__.py +1 -0
  93. cloudnetpy/products/classification.py +370 -88
  94. cloudnetpy/products/der.py +85 -55
  95. cloudnetpy/products/drizzle.py +77 -34
  96. cloudnetpy/products/drizzle_error.py +15 -11
  97. cloudnetpy/products/drizzle_tools.py +79 -59
  98. cloudnetpy/products/epsilon.py +211 -0
  99. cloudnetpy/products/ier.py +27 -50
  100. cloudnetpy/products/iwc.py +55 -48
  101. cloudnetpy/products/lwc.py +96 -70
  102. cloudnetpy/products/mwr_tools.py +186 -0
  103. cloudnetpy/products/product_tools.py +170 -128
  104. cloudnetpy/utils.py +455 -240
  105. cloudnetpy/version.py +2 -2
  106. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
  107. cloudnetpy-1.87.3.dist-info/RECORD +127 -0
  108. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
  109. cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
  110. docs/source/conf.py +2 -2
  111. cloudnetpy/categorize/atmos.py +0 -361
  112. cloudnetpy/products/mwr_multi.py +0 -68
  113. cloudnetpy/products/mwr_single.py +0 -75
  114. cloudnetpy-1.49.9.dist-info/RECORD +0 -112
  115. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
  116. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
@@ -1,34 +1,42 @@
1
+ import csv
1
2
  import datetime
2
3
  import logging
3
- from collections.abc import Callable, Iterator, Sequence
4
+ import re
5
+ from collections import defaultdict
6
+ from collections.abc import Callable, Iterable, Iterator, Sequence
4
7
  from itertools import islice
5
- from pathlib import Path
6
- from typing import Any, Literal
8
+ from os import PathLike
9
+ from typing import Any
10
+ from uuid import UUID
7
11
 
8
12
  import numpy as np
13
+ import numpy.typing as npt
14
+ from numpy import ma
9
15
 
10
16
  from cloudnetpy import output
11
17
  from cloudnetpy.cloudnetarray import CloudnetArray
18
+ from cloudnetpy.constants import MM_TO_M, SEC_IN_HOUR
12
19
  from cloudnetpy.exceptions import DisdrometerDataError
13
20
  from cloudnetpy.instruments import instruments
14
- from cloudnetpy.instruments.cloudnet_instrument import CloudnetInstrument
21
+ from cloudnetpy.utils import get_uuid
15
22
 
16
23
  from .common import ATTRIBUTES, Disdrometer
17
24
 
18
25
 
19
26
  def parsivel2nc(
20
- disdrometer_file: Path | str | bytes,
21
- output_file: str,
27
+ disdrometer_file: str | PathLike | Iterable[str | PathLike],
28
+ output_file: str | PathLike,
22
29
  site_meta: dict,
23
- uuid: str | None = None,
30
+ uuid: str | UUID | None = None,
24
31
  date: str | datetime.date | None = None,
25
32
  telegram: Sequence[int | None] | None = None,
26
- ) -> str:
33
+ timestamps: Sequence[datetime.datetime] | None = None,
34
+ ) -> UUID:
27
35
  """Converts OTT Parsivel-2 disdrometer data into Cloudnet Level 1b netCDF
28
36
  file.
29
37
 
30
38
  Args:
31
- disdrometer_file: Filename of disdrometer .log file.
39
+ disdrometer_file: Filename of disdrometer file or list of filenames.
32
40
  output_file: Output filename.
33
41
  site_meta: Dictionary containing information about the site. Required key
34
42
  is `name`.
@@ -38,6 +46,7 @@ def parsivel2nc(
38
46
  the instrument's operating instructions. Unknown values are indicated
39
47
  with None. Telegram is required if the input file doesn't contain a
40
48
  header.
49
+ timestamps: Specify list of timestamps if they are missing in the input file.
41
50
 
42
51
  Returns:
43
52
  UUID of the generated file.
@@ -55,28 +64,36 @@ def parsivel2nc(
55
64
  """
56
65
  if isinstance(date, str):
57
66
  date = datetime.date.fromisoformat(date)
58
- disdrometer = Parsivel(disdrometer_file, site_meta, telegram, date)
67
+ uuid = get_uuid(uuid)
68
+ if isinstance(disdrometer_file, str | PathLike):
69
+ disdrometer_file = [disdrometer_file]
70
+ disdrometer = Parsivel(disdrometer_file, site_meta, telegram, date, timestamps)
59
71
  disdrometer.sort_timestamps()
60
72
  disdrometer.remove_duplicate_timestamps()
73
+ disdrometer.mask_invalid_values()
74
+ if len(disdrometer.data["time"].data) < 2:
75
+ msg = "Too few data points"
76
+ raise DisdrometerDataError(msg)
61
77
  disdrometer.convert_units()
62
78
  disdrometer.add_meta()
63
79
  attributes = output.add_time_attribute(ATTRIBUTES, disdrometer.date)
64
80
  output.update_attributes(disdrometer.data, attributes)
65
- uuid = output.save_level1b(disdrometer, output_file, uuid)
81
+ output.save_level1b(disdrometer, output_file, uuid)
66
82
  return uuid
67
83
 
68
84
 
69
- class Parsivel(CloudnetInstrument):
85
+ class Parsivel(Disdrometer):
70
86
  def __init__(
71
87
  self,
72
- filename: Path | str | bytes,
88
+ filenames: Iterable[str | PathLike],
73
89
  site_meta: dict,
74
90
  telegram: Sequence[int | None] | None = None,
75
91
  expected_date: datetime.date | None = None,
76
- ):
92
+ timestamps: Sequence[datetime.datetime] | None = None,
93
+ ) -> None:
77
94
  super().__init__()
78
95
  self.site_meta = site_meta
79
- self.raw_data = _read_parsivel(filename, telegram)
96
+ self.raw_data = _read_parsivel(filenames, telegram, timestamps)
80
97
  self._screen_time(expected_date)
81
98
  self.n_velocity = 32
82
99
  self.n_diameter = 32
@@ -87,89 +104,74 @@ class Parsivel(CloudnetInstrument):
87
104
  self._create_velocity_vectors()
88
105
  self._create_diameter_vectors()
89
106
 
90
- def _screen_time(self, expected_date: datetime.date | None = None):
107
+ def _screen_time(self, expected_date: datetime.date | None = None) -> None:
91
108
  if expected_date is None:
92
109
  self.date = self.raw_data["time"][0].astype(object).date()
93
110
  return
94
111
  self.date = expected_date
95
112
  valid_mask = self.raw_data["time"].astype("datetime64[D]") == self.date
96
113
  if np.count_nonzero(valid_mask) == 0:
97
- raise DisdrometerDataError(f"No data found on {expected_date}")
114
+ msg = f"No data found on {expected_date}"
115
+ raise DisdrometerDataError(msg)
98
116
  for key in self.raw_data:
99
117
  self.raw_data[key] = self.raw_data[key][valid_mask]
100
118
 
101
- def _append_data(self):
119
+ def _append_data(self) -> None:
102
120
  for key, values in self.raw_data.items():
103
121
  if key.startswith("_"):
104
122
  continue
123
+ name = key
124
+ values_out = values
105
125
  match key:
106
126
  case "spectrum":
107
- key = "data_raw"
127
+ name = "data_raw"
108
128
  dimensions = ["time", "diameter", "velocity"]
109
129
  case "number_concentration" | "fall_velocity":
110
130
  dimensions = ["time", "diameter"]
111
131
  case "time":
112
132
  dimensions = []
113
133
  base = values[0].astype("datetime64[D]")
114
- values = (values - base) / np.timedelta64(1, "h")
134
+ values_out = (values - base) / np.timedelta64(1, "h")
115
135
  case _:
116
136
  dimensions = ["time"]
117
- self.data[key] = CloudnetArray(values, key, dimensions=dimensions)
137
+ self.data[name] = CloudnetArray(values_out, name, dimensions=dimensions)
118
138
  if "_sensor_id" in self.raw_data:
119
139
  first_id = self.raw_data["_sensor_id"][0]
120
140
  for sensor_id in self.raw_data["_sensor_id"]:
121
141
  if sensor_id != first_id:
122
- raise DisdrometerDataError("Multiple sensor IDs are not supported")
142
+ msg = "Multiple sensor IDs are not supported"
143
+ raise DisdrometerDataError(msg)
123
144
  self.serial_number = first_id
124
145
 
125
- def _create_velocity_vectors(self):
146
+ def _create_velocity_vectors(self) -> None:
126
147
  n_values = [10, 5, 5, 5, 5, 2]
127
148
  spreads = [0.1, 0.2, 0.4, 0.8, 1.6, 3.2]
128
- Disdrometer.store_vectors(self.data, n_values, spreads, "velocity")
149
+ self.store_vectors(n_values, spreads, "velocity")
129
150
 
130
- def _create_diameter_vectors(self):
151
+ def _create_diameter_vectors(self) -> None:
131
152
  n_values = [10, 5, 5, 5, 5, 2]
132
153
  spreads = [0.125, 0.25, 0.5, 1, 2, 3]
133
- Disdrometer.store_vectors(self.data, n_values, spreads, "diameter")
154
+ self.store_vectors(n_values, spreads, "diameter")
155
+
156
+ def mask_invalid_values(self) -> None:
157
+ if variable := self.data.get("number_concentration"):
158
+ variable.data = ma.masked_where(variable.data == -9.999, variable.data)
159
+ if variable := self.data.get("fall_velocity"):
160
+ variable.data = ma.masked_where(variable.data == 0, variable.data)
134
161
 
135
- def convert_units(self):
136
- mm_to_m = 1e3
137
- mmh_to_ms = 3600 * mm_to_m
162
+ def convert_units(self) -> None:
163
+ mmh_to_ms = SEC_IN_HOUR / MM_TO_M
138
164
  c_to_k = 273.15
139
165
  self._convert_data(("rainfall_rate",), mmh_to_ms)
140
166
  self._convert_data(("snowfall_rate",), mmh_to_ms)
141
- self._convert_data(("diameter", "diameter_spread", "diameter_bnds"), mm_to_m)
167
+ self._convert_data(("diameter", "diameter_spread", "diameter_bnds"), 1e3)
142
168
  self._convert_data(("V_sensor_supply",), 10)
143
169
  self._convert_data(("T_sensor",), c_to_k, method="add")
170
+ if variable := self.data.get("number_concentration"):
171
+ variable.data = np.power(10, variable.data).round().astype(np.uint32)
144
172
 
145
- def add_meta(self):
146
- valid_keys = ("latitude", "longitude", "altitude")
147
- for key, value in self.site_meta.items():
148
- key = key.lower()
149
- if key in valid_keys:
150
- self.data[key] = CloudnetArray(float(value), key)
151
173
 
152
- def _convert_data(
153
- self,
154
- keys: tuple[str, ...],
155
- value: float,
156
- method: Literal["divide", "add"] = "divide",
157
- ):
158
- for key in keys:
159
- if key not in self.data:
160
- continue
161
- variable = self.data[key]
162
- if method == "divide":
163
- variable.data = variable.data.astype("f4") / value
164
- variable.data_type = "f4"
165
- elif method == "add":
166
- variable.data = variable.data.astype("f4") + value
167
- variable.data_type = "f4"
168
- else:
169
- raise ValueError
170
-
171
-
172
- HEADERS = {
174
+ CSV_HEADERS = {
173
175
  "Date": "_date",
174
176
  "Time": "_time",
175
177
  "Intensity of precipitation (mm/h)": "rainfall_rate",
@@ -190,6 +192,86 @@ HEADERS = {
190
192
  "Spectrum": "spectrum",
191
193
  }
192
194
 
195
+ TOA5_HEADERS = {
196
+ "RECORD": "_record",
197
+ "TIMESTAMP": "_datetime",
198
+ "datetime_utc": "_datetime",
199
+ "rainIntensity": "rainfall_rate",
200
+ "rain_intensity": "rainfall_rate",
201
+ "rain rate [mm/h]": "rainfall_rate",
202
+ "snowIntensity": "snowfall_rate",
203
+ "snow_intensity": "snowfall_rate",
204
+ "accPrec": "_rain_accum",
205
+ "precipitation": "_rain_accum",
206
+ "rain accum [mm]": "_rain_accum",
207
+ "weatherCodeWaWa": "synop_WaWa",
208
+ "wawa": "synop_WaWa",
209
+ "weather_code_wawa": "synop_WaWa",
210
+ "radarReflectivity": "radar_reflectivity",
211
+ "radar_reflectivity": "radar_reflectivity",
212
+ "Z [dBz]": "radar_reflectivity",
213
+ "morVisibility": "visibility",
214
+ "mor_visibility": "visibility",
215
+ "MOR visibility [m]": "visibility",
216
+ "kineticEnergy": "kinetic_energy",
217
+ "kinetic_energy": "kinetic_energy",
218
+ "signalAmplitude": "sig_laser",
219
+ "signal_amplitude": "sig_laser",
220
+ "Signal amplitude": "sig_laser",
221
+ "sensorTemperature": "T_sensor",
222
+ "sensor_temperature": "T_sensor",
223
+ "Temperature sensor [°C]": "T_sensor",
224
+ "pbcTemperature": "_T_pcb",
225
+ "pbc_temperature": "_T_pcb",
226
+ "rightTemperature": "_T_right",
227
+ "right_temperature": "_T_right",
228
+ "leftTemperature": "_T_left",
229
+ "left_temperature": "_T_left",
230
+ "heatingCurrent": "I_heating",
231
+ "heating_current": "I_heating",
232
+ "sensorVoltage": "V_power_supply",
233
+ "sensor_voltage": "V_power_supply",
234
+ "Power supply voltage in the sensor [V]": "V_power_supply",
235
+ "sensorStatus": "state_sensor",
236
+ "sensor_status": "state_sensor",
237
+ "Sensor status": "state_sensor",
238
+ "errorCode": "error_code",
239
+ "error_code": "error_code",
240
+ "Error code": "error_code",
241
+ "numberParticles": "n_particles",
242
+ "number_particles": "n_particles",
243
+ "Number of detected particles": "n_particles",
244
+ "N": "number_concentration",
245
+ "V": "fall_velocity",
246
+ "spectrum": "spectrum",
247
+ "Current heating system [A]": "I_heating",
248
+ "sample interval [s]": "interval",
249
+ "Serial number": "_sensor_id",
250
+ "IOP firmware version": "_iop_firmware_version",
251
+ "Station name": "_station_name",
252
+ "Rain amount absolute [mm]": "_rain_amount_absolute",
253
+ # Kenttärova
254
+ "wawa [ww]": "synop_WW",
255
+ "wawa [METAR]": "_metar_speci",
256
+ "wawa [NWS]": "_nws",
257
+ "DSP firmware version": "_dsp_firmware_version",
258
+ "Start of measurement [DD.MM.YY_HH:MM:SS]": "_datetime_skip",
259
+ "Sensor time [HH:MM:SS]": "_time_skip",
260
+ "Sensor date [DD.MM.YY]": "_date_skip",
261
+ "Station number": "_station_number",
262
+ "Temperature PCB [°C]": "_T_pcb",
263
+ "Temperature right sensor head [°C]": "_T_right",
264
+ "Temperature left sensor head [°C]": "_T_left",
265
+ "Rain intensity 16 bit low [mm/h]": "_rainfall_rate_16_bit_low",
266
+ "Rain intensity 16 bit high [mm/h]": "_rainfall_rate_16_bit_high",
267
+ "Rain accumulated 16 bit [mm]": "_rain_accum_16_bit",
268
+ "Reflectivity 16 bit [dBZ]": "_radar_reflectivity_16_bit",
269
+ "Kinetic energy [J m-2 h-1)]": "kinetic_energy",
270
+ "Snow depth intensity (vol equiv.) [mm/h]": "snowfall_rate",
271
+ "Number of particles": "n_particles",
272
+ "Particle list (empty, see particle file)": "_particles",
273
+ }
274
+
193
275
  TELEGRAM = {
194
276
  1: "rainfall_rate",
195
277
  2: "_rain_accum",
@@ -249,9 +331,11 @@ def _parse_date(tokens: Iterator[str]) -> datetime.date:
249
331
  elif "." in token:
250
332
  day, month, year = token.split(".")
251
333
  else:
252
- raise ValueError(f"Unsupported date: '{input}'")
334
+ msg = f"Unsupported date: '{input}'"
335
+ raise ValueError(msg)
253
336
  if len(year) != 4:
254
- raise ValueError(f"Unsupported date: '{input}'")
337
+ msg = f"Unsupported date: '{input}'"
338
+ raise ValueError(msg)
255
339
  return datetime.date(int(year), int(month), int(day))
256
340
 
257
341
 
@@ -269,14 +353,21 @@ def _parse_datetime(tokens: Iterator[str]) -> datetime.datetime:
269
353
  hour = int(token[8:10])
270
354
  minute = int(token[10:12])
271
355
  second = int(token[12:14])
272
- return datetime.datetime(year, month, day, hour, minute, second)
356
+ return datetime.datetime(
357
+ year,
358
+ month,
359
+ day,
360
+ hour,
361
+ minute,
362
+ second,
363
+ )
273
364
 
274
365
 
275
- def _parse_vector(tokens: Iterator[str]) -> np.ndarray:
366
+ def _parse_vector(tokens: Iterator[str]) -> npt.NDArray:
276
367
  return np.array([_parse_float(tokens) for _i in range(32)])
277
368
 
278
369
 
279
- def _parse_spectrum(tokens: Iterator[str]) -> np.ndarray:
370
+ def _parse_spectrum(tokens: Iterator[str]) -> npt.NDArray:
280
371
  first = next(tokens)
281
372
  if first == "<SPECTRUM>ZERO</SPECTRUM>":
282
373
  return np.zeros((32, 32), dtype="i2")
@@ -284,17 +375,24 @@ def _parse_spectrum(tokens: Iterator[str]) -> np.ndarray:
284
375
  raw = [first.removeprefix("<SPECTRUM>")]
285
376
  raw.extend(islice(tokens, 1023))
286
377
  if next(tokens) != "</SPECTRUM>":
287
- raise ValueError("Invalid spectrum format")
378
+ msg = "Invalid spectrum format"
379
+ raise ValueError(msg)
288
380
  values = [int(x) if x != "" else 0 for x in raw]
381
+ elif "/" in first:
382
+ values = [int(x) for x in first.removesuffix("/R").split("/")]
289
383
  else:
290
384
  values = [int(first)]
291
385
  values.extend(int(x) for x in islice(tokens, 1023))
292
386
  if len(values) != 1024:
293
- raise ValueError("Invalid length")
387
+ msg = f"Invalid spectrum length: {len(values)}"
388
+ raise ValueError(msg)
294
389
  return np.array(values, dtype="i2").reshape((32, 32))
295
390
 
296
391
 
297
- PARSERS: dict[str, Callable[[Iterator[str]], Any]] = {
392
+ ParserType = Callable[[Iterator[str]], Any]
393
+
394
+
395
+ PARSERS: dict[str, ParserType] = {
298
396
  "I_heating": _parse_float,
299
397
  "T_sensor": _parse_int,
300
398
  "_T_pcb": _parse_int,
@@ -323,9 +421,19 @@ PARSERS: dict[str, Callable[[Iterator[str]], Any]] = {
323
421
  "visibility": _parse_int,
324
422
  }
325
423
 
424
+ EMPTY_VALUES: dict[ParserType, Any] = {
425
+ _parse_int: 0,
426
+ _parse_float: 0.0,
427
+ _parse_date: datetime.date(2000, 1, 1),
428
+ _parse_time: datetime.time(12, 0, 0),
429
+ _parse_datetime: datetime.datetime(2000, 1, 1),
430
+ _parse_vector: np.zeros(32, dtype=float),
431
+ _parse_spectrum: np.zeros((32, 32), dtype="i2"),
432
+ }
433
+
326
434
 
327
435
  def _parse_headers(line: str) -> list[str]:
328
- return [HEADERS[header.strip()] for header in line.split(";")]
436
+ return [CSV_HEADERS[header.strip()] for header in line.split(";")]
329
437
 
330
438
 
331
439
  def _parse_telegram(telegram: Sequence[int | None]) -> list[str]:
@@ -342,43 +450,294 @@ def _read_rows(headers: list[str], rows: list[str]) -> dict[str, list]:
342
450
  if row == "":
343
451
  continue
344
452
  try:
345
- tokens = iter(row.removesuffix(";").split(";"))
346
- parsed = [PARSERS.get(header, next)(tokens) for header in headers]
347
- unread_tokens = list(tokens)
348
- if unread_tokens:
349
- raise ValueError("More values than expected")
350
- for header, value in zip(headers, parsed):
453
+ parsed = _parse_row(row, headers)
454
+ for header, value in zip(headers, parsed, strict=True):
351
455
  result[header].append(value)
352
456
  except (ValueError, StopIteration):
353
457
  invalid_rows += 1
354
458
  continue
355
459
  if invalid_rows == len(rows):
356
- raise DisdrometerDataError("No valid data in file")
460
+ msg = "No valid data in file"
461
+ raise DisdrometerDataError(msg)
357
462
  if invalid_rows > 0:
358
- logging.info(f"Skipped {invalid_rows} invalid rows")
463
+ logging.info("Skipped %s invalid rows", invalid_rows)
359
464
  return result
360
465
 
361
466
 
362
- def _read_parsivel(
363
- filename: Path | str | bytes, telegram: Sequence[int | None] | None = None
364
- ) -> dict[str, np.ndarray]:
365
- with open(filename, encoding="latin1", errors="ignore") as file:
366
- lines = file.read().splitlines()
367
- if not lines:
368
- raise DisdrometerDataError("File is empty")
369
- if "Date" in lines[0]:
370
- headers = _parse_headers(lines[0])
371
- data = _read_rows(headers, lines[1:])
372
- elif telegram is not None:
373
- headers = _parse_telegram(telegram)
374
- data = _read_rows(headers, lines)
375
- else:
376
- raise ValueError("telegram must be specified for files without header")
377
- if "_datetime" not in data:
378
- data["_datetime"] = [
379
- datetime.datetime.combine(date, time)
380
- for date, time in zip(data["_date"], data["_time"])
467
+ def _parse_row(row_in: str, headers: list[str]) -> list:
468
+ tokens = iter(row_in.removesuffix(";").split(";"))
469
+ parsed = [PARSERS.get(header, next)(tokens) for header in headers]
470
+ if unread_tokens := list(tokens):
471
+ msg = f"Unused tokens: {unread_tokens}"
472
+ raise ValueError(msg)
473
+ return parsed
474
+
475
+
476
+ def _read_toa5(filename: str | PathLike) -> dict[str, list]:
477
+ """Read ASCII data from Campbell Scientific datalogger such as CR1000.
478
+
479
+ References:
480
+ CR1000 Measurement and Control System.
481
+ https://s.campbellsci.com/documents/us/manuals/cr1000.pdf
482
+ """
483
+ with open(filename, errors="ignore") as file:
484
+ reader = csv.reader(file)
485
+ _origin_line = next(reader)
486
+ header_line = next(reader)
487
+ headers = [
488
+ TOA5_HEADERS.get(re.sub(r"\(.*", "", field)) for field in header_line
489
+ ]
490
+ if unknown_headers := [
491
+ header_line[i] for i in range(len(header_line)) if headers[i] is None
492
+ ]:
493
+ msg = "Unknown headers: " + ", ".join(unknown_headers)
494
+ logging.warning(msg)
495
+ _units_line = next(reader)
496
+ _process_line = next(reader)
497
+ data: dict[str, list] = {header: [] for header in headers if header is not None}
498
+ n_rows = 0
499
+ n_invalid_rows = 0
500
+ for data_line in reader:
501
+ n_rows += 1
502
+ scalars: dict[str, datetime.datetime | int | float | str] = {}
503
+ arrays: dict[str, list] = {
504
+ "number_concentration": [],
505
+ "fall_velocity": [],
506
+ "spectrum": [],
507
+ }
508
+ try:
509
+ for header, value in zip(headers, data_line, strict=True):
510
+ if header is None:
511
+ continue
512
+ if header == "_datetime":
513
+ scalars[header] = datetime.datetime.strptime(
514
+ value,
515
+ "%Y-%m-%d %H:%M:%S",
516
+ )
517
+ elif header in ("number_concentration", "fall_velocity"):
518
+ arrays[header].append(float(value))
519
+ elif header == "spectrum":
520
+ arrays[header].append(int(value))
521
+ elif PARSERS.get(header) == _parse_int:
522
+ scalars[header] = int(value)
523
+ elif PARSERS.get(header) == _parse_float:
524
+ scalars[header] = float(value)
525
+ else:
526
+ scalars[header] = value
527
+ except ValueError:
528
+ n_invalid_rows += 1
529
+ continue
530
+ for header, scalar in scalars.items():
531
+ data[header].append(scalar)
532
+ if "spectrum" in headers:
533
+ data["spectrum"].append(
534
+ np.array(arrays["spectrum"], dtype="i2").reshape((32, 32)),
535
+ )
536
+ if "number_concentration" in headers:
537
+ data["number_concentration"].append(arrays["number_concentration"])
538
+ if "fall_velocity" in headers:
539
+ data["fall_velocity"].append(arrays["fall_velocity"])
540
+ if n_invalid_rows == n_rows:
541
+ msg = "No valid data in file"
542
+ raise DisdrometerDataError(msg)
543
+ if n_invalid_rows > 0:
544
+ logging.info("Skipped %s invalid rows", n_invalid_rows)
545
+ return data
546
+
547
+
548
+ def _read_pyatmoslogger_file(filename: str | PathLike) -> dict[str, list]:
549
+ """Read CSV file from pyAtmosLogger.
550
+
551
+ References:
552
+ https://pypi.org/project/pyAtmosLogger/
553
+ """
554
+ with open(filename, errors="ignore") as file:
555
+ lines = file.readlines()
556
+ header_line = lines[0].strip().strip(";").split(";")
557
+ headers = [
558
+ TOA5_HEADERS.get(
559
+ re.sub(
560
+ r"N[0-9][0-9]",
561
+ "N",
562
+ re.sub(r"v[0-9][0-9]", "V", re.sub(r"M\_.*", "spectrum", field)),
563
+ ),
564
+ )
565
+ for field in header_line
381
566
  ]
382
- result = {key: np.array(value) for key, value in data.items()}
567
+ if unknown_headers := [
568
+ header_line[i] for i in range(len(header_line)) if headers[i] is None
569
+ ]:
570
+ msg = "Unknown headers: " + ", ".join(unknown_headers)
571
+ logging.warning(msg)
572
+
573
+ data: dict[str, list] = {header: [] for header in headers if header is not None}
574
+ n_rows = 0
575
+ n_invalid_rows = 0
576
+ for data_line in lines[1:]:
577
+ data_line_splat = data_line.strip().strip(";").split(";")
578
+ n_rows += 1
579
+ scalars: dict[str, datetime.datetime | int | float | str] = {}
580
+ arrays: dict[str, list] = {
581
+ "number_concentration": [],
582
+ "fall_velocity": [],
583
+ "spectrum": [],
584
+ }
585
+ try:
586
+ for header, value in zip(headers, data_line_splat, strict=True):
587
+ if header is None:
588
+ continue
589
+ if header == "_datetime":
590
+ scalars[header] = datetime.datetime.strptime(
591
+ value,
592
+ "%Y-%m-%d %H:%M:%S",
593
+ )
594
+ elif header in ("number_concentration", "fall_velocity"):
595
+ arrays[header].append(float(value))
596
+ elif header == "spectrum":
597
+ arrays[header].append(int(value))
598
+ elif PARSERS.get(header) == _parse_int:
599
+ scalars[header] = int(value)
600
+ elif PARSERS.get(header) == _parse_float:
601
+ scalars[header] = float(value)
602
+ else:
603
+ scalars[header] = value
604
+ except ValueError:
605
+ n_invalid_rows += 1
606
+ continue
607
+ for header, scalar in scalars.items():
608
+ data[header].append(scalar)
609
+ if "spectrum" in headers:
610
+ data["spectrum"].append(
611
+ np.array(arrays["spectrum"], dtype="i2").reshape((32, 32)),
612
+ )
613
+ if "number_concentration" in headers:
614
+ data["number_concentration"].append(arrays["number_concentration"])
615
+ if "fall_velocity" in headers:
616
+ data["fall_velocity"].append(arrays["fall_velocity"])
617
+ if n_invalid_rows == n_rows:
618
+ msg = "No valid data in file"
619
+ raise DisdrometerDataError(msg)
620
+ if n_invalid_rows > 0:
621
+ logging.info("Skipped %s invalid rows", n_invalid_rows)
622
+ return data
623
+
624
+
625
+ def _read_typ_op4a(lines: list[str]) -> dict[str, Any]:
626
+ """Read output of "CS/PA" command. The output starts with line "TYP OP4A"
627
+ followed by one line per measured variable in format: <number>:<value>.
628
+ Output ends with characters: <ETX><CR><LF><NUL>. Lines are separated by
629
+ <CR><LF>.
630
+ """
631
+ data = {}
632
+ for line in lines:
633
+ if ":" not in line:
634
+ continue
635
+ key, value = line.strip().split(":", maxsplit=1)
636
+ # Skip datetime and 16-bit values.
637
+ if key in ("19", "30", "31", "32", "33"):
638
+ continue
639
+ varname = TELEGRAM.get(int(key))
640
+ if varname is None:
641
+ continue
642
+ parser = PARSERS.get(varname, next)
643
+ tokens = value.split(";")
644
+ data[varname] = parser(iter(tokens))
645
+ return data
646
+
647
+
648
+ def _read_fmi(content: str) -> dict[str, list]:
649
+ r"""Read format used by Finnish Meteorological Institute and University of
650
+ Helsinki.
651
+
652
+ Format consists of sequence of the following:
653
+ - "[YYYY-MM-DD HH:MM:SS\n"
654
+ - output of "CS/PA" command without non-printable characters at the end
655
+ - "]\n"
656
+ """
657
+ output: dict[str, list] = {"_datetime": []}
658
+ for m in re.finditer(
659
+ r"\[(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) "
660
+ r"(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+)"
661
+ r"(?P<output>[^\]]*)\]",
662
+ content,
663
+ ):
664
+ try:
665
+ record = _read_typ_op4a(m["output"].splitlines())
666
+ except ValueError:
667
+ continue
668
+
669
+ for key, value in record.items():
670
+ if key not in output:
671
+ output[key] = [None] * len(output["_datetime"])
672
+ output[key].append(value)
673
+ for key in output:
674
+ if key not in record and key != "_datetime":
675
+ output[key].append(None)
676
+
677
+ output["_datetime"].append(
678
+ datetime.datetime(
679
+ int(m["year"]),
680
+ int(m["month"]),
681
+ int(m["day"]),
682
+ int(m["hour"]),
683
+ int(m["minute"]),
684
+ int(m["second"]),
685
+ )
686
+ )
687
+ return output
688
+
689
+
690
+ def _read_parsivel(
691
+ filenames: Iterable[str | PathLike],
692
+ telegram: Sequence[int | None] | None = None,
693
+ timestamps: Sequence[datetime.datetime] | None = None,
694
+ ) -> dict[str, npt.NDArray]:
695
+ combined_data = defaultdict(list)
696
+ for filename in filenames:
697
+ with open(filename, encoding="latin1", errors="ignore") as file:
698
+ content = file.read()
699
+ lines = content.splitlines()
700
+ if not lines:
701
+ msg = f"File '{filename}' is empty"
702
+ raise DisdrometerDataError(msg)
703
+ if "TOA5" in lines[0]:
704
+ data = _read_toa5(filename)
705
+ elif "N00" in lines[0]:
706
+ data = _read_pyatmoslogger_file(filename)
707
+ elif "TYP OP4A" in lines[0]:
708
+ data = _read_typ_op4a(lines)
709
+ data = {key: [value] for key, value in data.items()}
710
+ elif "Date" in lines[0]:
711
+ headers = _parse_headers(lines[0])
712
+ data = _read_rows(headers, lines[1:])
713
+ elif "[" in lines[0]:
714
+ data = _read_fmi(content)
715
+ elif telegram is not None:
716
+ headers = _parse_telegram(telegram)
717
+ data = _read_rows(headers, lines)
718
+ else:
719
+ msg = "telegram must be specified for files without header"
720
+ raise ValueError(msg)
721
+ if "_datetime" not in data and timestamps is None:
722
+ data["_datetime"] = [
723
+ datetime.datetime.combine(date, time)
724
+ for date, time in zip(data["_date"], data["_time"], strict=True)
725
+ ]
726
+ for key, values in data.items():
727
+ combined_data[key].extend(values)
728
+ if timestamps is not None:
729
+ combined_data["_datetime"] = list(timestamps)
730
+ result: dict = {}
731
+ for key, value in combined_data.items():
732
+ array = np.array(
733
+ [
734
+ x
735
+ if x is not None
736
+ else (EMPTY_VALUES[PARSERS[key]] if key in PARSERS else "")
737
+ for x in value
738
+ ]
739
+ )
740
+ mask = [np.full(array.shape[1:], x is None) for x in value]
741
+ result[key] = ma.array(array, mask=mask)
383
742
  result["time"] = result["_datetime"].astype("datetime64[s]")
384
743
  return result