cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. cloudnetpy/categorize/__init__.py +1 -2
  2. cloudnetpy/categorize/atmos_utils.py +297 -67
  3. cloudnetpy/categorize/attenuation.py +31 -0
  4. cloudnetpy/categorize/attenuations/__init__.py +37 -0
  5. cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
  6. cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
  7. cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
  8. cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
  9. cloudnetpy/categorize/categorize.py +332 -156
  10. cloudnetpy/categorize/classify.py +127 -125
  11. cloudnetpy/categorize/containers.py +107 -76
  12. cloudnetpy/categorize/disdrometer.py +40 -0
  13. cloudnetpy/categorize/droplet.py +23 -21
  14. cloudnetpy/categorize/falling.py +53 -24
  15. cloudnetpy/categorize/freezing.py +25 -12
  16. cloudnetpy/categorize/insects.py +35 -23
  17. cloudnetpy/categorize/itu.py +243 -0
  18. cloudnetpy/categorize/lidar.py +36 -41
  19. cloudnetpy/categorize/melting.py +34 -26
  20. cloudnetpy/categorize/model.py +84 -37
  21. cloudnetpy/categorize/mwr.py +18 -14
  22. cloudnetpy/categorize/radar.py +215 -102
  23. cloudnetpy/cli.py +578 -0
  24. cloudnetpy/cloudnetarray.py +43 -89
  25. cloudnetpy/concat_lib.py +218 -78
  26. cloudnetpy/constants.py +28 -10
  27. cloudnetpy/datasource.py +61 -86
  28. cloudnetpy/exceptions.py +49 -20
  29. cloudnetpy/instruments/__init__.py +5 -0
  30. cloudnetpy/instruments/basta.py +29 -12
  31. cloudnetpy/instruments/bowtie.py +135 -0
  32. cloudnetpy/instruments/ceilo.py +138 -115
  33. cloudnetpy/instruments/ceilometer.py +164 -80
  34. cloudnetpy/instruments/cl61d.py +21 -5
  35. cloudnetpy/instruments/cloudnet_instrument.py +74 -36
  36. cloudnetpy/instruments/copernicus.py +108 -30
  37. cloudnetpy/instruments/da10.py +54 -0
  38. cloudnetpy/instruments/disdrometer/common.py +126 -223
  39. cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
  40. cloudnetpy/instruments/disdrometer/thies.py +254 -87
  41. cloudnetpy/instruments/fd12p.py +201 -0
  42. cloudnetpy/instruments/galileo.py +65 -23
  43. cloudnetpy/instruments/hatpro.py +123 -49
  44. cloudnetpy/instruments/instruments.py +113 -1
  45. cloudnetpy/instruments/lufft.py +39 -17
  46. cloudnetpy/instruments/mira.py +268 -61
  47. cloudnetpy/instruments/mrr.py +187 -0
  48. cloudnetpy/instruments/nc_lidar.py +19 -8
  49. cloudnetpy/instruments/nc_radar.py +109 -55
  50. cloudnetpy/instruments/pollyxt.py +135 -51
  51. cloudnetpy/instruments/radiometrics.py +313 -59
  52. cloudnetpy/instruments/rain_e_h3.py +171 -0
  53. cloudnetpy/instruments/rpg.py +321 -189
  54. cloudnetpy/instruments/rpg_reader.py +74 -40
  55. cloudnetpy/instruments/toa5.py +49 -0
  56. cloudnetpy/instruments/vaisala.py +95 -343
  57. cloudnetpy/instruments/weather_station.py +774 -105
  58. cloudnetpy/metadata.py +90 -19
  59. cloudnetpy/model_evaluation/file_handler.py +55 -52
  60. cloudnetpy/model_evaluation/metadata.py +46 -20
  61. cloudnetpy/model_evaluation/model_metadata.py +1 -1
  62. cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
  63. cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
  64. cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
  65. cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
  66. cloudnetpy/model_evaluation/products/model_products.py +43 -35
  67. cloudnetpy/model_evaluation/products/observation_products.py +41 -35
  68. cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
  69. cloudnetpy/model_evaluation/products/tools.py +29 -20
  70. cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
  71. cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
  72. cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
  73. cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
  74. cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
  75. cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
  76. cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
  77. cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
  78. cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
  79. cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
  80. cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
  81. cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
  82. cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
  83. cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
  84. cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
  85. cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
  86. cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
  87. cloudnetpy/model_evaluation/utils.py +2 -1
  88. cloudnetpy/output.py +170 -111
  89. cloudnetpy/plotting/__init__.py +2 -1
  90. cloudnetpy/plotting/plot_meta.py +562 -822
  91. cloudnetpy/plotting/plotting.py +1142 -704
  92. cloudnetpy/products/__init__.py +1 -0
  93. cloudnetpy/products/classification.py +370 -88
  94. cloudnetpy/products/der.py +85 -55
  95. cloudnetpy/products/drizzle.py +77 -34
  96. cloudnetpy/products/drizzle_error.py +15 -11
  97. cloudnetpy/products/drizzle_tools.py +79 -59
  98. cloudnetpy/products/epsilon.py +211 -0
  99. cloudnetpy/products/ier.py +27 -50
  100. cloudnetpy/products/iwc.py +55 -48
  101. cloudnetpy/products/lwc.py +96 -70
  102. cloudnetpy/products/mwr_tools.py +186 -0
  103. cloudnetpy/products/product_tools.py +170 -128
  104. cloudnetpy/utils.py +455 -240
  105. cloudnetpy/version.py +2 -2
  106. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
  107. cloudnetpy-1.87.3.dist-info/RECORD +127 -0
  108. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
  109. cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
  110. docs/source/conf.py +2 -2
  111. cloudnetpy/categorize/atmos.py +0 -361
  112. cloudnetpy/products/mwr_multi.py +0 -68
  113. cloudnetpy/products/mwr_single.py +0 -75
  114. cloudnetpy-1.49.9.dist-info/RECORD +0 -112
  115. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
  116. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,36 @@
1
+ import csv
1
2
  import datetime
3
+ import logging
4
+ import math
5
+ import re
6
+ from collections import defaultdict
7
+ from collections.abc import Iterable, Sequence
8
+ from os import PathLike
9
+ from uuid import UUID
2
10
 
11
+ import numpy as np
3
12
  from numpy import ma
4
13
 
5
14
  from cloudnetpy import output
6
15
  from cloudnetpy.categorize import atmos_utils
7
16
  from cloudnetpy.cloudnetarray import CloudnetArray
8
- from cloudnetpy.exceptions import ValidTimeStampError, WeatherStationDataError
17
+ from cloudnetpy.constants import HPA_TO_PA, MM_H_TO_M_S, SEC_IN_HOUR
18
+ from cloudnetpy.exceptions import ValidTimeStampError
9
19
  from cloudnetpy.instruments import instruments
10
- from cloudnetpy.instruments.cloudnet_instrument import CloudnetInstrument
20
+ from cloudnetpy.instruments.cloudnet_instrument import CSVFile
21
+ from cloudnetpy.instruments.toa5 import read_toa5
11
22
  from cloudnetpy.metadata import MetaData
12
- from cloudnetpy.utils import datetime2decimal_hours
23
+ from cloudnetpy.utils import datetime2decimal_hours, get_uuid
13
24
 
14
25
 
15
26
  def ws2nc(
16
- weather_station_file: str,
17
- output_file: str,
27
+ weather_station_file: str | PathLike | Sequence[str | PathLike],
28
+ output_file: str | PathLike,
18
29
  site_meta: dict,
19
- uuid: str | None = None,
20
- date: str | None = None,
21
- ) -> str:
22
- """Converts weather-station data into Cloudnet Level 1b netCDF file.
30
+ uuid: str | UUID | None = None,
31
+ date: str | datetime.date | None = None,
32
+ ) -> UUID:
33
+ """Converts weather station data into Cloudnet Level 1b netCDF file.
23
34
 
24
35
  Args:
25
36
  weather_station_file: Filename of weather-station ASCII file.
@@ -33,77 +44,601 @@ def ws2nc(
33
44
  UUID of the generated file.
34
45
 
35
46
  Raises:
36
- WeatherStationDataError : Unable to read the file.
37
47
  ValidTimeStampError: No valid timestamps found.
38
48
  """
39
-
40
- try:
41
- ws = WS(weather_station_file, site_meta)
42
- if date is not None:
43
- ws.screen_timestamps(date)
44
- ws.convert_time()
45
- ws.add_date()
46
- ws.add_site_geolocation()
47
- ws.add_data()
48
- ws.convert_units()
49
- attributes = output.add_time_attribute(ATTRIBUTES, ws.date)
50
- output.update_attributes(ws.data, attributes)
51
- except ValueError as err:
52
- raise WeatherStationDataError from err
53
- uuid = output.save_level1b(ws, output_file, uuid)
49
+ if isinstance(weather_station_file, str | PathLike):
50
+ weather_station_file = [weather_station_file]
51
+ if isinstance(date, str):
52
+ date = datetime.date.fromisoformat(date)
53
+ uuid = get_uuid(uuid)
54
+ ws: WS
55
+ if site_meta["name"] == "Palaiseau":
56
+ ws = PalaiseauWS(weather_station_file, site_meta)
57
+ elif site_meta["name"] == "Bucharest":
58
+ ws = BucharestWS(weather_station_file, site_meta)
59
+ elif site_meta["name"] == "Granada":
60
+ ws = GranadaWS(weather_station_file, site_meta)
61
+ elif site_meta["name"] == "Kenttärova":
62
+ ws = KenttarovaWS(weather_station_file, site_meta)
63
+ elif site_meta["name"] == "Hyytiälä":
64
+ ws = HyytialaWS(weather_station_file, site_meta)
65
+ elif site_meta["name"] == "Galați":
66
+ ws = GalatiWS(weather_station_file, site_meta)
67
+ elif site_meta["name"] == "Jülich":
68
+ ws = JuelichWS(weather_station_file, site_meta)
69
+ elif site_meta["name"] == "Lampedusa":
70
+ ws = LampedusaWS(weather_station_file, site_meta)
71
+ elif site_meta["name"] == "Limassol":
72
+ ws = LimassolWS(weather_station_file, site_meta)
73
+ elif site_meta["name"] == "L'Aquila":
74
+ ws = LAquilaWS(weather_station_file, site_meta)
75
+ elif site_meta["name"] == "Maïdo Observatory":
76
+ ws = MaidoWS(weather_station_file, site_meta)
77
+ elif site_meta["name"] == "Cluj-Napoca":
78
+ ws = ClujWS(weather_station_file, site_meta)
79
+ else:
80
+ msg = "Unsupported site"
81
+ raise ValueError(msg)
82
+ if date is not None:
83
+ ws.screen_timestamps(date)
84
+ ws.convert_time()
85
+ ws.add_date()
86
+ ws.add_site_geolocation()
87
+ ws.add_data()
88
+ ws.remove_duplicate_timestamps()
89
+ ws.convert_temperature_and_humidity()
90
+ ws.convert_pressure()
91
+ ws.convert_rainfall_rate()
92
+ ws.convert_rainfall_amount()
93
+ ws.normalize_cumulative_amount("rainfall_amount")
94
+ ws.calculate_rainfall_amount()
95
+ ws.wrap_wind_direction()
96
+ attributes = output.add_time_attribute(ATTRIBUTES, ws.date)
97
+ output.update_attributes(ws.data, attributes)
98
+ output.save_level1b(ws, output_file, uuid)
54
99
  return uuid
55
100
 
56
101
 
57
- class WS(CloudnetInstrument):
58
- def __init__(self, filename: str, site_meta: dict):
59
- super().__init__()
60
- self.filename = filename
61
- self.site_meta = site_meta
62
- self.date: list[str] = []
102
+ class WS(CSVFile):
103
+ def __init__(self, site_meta: dict) -> None:
104
+ super().__init__(site_meta)
63
105
  self.instrument = instruments.GENERIC_WEATHER_STATION
106
+
107
+ date: datetime.date
108
+
109
+ def calculate_rainfall_amount(self) -> None:
110
+ if "rainfall_amount" in self.data or "rainfall_rate" not in self.data:
111
+ return
112
+ time = self.data["time"].data
113
+ if len(time) == 1:
114
+ rainfall_amount = np.array([0])
115
+ else:
116
+ resolution = np.median(np.diff(time)) * SEC_IN_HOUR
117
+ rainfall_amount = ma.cumsum(self.data["rainfall_rate"].data * resolution)
118
+ self.data["rainfall_amount"] = CloudnetArray(rainfall_amount, "rainfall_amount")
119
+
120
+ def screen_timestamps(self, date: datetime.date) -> None:
121
+ dates = np.array([d.date() for d in self._data["time"]])
122
+ valid_mask = dates == date
123
+ if not valid_mask.any():
124
+ raise ValidTimeStampError
125
+ for key in self._data:
126
+ self._data[key] = self._data[key][valid_mask]
127
+
128
+ @staticmethod
129
+ def format_data(data: dict) -> dict:
130
+ for key, value in data.items():
131
+ new_value = np.array(value)
132
+ if key != "time":
133
+ new_value = ma.masked_where(np.isnan(new_value), new_value)
134
+ data[key] = new_value
135
+ return data
136
+
137
+ def convert_temperature_and_humidity(self) -> None:
138
+ temperature_kelvins = atmos_utils.c2k(self.data["air_temperature"][:])
139
+ self.data["air_temperature"].data = temperature_kelvins
140
+ self.data["relative_humidity"].data = self.data["relative_humidity"][:] / 100
141
+
142
+ def convert_rainfall_rate(self) -> None:
143
+ if "rainfall_rate" not in self.data:
144
+ return
145
+ rainfall_rate = self.data["rainfall_rate"][:]
146
+ self.data["rainfall_rate"].data = rainfall_rate / 60 / 1000 # mm/min -> m/s
147
+
148
+ def convert_pressure(self) -> None:
149
+ if "air_pressure" not in self.data:
150
+ return
151
+ self.data["air_pressure"].data = self.data["air_pressure"][:] * HPA_TO_PA
152
+
153
+ def convert_time(self) -> None:
154
+ pass
155
+
156
+ def convert_rainfall_amount(self) -> None:
157
+ pass
158
+
159
+ def wrap_wind_direction(self) -> None:
160
+ if "wind_direction" not in self.data:
161
+ return
162
+ # Wrap values little outside of [0, 360), keep original values
163
+ # otherwise.
164
+ threshold = 2
165
+ values = self.data["wind_direction"].data
166
+ values[(values > -threshold) & (values < 0)] += 360
167
+ values[(values >= 360) & (values < 360 + threshold)] -= 360
168
+
169
+
170
+ class PalaiseauWS(WS):
171
+ expected_header_identifiers: tuple[str, ...] = (
172
+ "DateTime(yyyy-mm-ddThh:mm:ssZ)",
173
+ "Windspeed(m/s)",
174
+ "Winddirection(deg",
175
+ "Airtemperature",
176
+ "Relativehumidity(%)",
177
+ "Pressure(hPa)",
178
+ "Precipitationrate(mm/min)",
179
+ "precipitation",
180
+ )
181
+ keys: tuple[str, ...] = (
182
+ "wind_speed",
183
+ "wind_direction",
184
+ "air_temperature",
185
+ "relative_humidity",
186
+ "air_pressure",
187
+ "rainfall_rate",
188
+ "rainfall_amount",
189
+ )
190
+
191
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
192
+ super().__init__(site_meta)
193
+ self.filenames = filenames
64
194
  self._data = self._read_data()
65
195
 
66
- def _read_data(self):
196
+ def _read_data(self) -> dict:
67
197
  timestamps, values, header = [], [], []
68
- with open(self.filename, encoding="latin-1") as f:
69
- data = f.readlines()
70
- for row in data:
71
- splat = row.split()
72
- try:
73
- timestamp = datetime.datetime.strptime(splat[0], "%Y-%m-%dT%H:%M:%SZ")
74
- temp: list[str | float] = list(splat)
75
- temp[1:] = [float(x) for x in temp[1:]]
76
- values.append(temp)
77
- timestamps.append(timestamp)
78
- except ValueError:
79
- header.append("".join(splat))
80
-
81
- # Simple validation for now:
82
- expected_identifiers = [
83
- "DateTime(yyyy-mm-ddThh:mm:ssZ)",
84
- "Windspeed(m/s)",
85
- "Winddirection(degres)",
86
- "Airtemperature(°C)",
87
- "Relativehumidity(%)",
88
- "Pressure(hPa)",
89
- "Precipitationrate(mm/min)",
90
- "24-hrcumulatedprecipitationsince00UT(mm)",
91
- ]
198
+ for filename in self.filenames:
199
+ with open(filename, encoding="latin-1") as f:
200
+ data = f.readlines()
201
+ for row in data:
202
+ if not (columns := row.split()):
203
+ continue
204
+ if re.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", columns[0]):
205
+ if len(columns) != len(self.keys) + 1:
206
+ msg = (
207
+ f"Skipping row '{row.strip()}' due to unexpected "
208
+ "number of values"
209
+ )
210
+ logging.warning(msg)
211
+ continue
212
+ timestamp = datetime.datetime.strptime(
213
+ columns[0], "%Y-%m-%dT%H:%M:%SZ"
214
+ ).replace(tzinfo=datetime.timezone.utc)
215
+ values.append([timestamp] + [float(x) for x in columns[1:]])
216
+ timestamps.append(timestamp)
217
+ else:
218
+ header_row = "".join(columns)
219
+ if header_row not in header:
220
+ header.append(header_row)
221
+
222
+ self._validate_header(header)
223
+ return {"time": timestamps, "values": values}
224
+
225
+ def convert_time(self) -> None:
226
+ decimal_hours = datetime2decimal_hours(self._data["time"])
227
+ self.data["time"] = CloudnetArray(decimal_hours, "time")
228
+
229
+ def screen_timestamps(self, date: datetime.date) -> None:
230
+ dates = [d.date() for d in self._data["time"]]
231
+ valid_ind = [ind for ind, d in enumerate(dates) if d == date]
232
+ if not valid_ind:
233
+ raise ValidTimeStampError
234
+ for key in self._data:
235
+ self._data[key] = [
236
+ x for ind, x in enumerate(self._data[key]) if ind in valid_ind
237
+ ]
238
+
239
+ def add_data(self) -> None:
240
+ for ind, key in enumerate(self.keys):
241
+ if key.startswith("_"):
242
+ continue
243
+ array = [row[ind + 1] for row in self._data["values"]]
244
+ array_masked = ma.masked_invalid(array)
245
+ self.data[key] = CloudnetArray(array_masked, key)
246
+
247
+ def convert_rainfall_amount(self) -> None:
248
+ self.data["rainfall_amount"].data = (
249
+ self.data["rainfall_amount"][:] / 1000
250
+ ) # mm -> m
251
+
252
+ def _validate_header(self, header: list[str]) -> None:
92
253
  column_titles = [row for row in header if "Col." in row]
93
254
  error_msg = "Unexpected weather station file format"
94
- if len(column_titles) != len(expected_identifiers):
255
+ if len(column_titles) != len(self.expected_header_identifiers):
95
256
  raise ValueError(error_msg)
96
- for title, identifier in zip(column_titles, expected_identifiers):
257
+ for title, identifier in zip(
258
+ column_titles, self.expected_header_identifiers, strict=True
259
+ ):
97
260
  if identifier not in title:
98
261
  raise ValueError(error_msg)
99
- return {"timestamps": timestamps, "values": values}
100
262
 
101
- def convert_time(self):
102
- decimal_hours = datetime2decimal_hours(self._data["timestamps"])
263
+
264
+ class MaidoWS(PalaiseauWS):
265
+ expected_header_identifiers = (
266
+ "DateTimeyyyy-mm-ddThh:mm:ssZ",
267
+ "Winddirection-average",
268
+ "Windspeed-maximumvalue(m/s)",
269
+ "Windspeed-average(m/s)",
270
+ "Pressure-average(hPa)",
271
+ "Relativehumidity-maximumvalue(%)",
272
+ "Relativehumidity-average(%)",
273
+ "Airtemperature-minimumvalue",
274
+ "Airtemperature-average",
275
+ )
276
+
277
+ keys = (
278
+ "wind_direction",
279
+ "_wind_speed_max",
280
+ "wind_speed",
281
+ "air_pressure",
282
+ "_relative_humidity_max",
283
+ "relative_humidity",
284
+ "_air_temperature_min",
285
+ "air_temperature",
286
+ )
287
+
288
+ def convert_rainfall_amount(self) -> None:
289
+ pass
290
+
291
+
292
+ class BucharestWS(PalaiseauWS):
293
+ def convert_rainfall_rate(self) -> None:
294
+ rainfall_rate = self.data["rainfall_rate"][:]
295
+ self.data["rainfall_rate"].data = rainfall_rate * MM_H_TO_M_S
296
+
297
+
298
+ class GranadaWS(WS):
299
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
300
+ if len(filenames) != 1:
301
+ raise ValueError
302
+ super().__init__(site_meta)
303
+ self.filename = filenames[0]
304
+ self._data = self._read_data()
305
+
306
+ def _read_data(self) -> dict:
307
+ keymap = {
308
+ "TIMESTAMP": "time",
309
+ "air_t_Avg": "air_temperature",
310
+ "rh_Avg": "relative_humidity",
311
+ "pressure_Avg": "air_pressure",
312
+ "wind_speed_avg": "wind_speed",
313
+ "wind_dir_avg": "wind_direction",
314
+ "rain_Tot": "rainfall_rate",
315
+ }
316
+ expected_units = {
317
+ "air_t_Avg": "degC",
318
+ "rh_Avg": "%",
319
+ "pressure_Avg": "hPa",
320
+ "wind_speed_avg": "m/s",
321
+ "wind_dir_avg": "Deg",
322
+ "rain_Tot": "mm",
323
+ }
324
+ units, _process, rows = read_toa5(self.filename)
325
+ for key in units:
326
+ if key in expected_units and expected_units[key] != units[key]:
327
+ msg = (
328
+ f"Expected {key} to have units {expected_units[key]},"
329
+ f" got {units[key]} instead"
330
+ )
331
+ raise ValueError(msg)
332
+
333
+ data: dict[str, list] = {keymap[key]: [] for key in units if key in keymap}
334
+ for row in rows:
335
+ for key, value in row.items():
336
+ if key not in keymap:
337
+ continue
338
+ parsed = value
339
+ if keymap[key] != "time":
340
+ try:
341
+ parsed = float(value)
342
+ except ValueError:
343
+ parsed = math.nan
344
+ data[keymap[key]].append(parsed)
345
+ return self.format_data(data)
346
+
347
+
348
+ class KenttarovaWS(WS):
349
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
350
+ super().__init__(site_meta)
351
+ self.filenames = filenames
352
+ self._data = self._read_data()
353
+
354
+ def _read_data(self) -> dict:
355
+ merged: dict = {}
356
+ for filename in self.filenames:
357
+ with open(filename, newline="") as f:
358
+ reader = csv.DictReader(f)
359
+ raw_data: dict = {key: [] for key in reader.fieldnames} # type: ignore[union-attr]
360
+ for row in reader:
361
+ for key, value in row.items():
362
+ parsed_value: float | datetime.datetime
363
+ if key == "Read time (UTC+2)":
364
+ try:
365
+ parsed_value = datetime.datetime.strptime(
366
+ value, "%Y-%m-%d %H:%M:%S"
367
+ ) - datetime.timedelta(hours=2)
368
+ except ValueError:
369
+ break # Should be first column, so skip whole row.
370
+ else:
371
+ try:
372
+ parsed_value = float(value)
373
+ except ValueError:
374
+ parsed_value = math.nan
375
+ raw_data[key].append(parsed_value)
376
+ data = {
377
+ "time": raw_data["Read time (UTC+2)"],
378
+ "air_temperature": raw_data["Temp 2m (C)"],
379
+ "relative_humidity": raw_data["Humidity 2m (%)"],
380
+ "air_pressure": raw_data["Pressure (hPa)"],
381
+ "wind_speed": raw_data["Wind speed (m/s)"],
382
+ "wind_direction": raw_data["Wind dir (deg)"],
383
+ "rainfall_rate": raw_data["Precipitation (?)"],
384
+ }
385
+ if merged:
386
+ merged = {key: [*merged[key], *data[key]] for key in merged}
387
+ else:
388
+ merged = data
389
+ return self.format_data(merged)
390
+
391
+ def convert_rainfall_rate(self) -> None:
392
+ # Rainfall rate is 10-minute averaged in mm h-1
393
+ rainfall_rate = self.data["rainfall_rate"][:]
394
+ self.data["rainfall_rate"].data = rainfall_rate * MM_H_TO_M_S / 10
395
+
396
+ def convert_pressure(self) -> None:
397
+ # Magic number 10 to convert to realistic Pa
398
+ self.data["air_pressure"].data = self.data["air_pressure"][:] * 10
399
+
400
+
401
+ class HyytialaWS(WS):
402
+ """Hyytiälä rain-gauge variables: a = Pluvio400 and b = Pluvio200.
403
+ E.g.
404
+ - AaRNRT/mm = amount of non-real-time rain total (Pluvio400) [mm]
405
+ - BbRT/mm = Bucket content in real-time (Pluvio200) [mm].
406
+ """
407
+
408
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
409
+ super().__init__(site_meta)
410
+ self.filename = filenames[0]
411
+ self._data = self._read_data()
412
+
413
+ def _read_data(self) -> dict:
414
+ with open(self.filename, newline="") as f:
415
+ # Skip first two lines
416
+ for _ in range(2):
417
+ next(f)
418
+ # Read header
419
+ header_line = f.readline().strip()
420
+ fields = header_line[1:].strip().split()
421
+ reader = csv.DictReader(
422
+ f, delimiter=" ", skipinitialspace=True, fieldnames=fields
423
+ )
424
+ if reader.fieldnames is None:
425
+ raise ValueError
426
+ raw_data: dict = {key: [] for key in reader.fieldnames}
427
+ raw_data["time"] = []
428
+ # Read data
429
+ for row in reader:
430
+ for key, value in row.items():
431
+ if key:
432
+ parsed_value: float | datetime.datetime
433
+ if key == "y":
434
+ current_time = datetime.datetime(
435
+ int(value),
436
+ int(row["m"]),
437
+ int(row["d"]),
438
+ int(row["minute"]) // 60,
439
+ int(row["minute"]) % 60,
440
+ )
441
+ raw_data["time"].append(current_time)
442
+ else:
443
+ try:
444
+ parsed_value = float(value)
445
+ except (TypeError, ValueError):
446
+ parsed_value = math.nan
447
+ if parsed_value in (-99.99, -99.9):
448
+ parsed_value = math.nan
449
+ raw_data[key].append(parsed_value)
450
+
451
+ data = {
452
+ "time": raw_data["time"],
453
+ "air_temperature": raw_data["Ta/dsC"],
454
+ "relative_humidity": raw_data["RH/pcnt"],
455
+ "air_pressure": raw_data["Pa/kPa"],
456
+ "wind_speed": raw_data["WS/(m/s)"],
457
+ "wind_direction": raw_data["WD/ds"],
458
+ "rainfall_rate": raw_data["AaNRT/mm"],
459
+ }
460
+ return self.format_data(data)
461
+
462
+ def convert_pressure(self) -> None:
463
+ self.data["air_pressure"].data = (
464
+ self.data["air_pressure"][:] * 1000
465
+ ) # kPa to Pa
466
+
467
+
468
+ class GalatiWS(WS):
469
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
470
+ super().__init__(site_meta)
471
+ self.filename = filenames[0]
472
+ self._data = self._read_data()
473
+
474
+ def _read_data(self) -> dict:
475
+ with open(self.filename, newline="") as f:
476
+ reader = csv.DictReader(f)
477
+ raw_data: dict = {key: [] for key in reader.fieldnames} # type: ignore[union-attr]
478
+ for row in reader:
479
+ for key, value in row.items():
480
+ parsed_value: float | datetime.datetime
481
+ if key == "TimeStamp":
482
+ parsed_value = datetime.datetime.strptime(
483
+ value, "%Y-%m-%d %H:%M:%S.%f"
484
+ )
485
+ else:
486
+ try:
487
+ parsed_value = float(value)
488
+ except ValueError:
489
+ parsed_value = math.nan
490
+ raw_data[key].append(parsed_value)
491
+
492
+ def read_value(keys: Iterable[str]) -> list:
493
+ for key in keys:
494
+ if key in raw_data:
495
+ return raw_data[key]
496
+ raise KeyError("Didn't find any keys: " + ", ".join(keys))
497
+
498
+ data = {
499
+ "time": read_value(["TimeStamp"]),
500
+ "air_temperature": read_value(["Temperature", "Temperatura"]),
501
+ "relative_humidity": read_value(["RH", "Umiditate_relativa"]),
502
+ "air_pressure": read_value(
503
+ ["Atmospheric_pressure", "Presiune_atmosferica"]
504
+ ),
505
+ "rainfall_rate": read_value(
506
+ ["Precipitations", "Precipitatii", "Precipitatii_Tot"]
507
+ ),
508
+ "wind_speed": read_value(["Wind_speed", "Viteza_vant"]),
509
+ "wind_direction": read_value(["Wind_direction", "Directie_vant"]),
510
+ "visibility": read_value(["Visibility", "Vizibilitate"]),
511
+ }
512
+ return self.format_data(data)
513
+
514
+ def add_data(self) -> None:
515
+ # Skip wind measurements where range was limited to 0-180 degrees
516
+ if self.date < datetime.date(2024, 10, 29):
517
+ del self._data["wind_speed"]
518
+ del self._data["wind_direction"]
519
+ self._data["visibility"] = self._data["visibility"].astype(np.int32)
520
+ return super().add_data()
521
+
522
+ def convert_pressure(self) -> None:
523
+ mmHg2Pa = 133.322
524
+ self.data["air_pressure"].data = self.data["air_pressure"][:] * mmHg2Pa
525
+
526
+
527
+ class JuelichWS(WS):
528
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
529
+ super().__init__(site_meta)
530
+ self.filename = filenames[0]
531
+ self._data = self._read_data()
532
+
533
+ def _read_data(self) -> dict:
534
+ keymap = {
535
+ "TIMESTAMP": "time",
536
+ "AirTC_Avg": "air_temperature",
537
+ "RH": "relative_humidity",
538
+ "BV_BP_Avg": "air_pressure",
539
+ "WS_ms_S_WVT": "wind_speed",
540
+ "WindDir_D1_WVT": "wind_direction",
541
+ }
542
+ expected_units = {
543
+ "AirTC_Avg": "Deg C",
544
+ "RH": "%",
545
+ "BV_BP_Avg": "hPa",
546
+ "WS_ms_S_WVT": "meters/Second",
547
+ "WindDir_D1_WVT": "Deg",
548
+ }
549
+ units, _process, rows = read_toa5(self.filename)
550
+ for key in units:
551
+ if key in expected_units and expected_units[key] != units[key]:
552
+ msg = (
553
+ f"Expected {key} to have units {expected_units[key]},"
554
+ f" got {units[key]} instead"
555
+ )
556
+ raise ValueError(msg)
557
+
558
+ data: dict[str, list] = {keymap[key]: [] for key in units if key in keymap}
559
+ for row in rows:
560
+ for key, value in row.items():
561
+ if key not in keymap:
562
+ continue
563
+ parsed = value
564
+ if keymap[key] != "time":
565
+ parsed = float(value)
566
+ data[keymap[key]].append(parsed)
567
+
568
+ return self.format_data(data)
569
+
570
+
571
+ class LampedusaWS(WS):
572
+ """Read Lampedusa weather station data in ICOS format."""
573
+
574
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
575
+ super().__init__(site_meta)
576
+ self.filename = filenames[0]
577
+ self._data = self._read_data()
578
+
579
+ def _read_data(self) -> dict:
580
+ with open(self.filename, newline="") as f:
581
+ fields = [
582
+ "time",
583
+ "str1",
584
+ "str2",
585
+ "T",
586
+ "RH",
587
+ "Td",
588
+ "P",
589
+ "WSi",
590
+ "WDi",
591
+ "WS10m",
592
+ "WD10m",
593
+ "rain1m",
594
+ "rain2h",
595
+ "empty",
596
+ ]
597
+ reader = csv.DictReader(f, fieldnames=fields)
598
+ raw_data: dict = {key: [] for key in fields}
599
+ for row in reader:
600
+ for key, value in row.items():
601
+ fixed_value = value.strip("\0")
602
+ parsed_value: float | datetime.datetime
603
+ if key == "time":
604
+ parsed_value = datetime.datetime.strptime(
605
+ fixed_value, "%y%m%d %H%M%S"
606
+ )
607
+ else:
608
+ try:
609
+ parsed_value = float(fixed_value)
610
+ except ValueError:
611
+ parsed_value = math.nan
612
+ raw_data[key].append(parsed_value)
613
+
614
+ data = {
615
+ "time": raw_data["time"],
616
+ "air_temperature": raw_data["T"],
617
+ "relative_humidity": raw_data["RH"],
618
+ "air_pressure": raw_data["P"],
619
+ "wind_speed": raw_data["WSi"],
620
+ "wind_direction": raw_data["WDi"],
621
+ "rainfall_rate": raw_data["rain1m"],
622
+ }
623
+ return self.format_data(data)
624
+
625
+
626
+ class LimassolWS(WS):
627
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
628
+ super().__init__(site_meta)
629
+ self.filenames = filenames
630
+ self._data = defaultdict(list)
631
+ for filename in filenames:
632
+ for key, values in _parse_sirta(filename).items():
633
+ self._data[key].extend(values)
634
+ self._data["time"] = self._data.pop("Date Time (yyyy-mm-ddThh:mm:ss)")
635
+
636
+ def convert_time(self) -> None:
637
+ decimal_hours = datetime2decimal_hours(self._data["time"])
103
638
  self.data["time"] = CloudnetArray(decimal_hours, "time")
104
639
 
105
- def screen_timestamps(self, date: str):
106
- dates = [str(d.date()) for d in self._data["timestamps"]]
640
+ def screen_timestamps(self, date: datetime.date) -> None:
641
+ dates = [d.date() for d in self._data["time"]]
107
642
  valid_ind = [ind for ind, d in enumerate(dates) if d == date]
108
643
  if not valid_ind:
109
644
  raise ValidTimeStampError
@@ -112,54 +647,188 @@ class WS(CloudnetInstrument):
112
647
  x for ind, x in enumerate(self._data[key]) if ind in valid_ind
113
648
  ]
114
649
 
115
- def add_date(self):
116
- first_date = self._data["timestamps"][0].date()
117
- self.date = [
118
- str(first_date.year),
119
- str(first_date.month).zfill(2),
120
- str(first_date.day).zfill(2),
121
- ]
122
-
123
- def add_data(self):
124
- keys = (
125
- "wind_speed",
126
- "wind_direction",
127
- "air_temperature",
128
- "relative_humidity",
129
- "air_pressure",
130
- "rainfall_rate",
131
- "rainfall_amount",
650
+ def add_data(self) -> None:
651
+ self.data["air_temperature"] = CloudnetArray(
652
+ np.array(self._data["Air temperature (°C)"]), "air_temperature"
132
653
  )
133
- for ind, key in enumerate(keys):
134
- array = [row[ind + 1] for row in self._data["values"]]
135
- array_masked = ma.masked_invalid(array)
136
- self.data[key] = CloudnetArray(array_masked, key)
654
+ self.data["relative_humidity"] = CloudnetArray(
655
+ np.array(self._data["Relative humidity (%)"]), "relative_humidity"
656
+ )
657
+ self.data["rainfall_rate"] = CloudnetArray(
658
+ np.array(self._data["Total precipitation (mm)"]), "rainfall_rate"
659
+ )
660
+ # Wind speed and direction are available since 2025-02-13:
661
+ if (
662
+ "Wind speed at 10m (m/s)" in self._data
663
+ and "Wind direction at 10m (degrees)" in self._data
664
+ ):
665
+ self.data["wind_speed"] = CloudnetArray(
666
+ np.array(self._data["Wind speed at 10m (m/s)"]), "wind_speed"
667
+ )
668
+ self.data["wind_direction"] = CloudnetArray(
669
+ np.array(self._data["Wind direction at 10m (degrees)"]),
670
+ "wind_direction",
671
+ )
672
+ else:
673
+ self.data["wind_speed"] = CloudnetArray(
674
+ np.array(self._data["Wind speed (m/s)"]), "wind_speed"
675
+ )
137
676
 
138
- def convert_units(self):
139
- temperature_kelvins = atmos_utils.c2k(self.data["air_temperature"][:])
140
- self.data["air_temperature"].data = temperature_kelvins
141
- self.data["relative_humidity"].data = self.data["relative_humidity"][:] / 100
142
- self.data["air_pressure"].data = self.data["air_pressure"][:] * 100 # hPa -> Pa
677
+ def convert_rainfall_rate(self) -> None:
143
678
  rainfall_rate = self.data["rainfall_rate"][:]
144
- self.data["rainfall_rate"].data = rainfall_rate / 60 / 1000 # mm/min -> m/s
145
- self.data["rainfall_amount"].data = self.data["rainfall_amount"][:] / 1000
679
+ self.data["rainfall_rate"].data = (
680
+ rainfall_rate / (10 * 60) / 1000
681
+ ) # mm/(10 min) -> m/s
682
+
683
+
684
+ def _parse_sirta(filename: str | PathLike) -> dict:
685
+ """Parse SIRTA-style weather station file."""
686
+ with open(filename, "rb") as f:
687
+ raw_content = f.read()
688
+ try:
689
+ content = raw_content.decode("utf-8")
690
+ except UnicodeDecodeError:
691
+ content = raw_content.decode("latin-1")
692
+ lines = [line.strip() for line in content.splitlines()]
693
+ columns: list[str] = []
694
+ output: dict = {}
695
+ for line in lines:
696
+ m = re.fullmatch(r"#\s*Col.\s*(\d+)\s*:\s*(.*)", line)
697
+ if m is None:
698
+ continue
699
+ if m[1] != str(len(columns) + 1):
700
+ msg = f"Expected column {m[1]}, found {len(columns) + 1}"
701
+ raise ValueError(msg)
702
+ columns.append(m[2])
703
+ output[m[2]] = []
704
+ for line in lines:
705
+ if not line or line.startswith("#"):
706
+ continue
707
+ values = line.split()
708
+ if len(columns) != len(values):
709
+ continue
710
+ for column, value in zip(columns, values, strict=False):
711
+ parsed: float | datetime.datetime
712
+ if column == "Date Time (yyyy-mm-ddThh:mm:ss)":
713
+ parsed = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").replace(
714
+ tzinfo=datetime.timezone.utc
715
+ )
716
+ elif column == "Date Time (yyyy-mm-ddThh:mm:ssZ)":
717
+ parsed = datetime.datetime.strptime(
718
+ value, "%Y-%m-%dT%H:%M:%SZ"
719
+ ).replace(tzinfo=datetime.timezone.utc)
720
+ else:
721
+ parsed = float(value)
722
+ output[column].append(parsed)
723
+ return output
724
+
725
+
726
+ class LAquilaWS(WS):
727
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
728
+ super().__init__(site_meta)
729
+ self.filenames = filenames
730
+ self._data = self._read_data()
731
+
732
+ def _read_data(self) -> dict:
733
+ data: dict[str, list] = {
734
+ key: []
735
+ for key in [
736
+ "time",
737
+ "air_temperature",
738
+ "air_pressure",
739
+ "relative_humidity",
740
+ "rainfall_rate",
741
+ "wind_speed",
742
+ "wind_direction",
743
+ ]
744
+ }
745
+ for filename in self.filenames:
746
+ with open(filename) as f:
747
+ for row in f:
748
+ if row.startswith("#"):
749
+ continue
750
+ columns = row.split(",")
751
+ if len(columns) != 7:
752
+ continue
753
+ timestamp = datetime.datetime.strptime(
754
+ columns[0], "%Y-%m-%dT%H:%M:%SZ"
755
+ ).replace(tzinfo=datetime.timezone.utc)
756
+ data["time"].append(timestamp)
757
+ data["air_temperature"].append(self._parse_value(columns[1]))
758
+ data["air_pressure"].append(self._parse_value(columns[2]))
759
+ data["relative_humidity"].append(self._parse_value(columns[3]))
760
+ data["rainfall_rate"].append(self._parse_value(columns[4]))
761
+ data["wind_speed"].append(self._parse_value(columns[5]))
762
+ data["wind_direction"].append(self._parse_value(columns[6]))
763
+ output = self.format_data(data)
764
+ _, time_ind = np.unique(output["time"], return_index=True)
765
+ for key in output:
766
+ output[key] = output[key][time_ind]
767
+ return output
768
+
769
+ def _parse_value(self, value: str) -> float:
770
+ value = value.strip()
771
+ return float(value) if value else math.nan
772
+
773
+
774
+ class ClujWS(WS):
775
+ def __init__(self, filenames: Sequence[str | PathLike], site_meta: dict) -> None:
776
+ super().__init__(site_meta)
777
+ self.filenames = filenames
778
+ self._data = self._read_data()
779
+
780
+ def _read_data(self) -> dict:
781
+ with open(self.filenames[0]) as f:
782
+ rows = f.readlines()
783
+ headers = rows[0].strip().split("\t")
784
+ raw_data: dict[str, list[str]] = {header: [] for header in headers}
785
+ for row in rows[1:]:
786
+ columns = row.strip().split("\t")
787
+ for key, value in zip(headers, columns, strict=True):
788
+ raw_data[key].append(value)
789
+ return self.format_data(
790
+ {
791
+ "time": [self._parse_datetime(x) for x in raw_data["DateTime"]],
792
+ "air_temperature": [
793
+ self._parse_value(x) for x in raw_data["Air_temperature_C"]
794
+ ],
795
+ "air_pressure": [
796
+ self._parse_value(x) for x in raw_data["air_pressure_hPA"]
797
+ ],
798
+ "relative_humidity": [
799
+ self._parse_value(x) for x in raw_data["rel_humidity_pct"]
800
+ ],
801
+ "rainfall_rate": [
802
+ self._parse_value(x) for x in raw_data["Precipitation_mm"]
803
+ ],
804
+ "wind_speed": [self._parse_value(x) for x in raw_data["WS_azimuth_ms"]],
805
+ "wind_direction": [
806
+ self._parse_value(x) for x in raw_data["WD_azimuth_deg"]
807
+ ],
808
+ }
809
+ )
810
+
811
+ def _parse_datetime(self, value: str) -> datetime.datetime:
812
+ return datetime.datetime.strptime(value, "%d.%m.%y %H:%M:%S.%f").replace(
813
+ tzinfo=datetime.timezone.utc
814
+ )
815
+
816
+ def _parse_value(self, value: str) -> float:
817
+ value = value.strip()
818
+ return float(value) if value else math.nan
819
+
820
+ def convert_rainfall_rate(self) -> None:
821
+ rainfall_rate = self.data["rainfall_rate"][:]
822
+ self.data["rainfall_rate"].data = rainfall_rate / (
823
+ 1000 * 600
824
+ ) # mm/10min => m/s
146
825
 
147
826
 
148
827
  ATTRIBUTES = {
149
- "air_temperature": MetaData(
150
- long_name="Air temperature",
151
- standard_name="air_temperature",
152
- units="K",
153
- ),
154
- "air_pressure": MetaData(
155
- long_name="Air pressure",
156
- standard_name="air_pressure",
157
- units="Pa",
158
- ),
159
- "rainfall_amount": MetaData(
160
- long_name="Rainfall amount",
161
- standard_name="thickness_of_rainfall_amount",
828
+ "visibility": MetaData(
829
+ long_name="Meteorological optical range (MOR) visibility",
162
830
  units="m",
163
- comment="Cumulated precipitation since 00:00 UTC",
831
+ standard_name="visibility_in_air",
832
+ dimensions=("time",),
164
833
  ),
165
834
  }