disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/data_transfer/download_data.py +145 -14
  5. disdrodb/l0/check_standards.py +15 -10
  6. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  7. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  8. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  9. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  10. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  11. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  12. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  13. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  14. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  15. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
  16. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
  17. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  18. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  19. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  20. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  21. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  22. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  23. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  24. disdrodb/l0/l0b_nc_processing.py +1 -1
  25. disdrodb/l0/l0b_processing.py +12 -10
  26. disdrodb/l0/manuals/SWS250.pdf +0 -0
  27. disdrodb/l0/manuals/VPF730.pdf +0 -0
  28. disdrodb/l0/manuals/VPF750.pdf +0 -0
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  30. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  31. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  32. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  33. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  34. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  35. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  36. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  37. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
  38. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
  39. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  42. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  43. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
  44. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
  45. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
  48. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
  49. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  50. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
  51. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  52. disdrodb/l0/standards.py +7 -4
  53. disdrodb/l0/template_tools.py +2 -2
  54. disdrodb/l1/encoding_attrs.py +30 -8
  55. disdrodb/l1/processing.py +6 -4
  56. disdrodb/l1/resampling.py +1 -1
  57. disdrodb/l1/routines.py +9 -7
  58. disdrodb/l2/empirical_dsd.py +100 -2
  59. disdrodb/l2/event.py +3 -3
  60. disdrodb/l2/processing.py +21 -12
  61. disdrodb/l2/processing_options.py +7 -7
  62. disdrodb/l2/routines.py +3 -3
  63. disdrodb/metadata/checks.py +15 -6
  64. disdrodb/metadata/manipulation.py +2 -2
  65. disdrodb/metadata/standards.py +83 -79
  66. disdrodb/metadata/writer.py +2 -2
  67. disdrodb/routines.py +246 -10
  68. disdrodb/scattering/routines.py +1 -1
  69. disdrodb/utils/dataframe.py +342 -0
  70. disdrodb/utils/directories.py +14 -2
  71. disdrodb/utils/xarray.py +83 -0
  72. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
  73. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
  74. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
  75. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
  76. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
  77. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for ENPC PWS100 raw text data."""
20
+ import zipfile
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.utils.logger import log_error, log_warning
26
+
27
+
28
+ @is_documented_by(reader_generic_docstring)
29
+ def reader(
30
+ filepath,
31
+ logger=None,
32
+ ):
33
+ """Reader."""
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define function to read each txt file inside each daily zip file
37
+ def read_txt_file(file, filename, logger): # noqa PLR0911
38
+ """Parse a single txt file within the daily zip file."""
39
+ # Read file
40
+ try:
41
+ txt = file.readline().decode("utf-8")
42
+ except Exception:
43
+ log_warning(logger=logger, msg=f"{filename} is corrupted", verbose=False)
44
+ return None
45
+
46
+ # Check file is not empty
47
+ if txt == "":
48
+ log_warning(logger=logger, msg=f"{filename} is empty", verbose=False)
49
+ return None
50
+
51
+ if "PSU voltage too low" in txt or "volt" in txt:
52
+ log_warning(logger=logger, msg=f"PSU voltage too low in {filename}", verbose=False)
53
+ return None
54
+
55
+ if "Error - message" in txt:
56
+ log_warning(logger=logger, msg=f"Error message in {filename}", verbose=False)
57
+ return None
58
+
59
+ # Clean up the line
60
+ txt = txt.replace(" 00 ", " 0 0 ")
61
+ txt = txt.replace(" ", " 0 ")
62
+ txt = txt[1:-8]
63
+
64
+ # Split the cleaned line
65
+ buf = txt.split(" ")
66
+
67
+ # Helper to convert list of floats to comma-separated string
68
+ def int_list_to_str(lst):
69
+ return ",".join(f"{int(i)}" for i in lst)
70
+
71
+ # Try to get the drop_size distribution:
72
+ try:
73
+ drop_size_distribution = int_list_to_str(buf[30:330]) # Drop size distribution (message field 42)
74
+ except Exception:
75
+ log_warning(logger, msg=f"Corrupted drop_size_distribution field in {filename}", verbose=False)
76
+ return None
77
+
78
+ # Try to get peak_to_pedestal_hist
79
+ try:
80
+ peak_to_pedestal_hist = int_list_to_str(buf[1499:1549])
81
+ except Exception:
82
+ log_warning(
83
+ logger,
84
+ msg=f"Corrupted raw_drop_number or peak_to_pedestal_hist field in {filename}",
85
+ verbose=False,
86
+ )
87
+ return None
88
+ # Parse fields
89
+ data = {
90
+ "mor_visibility": float(buf[2]), # Visibility Range (message field 20)
91
+ "weather_code_synop_4680": float(buf[3]), # Present Weather Code (WMO) (message field 21)
92
+ "weather_code_metar_4678": buf[4], # Present Weather Code (METAR) (message field 22)
93
+ "weather_code_nws": buf[5], # Present Weather Code (NWS) (message field 23)
94
+ "alarms": int_list_to_str(buf[6:22]), # Alarms (message field (24))
95
+ "sensor_status": buf[22], # Fault status of PWS100 (message field 25)
96
+ "air_temperature": float(buf[23]), # Temperature (°C) (message field 30)
97
+ "relative_humidity": float(buf[24]), # Sampled relative humidity (%) (message field 30)
98
+ "wetbulb_temperature": float(buf[25]), # Average wetbulb temperature (°C)(message field 30)
99
+ "air_temperature_max": float(buf[26]), # Maximum temperature (°C)(message field 31)
100
+ "air_temperature_min": float(buf[27]), # Minimum temperature (°C)(message field 31)
101
+ "rainfall_rate": float(buf[28]), # Precipitation rate (mm/h)(message field 40)
102
+ "rainfall_accumulated": float(buf[29]), # Precipitation accumulation (mm/h)(message field 41)
103
+ "drop_size_distribution": drop_size_distribution, # Drop size distribution (message field 42)
104
+ "average_drop_velocity": float(buf[330]), # Average velocity (mm/s)(message field 43)
105
+ "average_drop_size": float(buf[331]), # Average size (mm/h)(message field 43)
106
+ "type_distribution": int_list_to_str(buf[332:343]), # Type distribution (message field 44)
107
+ "raw_drop_number": int_list_to_str(buf[343:1499]), # Size/velocity spectrum (34*34) (message field 47)
108
+ "peak_to_pedestal_hist": (
109
+ peak_to_pedestal_hist # Peak to pedestal ratio distribution histogram (message field 48)
110
+ ),
111
+ }
112
+
113
+ # Convert to single-row DataFrame
114
+ df = pd.DataFrame([data])
115
+
116
+ # Define datetime "time" column from filename
117
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
118
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
119
+
120
+ # # Drop columns not agreeing with DISDRODB L0 standards
121
+ # columns_to_drop = [
122
+ # "peak_to_pedestal_hist",
123
+ # "type_distribution",
124
+ # ]
125
+ # df = df.drop(columns=columns_to_drop)
126
+ return df
127
+
128
+ # ---------------------------------------------------------------------.
129
+ #### Iterate over all files (aka timesteps) in the daily zip archive
130
+ # - Each file contain a single timestep !
131
+ list_df = []
132
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
133
+ filenames = sorted(zip_ref.namelist())
134
+ for filename in filenames:
135
+ if filename.endswith(".txt"):
136
+ # Open file
137
+ with zip_ref.open(filename) as f:
138
+ try:
139
+ df = read_txt_file(file=f, filename=filename, logger=logger)
140
+ if df is not None:
141
+ list_df.append(df)
142
+ except Exception as e:
143
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
144
+ log_error(logger=logger, msg=msg, verbose=True)
145
+
146
+ # Concatenate all dataframes into a single one
147
+ df = pd.concat(list_df)
148
+
149
+ # ---------------------------------------------------------------------.
150
+ return df
@@ -0,0 +1,291 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB Reader for NOAA PSL RD80 stations."""
20
+ import os
21
+ import re
22
+
23
+ # Convert ParserWarning into an error
24
+ import warnings
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+
29
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
30
+ from disdrodb.l0.l0a_processing import read_raw_text_file
31
+
32
+ warnings.simplefilter("error", pd.errors.ParserWarning)
33
+
34
+
35
+ def read_new_format(filepath, logger):
36
+ """Read new format."""
37
+ ##------------------------------------------------------------------------.
38
+ #### Define column names
39
+ column_names = [
40
+ "time_interval",
41
+ "n1",
42
+ "n2",
43
+ "n3",
44
+ "n4",
45
+ "n5",
46
+ "n6",
47
+ "n7",
48
+ "n8",
49
+ "n9",
50
+ "n10",
51
+ "n11",
52
+ "n12",
53
+ "n13",
54
+ "n14",
55
+ "n15",
56
+ "n16",
57
+ "n17",
58
+ "n18",
59
+ "n19",
60
+ "n20",
61
+ "Dmax",
62
+ "RI",
63
+ "RA",
64
+ "Wg",
65
+ "Z",
66
+ "EF",
67
+ "N0",
68
+ "slope",
69
+ "NumDrops",
70
+ "SumRA",
71
+ ]
72
+ ##------------------------------------------------------------------------.
73
+ #### Define reader options
74
+ reader_kwargs = {}
75
+ # - Define delimiter
76
+ reader_kwargs["delimiter"] = r"\s+"
77
+ # Skip header
78
+ reader_kwargs["header"] = None
79
+ # Skip first row as columns names
80
+ reader_kwargs["skiprows"] = 2
81
+ # - Define encoding
82
+ reader_kwargs["encoding"] = "ISO-8859-1"
83
+ # - Avoid first column to become df index !!!
84
+ reader_kwargs["index_col"] = False
85
+ # - Define behaviour when encountering bad lines
86
+ reader_kwargs["on_bad_lines"] = "skip"
87
+ # - Define reader engine
88
+ # - C engine is faster
89
+ # - Python engine is more feature-complete
90
+ reader_kwargs["engine"] = "python"
91
+ # - Define on-the-fly decompression of on-disk data
92
+ # - Available: gzip, bz2, zip
93
+ reader_kwargs["compression"] = "infer"
94
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
95
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
96
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
97
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
98
+ reader_kwargs["na_values"] = ["na", "", "error"]
99
+
100
+ ##------------------------------------------------------------------------.
101
+ #### Read the data
102
+ df = read_raw_text_file(
103
+ filepath=filepath,
104
+ column_names=column_names,
105
+ reader_kwargs=reader_kwargs,
106
+ logger=logger,
107
+ )
108
+
109
+ ##------------------------------------------------------------------------.
110
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
111
+ # Retrieve date and hour information
112
+ with open(filepath) as f:
113
+ date_header = f.readline().strip()
114
+ match = re.search(r":\s*(\d+)\s*UTC", date_header)
115
+ if match:
116
+ date_hour_str = match.group(1)
117
+ else:
118
+ raise ValueError("Date information not found.")
119
+
120
+ # Replace -99.9900 values with NaN
121
+ columns_to_replace = ["Dmax", "RI", "RA", "Wg", "Z", "EF", "N0", "slope"]
122
+ df[columns_to_replace] = df[columns_to_replace].replace("-99.9900", "NaN")
123
+
124
+ # Replace 'Z' -Inf with NaN
125
+ df["Z"] = df["Z"].str.replace("-Inf", "NaN")
126
+
127
+ # Deal with time interval column
128
+ # - Split into start and end time
129
+ df_time = df["time_interval"].str.split("-", expand=True)
130
+ df_time.columns = ["start", "end"]
131
+
132
+ # - Convert start/end MM:SS:SSS to timedelta
133
+ def parse_time(t):
134
+ try:
135
+ minutes, seconds, milliseconds = map(int, t.split(":"))
136
+ timedelta = pd.Timedelta(minutes=minutes, seconds=seconds, milliseconds=milliseconds)
137
+ except Exception:
138
+ timedelta = pd.Timedelta("NaT")
139
+ return timedelta
140
+
141
+ df_time["start"] = df_time["start"].apply(parse_time)
142
+ df_time["end"] = df_time["end"].apply(parse_time)
143
+
144
+ # Define time
145
+ date_hour = pd.to_datetime(date_hour_str, format="%y%j%H")
146
+ df["time"] = date_hour + df_time["start"]
147
+
148
+ # Drop invalid timesteps
149
+ df_time = df_time[~np.isnan(df["time"])]
150
+ df = df[~np.isnan(df["time"])]
151
+
152
+ # Compute sample_interval in seconds as integer
153
+ # - Wrap end time if it's less than start time (i.e., crosses 60:00 boundary)
154
+ # --> 00:00 --> 60:00
155
+ df_time.loc[df_time["end"] < df_time["start"], "end"] += pd.Timedelta(minutes=60)
156
+ df["sample_interval"] = (df_time["end"] - df_time["start"]).dt.total_seconds().astype(int)
157
+
158
+ # Create raw_drop_number column
159
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
160
+ df_arr = df[bin_columns]
161
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
162
+ df["raw_drop_number"] = df_raw_drop_number
163
+
164
+ # Remove bins columns
165
+ df = df.drop(columns=bin_columns)
166
+
167
+ # # Drop columns not agreeing with DISDRODB L0 standards
168
+ columns_to_drop = [
169
+ "time_interval",
170
+ "NumDrops",
171
+ "SumRA",
172
+ ]
173
+ df = df.drop(columns=columns_to_drop)
174
+
175
+ # Return the dataframe adhering to DISDRODB L0 standards
176
+ return df
177
+
178
+
179
+ def read_old_format(filepath, logger):
180
+ """Read old format."""
181
+ ##------------------------------------------------------------------------.
182
+ #### Define column names
183
+ column_names = [
184
+ "date",
185
+ "time",
186
+ "n1",
187
+ "n2",
188
+ "n3",
189
+ "n4",
190
+ "n5",
191
+ "n6",
192
+ "n7",
193
+ "n8",
194
+ "n9",
195
+ "n10",
196
+ "n11",
197
+ "n12",
198
+ "n13",
199
+ "n14",
200
+ "n15",
201
+ "n16",
202
+ "n17",
203
+ "n18",
204
+ "n19",
205
+ "n20",
206
+ "Dmax",
207
+ "RI",
208
+ "RA",
209
+ "Wg",
210
+ "Z",
211
+ "EF",
212
+ "N0",
213
+ "slope",
214
+ ]
215
+ ##------------------------------------------------------------------------.
216
+ #### Define reader options
217
+ reader_kwargs = {}
218
+ # - Define delimiter
219
+ reader_kwargs["delimiter"] = r"\s+"
220
+ # Skip header
221
+ reader_kwargs["header"] = None
222
+ # Skip first row as columns names
223
+ reader_kwargs["skiprows"] = 1
224
+ # - Define encoding
225
+ reader_kwargs["encoding"] = "ISO-8859-1"
226
+ # - Avoid first column to become df index !!!
227
+ reader_kwargs["index_col"] = False
228
+ # - Define behaviour when encountering bad lines
229
+ reader_kwargs["on_bad_lines"] = "skip"
230
+ # - Define reader engine
231
+ # - C engine is faster
232
+ # - Python engine is more feature-complete
233
+ reader_kwargs["engine"] = "python"
234
+ # - Define on-the-fly decompression of on-disk data
235
+ # - Available: gzip, bz2, zip
236
+ reader_kwargs["compression"] = "infer"
237
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
238
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
239
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
240
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
241
+ reader_kwargs["na_values"] = ["na", "", "error"]
242
+
243
+ ##------------------------------------------------------------------------.
244
+ #### Read the data
245
+ df = read_raw_text_file(
246
+ filepath=filepath,
247
+ column_names=column_names,
248
+ reader_kwargs=reader_kwargs,
249
+ logger=logger,
250
+ )
251
+
252
+ ##------------------------------------------------------------------------.
253
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
254
+
255
+ # Replace -99.9900 values with NaN
256
+ columns_to_replace = ["Dmax", "RI", "RA", "Wg", "Z", "EF", "N0", "slope"]
257
+ df[columns_to_replace] = df[columns_to_replace].replace("-99.9900", "NaN")
258
+
259
+ # Replace 'Z' -Inf with NaN
260
+ df["Z"] = df["Z"].str.replace("-Inf", "NaN")
261
+
262
+ # Define 'time' datetime column
263
+ df["time"] = df["date"].astype(str) + " " + df["time"].astype(str)
264
+ df["time"] = pd.to_datetime(df["time"], format="%Y/%m/%d %H:%M:%S", errors="coerce")
265
+ df = df.drop(columns=["date"])
266
+
267
+ # Create raw_drop_number column
268
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
269
+ df_arr = df[bin_columns]
270
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
271
+ df["raw_drop_number"] = df_raw_drop_number
272
+
273
+ # Remove bins columns
274
+ df = df.drop(columns=bin_columns)
275
+
276
+ # Return the dataframe adhering to DISDRODB L0 standards
277
+ return df
278
+
279
+
280
+ @is_documented_by(reader_generic_docstring)
281
+ def reader(
282
+ filepath,
283
+ logger=None,
284
+ ):
285
+ """Reader."""
286
+ filename = os.path.basename(filepath)
287
+ # station_name = filename[0:3]
288
+ if filename[3] == "-": # czc-050101-0052.txt
289
+ return read_old_format(filepath, logger=logger)
290
+ # czc2201220b20.txt
291
+ return read_new_format(filepath, logger=logger)
@@ -50,7 +50,7 @@ def reader(
50
50
  sensor_name = "LPM" # [SPECIFY HERE THE SENSOR FOR WHICH THE READER IS DESIGNED]
51
51
  ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name=sensor_name)
52
52
 
53
- # Replace occureence of NaN flags with np.nan
53
+ # Replace occurrence of NaN flags with np.nan
54
54
  # - Define a dictionary specifying the value(s) of NaN flags for each variable
55
55
  # - The code here below is just an example that requires to be adapted !
56
56
  # - This step might not be required with your data !
disdrodb/l0/standards.py CHANGED
@@ -51,8 +51,9 @@ def get_sensor_logged_variables(sensor_name: str) -> list:
51
51
  def allowed_l0_variables(sensor_name: str) -> list:
52
52
  """Get the list of allowed L0 variables for a given sensor."""
53
53
  sensor_variables = list(get_l0a_dtype(sensor_name))
54
- allowed_variables = [*sensor_variables, "time", "latitude", "longitude", "altitude"]
55
- # TODO: add air_temperature, relative_humidity, wind_speed, wind_direction
54
+ weather_variables = ["air_temperature", "relative_humidity", "wind_speed", "wind_direction"]
55
+ allowed_variables = [*sensor_variables, *weather_variables, "time", "latitude", "longitude", "altitude"]
56
+ allowed_variables = sorted(np.unique(allowed_variables).tolist())
56
57
  return allowed_variables
57
58
 
58
59
 
@@ -659,9 +660,11 @@ def get_raw_array_dims_order(sensor_name: str) -> dict:
659
660
 
660
661
  Examples
661
662
  --------
662
- OTT Parsivel spectrum [v1d1 ... v1d32, v2d1, ..., v2d32]
663
+ OTT Parsivel spectrum [d1v1 ... d32v1, d1v2, ..., d32v2] (diameter increases first)
663
664
  --> dimension_order = ["velocity_bin_center", "diameter_bin_center"]
664
- Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2]
665
+ Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2] (velocity increases first)
666
+ --> dimension_order = ["diameter_bin_center", "velocity_bin_center"]
667
+ PWS 100 spectrum [d1v1 ... d1v34, d2v1, ..., d2v34] (velocity increases first)
665
668
  --> dimension_order = ["diameter_bin_center", "velocity_bin_center"]
666
669
 
667
670
  Parameters
@@ -500,7 +500,7 @@ def _search_possible_columns(string: str, sensor_name: str) -> list:
500
500
  #### Infer column names and checks validity
501
501
 
502
502
 
503
- def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 1):
503
+ def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 0):
504
504
  """Try to guess the dataframe columns names based on string characteristics.
505
505
 
506
506
  Parameters
@@ -511,7 +511,7 @@ def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 1):
511
511
  name of the sensor.
512
512
  row_idx : int, optional
513
513
  The row index of the dataframe to use to infer the column names.
514
- The default row index is 1.
514
+ The default row index is 0.
515
515
 
516
516
  Returns
517
517
  -------
@@ -51,21 +51,36 @@ def get_attrs_dict():
51
51
  "long_name": "Measured average drop fall velocity",
52
52
  "units": "m s-1",
53
53
  },
54
- "n_drops_selected": {
54
+ "N": {
55
55
  "description": "Total number of selected drops",
56
56
  "long_name": "Total number of selected drops",
57
57
  "units": "",
58
58
  },
59
- "n_drops_discarded": {
59
+ "Nremoved": {
60
60
  "description": "Total number of discarded drops",
61
61
  "long_name": "Total number of discarded drops",
62
62
  "units": "",
63
63
  },
64
- "n_bins_with_drops": {
64
+ "Nbins": {
65
65
  "description": "Number of diameter bins with drops",
66
66
  "long_name": "Number of diameter bins with drops",
67
67
  "units": "",
68
68
  },
69
+ "Nbins_missing": {
70
+ "description": "Number of diameter bins with no drops",
71
+ "long_name": "Number of diameter bins with no drops",
72
+ "units": "",
73
+ },
74
+ "Nbins_missing_fraction": {
75
+ "description": "Fraction of diameter bins with no drops",
76
+ "long_name": "Fraction of diameter bins with no drops",
77
+ "units": "",
78
+ },
79
+ "Nbins_missing_consecutive": {
80
+ "description": "Maximum number of consecutive diameter bins with no drops",
81
+ "long_name": "Maximum number of consecutive diameter bins with no drops",
82
+ "units": "",
83
+ },
69
84
  #### L2
70
85
  "drop_number_concentration": {
71
86
  "description": "Number concentration of drops per diameter class per unit volume",
@@ -97,13 +112,20 @@ def get_attrs_dict():
97
112
  "units": "J",
98
113
  "long_name": "Maximum Drop Kinetic Energy",
99
114
  },
100
- "E": {
115
+ "TKE": {
116
+ "description": "Total Kinetic Energy",
117
+ "standard_name": "",
118
+ "units": "J m-2",
119
+ "long_name": "Total Kinetic Energy",
120
+ },
121
+ "KED": {
101
122
  "description": "Kinetic energy per unit rainfall depth",
102
123
  "standard_name": "",
103
124
  "units": "J m-2 mm-1",
104
125
  "long_name": "Rainfall Kinetic Energy",
105
126
  },
106
- "KE": {
127
+ "KEF": {
128
+ "description": "Kinetic energy per unit time",
107
129
  "standard_name": "",
108
130
  "units": "J m-2 h-1",
109
131
  "long_name": "Kinetic Energy Density Flux",
@@ -436,7 +458,7 @@ def get_encoding_dict():
436
458
  "contiguous": False,
437
459
  "_FillValue": 4294967295,
438
460
  },
439
- "n_drops_selected": {
461
+ "N": {
440
462
  "dtype": "uint32",
441
463
  "zlib": True,
442
464
  "complevel": 3,
@@ -445,7 +467,7 @@ def get_encoding_dict():
445
467
  "contiguous": False,
446
468
  "_FillValue": 4294967295,
447
469
  },
448
- "n_drops_discarded": {
470
+ "Nremoved": {
449
471
  "dtype": "uint32",
450
472
  "zlib": True,
451
473
  "complevel": 3,
@@ -454,7 +476,7 @@ def get_encoding_dict():
454
476
  "contiguous": False,
455
477
  "_FillValue": 4294967295,
456
478
  },
457
- "n_bins_with_drops": {
479
+ "Nbins": {
458
480
  "dtype": "uint8",
459
481
  "_FillValue": 255,
460
482
  "zlib": True,
disdrodb/l1/processing.py CHANGED
@@ -26,7 +26,7 @@ from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filt
26
26
  from disdrodb.l1.resampling import add_sample_interval
27
27
  from disdrodb.l1_env.routines import load_env_dataset
28
28
  from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
29
- count_bins_with_drops,
29
+ compute_qc_bins_metrics,
30
30
  get_min_max_diameter,
31
31
  )
32
32
  from disdrodb.utils.attrs import set_attrs
@@ -172,9 +172,11 @@ def generate_l1(
172
172
  # Add drop statistics
173
173
  ds_l1["Dmin"] = min_drop_diameter
174
174
  ds_l1["Dmax"] = max_drop_diameter
175
- ds_l1["n_drops_selected"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
176
- ds_l1["n_drops_discarded"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["n_drops_selected"]
177
- ds_l1["n_bins_with_drops"] = count_bins_with_drops(ds_l1)
175
+ ds_l1["N"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
176
+ ds_l1["Nremoved"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["N"]
177
+
178
+ # Add bins statistics
179
+ ds_l1.update(compute_qc_bins_metrics(ds_l1))
178
180
 
179
181
  # -------------------------------------------------------------------------------------------
180
182
  # Add quality flags
disdrodb/l1/resampling.py CHANGED
@@ -141,7 +141,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
141
141
 
142
142
  # Retrieve variables to average/sum
143
143
  var_to_average = ["fall_velocity"]
144
- var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "n_drops_selected", "n_drops_discarded"]
144
+ var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
145
145
  var_to_min = ["Dmin"]
146
146
  var_to_max = ["Dmax"]
147
147
 
disdrodb/l1/routines.py CHANGED
@@ -61,21 +61,23 @@ def get_l1_options():
61
61
  # - TODO: as function of sensor name
62
62
 
63
63
  # minimum_diameter
64
- # --> PARSIVEL: 0.2495
65
- # --> RD80: 0.313
66
- # --> LPM: 0.125 (we currently discard first bin with this setting)
64
+ # --> PWS100: 0 (0.05)
65
+ # --> PARSIVEL: 0.2495 (0.312)
66
+ # --> RD80: 0.313 (0.359)
67
+ # --> LPM: 0.125 (0.1875) (we currently discard first bin with default settings !)
67
68
 
68
69
  # maximum_diameter
69
- # LPM: 8 mm
70
- # RD80: 5.6 mm
71
- # OTT: 26 mm
70
+ # LPM: 9 (10) mm
71
+ # RD80: 5.373 (5.6) mm
72
+ # OTT: 24.5 (26) mm
73
+ # PWS100: 27.2 (28.8) mm
72
74
 
73
75
  l1_options = {
74
76
  # Fall velocity option
75
77
  "fall_velocity_method": "Beard1976",
76
78
  # Diameter-Velocity Filtering Options
77
79
  "minimum_diameter": 0.2495, # OTT PARSIVEL first two bin no data !
78
- "maximum_diameter": 8,
80
+ "maximum_diameter": 10,
79
81
  "minimum_velocity": 0,
80
82
  "maximum_velocity": 12,
81
83
  "above_velocity_fraction": 0.5,