disdrodb 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. disdrodb/_version.py +2 -2
  2. disdrodb/accessor/methods.py +10 -3
  3. disdrodb/api/checks.py +1 -1
  4. disdrodb/api/io.py +6 -1
  5. disdrodb/constants.py +1 -1
  6. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  7. disdrodb/etc/products/L1/global.yaml +1 -1
  8. disdrodb/etc/products/L2E/global.yaml +1 -1
  9. disdrodb/etc/products/L2M/global.yaml +1 -1
  10. disdrodb/issue/checks.py +2 -2
  11. disdrodb/l0/check_configs.py +1 -1
  12. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  13. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  14. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  15. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  16. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  17. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  18. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  19. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  20. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  21. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  22. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  23. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  24. disdrodb/l0/l0a_processing.py +6 -2
  25. disdrodb/l0/l0b_processing.py +26 -19
  26. disdrodb/l0/l0c_processing.py +10 -0
  27. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  28. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  29. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  30. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  31. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  32. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  33. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  34. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  35. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  36. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  37. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  38. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  39. disdrodb/l0/readers/PARSIVEL/NASA/LPVEX.py +25 -13
  40. disdrodb/l0/readers/PARSIVEL/NASA/MC3E.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -1
  42. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  43. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  44. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  45. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/{NASA/GCPEX.py → NORWAY/UIB.py} +54 -29
  48. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +6 -3
  49. disdrodb/l0/readers/{PARSIVEL/NASA/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  50. disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +48 -21
  51. disdrodb/l0/readers/{PARSIVEL/NASA/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  52. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  53. disdrodb/l1/beard_model.py +45 -1
  54. disdrodb/l1/fall_velocity.py +1 -6
  55. disdrodb/l1/filters.py +2 -0
  56. disdrodb/l2/empirical_dsd.py +12 -8
  57. disdrodb/routines/l0.py +2 -2
  58. disdrodb/routines/options.py +2 -0
  59. disdrodb/scattering/axis_ratio.py +3 -0
  60. disdrodb/scattering/routines.py +1 -1
  61. disdrodb/summary/routines.py +63 -61
  62. disdrodb/utils/compression.py +4 -2
  63. disdrodb/utils/dask.py +31 -11
  64. disdrodb/utils/manipulations.py +7 -1
  65. disdrodb/viz/plots.py +5 -3
  66. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  67. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/RECORD +71 -54
  68. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  69. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  70. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  71. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for the MANCHESTER Withworth Meteorological Observatory LPM sensor."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_PARSE"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # - Define encoding
48
+ reader_kwargs["encoding"] = "latin"
49
+
50
+ # Since column names are expected to be passed explicitly, header is set to None
51
+ reader_kwargs["header"] = None
52
+
53
+ # - Number of rows to be skipped at the beginning of the file
54
+ reader_kwargs["skiprows"] = None
55
+
56
+ # - Define behaviour when encountering bad lines
57
+ reader_kwargs["on_bad_lines"] = "skip"
58
+
59
+ # - Define reader engine
60
+ # - C engine is faster
61
+ # - Python engine is more feature-complete
62
+ reader_kwargs["engine"] = "python"
63
+
64
+ # - Define on-the-fly decompression of on-disk data
65
+ # - Available: gzip, bz2, zip
66
+ reader_kwargs["compression"] = "infer"
67
+
68
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
69
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
70
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
71
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
72
+ reader_kwargs["na_values"] = ["na", "", "error"]
73
+
74
+ ##------------------------------------------------------------------------.
75
+ #### Read the data
76
+ df = read_raw_text_file(
77
+ filepath=filepath,
78
+ column_names=column_names,
79
+ reader_kwargs=reader_kwargs,
80
+ logger=logger,
81
+ )
82
+
83
+ ##------------------------------------------------------------------------.
84
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
85
+ # Raise error if empty file
86
+ if len(df) == 0:
87
+ raise ValueError(f"{filepath} is empty.")
88
+
89
+ # Select only rows with expected number of delimiters
90
+ df = df[df["TO_PARSE"].str.count(";").isin([45, 522])]
91
+
92
+ # Raise error if no data left
93
+ if len(df) == 0:
94
+ raise ValueError(f"No valid data in {filepath}.")
95
+
96
+ # Split by ; delimiter (before raw drop number)
97
+ # - Add a dummy row with 82 delimiters so that str.split expand also if only 45 delimiters are present
98
+ dummy_row = ";".join(["DUMMY"] * 82)
99
+ df = pd.concat([df, pd.DataFrame({"TO_PARSE": [dummy_row]})], ignore_index=True)
100
+ # - Split columns
101
+ df = df["TO_PARSE"].str.split(";", expand=True, n=81)
102
+ # - Drop the dummy row (last one)
103
+ df = df.iloc[:-1, :]
104
+ # - Fill empty columns
105
+ df.loc[:, 46:50] = df.loc[:, 46:50].fillna("NaN")
106
+ df.loc[:, 51:] = df.loc[:, 51:].fillna("0")
107
+
108
+ # Assign column names
109
+ names = [
110
+ "date",
111
+ "time",
112
+ "start_identifier",
113
+ "device_address",
114
+ "sensor_serial_number",
115
+ "sensor_date",
116
+ "sensor_time",
117
+ "weather_code_synop_4677_5min",
118
+ "weather_code_synop_4680_5min",
119
+ "weather_code_metar_4678_5min",
120
+ "precipitation_rate_5min",
121
+ "weather_code_synop_4677",
122
+ "weather_code_synop_4680",
123
+ "weather_code_metar_4678",
124
+ "precipitation_rate",
125
+ "rainfall_rate",
126
+ "snowfall_rate",
127
+ "precipitation_accumulated",
128
+ "mor_visibility",
129
+ "reflectivity",
130
+ "quality_index",
131
+ "max_hail_diameter",
132
+ "laser_status",
133
+ "static_signal_status",
134
+ "laser_temperature_analog_status",
135
+ "laser_temperature_digital_status",
136
+ "laser_current_analog_status",
137
+ "laser_current_digital_status",
138
+ "sensor_voltage_supply_status",
139
+ "current_heating_pane_transmitter_head_status",
140
+ "current_heating_pane_receiver_head_status",
141
+ "temperature_sensor_status",
142
+ "current_heating_voltage_supply_status",
143
+ "current_heating_house_status",
144
+ "current_heating_heads_status",
145
+ "current_heating_carriers_status",
146
+ "control_output_laser_power_status",
147
+ "reserved_status",
148
+ "temperature_interior",
149
+ "laser_temperature",
150
+ "laser_current_average",
151
+ "control_voltage",
152
+ "optical_control_voltage_output",
153
+ "sensor_voltage_supply",
154
+ "current_heating_pane_transmitter_head",
155
+ "current_heating_pane_receiver_head",
156
+ "temperature_ambient",
157
+ "current_heating_voltage_supply",
158
+ "current_heating_house",
159
+ "current_heating_heads",
160
+ "current_heating_carriers",
161
+ "number_particles",
162
+ "number_particles_internal_data",
163
+ "number_particles_min_speed",
164
+ "number_particles_min_speed_internal_data",
165
+ "number_particles_max_speed",
166
+ "number_particles_max_speed_internal_data",
167
+ "number_particles_min_diameter",
168
+ "number_particles_min_diameter_internal_data",
169
+ "number_particles_no_hydrometeor",
170
+ "number_particles_no_hydrometeor_internal_data",
171
+ "number_particles_unknown_classification",
172
+ "number_particles_unknown_classification_internal_data",
173
+ "number_particles_class_1",
174
+ "number_particles_class_1_internal_data",
175
+ "number_particles_class_2",
176
+ "number_particles_class_2_internal_data",
177
+ "number_particles_class_3",
178
+ "number_particles_class_3_internal_data",
179
+ "number_particles_class_4",
180
+ "number_particles_class_4_internal_data",
181
+ "number_particles_class_5",
182
+ "number_particles_class_5_internal_data",
183
+ "number_particles_class_6",
184
+ "number_particles_class_6_internal_data",
185
+ "number_particles_class_7",
186
+ "number_particles_class_7_internal_data",
187
+ "number_particles_class_8",
188
+ "number_particles_class_8_internal_data",
189
+ "number_particles_class_9",
190
+ "number_particles_class_9_internal_data",
191
+ "raw_drop_number",
192
+ ]
193
+ df.columns = names
194
+
195
+ # Define datetime "time" column
196
+ time_str = df["date"] + " " + df["time"]
197
+ df["time"] = pd.to_datetime(time_str, format="%d/%m/%Y %H:%M:%S", errors="coerce")
198
+
199
+ # Remove checksum from raw_drop_number
200
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
201
+
202
+ # Drop rows with invalid raw_drop_number
203
+ df = df[df["raw_drop_number"].astype(str).str.len().isin([1, 1759])]
204
+
205
+ # Drop row if start_identifier different than 00
206
+ # df["start_identifier"] = df["start_identifier"].astype(str).str[-2:]
207
+ # df = df[df["start_identifier"] == "00"]
208
+
209
+ # Drop columns not agreeing with DISDRODB L0 standards
210
+ columns_to_drop = [
211
+ "start_identifier",
212
+ "device_address",
213
+ "sensor_serial_number",
214
+ "sensor_date",
215
+ "sensor_time",
216
+ "date",
217
+ ]
218
+ df = df.drop(columns=columns_to_drop)
219
+ return df
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for CHARLESTON experiment LPM sensors."""
20
+ import os
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+
28
+
29
+ @is_documented_by(reader_generic_docstring)
30
+ def reader(
31
+ filepath,
32
+ logger=None,
33
+ ):
34
+ """Reader."""
35
+ ##------------------------------------------------------------------------.
36
+ #### - Define raw data headers
37
+ column_names = ["TO_PARSE"]
38
+
39
+ ##------------------------------------------------------------------------.
40
+ #### Define reader options
41
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
42
+ reader_kwargs = {}
43
+
44
+ # - Define delimiter
45
+ reader_kwargs["delimiter"] = "\\n"
46
+
47
+ # - Avoid first column to become df index !!!
48
+ reader_kwargs["index_col"] = False
49
+
50
+ # - Define encoding
51
+ reader_kwargs["encoding"] = "ISO-8859-1"
52
+
53
+ # - Since column names are expected to be passed explicitly, header is set to None
54
+ reader_kwargs["header"] = None
55
+
56
+ # - Number of rows to be skipped at the beginning of the file
57
+ reader_kwargs["skiprows"] = None
58
+
59
+ # - Define behaviour when encountering bad lines
60
+ reader_kwargs["on_bad_lines"] = "skip"
61
+
62
+ # - Define reader engine
63
+ # - C engine is faster
64
+ # - Python engine is more feature-complete
65
+ reader_kwargs["engine"] = "python"
66
+
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+
71
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
72
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
73
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
74
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
75
+ reader_kwargs["na_values"] = ["na", "", "error"]
76
+
77
+ ##------------------------------------------------------------------------.
78
+ #### Read the data
79
+ df = read_raw_text_file(
80
+ filepath=filepath,
81
+ column_names=column_names,
82
+ reader_kwargs=reader_kwargs,
83
+ logger=logger,
84
+ )
85
+
86
+ ##------------------------------------------------------------------------.
87
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
88
+ # Raise error if empty file
89
+ if len(df) == 0:
90
+ raise ValueError(f"{filepath} is empty.")
91
+
92
+ # Select only rows with expected number of delimiters
93
+ df = df[df["TO_PARSE"].str.count(";") == 520]
94
+
95
+ # Raise error if no data left
96
+ if len(df) == 0:
97
+ raise ValueError(f"No valid data in {filepath}.")
98
+
99
+ # Split by ; delimiter (before raw drop number)
100
+ df = df["TO_PARSE"].str.split(";", expand=True, n=79)
101
+
102
+ # Assign column names
103
+ names = [
104
+ "start_identifier",
105
+ "device_address",
106
+ "sensor_serial_number",
107
+ "sensor_date",
108
+ "sensor_time",
109
+ "weather_code_synop_4677_5min",
110
+ "weather_code_synop_4680_5min",
111
+ "weather_code_metar_4678_5min",
112
+ "precipitation_rate_5min",
113
+ "weather_code_synop_4677",
114
+ "weather_code_synop_4680",
115
+ "weather_code_metar_4678",
116
+ "precipitation_rate",
117
+ "rainfall_rate",
118
+ "snowfall_rate",
119
+ "precipitation_accumulated",
120
+ "mor_visibility",
121
+ "reflectivity",
122
+ "quality_index",
123
+ "max_hail_diameter",
124
+ "laser_status",
125
+ "static_signal_status",
126
+ "laser_temperature_analog_status",
127
+ "laser_temperature_digital_status",
128
+ "laser_current_analog_status",
129
+ "laser_current_digital_status",
130
+ "sensor_voltage_supply_status",
131
+ "current_heating_pane_transmitter_head_status",
132
+ "current_heating_pane_receiver_head_status",
133
+ "temperature_sensor_status",
134
+ "current_heating_voltage_supply_status",
135
+ "current_heating_house_status",
136
+ "current_heating_heads_status",
137
+ "current_heating_carriers_status",
138
+ "control_output_laser_power_status",
139
+ "reserved_status",
140
+ "temperature_interior",
141
+ "laser_temperature",
142
+ "laser_current_average",
143
+ "control_voltage",
144
+ "optical_control_voltage_output",
145
+ "sensor_voltage_supply",
146
+ "current_heating_pane_transmitter_head",
147
+ "current_heating_pane_receiver_head",
148
+ "temperature_ambient",
149
+ "current_heating_voltage_supply",
150
+ "current_heating_house",
151
+ "current_heating_heads",
152
+ "current_heating_carriers",
153
+ "number_particles",
154
+ "number_particles_internal_data",
155
+ "number_particles_min_speed",
156
+ "number_particles_min_speed_internal_data",
157
+ "number_particles_max_speed",
158
+ "number_particles_max_speed_internal_data",
159
+ "number_particles_min_diameter",
160
+ "number_particles_min_diameter_internal_data",
161
+ "number_particles_no_hydrometeor",
162
+ "number_particles_no_hydrometeor_internal_data",
163
+ "number_particles_unknown_classification",
164
+ "number_particles_unknown_classification_internal_data",
165
+ "number_particles_class_1",
166
+ "number_particles_class_1_internal_data",
167
+ "number_particles_class_2",
168
+ "number_particles_class_2_internal_data",
169
+ "number_particles_class_3",
170
+ "number_particles_class_3_internal_data",
171
+ "number_particles_class_4",
172
+ "number_particles_class_4_internal_data",
173
+ "number_particles_class_5",
174
+ "number_particles_class_5_internal_data",
175
+ "number_particles_class_6",
176
+ "number_particles_class_6_internal_data",
177
+ "number_particles_class_7",
178
+ "number_particles_class_7_internal_data",
179
+ "number_particles_class_8",
180
+ "number_particles_class_8_internal_data",
181
+ "number_particles_class_9",
182
+ "number_particles_class_9_internal_data",
183
+ "raw_drop_number",
184
+ ]
185
+ df.columns = names
186
+
187
+ # Remove checksum from raw_drop_number
188
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
189
+
190
+ # Remove corrupted characters
191
+ df = df.replace("°", "", regex=True) # station N
192
+
193
+ # Keep only rows where sensor_time matches HH:MM:SS format
194
+ df = df[df["sensor_time"].astype(str).str.match(r"^\d{2}:\d{2}:\d{2}$")]
195
+
196
+ # Keep only rows with valid spectrum
197
+ df = df[df["raw_drop_number"].astype(str).str.match(r"^(?:\d{3};)*\d{3};?$")]
198
+ if len(df) == 0:
199
+ raise ValueError("Spectra is corrupted")
200
+
201
+ # Define datetime "time" column
202
+ # - Define start time
203
+ filename = os.path.basename(filepath)
204
+ _, delta_dt, doy, year = filename.split(".")[0].split("_")
205
+ start_time = pd.to_datetime(f"{year}_{doy}", format="%y_%j") + pd.to_timedelta(int(delta_dt), unit="s")
206
+ # - Define timedelta based on sensor_time
207
+ # --> Add +24h to subsequent times when time resets
208
+ dt = pd.to_timedelta(df["sensor_time"]).to_numpy().astype("m8[s]")
209
+ rollover_indices = np.where(np.diff(dt) < np.timedelta64(0, "s"))[0]
210
+ if rollover_indices.size > 0:
211
+ for idx in rollover_indices:
212
+ dt[idx + 1 :] += np.timedelta64(24, "h")
213
+ dt = dt - dt[0]
214
+ # - Define measurement datetime
215
+ df["time"] = start_time + dt
216
+
217
+ # Drop rows with invalid raw_drop_number
218
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
219
+
220
+ # Drop columns not agreeing with DISDRODB L0 standards
221
+ variables_to_drop = [
222
+ "start_identifier",
223
+ "device_address",
224
+ "sensor_serial_number",
225
+ "sensor_date",
226
+ "sensor_time",
227
+ ]
228
+ df = df.drop(columns=variables_to_drop)
229
+ return df
@@ -88,31 +88,22 @@ def read_txt_file(file, filename, logger):
88
88
  df = df["TO_PARSE"].str.split(";", expand=True, n=43)
89
89
 
90
90
  # Assign column names
91
- column_names = [
92
- "id",
93
- "sample_interval",
94
- "weather_code_synop_4677_5min", # or "weather_code_synop_4680_5min",
91
+ names = [
92
+ "start_identifier",
93
+ "sensor_serial_number",
94
+ "weather_code_synop_4680_5min",
95
95
  "weather_code_metar_4678_5min",
96
96
  "precipitation_rate_5min",
97
- "weather_code_synop_4677", # or "weather_code_synop_4680",
97
+ "weather_code_synop_4680",
98
98
  "weather_code_metar_4678",
99
99
  "precipitation_rate",
100
100
  "precipitation_accumulated",
101
101
  "sensor_time",
102
- # "mor_visibility",
103
- # "reflectivity",
104
- # "quality_index",
105
- # "max_hail_diameter",
106
- # "laser_status",
107
- "dummy1",
108
- "dummy2",
109
- # "laser_temperature",
102
+ "temperature_interior",
103
+ "laser_temperature",
110
104
  "laser_current_average",
111
105
  "control_voltage",
112
106
  "optical_control_voltage_output",
113
- # "current_heating_house",
114
- # "current_heating_heads",
115
- # "current_heating_carriers",
116
107
  "number_particles",
117
108
  "number_particles_internal_data",
118
109
  "number_particles_min_speed",
@@ -123,29 +114,27 @@ def read_txt_file(file, filename, logger):
123
114
  "number_particles_min_diameter_internal_data",
124
115
  "number_particles_no_hydrometeor",
125
116
  "number_particles_no_hydrometeor_internal_data",
126
- # "number_particles_unknown_classification", # ????
127
- # "number_particles_unknown_classification_internal_data",
128
- "number_particles_class_1",
129
- "number_particles_class_1_internal_data",
130
- "number_particles_class_2",
131
- "number_particles_class_2_internal_data",
132
- "number_particles_class_3",
133
- "number_particles_class_3_internal_data",
134
- "number_particles_class_4",
135
- "number_particles_class_4_internal_data",
136
- "number_particles_class_5",
137
- "number_particles_class_5_internal_data",
138
- "number_particles_class_6",
139
- "number_particles_class_6_internal_data",
140
- "number_particles_class_7",
141
- "number_particles_class_7_internal_data",
142
- "number_particles_class_8",
143
- "number_particles_class_8_internal_data",
144
- "number_particles_class_9",
145
- "number_particles_class_9_internal_data",
117
+ "number_particles_unknown_classification",
118
+ "total_gross_volume_unknown_classification",
119
+ "number_particles_hail",
120
+ "total_gross_volume_hail",
121
+ "number_particles_solid_precipitation",
122
+ "total_gross_volume_solid_precipitation",
123
+ "number_particles_great_pellet",
124
+ "total_gross_volume_great_pellet",
125
+ "number_particles_small_pellet",
126
+ "total_gross_volume_small_pellet",
127
+ "number_particles_snowgrain",
128
+ "total_gross_volume_snowgrain",
129
+ "number_particles_rain",
130
+ "total_gross_volume_rain",
131
+ "number_particles_small_rain",
132
+ "total_gross_volume_small_rain",
133
+ "number_particles_drizzle",
134
+ "total_gross_volume_drizzle",
146
135
  "raw_drop_number",
147
136
  ]
148
- df.columns = column_names
137
+ df.columns = names
149
138
 
150
139
  # Deal with case if there are 61 timesteps
151
140
  # - Occurs sometimes when previous hourly file miss timesteps
@@ -163,7 +152,12 @@ def read_txt_file(file, filename, logger):
163
152
  start_time = pd.to_datetime(start_time_str, format="%Y%m%d%H")
164
153
 
165
154
  # - Define timedelta based on sensor_time
155
+ # --> Add +24h to subsequent times when time resets
166
156
  dt = pd.to_timedelta(df["sensor_time"] + ":00").to_numpy().astype("m8[s]")
157
+ rollover_indices = np.where(np.diff(dt) < np.timedelta64(0, "s"))[0]
158
+ if rollover_indices.size > 0:
159
+ for idx in rollover_indices:
160
+ dt[idx + 1 :] += np.timedelta64(24, "h")
167
161
  dt = dt - dt[0]
168
162
 
169
163
  # - Define approximate time
@@ -173,25 +167,15 @@ def read_txt_file(file, filename, logger):
173
167
  valid_rows = dt <= np.timedelta64(3540, "s")
174
168
  df = df[valid_rows]
175
169
 
176
- # Drop rows where sample interval is not 60 seconds
177
- df = df[df["sample_interval"] == "000060"]
178
-
179
170
  # Drop rows with invalid raw_drop_number
180
- # --> 440 value # 22x20
181
171
  # --> 400 here # 20x20
182
172
  df = df[df["raw_drop_number"].astype(str).str.len() == 1599]
183
173
 
184
- # Deal with old LPM version 20x20 spectrum
185
- # - Add 000 in first two velocity bins
186
- df["raw_drop_number"] = df["raw_drop_number"] + ";" + ";".join(["000"] * 40)
187
-
188
174
  # Drop columns not agreeing with DISDRODB L0 standards
189
175
  columns_to_drop = [
190
- "sample_interval",
176
+ "start_identifier",
177
+ "sensor_serial_number",
191
178
  "sensor_time",
192
- "dummy1",
193
- "dummy2",
194
- "id",
195
179
  ]
196
180
  df = df.drop(columns=columns_to_drop)
197
181
  return df