disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/data_transfer/download_data.py +145 -14
  5. disdrodb/l0/check_standards.py +15 -10
  6. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  7. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  8. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  9. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  10. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  11. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  12. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  13. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  14. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  15. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
  16. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
  17. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  18. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  19. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  20. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  21. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  22. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  23. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  24. disdrodb/l0/l0b_nc_processing.py +1 -1
  25. disdrodb/l0/l0b_processing.py +12 -10
  26. disdrodb/l0/manuals/SWS250.pdf +0 -0
  27. disdrodb/l0/manuals/VPF730.pdf +0 -0
  28. disdrodb/l0/manuals/VPF750.pdf +0 -0
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  30. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  31. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  32. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  33. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  34. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  35. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  36. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  37. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
  38. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
  39. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  42. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  43. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
  44. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
  45. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
  48. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
  49. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  50. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
  51. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  52. disdrodb/l0/standards.py +7 -4
  53. disdrodb/l0/template_tools.py +2 -2
  54. disdrodb/l1/encoding_attrs.py +30 -8
  55. disdrodb/l1/processing.py +6 -4
  56. disdrodb/l1/resampling.py +1 -1
  57. disdrodb/l1/routines.py +9 -7
  58. disdrodb/l2/empirical_dsd.py +100 -2
  59. disdrodb/l2/event.py +3 -3
  60. disdrodb/l2/processing.py +21 -12
  61. disdrodb/l2/processing_options.py +7 -7
  62. disdrodb/l2/routines.py +3 -3
  63. disdrodb/metadata/checks.py +15 -6
  64. disdrodb/metadata/manipulation.py +2 -2
  65. disdrodb/metadata/standards.py +83 -79
  66. disdrodb/metadata/writer.py +2 -2
  67. disdrodb/routines.py +246 -10
  68. disdrodb/scattering/routines.py +1 -1
  69. disdrodb/utils/dataframe.py +342 -0
  70. disdrodb/utils/directories.py +14 -2
  71. disdrodb/utils/xarray.py +83 -0
  72. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
  73. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
  74. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
  75. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
  76. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
  77. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors measuring also wind."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_BE_SPLITTED"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 523]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
87
+
88
+ # Assign column names
89
+ column_names = [
90
+ "start_identifier",
91
+ "device_address",
92
+ "sensor_serial_number",
93
+ "sensor_date",
94
+ "sensor_time",
95
+ "weather_code_synop_4677_5min",
96
+ "weather_code_synop_4680_5min",
97
+ "weather_code_metar_4678_5min",
98
+ "precipitation_rate_5min",
99
+ "weather_code_synop_4677",
100
+ "weather_code_synop_4680",
101
+ "weather_code_metar_4678",
102
+ "precipitation_rate",
103
+ "rainfall_rate",
104
+ "snowfall_rate",
105
+ "precipitation_accumulated",
106
+ "mor_visibility",
107
+ "reflectivity",
108
+ "quality_index",
109
+ "max_hail_diameter",
110
+ "laser_status",
111
+ "static_signal",
112
+ "laser_temperature_analog_status",
113
+ "laser_temperature_digital_status",
114
+ "laser_current_analog_status",
115
+ "laser_current_digital_status",
116
+ "sensor_voltage_supply_status",
117
+ "current_heating_pane_transmitter_head_status",
118
+ "current_heating_pane_receiver_head_status",
119
+ "temperature_sensor_status",
120
+ "current_heating_voltage_supply_status",
121
+ "current_heating_house_status",
122
+ "current_heating_heads_status",
123
+ "current_heating_carriers_status",
124
+ "control_output_laser_power_status",
125
+ "reserve_status",
126
+ "temperature_interior",
127
+ "laser_temperature",
128
+ "laser_current_average",
129
+ "control_voltage",
130
+ "optical_control_voltage_output",
131
+ "sensor_voltage_supply",
132
+ "current_heating_pane_transmitter_head",
133
+ "current_heating_pane_receiver_head",
134
+ "temperature_ambient",
135
+ "current_heating_voltage_supply",
136
+ "current_heating_house",
137
+ "current_heating_heads",
138
+ "current_heating_carriers",
139
+ "number_particles",
140
+ "number_particles_internal_data",
141
+ "number_particles_min_speed",
142
+ "number_particles_min_speed_internal_data",
143
+ "number_particles_max_speed",
144
+ "number_particles_max_speed_internal_data",
145
+ "number_particles_min_diameter",
146
+ "number_particles_min_diameter_internal_data",
147
+ "number_particles_no_hydrometeor",
148
+ "number_particles_no_hydrometeor_internal_data",
149
+ "number_particles_unknown_classification",
150
+ "number_particles_unknown_classification_internal_data",
151
+ "number_particles_class_1",
152
+ "number_particles_class_1_internal_data",
153
+ "number_particles_class_2",
154
+ "number_particles_class_2_internal_data",
155
+ "number_particles_class_3",
156
+ "number_particles_class_3_internal_data",
157
+ "number_particles_class_4",
158
+ "number_particles_class_4_internal_data",
159
+ "number_particles_class_5",
160
+ "number_particles_class_5_internal_data",
161
+ "number_particles_class_6",
162
+ "number_particles_class_6_internal_data",
163
+ "number_particles_class_7",
164
+ "number_particles_class_7_internal_data",
165
+ "number_particles_class_8",
166
+ "number_particles_class_8_internal_data",
167
+ "number_particles_class_9",
168
+ "number_particles_class_9_internal_data",
169
+ "TO_BE_FURTHER_PROCESSED",
170
+ ]
171
+ df.columns = column_names
172
+
173
+ # Extract the last variables remained in raw_drop_number
174
+ df_parsed = df["TO_BE_FURTHER_PROCESSED"].str.rsplit(";", n=5, expand=True)
175
+ df_parsed.columns = [
176
+ "raw_drop_number",
177
+ "air_temperature",
178
+ "relative_humidity",
179
+ "wind_speed",
180
+ "wind_direction",
181
+ "checksum",
182
+ ]
183
+
184
+ # Assign columns to the original dataframe
185
+ df[df_parsed.columns] = df_parsed
186
+
187
+ # Define datetime "time" column
188
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
189
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
190
+
191
+ # Drop row if start_identifier different than 00
192
+ df = df[df["start_identifier"].astype(str) == "00"]
193
+
194
+ # Drop rows with invalid raw_drop_number
195
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
196
+
197
+ # Drop columns not agreeing with DISDRODB L0 standards
198
+ columns_to_drop = [
199
+ "start_identifier",
200
+ "device_address",
201
+ "sensor_serial_number",
202
+ "sensor_date",
203
+ "sensor_time",
204
+ "checksum",
205
+ "relative_humidity", # TO DROP? ALWAYS NOT AVAILABLE?
206
+ "TO_BE_FURTHER_PROCESSED",
207
+ ]
208
+ df = df.drop(columns=columns_to_drop)
209
+
210
+ return df
@@ -0,0 +1,225 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
20
+ import os
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.l0.l0a_processing import read_raw_text_file
26
+
27
+
28
+ @is_documented_by(reader_generic_docstring)
29
+ def reader(
30
+ filepath,
31
+ logger=None,
32
+ ):
33
+ """Reader."""
34
+
35
+ def read_txt_file(file, filename):
36
+ ##------------------------------------------------------------------------.
37
+ #### - Define raw data headers
38
+ column_names = ["TO_BE_SPLITTED"]
39
+
40
+ ##------------------------------------------------------------------------.
41
+ #### Define reader options
42
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
43
+ reader_kwargs = {}
44
+
45
+ # - Define delimiter
46
+ reader_kwargs["delimiter"] = "\\n"
47
+
48
+ # - Avoid first column to become df index !!!
49
+ reader_kwargs["index_col"] = False
50
+
51
+ # Since column names are expected to be passed explicitly, header is set to None
52
+ reader_kwargs["header"] = None
53
+
54
+ # - Number of rows to be skipped at the beginning of the file
55
+ reader_kwargs["skiprows"] = None
56
+
57
+ # - Define behaviour when encountering bad lines
58
+ reader_kwargs["on_bad_lines"] = "skip"
59
+
60
+ # - Define reader engine
61
+ # - C engine is faster
62
+ # - Python engine is more feature-complete
63
+ reader_kwargs["engine"] = "python"
64
+
65
+ # - Define on-the-fly decompression of on-disk data
66
+ # - Available: gzip, bz2, zip
67
+ reader_kwargs["compression"] = "infer"
68
+
69
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
70
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
71
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
72
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
73
+ reader_kwargs["na_values"] = ["na", "", "error"]
74
+
75
+ ##------------------------------------------------------------------------.
76
+ #### Read the data
77
+ df = read_raw_text_file(
78
+ filepath=file,
79
+ column_names=column_names,
80
+ reader_kwargs=reader_kwargs,
81
+ logger=logger,
82
+ )
83
+
84
+ ##------------------------------------------------------------------------.
85
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
86
+ # Count number of delimiters to identify valid rows
87
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 520]
88
+
89
+ # Split by ; delimiter (before raw drop number)
90
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
91
+
92
+ # Assign column names
93
+ column_names = [
94
+ "start_identifier",
95
+ "device_address",
96
+ "sensor_serial_number",
97
+ "sensor_date",
98
+ "sensor_time",
99
+ "weather_code_synop_4677_5min",
100
+ "weather_code_synop_4680_5min",
101
+ "weather_code_metar_4678_5min",
102
+ "precipitation_rate_5min",
103
+ "weather_code_synop_4677",
104
+ "weather_code_synop_4680",
105
+ "weather_code_metar_4678",
106
+ "precipitation_rate",
107
+ "rainfall_rate",
108
+ "snowfall_rate",
109
+ "precipitation_accumulated",
110
+ "mor_visibility",
111
+ "reflectivity",
112
+ "quality_index",
113
+ "max_hail_diameter",
114
+ "laser_status",
115
+ "static_signal",
116
+ "laser_temperature_analog_status",
117
+ "laser_temperature_digital_status",
118
+ "laser_current_analog_status",
119
+ "laser_current_digital_status",
120
+ "sensor_voltage_supply_status",
121
+ "current_heating_pane_transmitter_head_status",
122
+ "current_heating_pane_receiver_head_status",
123
+ "temperature_sensor_status",
124
+ "current_heating_voltage_supply_status",
125
+ "current_heating_house_status",
126
+ "current_heating_heads_status",
127
+ "current_heating_carriers_status",
128
+ "control_output_laser_power_status",
129
+ "reserve_status",
130
+ "temperature_interior",
131
+ "laser_temperature",
132
+ "laser_current_average",
133
+ "control_voltage",
134
+ "optical_control_voltage_output",
135
+ "sensor_voltage_supply",
136
+ "current_heating_pane_transmitter_head",
137
+ "current_heating_pane_receiver_head",
138
+ "temperature_ambient",
139
+ "current_heating_voltage_supply",
140
+ "current_heating_house",
141
+ "current_heating_heads",
142
+ "current_heating_carriers",
143
+ "number_particles",
144
+ "number_particles_internal_data",
145
+ "number_particles_min_speed",
146
+ "number_particles_min_speed_internal_data",
147
+ "number_particles_max_speed",
148
+ "number_particles_max_speed_internal_data",
149
+ "number_particles_min_diameter",
150
+ "number_particles_min_diameter_internal_data",
151
+ "number_particles_no_hydrometeor",
152
+ "number_particles_no_hydrometeor_internal_data",
153
+ "number_particles_unknown_classification",
154
+ "number_particles_unknown_classification_internal_data",
155
+ "number_particles_class_1",
156
+ "number_particles_class_1_internal_data",
157
+ "number_particles_class_2",
158
+ "number_particles_class_2_internal_data",
159
+ "number_particles_class_3",
160
+ "number_particles_class_3_internal_data",
161
+ "number_particles_class_4",
162
+ "number_particles_class_4_internal_data",
163
+ "number_particles_class_5",
164
+ "number_particles_class_5_internal_data",
165
+ "number_particles_class_6",
166
+ "number_particles_class_6_internal_data",
167
+ "number_particles_class_7",
168
+ "number_particles_class_7_internal_data",
169
+ "number_particles_class_8",
170
+ "number_particles_class_8_internal_data",
171
+ "number_particles_class_9",
172
+ "number_particles_class_9_internal_data",
173
+ "raw_drop_number",
174
+ ]
175
+ df.columns = column_names
176
+
177
+ # Remove checksum from raw_drop_number
178
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
179
+
180
+ # Define datetime "time" column from file name
181
+ datetime_str = "".join(filename.split("_")[1:3])
182
+ df["time"] = pd.to_datetime(datetime_str, format="%Y%m%d%H%M", errors="coerce")
183
+
184
+ # Drop row if start_identifier different than 00
185
+ df = df[df["start_identifier"].astype(str) == "00"]
186
+
187
+ # Drop rows with invalid raw_drop_number
188
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
189
+
190
+ # Drop columns not agreeing with DISDRODB L0 standards
191
+ columns_to_drop = [
192
+ "start_identifier",
193
+ "device_address",
194
+ "sensor_serial_number",
195
+ "sensor_date",
196
+ "sensor_time",
197
+ ]
198
+ df = df.drop(columns=columns_to_drop)
199
+ # Return the dataframe adhering to DISDRODB L0 standards
200
+ return df
201
+
202
+ #### TEMPORARY: to read just a single 1-min timestep
203
+ df = read_txt_file(file=filepath, filename=os.path.basename(filepath))
204
+
205
+ #### FUTURE: Iterate over all files (aka 1-min timesteps) in the daily zip archive
206
+ # - Each file contain a single timestep !
207
+ # list_df = []
208
+ # with zipfile.ZipFile(filepath, "r") as zip_ref:
209
+ # filenames = sorted(zip_ref.namelist())
210
+ # for filename in filenames:
211
+ # if filename.endswith(".dat"):
212
+ # # Open file
213
+ # with zip_ref.open(filename) as file:
214
+ # try:
215
+ # df = read_txt_file(file=file, filename=filename)
216
+ # list_df.append(df)
217
+ # except Exception as e:
218
+ # msg = f"An error occurred while reading {filename}. The error is: {e}."
219
+ # log_error(logger=logger, msg=msg, verbose=True)
220
+
221
+ # Concatenate all dataframes into a single one
222
+ # df = pd.concat(list_df)
223
+
224
+ # Return the dataframe adhering to DISDRODB L0 standards
225
+ return df
@@ -0,0 +1,197 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_PARSE"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_PARSE"].str.count(";") == 521]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_PARSE"].str.split(";", expand=True, n=80)
87
+
88
+ # Assign column names
89
+ column_names = [
90
+ "time",
91
+ "start_identifier",
92
+ "device_address",
93
+ "sensor_serial_number",
94
+ "sensor_date",
95
+ "sensor_time",
96
+ "weather_code_synop_4677_5min",
97
+ "weather_code_synop_4680_5min",
98
+ "weather_code_metar_4678_5min",
99
+ "precipitation_rate_5min",
100
+ "weather_code_synop_4677",
101
+ "weather_code_synop_4680",
102
+ "weather_code_metar_4678",
103
+ "precipitation_rate",
104
+ "rainfall_rate",
105
+ "snowfall_rate",
106
+ "precipitation_accumulated",
107
+ "mor_visibility",
108
+ "reflectivity",
109
+ "quality_index",
110
+ "max_hail_diameter",
111
+ "laser_status",
112
+ "static_signal",
113
+ "laser_temperature_analog_status",
114
+ "laser_temperature_digital_status",
115
+ "laser_current_analog_status",
116
+ "laser_current_digital_status",
117
+ "sensor_voltage_supply_status",
118
+ "current_heating_pane_transmitter_head_status",
119
+ "current_heating_pane_receiver_head_status",
120
+ "temperature_sensor_status",
121
+ "current_heating_voltage_supply_status",
122
+ "current_heating_house_status",
123
+ "current_heating_heads_status",
124
+ "current_heating_carriers_status",
125
+ "control_output_laser_power_status",
126
+ "reserve_status",
127
+ "temperature_interior",
128
+ "laser_temperature",
129
+ "laser_current_average",
130
+ "control_voltage",
131
+ "optical_control_voltage_output",
132
+ "sensor_voltage_supply",
133
+ "current_heating_pane_transmitter_head",
134
+ "current_heating_pane_receiver_head",
135
+ "temperature_ambient",
136
+ "current_heating_voltage_supply",
137
+ "current_heating_house",
138
+ "current_heating_heads",
139
+ "current_heating_carriers",
140
+ "number_particles",
141
+ "number_particles_internal_data",
142
+ "number_particles_min_speed",
143
+ "number_particles_min_speed_internal_data",
144
+ "number_particles_max_speed",
145
+ "number_particles_max_speed_internal_data",
146
+ "number_particles_min_diameter",
147
+ "number_particles_min_diameter_internal_data",
148
+ "number_particles_no_hydrometeor",
149
+ "number_particles_no_hydrometeor_internal_data",
150
+ "number_particles_unknown_classification",
151
+ "number_particles_unknown_classification_internal_data",
152
+ "number_particles_class_1",
153
+ "number_particles_class_1_internal_data",
154
+ "number_particles_class_2",
155
+ "number_particles_class_2_internal_data",
156
+ "number_particles_class_3",
157
+ "number_particles_class_3_internal_data",
158
+ "number_particles_class_4",
159
+ "number_particles_class_4_internal_data",
160
+ "number_particles_class_5",
161
+ "number_particles_class_5_internal_data",
162
+ "number_particles_class_6",
163
+ "number_particles_class_6_internal_data",
164
+ "number_particles_class_7",
165
+ "number_particles_class_7_internal_data",
166
+ "number_particles_class_8",
167
+ "number_particles_class_8_internal_data",
168
+ "number_particles_class_9",
169
+ "number_particles_class_9_internal_data",
170
+ "raw_drop_number",
171
+ ]
172
+ df.columns = column_names
173
+
174
+ # Remove checksum from raw_drop_number
175
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
176
+
177
+ # Define datetime "time" column
178
+ time = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
179
+ df["time"] = pd.to_datetime(time, format="%d/%m/%Y %H:%M:%S", errors="coerce")
180
+
181
+ # Drop row if start_identifier different than 00
182
+ df["start_identifier"] = df["start_identifier"].astype(str).str[-2:]
183
+ df = df[df["start_identifier"] == "00"]
184
+
185
+ # Drop rows with invalid raw_drop_number
186
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
187
+
188
+ # Drop columns not agreeing with DISDRODB L0 standards
189
+ columns_to_drop = [
190
+ "start_identifier",
191
+ "device_address",
192
+ "sensor_serial_number",
193
+ "sensor_date",
194
+ "sensor_time",
195
+ ]
196
+ df = df.drop(columns=columns_to_drop)
197
+ return df