disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/data_transfer/download_data.py +145 -14
  5. disdrodb/l0/check_standards.py +15 -10
  6. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  7. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  8. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  9. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  10. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  11. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  12. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  13. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  14. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  15. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
  16. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
  17. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  18. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  19. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  20. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  21. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  22. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  23. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  24. disdrodb/l0/l0b_nc_processing.py +1 -1
  25. disdrodb/l0/l0b_processing.py +12 -10
  26. disdrodb/l0/manuals/SWS250.pdf +0 -0
  27. disdrodb/l0/manuals/VPF730.pdf +0 -0
  28. disdrodb/l0/manuals/VPF750.pdf +0 -0
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  30. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  31. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  32. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  33. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  34. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  35. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  36. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  37. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
  38. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
  39. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  42. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  43. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
  44. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
  45. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
  48. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
  49. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  50. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
  51. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  52. disdrodb/l0/standards.py +7 -4
  53. disdrodb/l0/template_tools.py +2 -2
  54. disdrodb/l1/encoding_attrs.py +30 -8
  55. disdrodb/l1/processing.py +6 -4
  56. disdrodb/l1/resampling.py +1 -1
  57. disdrodb/l1/routines.py +9 -7
  58. disdrodb/l2/empirical_dsd.py +100 -2
  59. disdrodb/l2/event.py +3 -3
  60. disdrodb/l2/processing.py +21 -12
  61. disdrodb/l2/processing_options.py +7 -7
  62. disdrodb/l2/routines.py +3 -3
  63. disdrodb/metadata/checks.py +15 -6
  64. disdrodb/metadata/manipulation.py +2 -2
  65. disdrodb/metadata/standards.py +83 -79
  66. disdrodb/metadata/writer.py +2 -2
  67. disdrodb/routines.py +246 -10
  68. disdrodb/scattering/routines.py +1 -1
  69. disdrodb/utils/dataframe.py +342 -0
  70. disdrodb/utils/directories.py +14 -2
  71. disdrodb/utils/xarray.py +83 -0
  72. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
  73. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
  74. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
  75. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
  76. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
  77. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,197 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_BE_SPLITTED"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 521]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=81)
87
+
88
+ # Assign column names
89
+ column_names = [
90
+ "id",
91
+ "time",
92
+ "start_identifier",
93
+ "device_address",
94
+ "sensor_serial_number",
95
+ "sensor_date",
96
+ "sensor_time",
97
+ "weather_code_synop_4677_5min",
98
+ "weather_code_synop_4680_5min",
99
+ "weather_code_metar_4678_5min",
100
+ "precipitation_rate_5min",
101
+ "weather_code_synop_4677",
102
+ "weather_code_synop_4680",
103
+ "weather_code_metar_4678",
104
+ "precipitation_rate",
105
+ "rainfall_rate",
106
+ "snowfall_rate",
107
+ "precipitation_accumulated",
108
+ "mor_visibility",
109
+ "reflectivity",
110
+ "quality_index",
111
+ "max_hail_diameter",
112
+ "laser_status",
113
+ "static_signal",
114
+ "laser_temperature_analog_status",
115
+ "laser_temperature_digital_status",
116
+ "laser_current_analog_status",
117
+ "laser_current_digital_status",
118
+ "sensor_voltage_supply_status",
119
+ "current_heating_pane_transmitter_head_status",
120
+ "current_heating_pane_receiver_head_status",
121
+ "temperature_sensor_status",
122
+ "current_heating_voltage_supply_status",
123
+ "current_heating_house_status",
124
+ "current_heating_heads_status",
125
+ "current_heating_carriers_status",
126
+ "control_output_laser_power_status",
127
+ "reserve_status",
128
+ "temperature_interior",
129
+ "laser_temperature",
130
+ "laser_current_average",
131
+ "control_voltage",
132
+ "optical_control_voltage_output",
133
+ "sensor_voltage_supply",
134
+ "current_heating_pane_transmitter_head",
135
+ "current_heating_pane_receiver_head",
136
+ "temperature_ambient",
137
+ "current_heating_voltage_supply",
138
+ "current_heating_house",
139
+ "current_heating_heads",
140
+ "current_heating_carriers",
141
+ "number_particles",
142
+ "number_particles_internal_data",
143
+ "number_particles_min_speed",
144
+ "number_particles_min_speed_internal_data",
145
+ "number_particles_max_speed",
146
+ "number_particles_max_speed_internal_data",
147
+ "number_particles_min_diameter",
148
+ "number_particles_min_diameter_internal_data",
149
+ "number_particles_no_hydrometeor",
150
+ "number_particles_no_hydrometeor_internal_data",
151
+ "number_particles_unknown_classification",
152
+ "number_particles_unknown_classification_internal_data",
153
+ "number_particles_class_1",
154
+ "number_particles_class_1_internal_data",
155
+ "number_particles_class_2",
156
+ "number_particles_class_2_internal_data",
157
+ "number_particles_class_3",
158
+ "number_particles_class_3_internal_data",
159
+ "number_particles_class_4",
160
+ "number_particles_class_4_internal_data",
161
+ "number_particles_class_5",
162
+ "number_particles_class_5_internal_data",
163
+ "number_particles_class_6",
164
+ "number_particles_class_6_internal_data",
165
+ "number_particles_class_7",
166
+ "number_particles_class_7_internal_data",
167
+ "number_particles_class_8",
168
+ "number_particles_class_8_internal_data",
169
+ "number_particles_class_9",
170
+ "number_particles_class_9_internal_data",
171
+ "raw_drop_number",
172
+ ]
173
+ df.columns = column_names
174
+
175
+ # Remove checksum from raw_drop_number
176
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=1, expand=True)[0]
177
+
178
+ # Define datetime "time" column
179
+ df["time"] = pd.to_datetime(df["time"], format="%d/%m/%Y %H.%M.%S", errors="coerce")
180
+
181
+ # Drop row if start_identifier different than 00
182
+ df = df[df["start_identifier"].astype(str) == "00"]
183
+
184
+ # Drop rows with invalid raw_drop_number
185
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
186
+
187
+ # Drop columns not agreeing with DISDRODB L0 standards
188
+ columns_to_drop = [
189
+ "id",
190
+ "start_identifier",
191
+ "device_address",
192
+ "sensor_serial_number",
193
+ "sensor_date",
194
+ "sensor_time",
195
+ ]
196
+ df = df.drop(columns=columns_to_drop)
197
+ return df
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["time", "TO_BE_SPLITTED"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = ";"
39
+ # - Skip first row as columns names
40
+ reader_kwargs["header"] = None
41
+ # Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+ reader_kwargs["skiprows"] = 0
44
+ # - Skip file with encoding errors
45
+ reader_kwargs["encoding_errors"] = "ignore"
46
+ # - Avoid first column to become df index !!!
47
+ reader_kwargs["index_col"] = False
48
+ # - Define behaviour when encountering bad lines
49
+ reader_kwargs["on_bad_lines"] = "skip"
50
+ # - Define reader engine
51
+ # - C engine is faster
52
+ # - Python engine is more feature-complete
53
+ reader_kwargs["engine"] = "python"
54
+ # - Define on-the-fly decompression of on-disk data
55
+ # - Available: gzip, bz2, zip
56
+ reader_kwargs["compression"] = "infer"
57
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
58
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
59
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
60
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
61
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
62
+
63
+ ##------------------------------------------------------------------------.
64
+ #### Read the data
65
+ df = read_raw_text_file(
66
+ filepath=filepath,
67
+ column_names=column_names,
68
+ reader_kwargs=reader_kwargs,
69
+ logger=logger,
70
+ )
71
+
72
+ ##------------------------------------------------------------------------.
73
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
74
+ # Convert time column to datetime
75
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
76
+
77
+ # Split the 'TO_BE_SPLITTED' column
78
+ df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
79
+
80
+ # Assign column names
81
+ columns_names = [
82
+ "station_name",
83
+ "sensor_status",
84
+ "sensor_temperature",
85
+ "number_particles",
86
+ "rainfall_rate_32bit",
87
+ "reflectivity_16bit",
88
+ "mor_visibility",
89
+ "weather_code_synop_4680",
90
+ "weather_code_synop_4677",
91
+ "raw_drop_number",
92
+ ]
93
+ df.columns = columns_names
94
+
95
+ # Add the time column
96
+ df["time"] = df_time
97
+
98
+ # Drop columns not agreeing with DISDRODB L0 standards
99
+ df = df.drop(columns=["station_name"])
100
+
101
+ # Drop rows with invalid values
102
+ # --> Ensure that weather_code_synop_4677 has length 2
103
+ # --> If a previous column is missing it will have 000
104
+ df = df[df["weather_code_synop_4677"].str.len() == 2]
105
+
106
+ # Return the dataframe adhering to DISDRODB L0 standards
107
+ return df
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_SPLIT"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+
41
+ # - Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+
44
+ # - Skip header
45
+ reader_kwargs["skiprows"] = 0
46
+
47
+ # - Define encoding
48
+ reader_kwargs["encoding"] = "ISO-8859-1"
49
+
50
+ # - Avoid first column to become df index !!!
51
+ reader_kwargs["index_col"] = False
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ # reader_kwargs['compression'] = 'xz'
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Remove rows with less than 97 characters (empty spectrum --> 97 characters)
83
+ df = df[df["TO_SPLIT"].str.len() >= 97]
84
+
85
+ # Split into columns and assign name
86
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=14)
87
+ columns = [
88
+ "date",
89
+ "time",
90
+ "rainfall_rate_32bit",
91
+ "rainfall_accumulated_32bit",
92
+ "weather_code_synop_4680",
93
+ "weather_code_metar_4678",
94
+ "weather_code_nws",
95
+ "reflectivity_32bit",
96
+ "mor_visibility",
97
+ "laser_amplitude",
98
+ "number_particles",
99
+ "sensor_temperature",
100
+ "sensor_heating_current",
101
+ "sensor_battery_voltage",
102
+ "raw_drop_number",
103
+ ]
104
+ df.columns = columns
105
+
106
+ # Add datetime time column
107
+ df["time"] = df["date"] + "-" + df["time"]
108
+ df["time"] = pd.to_datetime(df["time"], format="%Y/%m/%d-%H:%M:%S", errors="coerce")
109
+ df = df.drop(columns=["date"])
110
+
111
+ # Preprocess the raw spectrum
112
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
113
+ # --> "" generates an array of zeros in L0B processing
114
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
115
+
116
+ # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
117
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
118
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
119
+
120
+ # Add 0 before every , if , not preceded by a digit
121
+ # Example: ',,1,,' --> '0,0,1,0,'
122
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
123
+
124
+ # Return the dataframe adhering to DISDRODB L0 standards
125
+ return df
@@ -111,7 +111,7 @@ def reader(
111
111
  # Preprocess the raw spectrum
112
112
  # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
113
113
  # --> "" generates an array of zeros in L0B processing
114
- df["raw_drop_number"] = df["raw_drop_number"].replace("<SPECTRUM>ZERO</SPECTRUM>", "")
114
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
115
115
 
116
116
  # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
117
117
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
@@ -100,7 +100,7 @@ def reader(
100
100
  # Preprocess the raw spectrum
101
101
  # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
102
102
  # --> "" generates an array of zeros in L0B processing
103
- df["raw_drop_number"] = df["raw_drop_number"].replace("<SPECTRUM>ZERO</SPECTRUM>", "")
103
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
104
104
 
105
105
  # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
106
106
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for EPFL 2009 campaign."""
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ @is_documented_by(reader_generic_docstring)
26
+ def reader(
27
+ filepath,
28
+ logger=None,
29
+ ):
30
+ """Reader."""
31
+ ##------------------------------------------------------------------------.
32
+ #### Define column names
33
+ column_names = ["TO_PARSE"]
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define reader options
37
+ reader_kwargs = {}
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+
41
+ # - Avoid first column to become df index !!!
42
+ reader_kwargs["index_col"] = False
43
+
44
+ # - Define behaviour when encountering bad lines
45
+ reader_kwargs["on_bad_lines"] = "skip"
46
+
47
+ # Skip the first row (header)
48
+ reader_kwargs["skiprows"] = 0
49
+
50
+ # - Define encoding
51
+ reader_kwargs["encoding"] = "latin"
52
+
53
+ # - Define reader engine
54
+ # - C engine is faster
55
+ # - Python engine is more feature-complete
56
+ reader_kwargs["engine"] = "python"
57
+
58
+ # - Define on-the-fly decompression of on-disk data
59
+ # - Available: gzip, bz2, zip
60
+ reader_kwargs["compression"] = "infer"
61
+
62
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
63
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
64
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
65
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
66
+ reader_kwargs["na_values"] = ["na", "", "error"]
67
+
68
+ # Skip first row as columns names
69
+ reader_kwargs["header"] = None
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Create ID and Value columns
83
+ df = df["TO_PARSE"].str.split(";", expand=True, n=14)
84
+
85
+ # Assign column names
86
+ column_names = [
87
+ "id",
88
+ "time",
89
+ "rainfall_rate_32bit",
90
+ "rainfall_accumulated_32bit",
91
+ "weather_code_synop_4680",
92
+ "reflectivity_32bit",
93
+ "mor_visibility",
94
+ "sensor_temperature", # maybe
95
+ "laser_amplitude", # probably
96
+ "number_particles",
97
+ "sensor_status",
98
+ "sensor_heating_current",
99
+ "sensor_battery_voltage",
100
+ "error_code",
101
+ "raw_drop_number",
102
+ ]
103
+ df.columns = column_names
104
+
105
+ # Convert time column to datetime
106
+ df["time"] = pd.to_datetime(df["time"], format="%d/%m/%Y %H.%M.%S", errors="coerce")
107
+
108
+ # Preprocess the raw spectrum
109
+ # - Add 0 before every ; if ; not preceded by a digit
110
+ # Example: ';;1;;' --> '0;0;1;0;'
111
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("R;", "")
112
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
113
+
114
+ # Drop columns not agreeing with DISDRODB L0 standards
115
+ columns_to_drop = [
116
+ "id",
117
+ ]
118
+ df = df.drop(columns=columns_to_drop)
119
+
120
+ # Return the dataframe adhering to DISDRODB L0 standards
121
+ return df