disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for HYDROX PARSIVEL2 disdrometer located at Trafoi (Italy)."""
19
+ import os
20
+
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+
26
+
27
+ def read_old_format(filepath, logger):
28
+ """Read old format."""
29
+ ##------------------------------------------------------------------------.
30
+ #### Define column names
31
+ column_names = ["TO_SPLIT"]
32
+
33
+ ##------------------------------------------------------------------------.
34
+ #### Define reader options
35
+ reader_kwargs = {}
36
+
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+
40
+ # - Skip first row as columns names
41
+ reader_kwargs["header"] = None
42
+
43
+ # - Skip header
44
+ reader_kwargs["skiprows"] = 0
45
+
46
+ # - Define encoding
47
+ reader_kwargs["encoding"] = "ISO-8859-1"
48
+
49
+ # - Avoid first column to become df index !!!
50
+ reader_kwargs["index_col"] = False
51
+
52
+ # - Define behaviour when encountering bad lines
53
+ reader_kwargs["on_bad_lines"] = "skip"
54
+
55
+ # - Define reader engine
56
+ # - C engine is faster
57
+ # - Python engine is more feature-complete
58
+ reader_kwargs["engine"] = "python"
59
+
60
+ # - Define on-the-fly decompression of on-disk data
61
+ # - Available: gzip, bz2, zip
62
+ # reader_kwargs['compression'] = 'xz'
63
+
64
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
65
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Read the data
72
+ df = read_raw_text_file(
73
+ filepath=filepath,
74
+ column_names=column_names,
75
+ reader_kwargs=reader_kwargs,
76
+ logger=logger,
77
+ )
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Remove corrupted rows
82
+ df = df[df["TO_SPLIT"].str.count(";").isin([7, 1031])]
83
+
84
+ # Split into columns
85
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=7)
86
+
87
+ # Assign columns names
88
+ names = [
89
+ "date",
90
+ "time",
91
+ "rainfall_rate_32bit",
92
+ "rainfall_accumulated_32bit",
93
+ "laser_amplitude",
94
+ "number_particles",
95
+ "sensor_temperature",
96
+ "raw_drop_number",
97
+ ]
98
+ df.columns = names
99
+
100
+ # Add datetime time column
101
+ df["time"] = df["date"] + "-" + df["time"]
102
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
103
+ df = df.drop(columns=["date"])
104
+
105
+ # Correct for UTC time (from UTC+1)
106
+ df["time"] = df["time"] - pd.Timedelta(hours=1)
107
+
108
+ # Preprocess the raw spectrum
109
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
110
+ # --> "" generates an array of zeros in L0B processing
111
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
112
+
113
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
114
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
115
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
116
+
117
+ # Add 0 before every , if , not preceded by a digit
118
+ # Example: ',,1,,' --> '0,0,1,0,'
119
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
120
+
121
+ # Replace ending 999; with 0;
122
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"999;$", "0", regex=True)
123
+
124
+ # Return the dataframe adhering to DISDRODB L0 standards
125
+ return df
126
+
127
+
128
+ def read_new_format(filepath, logger):
129
+ """Read new NOA data format."""
130
+ ##------------------------------------------------------------------------.
131
+ #### Define column names
132
+ column_names = ["TO_SPLIT"]
133
+
134
+ ##------------------------------------------------------------------------.
135
+ #### Define reader options
136
+ reader_kwargs = {}
137
+
138
+ # - Define delimiter
139
+ reader_kwargs["delimiter"] = "\\n"
140
+
141
+ # - Skip first row as columns names
142
+ reader_kwargs["header"] = None
143
+
144
+ # - Skip header
145
+ reader_kwargs["skiprows"] = 0
146
+
147
+ # - Define encoding
148
+ reader_kwargs["encoding"] = "ISO-8859-1"
149
+
150
+ # - Avoid first column to become df index !!!
151
+ reader_kwargs["index_col"] = False
152
+
153
+ # - Define behaviour when encountering bad lines
154
+ reader_kwargs["on_bad_lines"] = "skip"
155
+
156
+ # - Define reader engine
157
+ # - C engine is faster
158
+ # - Python engine is more feature-complete
159
+ reader_kwargs["engine"] = "python"
160
+
161
+ # - Define on-the-fly decompression of on-disk data
162
+ # - Available: gzip, bz2, zip
163
+ # reader_kwargs['compression'] = 'xz'
164
+
165
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
166
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
167
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
168
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
169
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
170
+
171
+ ##------------------------------------------------------------------------.
172
+ #### Read the data
173
+ df = read_raw_text_file(
174
+ filepath=filepath,
175
+ column_names=column_names,
176
+ reader_kwargs=reader_kwargs,
177
+ logger=logger,
178
+ )
179
+
180
+ ##------------------------------------------------------------------------.
181
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
182
+ # Remove corrupted rows
183
+ df = df[df["TO_SPLIT"].str.count(";").isin([11, 1035])]
184
+
185
+ # Split into columns
186
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=11)
187
+
188
+ # Assign columns names
189
+ names = [
190
+ "date",
191
+ "time",
192
+ "rainfall_rate_32bit",
193
+ "rainfall_accumulated_32bit",
194
+ "reflectivity_32bit",
195
+ "mor_visibility",
196
+ "laser_amplitude",
197
+ "number_particles",
198
+ "sensor_temperature",
199
+ "sensor_heating_current",
200
+ "sensor_battery_voltage",
201
+ "raw_drop_number",
202
+ ]
203
+ df.columns = names
204
+
205
+ # Add datetime time column
206
+ df["time"] = df["date"] + "-" + df["time"]
207
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
208
+ df = df.drop(columns=["date"])
209
+
210
+ # Preprocess the raw spectrum
211
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
212
+ # --> "" generates an array of zeros in L0B processing
213
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
214
+
215
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
216
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
217
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
218
+
219
+ # Add 0 before every , if , not preceded by a digit
220
+ # Example: ',,1,,' --> '0,0,1,0,'
221
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
222
+
223
+ # Replace ending 999; with 0;
224
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"999;$", "0", regex=True)
225
+
226
+ # Return the dataframe adhering to DISDRODB L0 standards
227
+ return df
228
+
229
+
230
+ @is_documented_by(reader_generic_docstring)
231
+ def reader(
232
+ filepath,
233
+ logger=None,
234
+ ):
235
+ """Reader."""
236
+ date = int(os.path.basename(filepath)[-12:-4])
237
+ if date > 20140000:
238
+ return read_new_format(filepath, logger)
239
+ return read_old_format(filepath, logger)
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_BE_PARSED"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "/\n"
40
+
41
+ # Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+
44
+ # Skip first 2 rows
45
+ reader_kwargs["skiprows"] = 1
46
+
47
+ # - Avoid first column to become df index !!!
48
+ reader_kwargs["index_col"] = False
49
+
50
+ # - Define behaviour when encountering bad lines
51
+ reader_kwargs["on_bad_lines"] = "skip"
52
+
53
+ # - Define reader engine
54
+ # - C engine is faster
55
+ # - Python engine is more feature-complete
56
+ reader_kwargs["engine"] = "python"
57
+
58
+ # - Define on-the-fly decompression of on-disk data
59
+ # - Available: gzip, bz2, zip
60
+ reader_kwargs["compression"] = "infer"
61
+
62
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
63
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
64
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
65
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
66
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Read the data
70
+ df = read_raw_text_file(
71
+ filepath=filepath,
72
+ column_names=column_names,
73
+ reader_kwargs=reader_kwargs,
74
+ logger=logger,
75
+ )
76
+
77
+ ##------------------------------------------------------------------------.
78
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
79
+ # Remove rows with invalid length
80
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
81
+
82
+ # Count number of delimiters to select valid rows
83
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
84
+
85
+ # Split by ; delimiter
86
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
87
+
88
+ # Assign column names
89
+ names = [
90
+ "date",
91
+ "time",
92
+ "rainfall_rate_32bit",
93
+ "rainfall_accumulated_32bit",
94
+ "weather_code_synop_4680",
95
+ # "weather_code_synop_4677",
96
+ # "weather_code_metar_4678",
97
+ "reflectivity_32bit",
98
+ "mor_visibility",
99
+ "sample_interval",
100
+ "laser_amplitude",
101
+ "number_particles",
102
+ "sensor_temperature",
103
+ "sensor_serial_number",
104
+ "firmware_iop",
105
+ "sensor_heating_current",
106
+ "sensor_battery_voltage",
107
+ "sensor_status",
108
+ "station_name",
109
+ "rainfall_amount_absolute_32bit",
110
+ "error_code",
111
+ "ARRAY_TO_SPLIT",
112
+ ]
113
+
114
+ df.columns = names
115
+
116
+ # Define time in datetime format
117
+ time_str = df["date"] + " " + df["time"]
118
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
119
+
120
+ # Add raw array
121
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
122
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
123
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
124
+
125
+ # Drop columns not agreeing with DISDRODB L0 standards
126
+ columns_to_drop = [
127
+ "date",
128
+ "station_name",
129
+ "firmware_iop",
130
+ "ARRAY_TO_SPLIT",
131
+ "sensor_serial_number",
132
+ "sample_interval",
133
+ ]
134
+ df = df.drop(columns=columns_to_drop)
135
+
136
+ return df
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import os
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+
26
+ TRACKS_DICT = {
27
+ "M203": ("2024-08-16 02:53:21", "2024-09-24 07:59:57"),
28
+ "M204": ("2024-09-27 08:00:00", "2024-10-20 07:59:57"),
29
+ "M205": ("2024-10-23 08:00:01", "2024-11-28 13:00:01"),
30
+ "M206": ("2024-12-01 08:00:02", "2024-12-30 07:59:57"),
31
+ "M207": ("2025-01-04 08:00:01", "2025-02-11 10:25:15"),
32
+ }
33
+
34
+
35
+ def get_track_for_dataframe(df):
36
+ """Retrieve ship track identifier."""
37
+ df_start, df_end = df["time"].min(), df["time"].max()
38
+
39
+ overlaps = []
40
+ for key, (start, end) in TRACKS_DICT.items():
41
+ start, end = pd.to_datetime(start), pd.to_datetime(end)
42
+ # check if df range lies within track coverage
43
+ if df_start <= end and df_end >= start:
44
+ overlaps.append(key)
45
+ return overlaps
46
+
47
+
48
+ def read_tracks_file(tracks_filepath):
49
+ """Read GPS master track file."""
50
+ df = pd.read_csv(
51
+ tracks_filepath,
52
+ names=["time", "latitude", "longitude", "flag"],
53
+ dtype={"time": str, "latitude": float, "longitude": float, "flag": str},
54
+ sep="\t", # tab-separated
55
+ skiprows=1, # skip the weird first line
56
+ engine="c", # speed up reading
57
+ )
58
+ df["time"] = pd.to_datetime(df["time"])
59
+ return df
60
+
61
+
62
+ def add_gps_coordinates(df, filepath):
63
+ """Add GPS coordinates to dataframe."""
64
+ # Retrieve useful tracks ids
65
+ tracks_ids = get_track_for_dataframe(df)
66
+
67
+ if len(tracks_ids) == 0:
68
+ df["latitude"] = np.nan
69
+ df["longitude"] = np.nan
70
+ return df
71
+
72
+ # Retrieve station base directory
73
+ station_base_dir = os.path.join(os.path.sep, *filepath.split(os.path.sep)[:-2])
74
+ # Define GPS files to read
75
+ tracks_filepaths = [os.path.join(station_base_dir, f"{tracks_id}_mastertrack.zip") for tracks_id in tracks_ids]
76
+ # Read GPS files
77
+ list_df_tracks = [read_tracks_file(fpath) for fpath in tracks_filepaths]
78
+ df_tracks = pd.concat(list_df_tracks)
79
+ df_tracks = df_tracks.dropna(subset=["time"])
80
+
81
+ # Ensure dataframes are sorted by time
82
+ df = df.sort_values("time")
83
+ df_tracks = df_tracks.sort_values("time")
84
+
85
+ # Remove bad flags
86
+ # df_tracks = df_tracks[df_tracks["flag"] == "1"]
87
+
88
+ # Remove flag column
89
+ df_tracks = df_tracks.drop(columns="flag")
90
+
91
+ # Add GPS coordinate to dataframe
92
+ df = pd.merge_asof(
93
+ df,
94
+ df_tracks,
95
+ on="time",
96
+ direction="nearest",
97
+ tolerance=pd.Timedelta("5min"),
98
+ )
99
+ return df
100
+
101
+
102
+ @is_documented_by(reader_generic_docstring)
103
+ def reader(
104
+ filepath,
105
+ logger=None,
106
+ ):
107
+ """Reader."""
108
+ ##------------------------------------------------------------------------.
109
+ #### Define column names
110
+ column_names = ["TO_BE_PARSED"]
111
+
112
+ ##------------------------------------------------------------------------.
113
+ #### Define reader options
114
+ reader_kwargs = {}
115
+
116
+ # - Define delimiter
117
+ reader_kwargs["delimiter"] = "/\n"
118
+
119
+ # Skip first row as columns names
120
+ reader_kwargs["header"] = None
121
+
122
+ # Skip first 2 rows
123
+ reader_kwargs["skiprows"] = 1
124
+
125
+ # - Avoid first column to become df index !!!
126
+ reader_kwargs["index_col"] = False
127
+
128
+ # - Define behaviour when encountering bad lines
129
+ reader_kwargs["on_bad_lines"] = "skip"
130
+
131
+ # - Define reader engine
132
+ # - C engine is faster
133
+ # - Python engine is more feature-complete
134
+ reader_kwargs["engine"] = "python"
135
+
136
+ # - Define on-the-fly decompression of on-disk data
137
+ # - Available: gzip, bz2, zip
138
+ reader_kwargs["compression"] = "infer"
139
+
140
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
141
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
142
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
143
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
144
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
145
+
146
+ # - Define encoding
147
+ reader_kwargs["encoding"] = "latin1"
148
+
149
+ ##------------------------------------------------------------------------.
150
+ #### Read the data
151
+ df = read_raw_text_file(
152
+ filepath=filepath,
153
+ column_names=column_names,
154
+ reader_kwargs=reader_kwargs,
155
+ logger=logger,
156
+ )
157
+
158
+ ##------------------------------------------------------------------------.
159
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
160
+ # Remove rows with invalid length
161
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
162
+
163
+ # Count number of delimiters to select valid rows
164
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
165
+
166
+ # Split by ; delimiter
167
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
168
+
169
+ # Assign column names
170
+ names = [
171
+ "date",
172
+ "time",
173
+ "rainfall_rate_32bit",
174
+ "rainfall_accumulated_32bit",
175
+ "weather_code_synop_4680",
176
+ # "weather_code_synop_4677",
177
+ # "weather_code_metar_4678",
178
+ "reflectivity_32bit",
179
+ "mor_visibility",
180
+ "sample_interval",
181
+ "laser_amplitude",
182
+ "number_particles",
183
+ "sensor_temperature",
184
+ "sensor_serial_number",
185
+ "firmware_iop",
186
+ "sensor_heating_current",
187
+ "sensor_battery_voltage",
188
+ "sensor_status",
189
+ "station_name",
190
+ "rainfall_amount_absolute_32bit",
191
+ "error_code",
192
+ "ARRAY_TO_SPLIT",
193
+ ]
194
+
195
+ df.columns = names
196
+
197
+ # Define time in datetime format
198
+ time_str = df["date"] + " " + df["time"]
199
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
200
+ df = df.dropna(subset=["time"])
201
+
202
+ # Add raw array
203
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
204
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
205
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
206
+
207
+ # Drop columns not agreeing with DISDRODB L0 standards
208
+ columns_to_drop = [
209
+ "date",
210
+ "station_name",
211
+ "firmware_iop",
212
+ "ARRAY_TO_SPLIT",
213
+ "sensor_serial_number",
214
+ "sample_interval",
215
+ ]
216
+ df = df.drop(columns=columns_to_drop)
217
+
218
+ # Add GPS coordinates
219
+ df = add_gps_coordinates(df, filepath=filepath)
220
+ return df