disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/info.py +3 -3
  6. disdrodb/api/io.py +48 -8
  7. disdrodb/api/path.py +116 -133
  8. disdrodb/api/search.py +12 -3
  9. disdrodb/cli/disdrodb_create_summary.py +103 -0
  10. disdrodb/cli/disdrodb_create_summary_station.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  13. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  17. disdrodb/data_transfer/download_data.py +123 -7
  18. disdrodb/issue/writer.py +2 -0
  19. disdrodb/l0/l0a_processing.py +10 -5
  20. disdrodb/l0/l0b_nc_processing.py +10 -6
  21. disdrodb/l0/l0b_processing.py +26 -61
  22. disdrodb/l0/l0c_processing.py +369 -251
  23. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  24. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  25. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  26. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  27. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  28. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  29. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  30. disdrodb/l1/fall_velocity.py +46 -0
  31. disdrodb/l1/processing.py +1 -1
  32. disdrodb/l2/processing.py +1 -1
  33. disdrodb/metadata/checks.py +132 -125
  34. disdrodb/psd/fitting.py +172 -205
  35. disdrodb/psd/models.py +1 -1
  36. disdrodb/routines/__init__.py +54 -0
  37. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  38. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  39. disdrodb/{l2/routines.py → routines/l2.py} +249 -462
  40. disdrodb/{routines.py → routines/wrappers.py} +95 -7
  41. disdrodb/scattering/axis_ratio.py +5 -1
  42. disdrodb/scattering/permittivity.py +18 -0
  43. disdrodb/scattering/routines.py +56 -36
  44. disdrodb/summary/routines.py +110 -34
  45. disdrodb/utils/archiving.py +434 -0
  46. disdrodb/utils/cli.py +5 -5
  47. disdrodb/utils/dask.py +62 -1
  48. disdrodb/utils/decorators.py +31 -0
  49. disdrodb/utils/encoding.py +5 -1
  50. disdrodb/{l2 → utils}/event.py +1 -66
  51. disdrodb/utils/logger.py +1 -1
  52. disdrodb/utils/manipulations.py +22 -12
  53. disdrodb/utils/routines.py +166 -0
  54. disdrodb/utils/time.py +3 -291
  55. disdrodb/utils/xarray.py +3 -0
  56. disdrodb/viz/plots.py +85 -14
  57. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
  58. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
  59. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
  60. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  61. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  62. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -30,6 +30,13 @@ def reader(
30
30
  #### Open the netCDF
31
31
  ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
32
32
 
33
+ ##------------------------------------------------------------------------.
34
+ # Check correct dimensions
35
+ if ds.sizes["particle_diameter"] != 22 or ds.sizes["particle_fall_velocity"] != 20:
36
+ raise ValueError(
37
+ f"Dimensions of {filepath} {ds.sizes} do not match the expected dimensions for LPM sensor.",
38
+ )
39
+
33
40
  ##------------------------------------------------------------------------.
34
41
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
35
42
  # Define dictionary mapping dataset variables to select and rename
@@ -32,6 +32,10 @@ def reader(
32
32
 
33
33
  ##------------------------------------------------------------------------.
34
34
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
35
+ # Correct for inverted raw_spectrum axis
36
+ ds["raw_spectrum"] = ds["raw_spectrum"].transpose("time", "particle_size", "raw_fall_velocity")
37
+ ds["raw_spectrum"].data = ds["raw_spectrum"].data.swapaxes(1, 2)
38
+
35
39
  # Define dictionary mapping dataset variables to select and rename
36
40
  dict_names = {
37
41
  ### Dimensions
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for UQAM OTT PARSIVEL2 sensor in netCDF format."""
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Open the netCDF
32
+ ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
36
+ # Add time coordinate
37
+ ds["time"] = ds["time_as_string"].astype("M8[s]")
38
+ ds["time"].attrs.pop("comment", None)
39
+ ds["time"].attrs.pop("units", None)
40
+ ds = ds.set_coords("time")
41
+
42
+ # Define dictionary mapping dataset variables to select and rename
43
+ dict_names = {
44
+ ### Dimensions
45
+ "diameter_classes": "diameter_bin_center",
46
+ "velocity_classes": "velocity_bin_center",
47
+ ### Variables
48
+ "rainfall_rate_32bit": "rainfall_rate_32bit",
49
+ "weather_code_synop_4680": "weather_code_synop_4680",
50
+ "weather_code_synop_4677": "weather_code_synop_4677",
51
+ "weather_code_metar_4678": "weather_code_metar_4678",
52
+ "weather_code_nws": "weather_code_nws",
53
+ "reflectivity_32bit": "reflectivity_32bit",
54
+ "mor_visibility": "mor_visibility",
55
+ "laser_amplitude": "laser_amplitude",
56
+ "number_particles_validated": "number_particles",
57
+ "sensor_temperature": "sensor_temperature",
58
+ "error_code": "error_code",
59
+ "kinetic_energy": "rain_kinetic_energy",
60
+ "fieldV": "raw_drop_average_velocity",
61
+ "fieldN": "raw_drop_concentration",
62
+ "raw_data": "raw_drop_number",
63
+ }
64
+
65
+ # Rename dataset variables and columns and infill missing variables
66
+ ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="PARSIVEL2")
67
+
68
+ # Return the dataset adhering to DISDRODB L0B standards
69
+ return ds
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_BE_PARSED"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "/\n"
40
+
41
+ # Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+
44
+ # Skip first 2 rows
45
+ reader_kwargs["skiprows"] = 1
46
+
47
+ # - Avoid first column to become df index !!!
48
+ reader_kwargs["index_col"] = False
49
+
50
+ # - Define behaviour when encountering bad lines
51
+ reader_kwargs["on_bad_lines"] = "skip"
52
+
53
+ # - Define reader engine
54
+ # - C engine is faster
55
+ # - Python engine is more feature-complete
56
+ reader_kwargs["engine"] = "python"
57
+
58
+ # - Define on-the-fly decompression of on-disk data
59
+ # - Available: gzip, bz2, zip
60
+ reader_kwargs["compression"] = "infer"
61
+
62
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
63
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
64
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
65
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
66
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Read the data
70
+ df = read_raw_text_file(
71
+ filepath=filepath,
72
+ column_names=column_names,
73
+ reader_kwargs=reader_kwargs,
74
+ logger=logger,
75
+ )
76
+
77
+ ##------------------------------------------------------------------------.
78
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
79
+ # Remove rows with invalid length
80
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
81
+
82
+ # Count number of delimiters to select valid rows
83
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
84
+
85
+ # Split by ; delimiter
86
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
87
+
88
+ # Assign column names
89
+ names = [
90
+ "date",
91
+ "time",
92
+ "rainfall_rate_32bit",
93
+ "rainfall_accumulated_32bit",
94
+ "weather_code_synop_4680",
95
+ # "weather_code_synop_4677",
96
+ # "weather_code_metar_4678",
97
+ "reflectivity_32bit",
98
+ "mor_visibility",
99
+ "sample_interval",
100
+ "laser_amplitude",
101
+ "number_particles",
102
+ "sensor_temperature",
103
+ "sensor_serial_number",
104
+ "firmware_iop",
105
+ "sensor_heating_current",
106
+ "sensor_battery_voltage",
107
+ "sensor_status",
108
+ "station_name",
109
+ "rainfall_amount_absolute_32bit",
110
+ "error_code",
111
+ "ARRAY_TO_SPLIT",
112
+ ]
113
+
114
+ df.columns = names
115
+
116
+ # Define time in datetime format
117
+ time_str = df["date"] + " " + df["time"]
118
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
119
+
120
+ # Add raw array
121
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
122
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
123
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
124
+
125
+ # Drop columns not agreeing with DISDRODB L0 standards
126
+ columns_to_drop = [
127
+ "date",
128
+ "station_name",
129
+ "firmware_iop",
130
+ "ARRAY_TO_SPLIT",
131
+ "sensor_serial_number",
132
+ "sample_interval",
133
+ ]
134
+ df = df.drop(columns=columns_to_drop)
135
+
136
+ return df
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import os
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+
26
+ TRACKS_DICT = {
27
+ "M203": ("2024-08-16 02:53:21", "2024-09-24 07:59:57"),
28
+ "M204": ("2024-09-27 08:00:00", "2024-10-20 07:59:57"),
29
+ "M205": ("2024-10-23 08:00:01", "2024-11-28 13:00:01"),
30
+ "M206": ("2024-12-01 08:00:02", "2024-12-30 07:59:57"),
31
+ "M207": ("2025-01-04 08:00:01", "2025-02-11 10:25:15"),
32
+ }
33
+
34
+
35
+ def get_track_for_dataframe(df):
36
+ """Retrieve ship track identifier."""
37
+ df_start, df_end = df["time"].min(), df["time"].max()
38
+
39
+ overlaps = []
40
+ for key, (start, end) in TRACKS_DICT.items():
41
+ start, end = pd.to_datetime(start), pd.to_datetime(end)
42
+ # check if df range lies within track coverage
43
+ if df_start <= end and df_end >= start:
44
+ overlaps.append(key)
45
+ return overlaps
46
+
47
+
48
+ def read_tracks_file(tracks_filepath):
49
+ """Read GPS master track file."""
50
+ df = pd.read_csv(
51
+ tracks_filepath,
52
+ names=["time", "latitude", "longitude", "flag"],
53
+ dtype={"time": str, "latitude": float, "longitude": float, "flag": str},
54
+ sep="\t", # tab-separated
55
+ skiprows=1, # skip the weird first line
56
+ engine="c", # speed up reading
57
+ )
58
+ df["time"] = pd.to_datetime(df["time"])
59
+ return df
60
+
61
+
62
+ def add_gps_coordinates(df, filepath):
63
+ """Add GPS coordinates to dataframe."""
64
+ # Retrieve useful tracks ids
65
+ tracks_ids = get_track_for_dataframe(df)
66
+
67
+ if len(tracks_ids) == 0:
68
+ df["latitude"] = np.nan
69
+ df["longitude"] = np.nan
70
+ return df
71
+
72
+ # Retrieve station base directory
73
+ station_base_dir = os.path.join(os.path.sep, *filepath.split(os.path.sep)[:-2])
74
+ # Define GPS files to read
75
+ tracks_filepaths = [os.path.join(station_base_dir, f"{tracks_id}_mastertrack.zip") for tracks_id in tracks_ids]
76
+ # Read GPS files
77
+ list_df_tracks = [read_tracks_file(fpath) for fpath in tracks_filepaths]
78
+ df_tracks = pd.concat(list_df_tracks)
79
+ df_tracks = df_tracks.dropna(subset=["time"])
80
+
81
+ # Ensure dataframes are sorted by time
82
+ df = df.sort_values("time")
83
+ df_tracks = df_tracks.sort_values("time")
84
+
85
+ # Remove bad flags
86
+ # df_tracks = df_tracks[df_tracks["flag"] == "1"]
87
+
88
+ # Remove flag column
89
+ df_tracks = df_tracks.drop(columns="flag")
90
+
91
+ # Add GPS coordinate to dataframe
92
+ df = pd.merge_asof(
93
+ df,
94
+ df_tracks,
95
+ on="time",
96
+ direction="nearest",
97
+ tolerance=pd.Timedelta("5min"),
98
+ )
99
+ return df
100
+
101
+
102
+ @is_documented_by(reader_generic_docstring)
103
+ def reader(
104
+ filepath,
105
+ logger=None,
106
+ ):
107
+ """Reader."""
108
+ ##------------------------------------------------------------------------.
109
+ #### Define column names
110
+ column_names = ["TO_BE_PARSED"]
111
+
112
+ ##------------------------------------------------------------------------.
113
+ #### Define reader options
114
+ reader_kwargs = {}
115
+
116
+ # - Define delimiter
117
+ reader_kwargs["delimiter"] = "/\n"
118
+
119
+ # Skip first row as columns names
120
+ reader_kwargs["header"] = None
121
+
122
+ # Skip first 2 rows
123
+ reader_kwargs["skiprows"] = 1
124
+
125
+ # - Avoid first column to become df index !!!
126
+ reader_kwargs["index_col"] = False
127
+
128
+ # - Define behaviour when encountering bad lines
129
+ reader_kwargs["on_bad_lines"] = "skip"
130
+
131
+ # - Define reader engine
132
+ # - C engine is faster
133
+ # - Python engine is more feature-complete
134
+ reader_kwargs["engine"] = "python"
135
+
136
+ # - Define on-the-fly decompression of on-disk data
137
+ # - Available: gzip, bz2, zip
138
+ reader_kwargs["compression"] = "infer"
139
+
140
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
141
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
142
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
143
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
144
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
145
+
146
+ # - Define encoding
147
+ reader_kwargs["encoding"] = "latin1"
148
+
149
+ ##------------------------------------------------------------------------.
150
+ #### Read the data
151
+ df = read_raw_text_file(
152
+ filepath=filepath,
153
+ column_names=column_names,
154
+ reader_kwargs=reader_kwargs,
155
+ logger=logger,
156
+ )
157
+
158
+ ##------------------------------------------------------------------------.
159
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
160
+ # Remove rows with invalid length
161
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
162
+
163
+ # Count number of delimiters to select valid rows
164
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
165
+
166
+ # Split by ; delimiter
167
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
168
+
169
+ # Assign column names
170
+ names = [
171
+ "date",
172
+ "time",
173
+ "rainfall_rate_32bit",
174
+ "rainfall_accumulated_32bit",
175
+ "weather_code_synop_4680",
176
+ # "weather_code_synop_4677",
177
+ # "weather_code_metar_4678",
178
+ "reflectivity_32bit",
179
+ "mor_visibility",
180
+ "sample_interval",
181
+ "laser_amplitude",
182
+ "number_particles",
183
+ "sensor_temperature",
184
+ "sensor_serial_number",
185
+ "firmware_iop",
186
+ "sensor_heating_current",
187
+ "sensor_battery_voltage",
188
+ "sensor_status",
189
+ "station_name",
190
+ "rainfall_amount_absolute_32bit",
191
+ "error_code",
192
+ "ARRAY_TO_SPLIT",
193
+ ]
194
+
195
+ df.columns = names
196
+
197
+ # Define time in datetime format
198
+ time_str = df["date"] + " " + df["time"]
199
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
200
+ df = df.dropna(subset=["time"])
201
+
202
+ # Add raw array
203
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
204
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
205
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
206
+
207
+ # Drop columns not agreeing with DISDRODB L0 standards
208
+ columns_to_drop = [
209
+ "date",
210
+ "station_name",
211
+ "firmware_iop",
212
+ "ARRAY_TO_SPLIT",
213
+ "sensor_serial_number",
214
+ "sample_interval",
215
+ ]
216
+ df = df.drop(columns=columns_to_drop)
217
+
218
+ # Add GPS coordinates
219
+ df = add_gps_coordinates(df, filepath=filepath)
220
+ return df
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """This reader allows to read raw data from NASA APU stations.
19
+
20
+ The reader allows to read raw APU data from the following NASA campaigns:
21
+
22
+ - HYMEX
23
+ - IFLOODS
24
+ - IPHEX
25
+ - OLYMPEX
26
+ - ICEPOP
27
+ - IMPACTS
28
+ - GCPEX
29
+ - WFF
30
+
31
+ """
32
+
33
+ import pandas as pd
34
+
35
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
36
+ from disdrodb.l0.l0a_processing import read_raw_text_file
37
+
38
+
39
+ @is_documented_by(reader_generic_docstring)
40
+ def reader(
41
+ filepath,
42
+ logger=None,
43
+ ):
44
+ """Reader."""
45
+ ##------------------------------------------------------------------------.
46
+ #### Define column names
47
+ column_names = ["time", "TO_BE_SPLITTED"]
48
+
49
+ ##------------------------------------------------------------------------.
50
+ #### Define reader options
51
+ reader_kwargs = {}
52
+ # - Define delimiter
53
+ reader_kwargs["delimiter"] = ";"
54
+ # - Skip first row as columns names
55
+ reader_kwargs["header"] = None
56
+ reader_kwargs["skiprows"] = 0
57
+ # - Skip file with encoding errors
58
+ reader_kwargs["encoding_errors"] = "ignore"
59
+ # - Avoid first column to become df index !!!
60
+ reader_kwargs["index_col"] = False
61
+ # - Define behaviour when encountering bad lines
62
+ reader_kwargs["on_bad_lines"] = "skip"
63
+ # - Define reader engine
64
+ # - C engine is faster
65
+ # - Python engine is more feature-complete
66
+ reader_kwargs["engine"] = "python"
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
71
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
72
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
73
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
74
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
75
+
76
+ ##------------------------------------------------------------------------.
77
+ #### Read the data
78
+ df = read_raw_text_file(
79
+ filepath=filepath,
80
+ column_names=column_names,
81
+ reader_kwargs=reader_kwargs,
82
+ logger=logger,
83
+ )
84
+
85
+ ##------------------------------------------------------------------------.
86
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
87
+ # Convert time column to datetime
88
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
89
+
90
+ # Split the 'TO_BE_SPLITTED' column
91
+ df = df["TO_BE_SPLITTED"].str.split(",", n=3, expand=True)
92
+
93
+ # Assign column names
94
+ names = [
95
+ "station_name",
96
+ "unknown",
97
+ "unknown2",
98
+ "raw_drop_number",
99
+ ]
100
+ df.columns = names
101
+
102
+ # Add the time column
103
+ df["time"] = df_time
104
+
105
+ # Drop columns not agreeing with DISDRODB L0 standards
106
+ df = df.drop(columns=["station_name", "unknown", "unknown2"])
107
+
108
+ # Return the dataframe adhering to DISDRODB L0 standards
109
+ return df
@@ -65,5 +65,8 @@ def reader(
65
65
  # Rename dataset variables and columns and infill missing variables
66
66
  ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="PARSIVEL2")
67
67
 
68
+ # Replace NaN flags
69
+ ds["raw_drop_number"] = ds["raw_drop_number"].where(ds["raw_drop_number"] < 65437) # dummy flag 65437.0
70
+
68
71
  # Return the dataset adhering to DISDRODB L0B standards
69
72
  return ds
@@ -195,6 +195,7 @@ def ensure_valid_coordinates(ds, default_altitude=0, default_latitude=0, default
195
195
  The dataset with invalid coordinates replaced by default values.
196
196
 
197
197
  """
198
+ # TODO raise error if not present
198
199
  invalid_altitude = np.logical_or(np.isnan(ds["altitude"]), ds["altitude"] == -9999)
199
200
  ds["altitude"] = ds["altitude"].where(~invalid_altitude, default_altitude)
200
201
 
@@ -250,6 +251,12 @@ def get_raindrop_fall_velocity(diameter, method, ds_env=None):
250
251
  raise ValueError(f"{method} is an invalid fall velocity method. Valid methods: {available_methods}.")
251
252
  # Copy diameter
252
253
  diameter = diameter.copy()
254
+ # Initialize ds_env if None
255
+ # if ds_env is None:
256
+ # ds_env = load_env_dataset(ds_env)
257
+
258
+ # TODO: wrapper for DISDRODB product !
259
+
253
260
  # Ensure valid altitude and geolocation (if missing set defaults)
254
261
  # - altitude required by Beard
255
262
  # - latitude required for gravity
@@ -258,3 +265,42 @@ def get_raindrop_fall_velocity(diameter, method, ds_env=None):
258
265
  func = dict_methods[method]
259
266
  fall_velocity = func(diameter, ds_env=ds_env) if method == "Beard1976" else func(diameter)
260
267
  return fall_velocity
268
+
269
+
270
+ def get_dataset_fall_velocity(ds, method="Brandes2002"):
271
+ """Compute the fall velocity and add it to the dataset.
272
+
273
+ Parameters
274
+ ----------
275
+ ds : xarray.Dataset
276
+ DISDRODB L0C dataset.
277
+ method : str, optional
278
+ Method to compute fall velocity. The default method is ``"Brandes2002"``.
279
+
280
+ Returns
281
+ -------
282
+ xarray.Dataset
283
+ DISDRODB L0C dataset with an additional variable 'fall_velocity'.
284
+ """
285
+ from disdrodb.constants import DIAMETER_DIMENSION
286
+ from disdrodb.l1_env.routines import load_env_dataset
287
+
288
+ # Check if diameter dimension exists
289
+ if DIAMETER_DIMENSION not in ds.dims:
290
+ raise ValueError(f"Diameter dimension '{DIAMETER_DIMENSION}' not found in dataset dimensions.")
291
+
292
+ # Retrieve diameter values (in mm)
293
+ diameter_bin_center = ds["diameter_bin_center"]
294
+
295
+ # Ensure valid altitude and geolocation (if missing set defaults)
296
+ # TODO: MOBILE CASE !
297
+ default_geolocation = {"altitude": 0, "latitude": 0, "longitude": 0}
298
+ dataset_coords = {key: ds[key] for key in default_geolocation if key in ds}
299
+ default_geolocation.update(dataset_coords)
300
+ ds = ds.assign_coords(default_geolocation)
301
+
302
+ # TODO: deal with ENV dataset
303
+ ds_env = load_env_dataset(ds)
304
+
305
+ fall_velocity = get_raindrop_fall_velocity(diameter_bin_center, method=method, ds_env=ds_env)
306
+ return fall_velocity
disdrodb/l1/processing.py CHANGED
@@ -83,7 +83,7 @@ def generate_l1(
83
83
  Returns
84
84
  -------
85
85
  xarray.Dataset
86
- DISRODB L1 dataset.
86
+ DISDRODB L1 dataset.
87
87
  """
88
88
  # Retrieve source attributes
89
89
  attrs = ds.attrs.copy()
disdrodb/l2/processing.py CHANGED
@@ -219,7 +219,7 @@ def generate_l2e(
219
219
  Returns
220
220
  -------
221
221
  xarray.Dataset
222
- DISRODB L2E dataset.
222
+ DISDRODB L2E dataset.
223
223
  """
224
224
  # Check and prepapre input dataset
225
225
  ds = check_l2e_input_dataset(ds)