disdrodb 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/l0/check_standards.py +15 -10
  5. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  6. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  7. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  8. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  9. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  10. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  11. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +4 -4
  12. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +10 -10
  13. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  14. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  15. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  16. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  17. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  18. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  19. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  20. disdrodb/l0/l0b_nc_processing.py +1 -1
  21. disdrodb/l0/l0b_processing.py +12 -10
  22. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  23. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  24. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  25. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  26. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  27. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  28. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  29. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  30. disdrodb/l0/readers/PARSIVEL/KIT/BURKINA_FASO.py +133 -0
  31. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  32. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  33. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  34. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  35. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  36. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  37. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  38. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +274 -0
  39. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  40. disdrodb/l0/standards.py +7 -4
  41. disdrodb/l0/template_tools.py +2 -2
  42. disdrodb/l1/encoding_attrs.py +21 -6
  43. disdrodb/l1/processing.py +6 -4
  44. disdrodb/l1/resampling.py +1 -1
  45. disdrodb/l1/routines.py +2 -1
  46. disdrodb/l2/empirical_dsd.py +100 -2
  47. disdrodb/l2/event.py +3 -3
  48. disdrodb/l2/processing.py +21 -12
  49. disdrodb/l2/processing_options.py +7 -7
  50. disdrodb/l2/routines.py +3 -3
  51. disdrodb/metadata/checks.py +15 -6
  52. disdrodb/metadata/manipulation.py +2 -2
  53. disdrodb/metadata/standards.py +83 -79
  54. disdrodb/metadata/writer.py +2 -2
  55. disdrodb/routines.py +246 -10
  56. disdrodb/scattering/routines.py +1 -1
  57. disdrodb/utils/dataframe.py +342 -0
  58. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/METADATA +34 -61
  59. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/RECORD +63 -47
  60. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/WHEEL +1 -1
  61. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/entry_points.txt +3 -3
  62. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/licenses/LICENSE +0 -0
  63. {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for ENPC PARSIVEL2 raw text data."""
20
+ import zipfile
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+ from disdrodb.utils.logger import log_error
28
+
29
+
30
+ @is_documented_by(reader_generic_docstring)
31
+ def reader(
32
+ filepath,
33
+ logger=None,
34
+ ):
35
+ """Reader."""
36
+
37
+ ##------------------------------------------------------------------------.
38
+ #### Define function to read each txt file inside each daily zip file
39
+ def read_txt_file(file, filename):
40
+ """Parse a single txt file within the daily zip file."""
41
+ ##------------------------------------------------------------------------.
42
+ #### Define column names
43
+ column_names = ["TO_PARSE"]
44
+
45
+ ##------------------------------------------------------------------------.
46
+ #### Define reader options
47
+ reader_kwargs = {}
48
+ # - Define delimiter
49
+ reader_kwargs["delimiter"] = "\\n"
50
+ # - Skip first row as columns names
51
+ # - Define encoding
52
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
53
+ # - Avoid first column to become df index !!!
54
+ reader_kwargs["index_col"] = False
55
+ # - Define behaviour when encountering bad lines
56
+ reader_kwargs["on_bad_lines"] = "skip"
57
+ # - Define reader engine
58
+ # - C engine is faster
59
+ # - Python engine is more feature-complete
60
+ reader_kwargs["engine"] = "python"
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
65
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
+ reader_kwargs["na_values"] = ["na", "", "error"]
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Read the data
72
+ df = read_raw_text_file(
73
+ filepath=file,
74
+ column_names=column_names,
75
+ reader_kwargs=reader_kwargs,
76
+ logger=logger,
77
+ )
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Create ID and Value columns
82
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
83
+ df.columns = ["ID", "Value"]
84
+
85
+ # Select only rows with values
86
+ df = df[df["Value"].apply(lambda x: x is not None)]
87
+
88
+ # Drop rows with invalid IDs
89
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
90
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
91
+
92
+ # Create the dataframe with each row corresponding to a timestep
93
+ # - Group rows based on when ID values restart
94
+ groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
95
+
96
+ # Reshape the dataframe
97
+ group_dfs = []
98
+ for _, group in groups:
99
+ group_df = group.set_index("ID").T
100
+ group_dfs.append(group_df)
101
+
102
+ # Merge each timestep dataframe
103
+ # --> Missing columns are infilled by NaN
104
+ df = pd.concat(group_dfs, axis=0)
105
+
106
+ # Assign column names
107
+ column_dict = {
108
+ "01": "rainfall_rate_32bit",
109
+ "02": "rainfall_accumulated_32bit",
110
+ "03": "weather_code_synop_4680",
111
+ "04": "weather_code_synop_4677",
112
+ "05": "weather_code_metar_4678",
113
+ "06": "weather_code_nws",
114
+ "07": "reflectivity_32bit",
115
+ "08": "mor_visibility",
116
+ "09": "sample_interval",
117
+ "10": "laser_amplitude",
118
+ "11": "number_particles",
119
+ "12": "sensor_temperature",
120
+ # "13": "sensor_serial_number",
121
+ # "14": "firmware_iop",
122
+ # "15": "firmware_dsp",
123
+ "16": "sensor_heating_current",
124
+ "17": "sensor_battery_voltage",
125
+ "18": "sensor_status",
126
+ # "19": "start_time",
127
+ # "20": "sensor_time",
128
+ # "21": "sensor_date",
129
+ # "22": "station_name",
130
+ # "23": "station_number",
131
+ "24": "rainfall_amount_absolute_32bit",
132
+ "25": "error_code",
133
+ "26": "sensor_temperature_pcb",
134
+ "27": "sensor_temperature_receiver",
135
+ "28": "sensor_temperature_trasmitter",
136
+ "30": "rainfall_rate_16_bit_30",
137
+ "31": "rainfall_rate_16_bit_1200",
138
+ "32": "rainfall_accumulated_16bit",
139
+ "34": "rain_kinetic_energy",
140
+ "35": "snowfall_rate",
141
+ "90": "raw_drop_concentration",
142
+ "91": "raw_drop_average_velocity",
143
+ "93": "raw_drop_number",
144
+ }
145
+
146
+ df = df.rename(column_dict, axis=1)
147
+
148
+ # Keep only columns defined in the dictionary
149
+ df = df[list(column_dict.values())]
150
+
151
+ # Define datetime "time" column from filename
152
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
153
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
154
+
155
+ # # Drop columns not agreeing with DISDRODB L0 standards
156
+ # columns_to_drop = [
157
+ # "sensor_date",
158
+ # "sensor_time",
159
+ # "firmware_iop",
160
+ # "firmware_dsp",
161
+ # "sensor_serial_number",
162
+ # "station_name",
163
+ # "station_number",
164
+ # ]
165
+ # df = df.drop(columns=columns_to_drop)
166
+ return df
167
+
168
+ # ---------------------------------------------------------------------.
169
+ #### Iterate over all files (aka timesteps) in the daily zip archive
170
+ # - Each file contain a single timestep !
171
+ list_df = []
172
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
173
+ filenames = sorted(zip_ref.namelist())
174
+ for filename in filenames:
175
+ if filename.endswith(".txt"):
176
+ # Open file
177
+ with zip_ref.open(filename) as file:
178
+ try:
179
+ df = read_txt_file(file=file, filename=filename)
180
+ list_df.append(df)
181
+ except Exception as e:
182
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
183
+ log_error(logger=logger, msg=msg, verbose=True)
184
+
185
+ # Concatenate all dataframes into a single one
186
+ df = pd.concat(list_df)
187
+
188
+ # ---------------------------------------------------------------------.
189
+ return df
@@ -106,7 +106,7 @@ def reader(
106
106
  # Preprocess the raw spectrum
107
107
  # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
108
108
  # --> "" generates an array of zeros in L0B processing
109
- df["raw_drop_number"] = df["raw_drop_number"].replace("<SPECTRUM>ZERO</SPECTRUM>", "")
109
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
110
110
 
111
111
  # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
112
112
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
@@ -100,7 +100,7 @@ def reader(
100
100
  # --> "" generates an array of zeros in L0B processing
101
101
  df["raw_drop_number"] = df["raw_drop_number"].astype("string")
102
102
  df["raw_drop_number"] = df["raw_drop_number"].str.strip()
103
- df["raw_drop_number"] = df["raw_drop_number"].replace("<SPECTRUM>ZERO</SPECTRUM>", "")
103
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
104
104
 
105
105
  # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
106
106
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for ENPC PWS100 raw text data."""
20
+ import zipfile
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.utils.logger import log_error, log_warning
26
+
27
+
28
+ @is_documented_by(reader_generic_docstring)
29
+ def reader(
30
+ filepath,
31
+ logger=None,
32
+ ):
33
+ """Reader."""
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define function to read each txt file inside each daily zip file
37
+ def read_txt_file(file, filename, logger): # noqa PLR0911
38
+ """Parse a single txt file within the daily zip file."""
39
+ # Read file
40
+ try:
41
+ txt = file.readline().decode("utf-8")
42
+ except Exception:
43
+ log_warning(logger=logger, msg=f"{filename} is corrupted", verbose=False)
44
+ return None
45
+
46
+ # Check file is not empty
47
+ if txt == "":
48
+ log_warning(logger=logger, msg=f"{filename} is empty", verbose=False)
49
+ return None
50
+
51
+ if "PSU voltage too low" in txt or "volt" in txt:
52
+ log_warning(logger=logger, msg=f"PSU voltage too low in {filename}", verbose=False)
53
+ return None
54
+
55
+ if "Error - message" in txt:
56
+ log_warning(logger=logger, msg=f"Error message in {filename}", verbose=False)
57
+ return None
58
+
59
+ # Clean up the line
60
+ txt = txt.replace(" 00 ", " 0 0 ")
61
+ txt = txt.replace(" ", " 0 ")
62
+ txt = txt[1:-8]
63
+
64
+ # Split the cleaned line
65
+ buf = txt.split(" ")
66
+
67
+ # Helper to convert list of floats to comma-separated string
68
+ def int_list_to_str(lst):
69
+ return ",".join(f"{int(i)}" for i in lst)
70
+
71
+ # Try to get the drop_size distribution:
72
+ try:
73
+ drop_size_distribution = int_list_to_str(buf[30:330]) # Drop size distribution (message field 42)
74
+ except Exception:
75
+ log_warning(logger, msg=f"Corrupted drop_size_distribution field in {filename}", verbose=False)
76
+ return None
77
+
78
+ # Try to get peak_to_pedestal_hist
79
+ try:
80
+ peak_to_pedestal_hist = int_list_to_str(buf[1499:1549])
81
+ except Exception:
82
+ log_warning(
83
+ logger,
84
+ msg=f"Corrupted raw_drop_number or peak_to_pedestal_hist field in {filename}",
85
+ verbose=False,
86
+ )
87
+ return None
88
+ # Parse fields
89
+ data = {
90
+ "mor_visibility": float(buf[2]), # Visibility Range (message field 20)
91
+ "weather_code_synop_4680": float(buf[3]), # Present Weather Code (WMO) (message field 21)
92
+ "weather_code_metar_4678": buf[4], # Present Weather Code (METAR) (message field 22)
93
+ "weather_code_nws": buf[5], # Present Weather Code (NWS) (message field 23)
94
+ "alarms": int_list_to_str(buf[6:22]), # Alarms (message field (24))
95
+ "sensor_status": buf[22], # Fault status of PWS100 (message field 25)
96
+ "air_temperature": float(buf[23]), # Temperature (°C) (message field 30)
97
+ "relative_humidity": float(buf[24]), # Sampled relative humidity (%) (message field 30)
98
+ "wetbulb_temperature": float(buf[25]), # Average wetbulb temperature (°C)(message field 30)
99
+ "air_temperature_max": float(buf[26]), # Maximum temperature (°C)(message field 31)
100
+ "air_temperature_min": float(buf[27]), # Minimum temperature (°C)(message field 31)
101
+ "rainfall_rate": float(buf[28]), # Precipitation rate (mm/h)(message field 40)
102
+ "rainfall_accumulated": float(buf[29]), # Precipitation accumulation (mm/h)(message field 41)
103
+ "drop_size_distribution": drop_size_distribution, # Drop size distribution (message field 42)
104
+ "average_drop_velocity": float(buf[330]), # Average velocity (mm/s)(message field 43)
105
+ "average_drop_size": float(buf[331]), # Average size (mm/h)(message field 43)
106
+ "type_distribution": int_list_to_str(buf[332:343]), # Type distribution (message field 44)
107
+ "raw_drop_number": int_list_to_str(buf[343:1499]), # Size/velocity spectrum (34*34) (message field 47)
108
+ "peak_to_pedestal_hist": (
109
+ peak_to_pedestal_hist # Peak to pedestal ratio distribution histogram (message field 48)
110
+ ),
111
+ }
112
+
113
+ # Convert to single-row DataFrame
114
+ df = pd.DataFrame([data])
115
+
116
+ # Define datetime "time" column from filename
117
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
118
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
119
+
120
+ # # Drop columns not agreeing with DISDRODB L0 standards
121
+ # columns_to_drop = [
122
+ # "peak_to_pedestal_hist",
123
+ # "type_distribution",
124
+ # ]
125
+ # df = df.drop(columns=columns_to_drop)
126
+ return df
127
+
128
+ # ---------------------------------------------------------------------.
129
+ #### Iterate over all files (aka timesteps) in the daily zip archive
130
+ # - Each file contain a single timestep !
131
+ list_df = []
132
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
133
+ filenames = sorted(zip_ref.namelist())
134
+ for filename in filenames:
135
+ if filename.endswith(".txt"):
136
+ # Open file
137
+ with zip_ref.open(filename) as f:
138
+ try:
139
+ df = read_txt_file(file=f, filename=filename, logger=logger)
140
+ if df is not None:
141
+ list_df.append(df)
142
+ except Exception as e:
143
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
144
+ log_error(logger=logger, msg=msg, verbose=True)
145
+
146
+ # Concatenate all dataframes into a single one
147
+ df = pd.concat(list_df)
148
+
149
+ # ---------------------------------------------------------------------.
150
+ return df
@@ -0,0 +1,274 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB Reader for NOAA PSL RD80 stations."""
20
+ import os
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.l0.l0a_processing import read_raw_text_file
26
+
27
+
28
+ def read_new_format(filepath, logger):
29
+ """Read new format."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = [
33
+ "time_interval",
34
+ "n1",
35
+ "n2",
36
+ "n3",
37
+ "n4",
38
+ "n5",
39
+ "n6",
40
+ "n7",
41
+ "n8",
42
+ "n9",
43
+ "n10",
44
+ "n11",
45
+ "n12",
46
+ "n13",
47
+ "n14",
48
+ "n15",
49
+ "n16",
50
+ "n17",
51
+ "n18",
52
+ "n19",
53
+ "n20",
54
+ "Dmax",
55
+ "RI",
56
+ "RA",
57
+ "Wg",
58
+ "Z",
59
+ "EF",
60
+ "N0",
61
+ "slope",
62
+ "NumDrops",
63
+ "SumRA",
64
+ ]
65
+ ##------------------------------------------------------------------------.
66
+ #### Define reader options
67
+ reader_kwargs = {}
68
+ # - Define delimiter
69
+ reader_kwargs["delimiter"] = r"\s+"
70
+ # Skip header
71
+ reader_kwargs["header"] = None
72
+ # Skip first row as columns names
73
+ reader_kwargs["skiprows"] = 2
74
+ # - Define encoding
75
+ reader_kwargs["encoding"] = "ISO-8859-1"
76
+ # - Avoid first column to become df index !!!
77
+ reader_kwargs["index_col"] = False
78
+ # - Define behaviour when encountering bad lines
79
+ reader_kwargs["on_bad_lines"] = "skip"
80
+ # - Define reader engine
81
+ # - C engine is faster
82
+ # - Python engine is more feature-complete
83
+ reader_kwargs["engine"] = "python"
84
+ # - Define on-the-fly decompression of on-disk data
85
+ # - Available: gzip, bz2, zip
86
+ reader_kwargs["compression"] = "infer"
87
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
88
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
89
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
90
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
91
+ reader_kwargs["na_values"] = ["na", "", "error"]
92
+
93
+ ##------------------------------------------------------------------------.
94
+ #### Read the data
95
+ df = read_raw_text_file(
96
+ filepath=filepath,
97
+ column_names=column_names,
98
+ reader_kwargs=reader_kwargs,
99
+ logger=logger,
100
+ )
101
+
102
+ ##------------------------------------------------------------------------.
103
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
104
+
105
+ # Replace -99.9900 values with NaN
106
+ columns_to_replace = ["Dmax", "RI", "RA", "Wg", "Z", "EF", "N0", "slope"]
107
+ df[columns_to_replace] = df[columns_to_replace].replace("-99.9900", "NaN")
108
+
109
+ # Replace 'Z' -Inf with NaN
110
+ df["Z"] = df["Z"].str.replace("-Inf", "NaN")
111
+
112
+ # Deal with time interval column
113
+ # - Split into start and end time
114
+ df_time = df["time_interval"].str.split("-", expand=True)
115
+ df_time.columns = ["start", "end"]
116
+
117
+ # - Convert start/end MM:SS:SSS to timedelta
118
+ def parse_time(t):
119
+ minutes, seconds, milliseconds = map(int, t.split(":"))
120
+ return pd.Timedelta(minutes=minutes, seconds=seconds, milliseconds=milliseconds)
121
+
122
+ df_time["start"] = df_time["start"].apply(parse_time)
123
+ df_time["end"] = df_time["end"].apply(parse_time)
124
+ # - Wrap end time if it's less than start time (i.e., crosses 60:00 boundary)
125
+ # --> 00:00 --> 60:00
126
+ df_time.loc[df_time["end"] < df_time["start"], "end"] += pd.Timedelta(minutes=60)
127
+
128
+ # Compute sample_interval in seconds as integer
129
+ df["sample_interval"] = (df_time["end"] - df_time["start"]).dt.total_seconds().astype(int)
130
+
131
+ # Define time
132
+ # - Extract date-hour
133
+ filename = os.path.basename(filepath)
134
+ if filename.startswith("lab") or filename.startswith("bao0") or filename.startswith("mdt0"):
135
+ date_hour_str = filename[4:11]
136
+ else:
137
+ date_hour_str = filename[3:10]
138
+ date_hour = pd.to_datetime(date_hour_str, format="%y%j%H")
139
+ df["time"] = date_hour + df_time["start"]
140
+
141
+ # Create raw_drop_number column
142
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
143
+ df_arr = df[bin_columns]
144
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
145
+ df["raw_drop_number"] = df_raw_drop_number
146
+
147
+ # Remove bins columns
148
+ df = df.drop(columns=bin_columns)
149
+
150
+ # # Drop columns not agreeing with DISDRODB L0 standards
151
+ columns_to_drop = [
152
+ "time_interval",
153
+ "NumDrops",
154
+ "SumRA",
155
+ ]
156
+ df = df.drop(columns=columns_to_drop)
157
+
158
+ # Return the dataframe adhering to DISDRODB L0 standards
159
+ return df
160
+
161
+
162
+ def read_old_format(filepath, logger):
163
+ """Read old format."""
164
+ ##------------------------------------------------------------------------.
165
+ #### Define column names
166
+ column_names = [
167
+ "date",
168
+ "time",
169
+ "n1",
170
+ "n2",
171
+ "n3",
172
+ "n4",
173
+ "n5",
174
+ "n6",
175
+ "n7",
176
+ "n8",
177
+ "n9",
178
+ "n10",
179
+ "n11",
180
+ "n12",
181
+ "n13",
182
+ "n14",
183
+ "n15",
184
+ "n16",
185
+ "n17",
186
+ "n18",
187
+ "n19",
188
+ "n20",
189
+ "Dmax",
190
+ "RI",
191
+ "RA",
192
+ "Wg",
193
+ "Z",
194
+ "EF",
195
+ "N0",
196
+ "slope",
197
+ ]
198
+ ##------------------------------------------------------------------------.
199
+ #### Define reader options
200
+ reader_kwargs = {}
201
+ # - Define delimiter
202
+ reader_kwargs["delimiter"] = r"\s+"
203
+ # Skip header
204
+ reader_kwargs["header"] = None
205
+ # Skip first row as columns names
206
+ reader_kwargs["skiprows"] = 1
207
+ # - Define encoding
208
+ reader_kwargs["encoding"] = "ISO-8859-1"
209
+ # - Avoid first column to become df index !!!
210
+ reader_kwargs["index_col"] = False
211
+ # - Define behaviour when encountering bad lines
212
+ reader_kwargs["on_bad_lines"] = "skip"
213
+ # - Define reader engine
214
+ # - C engine is faster
215
+ # - Python engine is more feature-complete
216
+ reader_kwargs["engine"] = "python"
217
+ # - Define on-the-fly decompression of on-disk data
218
+ # - Available: gzip, bz2, zip
219
+ reader_kwargs["compression"] = "infer"
220
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
221
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
222
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
223
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
224
+ reader_kwargs["na_values"] = ["na", "", "error"]
225
+
226
+ ##------------------------------------------------------------------------.
227
+ #### Read the data
228
+ df = read_raw_text_file(
229
+ filepath=filepath,
230
+ column_names=column_names,
231
+ reader_kwargs=reader_kwargs,
232
+ logger=logger,
233
+ )
234
+
235
+ ##------------------------------------------------------------------------.
236
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
237
+
238
+ # Replace -99.9900 values with NaN
239
+ columns_to_replace = ["Dmax", "RI", "RA", "Wg", "Z", "EF", "N0", "slope"]
240
+ df[columns_to_replace] = df[columns_to_replace].replace("-99.9900", "NaN")
241
+
242
+ # Replace 'Z' -Inf with NaN
243
+ df["Z"] = df["Z"].str.replace("-Inf", "NaN")
244
+
245
+ # Define 'time' datetime column
246
+ df["time"] = df["date"].astype(str) + " " + df["time"].astype(str)
247
+ df["time"] = pd.to_datetime(df["time"], format="%Y/%m/%d %H:%M:%S", errors="coerce")
248
+ df = df.drop(columns=["date"])
249
+
250
+ # Create raw_drop_number column
251
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
252
+ df_arr = df[bin_columns]
253
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
254
+ df["raw_drop_number"] = df_raw_drop_number
255
+
256
+ # Remove bins columns
257
+ df = df.drop(columns=bin_columns)
258
+
259
+ # Return the dataframe adhering to DISDRODB L0 standards
260
+ return df
261
+
262
+
263
+ @is_documented_by(reader_generic_docstring)
264
+ def reader(
265
+ filepath,
266
+ logger=None,
267
+ ):
268
+ """Reader."""
269
+ filename = os.path.basename(filepath)
270
+ # station_name = filename[0:3]
271
+ if filename[3] == "-": # czc-050101-0052.txt
272
+ return read_old_format(filepath, logger=logger)
273
+ # czc2201220b20.txt
274
+ return read_new_format(filepath, logger=logger)
@@ -50,7 +50,7 @@ def reader(
50
50
  sensor_name = "LPM" # [SPECIFY HERE THE SENSOR FOR WHICH THE READER IS DESIGNED]
51
51
  ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name=sensor_name)
52
52
 
53
- # Replace occureence of NaN flags with np.nan
53
+ # Replace occurrence of NaN flags with np.nan
54
54
  # - Define a dictionary specifying the value(s) of NaN flags for each variable
55
55
  # - The code here below is just an example that requires to be adapted !
56
56
  # - This step might not be required with your data !
disdrodb/l0/standards.py CHANGED
@@ -51,8 +51,9 @@ def get_sensor_logged_variables(sensor_name: str) -> list:
51
51
  def allowed_l0_variables(sensor_name: str) -> list:
52
52
  """Get the list of allowed L0 variables for a given sensor."""
53
53
  sensor_variables = list(get_l0a_dtype(sensor_name))
54
- allowed_variables = [*sensor_variables, "time", "latitude", "longitude", "altitude"]
55
- # TODO: add air_temperature, relative_humidity, wind_speed, wind_direction
54
+ weather_variables = ["air_temperature", "relative_humidity", "wind_speed", "wind_direction"]
55
+ allowed_variables = [*sensor_variables, *weather_variables, "time", "latitude", "longitude", "altitude"]
56
+ allowed_variables = sorted(np.unique(allowed_variables).tolist())
56
57
  return allowed_variables
57
58
 
58
59
 
@@ -659,9 +660,11 @@ def get_raw_array_dims_order(sensor_name: str) -> dict:
659
660
 
660
661
  Examples
661
662
  --------
662
- OTT Parsivel spectrum [v1d1 ... v1d32, v2d1, ..., v2d32]
663
+ OTT Parsivel spectrum [d1v1 ... d32v1, d1v2, ..., d32v2] (diameter increases first)
663
664
  --> dimension_order = ["velocity_bin_center", "diameter_bin_center"]
664
- Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2]
665
+ Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2] (velocity increases first)
666
+ --> dimension_order = ["diameter_bin_center", "velocity_bin_center"]
667
+ PWS 100 spectrum [d1v1 ... d1v34, d2v1, ..., d2v34] (velocity increases first)
665
668
  --> dimension_order = ["diameter_bin_center", "velocity_bin_center"]
666
669
 
667
670
  Parameters