disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +5 -5
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  37. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  38. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  39. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  40. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  41. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  42. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  43. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  44. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  45. disdrodb/l0/l0a_processing.py +30 -30
  46. disdrodb/l0/l0b_nc_processing.py +108 -2
  47. disdrodb/l0/l0b_processing.py +4 -4
  48. disdrodb/l0/l0c_processing.py +5 -13
  49. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  50. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  51. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  52. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  53. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  54. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  55. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  56. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  57. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  58. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  59. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  60. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  61. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  63. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  64. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  65. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  66. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  67. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  68. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  69. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  70. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  71. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  72. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
  73. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  74. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  75. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  76. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
  77. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  78. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  79. disdrodb/l0/routines.py +105 -14
  80. disdrodb/l1/__init__.py +5 -0
  81. disdrodb/l1/filters.py +34 -20
  82. disdrodb/l1/processing.py +45 -44
  83. disdrodb/l1/resampling.py +77 -66
  84. disdrodb/l1/routines.py +35 -43
  85. disdrodb/l1_env/routines.py +18 -3
  86. disdrodb/l2/__init__.py +7 -0
  87. disdrodb/l2/empirical_dsd.py +58 -10
  88. disdrodb/l2/event.py +27 -120
  89. disdrodb/l2/processing.py +267 -116
  90. disdrodb/l2/routines.py +618 -254
  91. disdrodb/metadata/standards.py +3 -1
  92. disdrodb/psd/fitting.py +463 -144
  93. disdrodb/psd/models.py +8 -5
  94. disdrodb/routines.py +3 -3
  95. disdrodb/scattering/__init__.py +16 -4
  96. disdrodb/scattering/axis_ratio.py +56 -36
  97. disdrodb/scattering/permittivity.py +486 -0
  98. disdrodb/scattering/routines.py +701 -159
  99. disdrodb/summary/__init__.py +17 -0
  100. disdrodb/summary/routines.py +4120 -0
  101. disdrodb/utils/attrs.py +68 -125
  102. disdrodb/utils/compression.py +30 -1
  103. disdrodb/utils/dask.py +59 -8
  104. disdrodb/utils/dataframe.py +61 -7
  105. disdrodb/utils/directories.py +35 -15
  106. disdrodb/utils/encoding.py +33 -19
  107. disdrodb/utils/logger.py +13 -6
  108. disdrodb/utils/manipulations.py +71 -0
  109. disdrodb/utils/subsetting.py +214 -0
  110. disdrodb/utils/time.py +165 -19
  111. disdrodb/utils/writer.py +20 -7
  112. disdrodb/utils/xarray.py +2 -4
  113. disdrodb/viz/__init__.py +13 -0
  114. disdrodb/viz/plots.py +327 -0
  115. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  116. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
  117. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  118. disdrodb/l1/encoding_attrs.py +0 -642
  119. disdrodb/l2/processing_options.py +0 -213
  120. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  121. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  122. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  123. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_BE_SPLITTED"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 520]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
87
+
88
+ # Assign column names
89
+ column_names = [
90
+ "start_identifier",
91
+ "device_address",
92
+ "sensor_serial_number",
93
+ "sensor_date",
94
+ "sensor_time",
95
+ "weather_code_synop_4677_5min",
96
+ "weather_code_synop_4680_5min",
97
+ "weather_code_metar_4678_5min",
98
+ "precipitation_rate_5min",
99
+ "weather_code_synop_4677",
100
+ "weather_code_synop_4680",
101
+ "weather_code_metar_4678",
102
+ "precipitation_rate",
103
+ "rainfall_rate",
104
+ "snowfall_rate",
105
+ "precipitation_accumulated",
106
+ "mor_visibility",
107
+ "reflectivity",
108
+ "quality_index",
109
+ "max_hail_diameter",
110
+ "laser_status",
111
+ "static_signal",
112
+ "laser_temperature_analog_status",
113
+ "laser_temperature_digital_status",
114
+ "laser_current_analog_status",
115
+ "laser_current_digital_status",
116
+ "sensor_voltage_supply_status",
117
+ "current_heating_pane_transmitter_head_status",
118
+ "current_heating_pane_receiver_head_status",
119
+ "temperature_sensor_status",
120
+ "current_heating_voltage_supply_status",
121
+ "current_heating_house_status",
122
+ "current_heating_heads_status",
123
+ "current_heating_carriers_status",
124
+ "control_output_laser_power_status",
125
+ "reserve_status",
126
+ "temperature_interior",
127
+ "laser_temperature",
128
+ "laser_current_average",
129
+ "control_voltage",
130
+ "optical_control_voltage_output",
131
+ "sensor_voltage_supply",
132
+ "current_heating_pane_transmitter_head",
133
+ "current_heating_pane_receiver_head",
134
+ "temperature_ambient",
135
+ "current_heating_voltage_supply",
136
+ "current_heating_house",
137
+ "current_heating_heads",
138
+ "current_heating_carriers",
139
+ "number_particles",
140
+ "number_particles_internal_data",
141
+ "number_particles_min_speed",
142
+ "number_particles_min_speed_internal_data",
143
+ "number_particles_max_speed",
144
+ "number_particles_max_speed_internal_data",
145
+ "number_particles_min_diameter",
146
+ "number_particles_min_diameter_internal_data",
147
+ "number_particles_no_hydrometeor",
148
+ "number_particles_no_hydrometeor_internal_data",
149
+ "number_particles_unknown_classification",
150
+ "number_particles_unknown_classification_internal_data",
151
+ "number_particles_class_1",
152
+ "number_particles_class_1_internal_data",
153
+ "number_particles_class_2",
154
+ "number_particles_class_2_internal_data",
155
+ "number_particles_class_3",
156
+ "number_particles_class_3_internal_data",
157
+ "number_particles_class_4",
158
+ "number_particles_class_4_internal_data",
159
+ "number_particles_class_5",
160
+ "number_particles_class_5_internal_data",
161
+ "number_particles_class_6",
162
+ "number_particles_class_6_internal_data",
163
+ "number_particles_class_7",
164
+ "number_particles_class_7_internal_data",
165
+ "number_particles_class_8",
166
+ "number_particles_class_8_internal_data",
167
+ "number_particles_class_9",
168
+ "number_particles_class_9_internal_data",
169
+ "raw_drop_number",
170
+ ]
171
+ df.columns = column_names
172
+
173
+ # Remove checksum from raw_drop_number
174
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
175
+
176
+ # Define datetime "time" column
177
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
178
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
179
+
180
+ # Drop row if start_identifier different than 00
181
+ df = df[df["start_identifier"].astype(str) == "00"]
182
+
183
+ # Drop rows with invalid raw_drop_number
184
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
185
+
186
+ # Drop columns not agreeing with DISDRODB L0 standards
187
+ columns_to_drop = [
188
+ "start_identifier",
189
+ "device_address",
190
+ "sensor_serial_number",
191
+ "sensor_date",
192
+ "sensor_time",
193
+ ]
194
+ df = df.drop(columns=columns_to_drop)
195
+ return df
@@ -38,8 +38,6 @@ def reader(
38
38
  reader_kwargs["delimiter"] = ";"
39
39
  # - Skip first row as columns names
40
40
  reader_kwargs["header"] = None
41
- # Skip first row as columns names
42
- reader_kwargs["header"] = None
43
41
  reader_kwargs["skiprows"] = 0
44
42
  # - Skip file with encoding errors
45
43
  reader_kwargs["encoding_errors"] = "ignore"
@@ -108,12 +108,15 @@ def reader(
108
108
  df["time"] = pd.to_datetime(df["time"], format="%Y/%m/%d-%H:%M:%S", errors="coerce")
109
109
  df = df.drop(columns=["date"])
110
110
 
111
+ # Convert timezone from JST to UTC
112
+ df = df.set_index("time").tz_localize("Asia/Tokyo").tz_convert(None).reset_index()
113
+
111
114
  # Preprocess the raw spectrum
112
115
  # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
113
116
  # --> "" generates an array of zeros in L0B processing
114
117
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
115
118
 
116
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
119
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
117
120
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
118
121
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
119
122
 
@@ -113,7 +113,7 @@ def reader(
113
113
  # --> "" generates an array of zeros in L0B processing
114
114
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
115
115
 
116
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
116
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
117
117
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
118
118
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
119
119
 
@@ -102,7 +102,7 @@ def reader(
102
102
  # --> "" generates an array of zeros in L0B processing
103
103
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
104
104
 
105
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
105
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
106
106
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
107
107
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
108
108
 
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for UGENT ILVO Parsivel2 raw text data."""
20
+ import tarfile
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+ from disdrodb.utils.logger import log_error
28
+
29
+
30
+ @is_documented_by(reader_generic_docstring)
31
+ def reader(
32
+ filepath,
33
+ logger=None,
34
+ ):
35
+ """Reader."""
36
+
37
+ ##------------------------------------------------------------------------.
38
+ #### Define function to read each txt file inside each daily zip file
39
+ def read_txt_file(file, filename):
40
+ """Parse a single txt file within the daily zip file."""
41
+ ##------------------------------------------------------------------------.
42
+ #### Define column names
43
+ column_names = ["TO_PARSE"]
44
+
45
+ ##------------------------------------------------------------------------.
46
+ #### Define reader options
47
+ reader_kwargs = {}
48
+ # - Define delimiter
49
+ reader_kwargs["delimiter"] = "\\n"
50
+ # - Skip first row as columns names
51
+ # - Define encoding
52
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
53
+ # - Avoid first column to become df index !!!
54
+ reader_kwargs["index_col"] = False
55
+ # - Define behaviour when encountering bad lines
56
+ reader_kwargs["on_bad_lines"] = "skip"
57
+ # - Define reader engine
58
+ # - C engine is faster
59
+ # - Python engine is more feature-complete
60
+ reader_kwargs["engine"] = "python"
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
65
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
+ reader_kwargs["na_values"] = ["na", "", "error"]
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Read the data
72
+ df = read_raw_text_file(
73
+ filepath=file,
74
+ column_names=column_names,
75
+ reader_kwargs=reader_kwargs,
76
+ logger=logger,
77
+ )
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Create ID and Value columns
82
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
83
+ df.columns = ["ID", "Value"]
84
+
85
+ # Select only rows with values
86
+ df = df[df["Value"].apply(lambda x: x is not None)]
87
+
88
+ # Drop rows with invalid IDs
89
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
90
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
91
+
92
+ # Reshape dataframe
93
+ df = df.set_index("ID").T
94
+
95
+ # Assign column names
96
+ column_dict = {
97
+ "01": "rainfall_rate_32bit",
98
+ "02": "rainfall_accumulated_32bit",
99
+ "03": "weather_code_synop_4680",
100
+ "04": "weather_code_synop_4677",
101
+ "05": "weather_code_metar_4678",
102
+ "06": "weather_code_nws",
103
+ "07": "reflectivity_32bit",
104
+ "08": "mor_visibility",
105
+ "09": "sample_interval",
106
+ "10": "laser_amplitude",
107
+ "11": "number_particles",
108
+ "12": "sensor_temperature",
109
+ # "13": "sensor_serial_number",
110
+ # "14": "firmware_iop",
111
+ # "15": "firmware_dsp",
112
+ "16": "sensor_heating_current",
113
+ "17": "sensor_battery_voltage",
114
+ "18": "sensor_status",
115
+ # "19": "start_time",
116
+ # "20": "sensor_time",
117
+ # "21": "sensor_date",
118
+ # "22": "station_name",
119
+ # "23": "station_number",
120
+ "24": "rainfall_amount_absolute_32bit",
121
+ "25": "error_code",
122
+ "26": "sensor_temperature_pcb",
123
+ "27": "sensor_temperature_receiver",
124
+ "28": "sensor_temperature_trasmitter",
125
+ "30": "rainfall_rate_16_bit_30",
126
+ "31": "rainfall_rate_16_bit_1200",
127
+ "32": "rainfall_accumulated_16bit",
128
+ "34": "rain_kinetic_energy",
129
+ "35": "snowfall_rate",
130
+ "90": "raw_drop_concentration",
131
+ "91": "raw_drop_average_velocity",
132
+ "93": "raw_drop_number",
133
+ }
134
+
135
+ df = df.rename(column_dict, axis=1)
136
+
137
+ # Keep only columns defined in the dictionary
138
+ df = df[list(column_dict.values())]
139
+
140
+ # Define datetime "time" column from filename
141
+ datetime_str = filename.replace(".txt", "").split("_")[1]
142
+ df["time"] = pd.to_datetime(datetime_str, format="%Y%m%d%H%M%S")
143
+
144
+ return df
145
+
146
+ # ---------------------------------------------------------------------.
147
+ #### Iterate over all files (aka timesteps) in the daily zip archive
148
+ # - Each file contain a single timestep !
149
+ list_df = []
150
+ with tarfile.open(filepath, "r:gz") as tar:
151
+ members = sorted(tar.getmembers(), key=lambda m: m.name)
152
+ for member in members:
153
+ filename = member.name
154
+ if member.isfile() and filename.endswith(".txt"):
155
+ # Open file
156
+ with tar.extractfile(member) as file:
157
+ try:
158
+ df = read_txt_file(file=file, filename=filename)
159
+ list_df.append(df)
160
+ except Exception as e:
161
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
162
+ log_error(logger=logger, msg=msg, verbose=True)
163
+
164
+ # Concatenate all dataframes into a single one
165
+ df = pd.concat(list_df)
166
+
167
+ # ---------------------------------------------------------------------.
168
+ return df
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ @is_documented_by(reader_generic_docstring)
26
+ def reader(
27
+ filepath,
28
+ logger=None,
29
+ ):
30
+ """Reader."""
31
+ ##------------------------------------------------------------------------.
32
+ #### Define column names
33
+ column_names = ["TO_PARSE"]
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define reader options
37
+ reader_kwargs = {}
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+ # - Skip first row as columns names
41
+ # - Define encoding
42
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
43
+ # - Avoid first column to become df index !!!
44
+ reader_kwargs["index_col"] = False
45
+ # - Define behaviour when encountering bad lines
46
+ reader_kwargs["on_bad_lines"] = "skip"
47
+ # - Define reader engine
48
+ # - C engine is faster
49
+ # - Python engine is more feature-complete
50
+ reader_kwargs["engine"] = "python"
51
+ # - Define on-the-fly decompression of on-disk data
52
+ # - Available: gzip, bz2, zip
53
+ reader_kwargs["compression"] = "infer"
54
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
55
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
+ reader_kwargs["na_values"] = ["na", "", "error"]
59
+
60
+ ##------------------------------------------------------------------------.
61
+ #### Read the data
62
+ df = read_raw_text_file(
63
+ filepath=filepath,
64
+ column_names=column_names,
65
+ reader_kwargs=reader_kwargs,
66
+ logger=logger,
67
+ )
68
+
69
+ ##------------------------------------------------------------------------.
70
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
+ # Create ID and Value columns
72
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
73
+ df.columns = ["ID", "Value"]
74
+
75
+ # Select only rows with values
76
+ df = df[df["Value"].astype(bool)]
77
+ df = df[df["Value"].apply(lambda x: x is not None)]
78
+
79
+ # Drop rows with invalid IDs
80
+ # - Corrupted rows
81
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
83
+
84
+ # Create the dataframe with each row corresponding to a timestep
85
+ # - Group rows based on when ID values restart
86
+ groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
+
88
+ # Reshape the dataframe
89
+ group_dfs = []
90
+ for _, group in groups:
91
+ group_df = group.set_index("ID").T
92
+ group_dfs.append(group_df)
93
+
94
+ # Merge each timestep dataframe
95
+ # --> Missing columns are infilled by NaN
96
+ df = pd.concat(group_dfs, axis=0)
97
+ df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
98
+
99
+ # Define available column names
100
+ column_dict = {
101
+ "01": "rainfall_rate_32bit",
102
+ "02": "rainfall_accumulated_32bit",
103
+ "03": "weather_code_synop_4680",
104
+ "04": "weather_code_synop_4677",
105
+ "05": "weather_code_metar_4678",
106
+ "06": "weather_code_nws",
107
+ "07": "reflectivity_32bit",
108
+ "08": "mor_visibility",
109
+ "09": "sample_interval",
110
+ "10": "laser_amplitude",
111
+ "11": "number_particles",
112
+ "12": "sensor_temperature",
113
+ # "13": "sensor_serial_number",
114
+ # "14": "firmware_iop",
115
+ # "15": "firmware_dsp",
116
+ "16": "sensor_heating_current",
117
+ "17": "sensor_battery_voltage",
118
+ "18": "sensor_status",
119
+ # "19": "start_time",
120
+ "20": "sensor_time",
121
+ "21": "sensor_date",
122
+ # "22": "station_name",
123
+ # "23": "station_number",
124
+ "24": "rainfall_amount_absolute_32bit",
125
+ "25": "error_code",
126
+ # "30": "rainfall_rate_16_bit_30",
127
+ # "31": "rainfall_rate_16_bit_1200",
128
+ # "32": "rainfall_accumulated_16bit",
129
+ "34": "rain_kinetic_energy",
130
+ "35": "snowfall_rate",
131
+ "90": "raw_drop_concentration",
132
+ "91": "raw_drop_average_velocity",
133
+ "93": "raw_drop_number",
134
+ }
135
+
136
+ # Identify missing columns and add NaN
137
+ expected_columns = np.array(list(column_dict.keys()))
138
+ missing_columns = expected_columns[np.isin(expected_columns, df.columns, invert=True)].tolist()
139
+ if len(missing_columns) > 0:
140
+ for column in missing_columns:
141
+ df[column] = "NaN"
142
+
143
+ # Rename columns
144
+ df = df.rename(column_dict, axis=1)
145
+
146
+ # Keep only columns defined in the dictionary
147
+ df = df[list(column_dict.values())]
148
+
149
+ # Define datetime "time" column
150
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
151
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
152
+
153
+ # Drop columns not agreeing with DISDRODB L0 standards
154
+ columns_to_drop = [
155
+ "sensor_date",
156
+ "sensor_time",
157
+ # "firmware_iop",
158
+ # "firmware_dsp",
159
+ # "sensor_serial_number",
160
+ # "station_name",
161
+ # "station_number",
162
+ ]
163
+ df = df.drop(columns=columns_to_drop)
164
+
165
+ return df
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for DELFT OTT PARSIVEL2 sensor in netCDF format."""
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Open the netCDF
32
+ ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
36
+ # Define dictionary mapping dataset variables to select and rename
37
+ dict_names = {
38
+ ### Dimensions
39
+ "diameter": "diameter_bin_center",
40
+ "velocity": "velocity_bin_center",
41
+ ### Variables
42
+ "rainfall_rate_32bit": "rainfall_rate_32bit",
43
+ "synop_WaWa": "weather_code_synop_4680",
44
+ "synop_WW": "weather_code_synop_4677",
45
+ "radar_reflectivity": "reflectivity_32bit",
46
+ "visibility": "mor_visibility",
47
+ "interval": "sample_interval",
48
+ "sig_laser": "laser_amplitude",
49
+ "n_particles": "number_particles",
50
+ "T_sensor": "sensor_temperature",
51
+ "I_heating": "sensor_heating_current",
52
+ "V_power_supply": "sensor_battery_voltage",
53
+ "state_sensor": "sensor_status",
54
+ "error_code": "error_code",
55
+ "kinetic_energy": "rain_kinetic_energy",
56
+ "snowfall_rate": "snowfall_rate",
57
+ "fall_velocity": "raw_drop_average_velocity",
58
+ "number_concentration": "raw_drop_concentration",
59
+ "data_raw": "raw_drop_number",
60
+ }
61
+
62
+ # Rename dataset variables and columns and infill missing variables
63
+ ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="PARSIVEL2")
64
+
65
+ # Ensure sensor_temperature in Celsius degree (as logged by sensor)
66
+ ds["sensor_temperature"] = ds["sensor_temperature"] - 273.15
67
+
68
+ # Return the dataset adhering to DISDRODB L0B standards
69
+ return ds