disdrodb 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. disdrodb/_version.py +2 -2
  2. disdrodb/api/create_directories.py +0 -2
  3. disdrodb/cli/disdrodb_create_summary.py +10 -0
  4. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  5. disdrodb/constants.py +1 -1
  6. disdrodb/etc/products/L1/global.yaml +1 -1
  7. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  8. disdrodb/etc/products/L2E/global.yaml +1 -1
  9. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  10. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  11. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  12. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  13. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  14. disdrodb/etc/products/L2M/global.yaml +11 -3
  15. disdrodb/l0/check_configs.py +49 -16
  16. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  17. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  18. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  19. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  20. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  21. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  22. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  23. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  24. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  25. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  26. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  27. disdrodb/l0/l0b_processing.py +70 -15
  28. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  30. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  31. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  32. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  33. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  34. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  35. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  36. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  37. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  38. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  39. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  41. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  42. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  43. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  44. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  45. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  46. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  47. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  48. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  49. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  50. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  51. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  52. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  53. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  54. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  55. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  56. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  57. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  58. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  59. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  60. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  61. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  62. disdrodb/l1/beard_model.py +31 -129
  63. disdrodb/l1/fall_velocity.py +136 -83
  64. disdrodb/l1/filters.py +25 -28
  65. disdrodb/l1/processing.py +11 -13
  66. disdrodb/l1_env/routines.py +46 -17
  67. disdrodb/l2/empirical_dsd.py +6 -0
  68. disdrodb/l2/processing.py +2 -2
  69. disdrodb/metadata/geolocation.py +0 -2
  70. disdrodb/psd/fitting.py +16 -13
  71. disdrodb/routines/l2.py +35 -23
  72. disdrodb/routines/wrappers.py +5 -0
  73. disdrodb/scattering/axis_ratio.py +90 -84
  74. disdrodb/scattering/permittivity.py +6 -0
  75. disdrodb/summary/routines.py +38 -12
  76. disdrodb/utils/attrs.py +2 -0
  77. disdrodb/utils/encoding.py +5 -0
  78. disdrodb/utils/time.py +2 -2
  79. disdrodb/viz/plots.py +24 -1
  80. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/METADATA +2 -1
  81. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/RECORD +85 -65
  82. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  83. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +0 -0
  84. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  85. {disdrodb-0.1.4.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -16,19 +16,18 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for GID LPM sensors not measuring wind."""
19
+ """DISDRODB reader for ARSO LPM sensors."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
23
  from disdrodb.l0.l0a_processing import read_raw_text_file
24
24
 
25
25
 
26
- @is_documented_by(reader_generic_docstring)
27
- def reader(
26
+ def read_SM03_telegram(
28
27
  filepath,
29
28
  logger=None,
30
29
  ):
31
- """Reader."""
30
+ """Read SM03 telegram."""
32
31
  ##------------------------------------------------------------------------.
33
32
  #### - Define raw data headers
34
33
  column_names = ["TO_PARSE"]
@@ -77,16 +76,99 @@ def reader(
77
76
  logger=logger,
78
77
  )
79
78
 
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Count number of delimiters to identify valid rows
82
+ df = df[df["TO_PARSE"].str.count(";") == 12]
83
+
84
+ # Check there are valid rows left
85
+ if len(df) == 0:
86
+ raise ValueError(f"No valid data in {filepath}")
87
+
88
+ # Split by ; delimiter (before raw drop number)
89
+ df = df["TO_PARSE"].str.split(";", expand=True)
90
+
91
+ # Assign column names
92
+ names = []
93
+ df.columns = names
94
+
95
+ # Define datetime "time" column
96
+ time = df[0].str[-19:]
97
+ df["time"] = pd.to_datetime(time, format="%d/%m/%Y %H:%M:%S", errors="coerce")
98
+
99
+ return df
100
+
101
+
102
+ def read_SM05_telegram(
103
+ filepath,
104
+ logger=None,
105
+ ):
106
+ """Read SM05 telegram."""
107
+ ##------------------------------------------------------------------------.
108
+ #### - Define raw data headers
109
+ column_names = ["TO_PARSE"]
110
+
111
+ ##------------------------------------------------------------------------.
112
+ #### Define reader options
113
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
114
+ reader_kwargs = {}
115
+
116
+ # - Define delimiter
117
+ reader_kwargs["delimiter"] = "\\n"
118
+
119
+ # - Avoid first column to become df index !!!
120
+ reader_kwargs["index_col"] = False
121
+
122
+ # - Define encoding
123
+ reader_kwargs["encoding"] = "latin"
124
+
125
+ # Since column names are expected to be passed explicitly, header is set to None
126
+ reader_kwargs["header"] = None
127
+
128
+ # - Number of rows to be skipped at the beginning of the file
129
+ reader_kwargs["skiprows"] = None
130
+
131
+ # - Define behaviour when encountering bad lines
132
+ reader_kwargs["on_bad_lines"] = "skip"
133
+
134
+ # - Define reader engine
135
+ # - C engine is faster
136
+ # - Python engine is more feature-complete
137
+ reader_kwargs["engine"] = "python"
138
+
139
+ # - Define on-the-fly decompression of on-disk data
140
+ # - Available: gzip, bz2, zip
141
+ reader_kwargs["compression"] = "infer"
142
+
143
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
144
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
145
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
146
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
147
+ reader_kwargs["na_values"] = ["na", "", "error"]
148
+
149
+ ##------------------------------------------------------------------------.
150
+ #### Read the data
151
+ df = read_raw_text_file(
152
+ filepath=filepath,
153
+ column_names=column_names,
154
+ reader_kwargs=reader_kwargs,
155
+ logger=logger,
156
+ )
157
+
80
158
  ##------------------------------------------------------------------------.
81
159
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
160
  # Count number of delimiters to identify valid rows
83
161
  df = df[df["TO_PARSE"].str.count(";") == 521]
84
162
 
163
+ # Check there are valid rows left
164
+ if len(df) == 0:
165
+ raise ValueError(f"No valid data in {filepath}")
166
+
85
167
  # Split by ; delimiter (before raw drop number)
86
168
  df = df["TO_PARSE"].str.split(";", expand=True, n=80)
87
169
 
88
170
  # Assign column names
89
- column_names = [
171
+ names = [
90
172
  "time",
91
173
  "start_identifier",
92
174
  "device_address",
@@ -109,7 +191,7 @@ def reader(
109
191
  "quality_index",
110
192
  "max_hail_diameter",
111
193
  "laser_status",
112
- "static_signal",
194
+ "static_signal_status",
113
195
  "laser_temperature_analog_status",
114
196
  "laser_temperature_digital_status",
115
197
  "laser_current_analog_status",
@@ -123,7 +205,7 @@ def reader(
123
205
  "current_heating_heads_status",
124
206
  "current_heating_carriers_status",
125
207
  "control_output_laser_power_status",
126
- "reserve_status",
208
+ "reserved_status",
127
209
  "temperature_interior",
128
210
  "laser_temperature",
129
211
  "laser_current_average",
@@ -169,15 +251,16 @@ def reader(
169
251
  "number_particles_class_9_internal_data",
170
252
  "raw_drop_number",
171
253
  ]
172
- df.columns = column_names
254
+ df.columns = names
255
+
256
+ # Define datetime "time" column
257
+ time_str = df["time"].str[-19:]
258
+ # time_str = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
259
+ df["time"] = pd.to_datetime(time_str, format="%d/%m/%Y %H:%M:%S", errors="coerce")
173
260
 
174
261
  # Remove checksum from raw_drop_number
175
262
  df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
176
263
 
177
- # Define datetime "time" column
178
- time = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
179
- df["time"] = pd.to_datetime(time, format="%d/%m/%Y %H:%M:%S", errors="coerce")
180
-
181
264
  # Drop row if start_identifier different than 00
182
265
  df["start_identifier"] = df["start_identifier"].astype(str).str[-2:]
183
266
  df = df[df["start_identifier"] == "00"]
@@ -195,3 +278,15 @@ def reader(
195
278
  ]
196
279
  df = df.drop(columns=columns_to_drop)
197
280
  return df
281
+
282
+
283
+ @is_documented_by(reader_generic_docstring)
284
+ def reader(
285
+ filepath,
286
+ logger=None,
287
+ ):
288
+ """Reader."""
289
+ return read_SM05_telegram(
290
+ filepath=filepath,
291
+ logger=logger,
292
+ )
@@ -16,7 +16,7 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for GID LPM sensors not measuring wind."""
19
+ """DISDRODB reader for UL LPM sensors in Crni Vrh."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -113,7 +113,7 @@ def reader(
113
113
  "quality_index",
114
114
  "max_hail_diameter",
115
115
  "laser_status",
116
- "static_signal",
116
+ "static_signal_status",
117
117
  "laser_temperature_analog_status",
118
118
  "laser_temperature_digital_status",
119
119
  "laser_current_analog_status",
@@ -127,7 +127,7 @@ def reader(
127
127
  "current_heating_heads_status",
128
128
  "current_heating_carriers_status",
129
129
  "control_output_laser_power_status",
130
- "reserve_status",
130
+ "reserved_status",
131
131
  "temperature_interior",
132
132
  "laser_temperature",
133
133
  "laser_current_average",
@@ -108,7 +108,7 @@ def reader(
108
108
  "quality_index",
109
109
  "max_hail_diameter",
110
110
  "laser_status",
111
- "static_signal",
111
+ "static_signal_status",
112
112
  "laser_temperature_analog_status",
113
113
  "laser_temperature_digital_status",
114
114
  "laser_current_analog_status",
@@ -122,7 +122,7 @@ def reader(
122
122
  "current_heating_heads_status",
123
123
  "current_heating_carriers_status",
124
124
  "control_output_laser_power_status",
125
- "reserve_status",
125
+ "reserved_status",
126
126
  "temperature_interior",
127
127
  "laser_temperature",
128
128
  "laser_current_average",
@@ -93,20 +93,11 @@ def reader(
93
93
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
94
94
  df = df[df["ID"].astype(str).isin(valid_id_str)]
95
95
 
96
- # Create the dataframe with each row corresponding to a timestep
97
- # - Group rows based on when ID values restart
98
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
99
-
100
- # Reshape the dataframe
101
- group_dfs = []
102
- for _, group in groups:
103
- group_df = group.set_index("ID").T
104
- group_dfs.append(group_df)
105
-
106
- # Merge each timestep dataframe
107
- # --> Missing columns are infilled by NaN
108
- df = pd.concat(group_dfs, axis=0)
109
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
96
+ # Create the dataframe where each row corresponds to a timestep
97
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
98
+ df = df.pivot(index="_group", columns="ID") # noqa
99
+ df.columns = df.columns.get_level_values("ID")
100
+ df = df.reset_index(drop=True)
110
101
 
111
102
  # Define column names
112
103
  column_dict = {
@@ -97,20 +97,11 @@ def reader(
97
97
  if len(df) == 0:
98
98
  raise ValueError("No rows left after removing corrupted ones.")
99
99
 
100
- # Create the dataframe with each row corresponding to a timestep
101
- # - Group rows based on when ID values restart
102
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
103
-
104
- # Reshape the dataframe
105
- group_dfs = []
106
- for _, group in groups:
107
- group_df = group.set_index("ID").T
108
- group_dfs.append(group_df)
109
-
110
- # Merge each timestep dataframe
111
- # --> Missing columns are infilled by NaN
112
- df = pd.concat(group_dfs, axis=0)
113
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
100
+ # Create the dataframe where each row corresponds to a timestep
101
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
102
+ df = df.pivot(index="_group", columns="ID") # noqa
103
+ df.columns = df.columns.get_level_values("ID")
104
+ df = df.reset_index(drop=True)
114
105
 
115
106
  # Define column names
116
107
  column_dict = {
@@ -22,12 +22,8 @@ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
22
  from disdrodb.l0.l0a_processing import read_raw_text_file
23
23
 
24
24
 
25
- @is_documented_by(reader_generic_docstring)
26
- def reader(
27
- filepath,
28
- logger=None,
29
- ):
30
- """Reader."""
25
+ def read_format1(filepath, logger):
26
+ """Read format F1 (before 2020)."""
31
27
  ##------------------------------------------------------------------------.
32
28
  #### Define column names
33
29
  column_names = ["TO_PARSE"]
@@ -91,8 +87,8 @@ def reader(
91
87
  "weather_code_synop_4680",
92
88
  "reflectivity_32bit",
93
89
  "mor_visibility",
94
- "sensor_temperature", # maybe
95
- "laser_amplitude", # probably
90
+ "sensor_temperature",
91
+ "laser_amplitude",
96
92
  "number_particles",
97
93
  "sensor_status",
98
94
  "sensor_heating_current",
@@ -119,3 +115,116 @@ def reader(
119
115
 
120
116
  # Return the dataframe adhering to DISDRODB L0 standards
121
117
  return df
118
+
119
+
120
+ def read_format2(filepath, logger):
121
+ """Read format 2 (July 2012-April 2023)."""
122
+ ##------------------------------------------------------------------------.
123
+ #### Define column names
124
+ column_names = ["TO_PARSE"]
125
+
126
+ ##------------------------------------------------------------------------.
127
+ #### Define reader options
128
+ reader_kwargs = {}
129
+ # - Define delimiter
130
+ reader_kwargs["delimiter"] = "\\n"
131
+
132
+ # - Avoid first column to become df index !!!
133
+ reader_kwargs["index_col"] = False
134
+
135
+ # - Define behaviour when encountering bad lines
136
+ reader_kwargs["on_bad_lines"] = "skip"
137
+
138
+ # Skip the first row (header)
139
+ reader_kwargs["skiprows"] = 0
140
+
141
+ # - Define encoding
142
+ reader_kwargs["encoding"] = "latin"
143
+
144
+ # - Define reader engine
145
+ # - C engine is faster
146
+ # - Python engine is more feature-complete
147
+ reader_kwargs["engine"] = "python"
148
+
149
+ # - Define on-the-fly decompression of on-disk data
150
+ # - Available: gzip, bz2, zip
151
+ reader_kwargs["compression"] = "infer"
152
+
153
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
154
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
155
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
156
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
157
+ reader_kwargs["na_values"] = ["na", "", "error"]
158
+
159
+ # Skip first row as columns names
160
+ reader_kwargs["header"] = None
161
+
162
+ ##------------------------------------------------------------------------.
163
+ #### Read the data
164
+ df_raw = read_raw_text_file(
165
+ filepath=filepath,
166
+ column_names=column_names,
167
+ reader_kwargs=reader_kwargs,
168
+ logger=logger,
169
+ )
170
+
171
+ ##------------------------------------------------------------------------.
172
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
173
+ # Create ID and Value columns
174
+ df = df_raw["TO_PARSE"].str.split(";", expand=True, n=14)
175
+
176
+ # Assign column names
177
+ names = [
178
+ "id",
179
+ "time",
180
+ "rainfall_rate_32bit",
181
+ "rainfall_accumulated_32bit",
182
+ "weather_code_synop_4680",
183
+ "reflectivity_32bit",
184
+ "mor_visibility",
185
+ "sensor_temperature",
186
+ "laser_amplitude",
187
+ "number_particles",
188
+ "sensor_status",
189
+ "sensor_heating_current",
190
+ "sensor_battery_voltage",
191
+ "error_code",
192
+ "raw_drop_number",
193
+ ]
194
+ df.columns = names
195
+
196
+ # Convert time column to datetime
197
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
198
+
199
+ # Preprocess the raw spectrum
200
+ # - Add 0 before every ; if ; not preceded by a digit
201
+ # Example: ';;1;;' --> '0;0;1;0;'
202
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("R;", "")
203
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
204
+
205
+ # Drop rows with invalid spectrum
206
+ df = df[df["raw_drop_number"].str.count(";") == 1024]
207
+
208
+ # df["rainfall_rate_32bit"].max()
209
+
210
+ # Drop columns not agreeing with DISDRODB L0 standards
211
+ columns_to_drop = [
212
+ "id",
213
+ ]
214
+ df = df.drop(columns=columns_to_drop)
215
+
216
+ # Return the dataframe adhering to DISDRODB L0 standards
217
+ return df
218
+
219
+
220
+ @is_documented_by(reader_generic_docstring)
221
+ def reader(
222
+ filepath,
223
+ logger=None,
224
+ ):
225
+ """Reader."""
226
+ if filepath.endswith("F1.dat.gz"):
227
+ return read_format1(filepath, logger)
228
+ if filepath.endswith("F2.dat.gz"):
229
+ return read_format2(filepath, logger)
230
+ raise ValueError("Unexpected filename: {os.path.basename(filepath)")
@@ -81,20 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
97
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
98
89
 
99
90
  # Define available column names
100
91
  column_dict = {
@@ -123,9 +114,14 @@ def reader(
123
114
  # "23": "station_number",
124
115
  "24": "rainfall_amount_absolute_32bit",
125
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
126
120
  "30": "rainfall_rate_16_bit_30",
127
121
  "31": "rainfall_rate_16_bit_1200",
128
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
129
125
  "90": "raw_drop_concentration",
130
126
  "91": "raw_drop_average_velocity",
131
127
  "93": "raw_drop_number",
@@ -81,20 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
97
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
98
89
 
99
90
  # Assign column names
100
91
  column_dict = {
@@ -123,9 +114,14 @@ def reader(
123
114
  # "23": "station_number",
124
115
  "24": "rainfall_amount_absolute_32bit",
125
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
126
120
  "30": "rainfall_rate_16_bit_30",
127
121
  "31": "rainfall_rate_16_bit_1200",
128
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
129
125
  "90": "raw_drop_concentration",
130
126
  "91": "raw_drop_average_velocity",
131
127
  "93": "raw_drop_number",
@@ -81,20 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
97
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
98
89
 
99
90
  # Define available column names
100
91
  column_dict = {
@@ -123,6 +114,9 @@ def reader(
123
114
  # "23": "station_number",
124
115
  "24": "rainfall_amount_absolute_32bit",
125
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
126
120
  # "30": "rainfall_rate_16_bit_30",
127
121
  # "31": "rainfall_rate_16_bit_1200",
128
122
  # "32": "rainfall_accumulated_16bit",