disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +22 -4
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/io.py +20 -18
  6. disdrodb/api/path.py +42 -77
  7. disdrodb/api/search.py +89 -23
  8. disdrodb/cli/disdrodb_create_summary.py +1 -1
  9. disdrodb/cli/disdrodb_run_l0.py +1 -1
  10. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  13. disdrodb/cli/disdrodb_run_l1.py +1 -1
  14. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  15. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  16. disdrodb/configs.py +30 -83
  17. disdrodb/constants.py +4 -3
  18. disdrodb/data_transfer/download_data.py +4 -2
  19. disdrodb/docs.py +2 -2
  20. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  21. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  22. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/global.yaml +6 -0
  29. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  30. disdrodb/etc/products/L2E/global.yaml +1 -1
  31. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/global.yaml +1 -1
  33. disdrodb/issue/checks.py +2 -2
  34. disdrodb/l0/check_configs.py +1 -1
  35. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  37. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  38. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  39. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  40. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  41. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  42. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  43. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  44. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  48. disdrodb/l0/l0_reader.py +2 -2
  49. disdrodb/l0/l0a_processing.py +6 -2
  50. disdrodb/l0/l0b_processing.py +26 -19
  51. disdrodb/l0/l0c_processing.py +17 -3
  52. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  54. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  55. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  56. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  57. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  58. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  59. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  60. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  61. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  62. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  63. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  64. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  65. disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
  66. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
  68. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  69. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  73. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  75. disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
  76. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
  77. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
  78. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  79. disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  80. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
  81. disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  82. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  83. disdrodb/l1/beard_model.py +45 -1
  84. disdrodb/l1/fall_velocity.py +1 -6
  85. disdrodb/l1/filters.py +2 -0
  86. disdrodb/l1/processing.py +6 -5
  87. disdrodb/l1/resampling.py +101 -38
  88. disdrodb/l2/empirical_dsd.py +12 -8
  89. disdrodb/l2/processing.py +4 -3
  90. disdrodb/metadata/search.py +3 -4
  91. disdrodb/routines/l0.py +4 -4
  92. disdrodb/routines/l1.py +173 -60
  93. disdrodb/routines/l2.py +121 -269
  94. disdrodb/routines/options.py +347 -0
  95. disdrodb/routines/wrappers.py +9 -1
  96. disdrodb/scattering/axis_ratio.py +3 -0
  97. disdrodb/scattering/routines.py +1 -1
  98. disdrodb/summary/routines.py +765 -724
  99. disdrodb/utils/archiving.py +51 -44
  100. disdrodb/utils/attrs.py +1 -1
  101. disdrodb/utils/compression.py +4 -2
  102. disdrodb/utils/dask.py +35 -15
  103. disdrodb/utils/dict.py +33 -0
  104. disdrodb/utils/encoding.py +1 -1
  105. disdrodb/utils/manipulations.py +7 -1
  106. disdrodb/utils/routines.py +9 -8
  107. disdrodb/utils/time.py +9 -1
  108. disdrodb/viz/__init__.py +0 -13
  109. disdrodb/viz/plots.py +209 -0
  110. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  111. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
  112. disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
  113. /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
  114. /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
  115. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
  116. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
  117. /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
  118. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  119. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  120. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  121. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  122. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  124. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  125. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -91,7 +91,7 @@ def format_string_array(string: str, n_values: int) -> np.array:
91
91
 
92
92
  e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
93
93
 
94
- If empty string ("") --> Return an arrays of zeros
94
+ If empty string ("") or "" --> Return an arrays of zeros
95
95
  If the list length is not n_values -> Return an arrays of np.nan
96
96
 
97
97
  The function strip potential delimiters at start and end before splitting.
@@ -108,31 +108,38 @@ def format_string_array(string: str, n_values: int) -> np.array:
108
108
  np.array
109
109
  array of float
110
110
  """
111
- split_str = infer_split_str(string)
112
- values = np.array(string.strip(split_str).split(split_str))
113
-
114
- # -------------------------------------------------------------------------.
115
- ## Assumptions !!!
116
- # If empty list --> Assume no precipitation recorded. Return an arrays of zeros
117
- if len(values) == 0:
111
+ # Check for empty string or "0" case
112
+ # - Assume no precipitation recorded. Return an arrays of zeros
113
+ if string in {"", "0"}:
118
114
  values = np.zeros(n_values)
119
115
  return values
120
116
 
121
- # -------------------------------------------------------------------------.
117
+ # Check for NaN case
118
+ # - Assume no data available. Return an arrays of NaN
119
+ if string == "NaN":
120
+ values = np.zeros(n_values) * np.nan
121
+ return values
122
+
123
+ # Retrieve list of values
124
+ split_str = infer_split_str(string)
125
+ values = np.array(string.strip(split_str).split(split_str))
126
+
122
127
  # If the length is not as expected --> Assume data corruption
123
128
  # --> Return an array with nan
124
129
  if len(values) != n_values:
125
130
  values = np.zeros(n_values) * np.nan
126
- else:
127
- # Ensure string type
128
- values = values.astype("str")
129
- # Replace '' with 0
130
- values = replace_empty_strings_with_zeros(values)
131
- # Replace "-9.999" with 0
132
- values = np.char.replace(values, "-9.999", "0")
133
- # Cast values to float type
134
- # --> Note: the disk encoding is specified in the l0b_encodings.yml
135
- values = values.astype(float)
131
+ return values
132
+
133
+ # Otherwise sanitize the list of value
134
+ # Ensure string type
135
+ values = values.astype("str")
136
+ # Replace '' with 0
137
+ values = replace_empty_strings_with_zeros(values)
138
+ # Replace "-9.999" with 0
139
+ values = np.char.replace(values, "-9.999", "0")
140
+ # Cast values to float type
141
+ # --> Note: the disk encoding is specified in the l0b_encodings.yml
142
+ values = values.astype(float)
136
143
  return values
137
144
 
138
145
 
@@ -117,7 +117,12 @@ def split_dataset_by_sampling_intervals(
117
117
 
118
118
  # If sample_interval is a dataset variable, use it to define dictionary of datasets
119
119
  if "sample_interval" in ds:
120
- return {int(interval): ds.isel(time=ds["sample_interval"] == interval) for interval in measurement_intervals}
120
+ dict_ds = {}
121
+ for interval in measurement_intervals:
122
+ ds_subset = ds.isel(time=ds["sample_interval"] == interval)
123
+ if ds_subset.sizes["time"] > 2:
124
+ dict_ds[int(interval)] = ds_subset
125
+ return dict_ds
121
126
 
122
127
  # ---------------------------------------------------------------------------------------.
123
128
  # Otherwise exploit difference between timesteps to identify change point
@@ -460,9 +465,8 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
460
465
  # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
461
466
  # qc_flag[-1] = 0
462
467
 
463
- # Assign time quality flag coordinate
468
+ # Add time quality flag variable
464
469
  ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
465
- ds = ds.set_coords("time_qc")
466
470
 
467
471
  # Add CF attributes for time_qc
468
472
  ds["time_qc"].attrs = {
@@ -674,6 +678,16 @@ def create_l0c_datasets(
674
678
  log_info(logger=logger, msg=f"No data between {start_time} and {end_time}.", verbose=verbose)
675
679
  return {}
676
680
 
681
+ # If 1 or 2 timesteps per time block, return empty dictionary
682
+ n_timesteps = len(ds["time"])
683
+ if n_timesteps < 3:
684
+ log_info(
685
+ logger=logger,
686
+ msg=f"Only {n_timesteps} timesteps between {start_time} and {end_time}.",
687
+ verbose=verbose,
688
+ )
689
+ return {}
690
+
677
691
  # ---------------------------------------------------------------------------------------.
678
692
  # If sample interval is a dataset variable, drop timesteps with unexpected measurement intervals !
679
693
  if "sample_interval" in ds:
Binary file
@@ -31,7 +31,7 @@ def reader(
31
31
  """Reader."""
32
32
  ##------------------------------------------------------------------------.
33
33
  #### - Define raw data headers
34
- column_names = ["TO_BE_SPLITTED"]
34
+ column_names = ["TO_PARSE"]
35
35
 
36
36
  ##------------------------------------------------------------------------.
37
37
  #### Define reader options
@@ -79,14 +79,22 @@ def reader(
79
79
 
80
80
  ##------------------------------------------------------------------------.
81
81
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
- # Count number of delimiters to identify valid rows
83
- df = df[df["TO_BE_SPLITTED"].str.count(";") == 519]
82
+ # Raise error if empty file
83
+ if len(df) == 0:
84
+ raise ValueError(f"{filepath} is empty.")
85
+
86
+ # Select only rows with expected number of delimiters
87
+ df = df[df["TO_PARSE"].str.count(";").isin([519, 520])]
88
+
89
+ # Check there are still valid rows
90
+ if len(df) == 0:
91
+ raise ValueError(f"No valid rows in {filepath}.")
84
92
 
85
93
  # Split by ; delimiter (before raw drop number)
86
- df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
94
+ df = df["TO_PARSE"].str.split(";", expand=True, n=79)
87
95
 
88
96
  # Assign column names
89
- column_names = [
97
+ names = [
90
98
  "start_identifier",
91
99
  "device_address",
92
100
  "sensor_serial_number",
@@ -168,10 +176,10 @@ def reader(
168
176
  "number_particles_class_9_internal_data",
169
177
  "raw_drop_number",
170
178
  ]
171
- df.columns = column_names
179
+ df.columns = names
172
180
 
173
181
  # Remove checksum from raw_drop_number
174
- df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=1, expand=True)[0]
182
+ df["raw_drop_number"] = df["raw_drop_number"].str.strip(";").str.rsplit(";", n=1, expand=True)[0]
175
183
 
176
184
  # Define datetime "time" column
177
185
  df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
@@ -0,0 +1,279 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """DISDRODB reader for GID LPM sensor TC-PI with incorrect reported time."""
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+ from disdrodb.utils.logger import log_error
23
+
24
+
25
+ def read_txt_file(file, filename, logger):
26
+ """Parse for TC-PI LPM file."""
27
+ #### - Define raw data headers
28
+ column_names = ["TO_PARSE"]
29
+
30
+ ##------------------------------------------------------------------------.
31
+ #### Define reader options
32
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
33
+ reader_kwargs = {}
34
+
35
+ # - Define delimiter
36
+ reader_kwargs["delimiter"] = "\\n"
37
+
38
+ # - Avoid first column to become df index !!!
39
+ reader_kwargs["index_col"] = False
40
+
41
+ # Since column names are expected to be passed explicitly, header is set to None
42
+ reader_kwargs["header"] = None
43
+
44
+ # - Number of rows to be skipped at the beginning of the file
45
+ reader_kwargs["skiprows"] = 1
46
+
47
+ # - Define behaviour when encountering bad lines
48
+ reader_kwargs["on_bad_lines"] = "skip"
49
+
50
+ # - Define reader engine
51
+ # - C engine is faster
52
+ # - Python engine is more feature-complete
53
+ reader_kwargs["engine"] = "python"
54
+
55
+ # - Define on-the-fly decompression of on-disk data
56
+ # - Available: gzip, bz2, zip
57
+ reader_kwargs["compression"] = "infer"
58
+
59
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
60
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
61
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
62
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
63
+ reader_kwargs["na_values"] = ["na", "", "error"]
64
+
65
+ ##------------------------------------------------------------------------.
66
+ #### Read the data
67
+ df = read_raw_text_file(
68
+ filepath=file,
69
+ column_names=column_names,
70
+ reader_kwargs=reader_kwargs,
71
+ logger=logger,
72
+ )
73
+
74
+ ##------------------------------------------------------------------------.
75
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
76
+ # Raise error if empty file
77
+ if len(df) == 0:
78
+ raise ValueError(f"{filename} is empty.")
79
+
80
+ # Select only rows with expected number of delimiters
81
+ df = df[df["TO_PARSE"].str.count(" ") == 526]
82
+
83
+ # Check there are still valid rows
84
+ if len(df) == 0:
85
+ raise ValueError(f"No valid rows in {filename}.")
86
+
87
+ # Split by ; delimiter (before raw drop number)
88
+ df = df["TO_PARSE"].str.split(" ", expand=True, n=82)
89
+
90
+ # Assign column names
91
+ names = [
92
+ "date",
93
+ "time",
94
+ "unknown",
95
+ "start_identifier",
96
+ "device_address",
97
+ "sensor_serial_number",
98
+ "sensor_date",
99
+ "sensor_time",
100
+ "weather_code_synop_4677_5min",
101
+ "weather_code_synop_4680_5min",
102
+ "weather_code_metar_4678_5min",
103
+ "precipitation_rate_5min",
104
+ "weather_code_synop_4677",
105
+ "weather_code_synop_4680",
106
+ "weather_code_metar_4678",
107
+ "precipitation_rate",
108
+ "rainfall_rate",
109
+ "snowfall_rate",
110
+ "precipitation_accumulated",
111
+ "mor_visibility",
112
+ "reflectivity",
113
+ "quality_index",
114
+ "max_hail_diameter",
115
+ "laser_status",
116
+ "static_signal_status",
117
+ "laser_temperature_analog_status",
118
+ "laser_temperature_digital_status",
119
+ "laser_current_analog_status",
120
+ "laser_current_digital_status",
121
+ "sensor_voltage_supply_status",
122
+ "current_heating_pane_transmitter_head_status",
123
+ "current_heating_pane_receiver_head_status",
124
+ "temperature_sensor_status",
125
+ "current_heating_voltage_supply_status",
126
+ "current_heating_house_status",
127
+ "current_heating_heads_status",
128
+ "current_heating_carriers_status",
129
+ "control_output_laser_power_status",
130
+ "reserved_status",
131
+ "temperature_interior",
132
+ "laser_temperature",
133
+ "laser_current_average",
134
+ "control_voltage",
135
+ "optical_control_voltage_output",
136
+ "sensor_voltage_supply",
137
+ "current_heating_pane_transmitter_head",
138
+ "current_heating_pane_receiver_head",
139
+ "temperature_ambient",
140
+ "current_heating_voltage_supply",
141
+ "current_heating_house",
142
+ "current_heating_heads",
143
+ "current_heating_carriers",
144
+ "number_particles",
145
+ "number_particles_internal_data",
146
+ "number_particles_min_speed",
147
+ "number_particles_min_speed_internal_data",
148
+ "number_particles_max_speed",
149
+ "number_particles_max_speed_internal_data",
150
+ "number_particles_min_diameter",
151
+ "number_particles_min_diameter_internal_data",
152
+ "number_particles_no_hydrometeor",
153
+ "number_particles_no_hydrometeor_internal_data",
154
+ "number_particles_unknown_classification",
155
+ "number_particles_unknown_classification_internal_data",
156
+ "number_particles_class_1",
157
+ "number_particles_class_1_internal_data",
158
+ "number_particles_class_2",
159
+ "number_particles_class_2_internal_data",
160
+ "number_particles_class_3",
161
+ "number_particles_class_3_internal_data",
162
+ "number_particles_class_4",
163
+ "number_particles_class_4_internal_data",
164
+ "number_particles_class_5",
165
+ "number_particles_class_5_internal_data",
166
+ "number_particles_class_6",
167
+ "number_particles_class_6_internal_data",
168
+ "number_particles_class_7",
169
+ "number_particles_class_7_internal_data",
170
+ "number_particles_class_8",
171
+ "number_particles_class_8_internal_data",
172
+ "number_particles_class_9",
173
+ "number_particles_class_9_internal_data",
174
+ "TO_BE_FURTHER_PROCESSED",
175
+ ]
176
+ df.columns = names
177
+
178
+ # Define datetime "time" column
179
+ df["time"] = df["date"] + " " + df["time"]
180
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
181
+
182
+ # Drop row if start_identifier different than 00
183
+ df = df[df["start_identifier"].astype(str) == "00"]
184
+
185
+ # Extract the last variables remained in raw_drop_number
186
+ df_parsed = df["TO_BE_FURTHER_PROCESSED"].str.rsplit(" ", n=5, expand=True)
187
+ df_parsed.columns = [
188
+ "raw_drop_number",
189
+ "air_temperature",
190
+ "relative_humidity",
191
+ "wind_speed",
192
+ "wind_direction",
193
+ "checksum",
194
+ ]
195
+
196
+ # Assign columns to the original dataframe
197
+ df[df_parsed.columns] = df_parsed
198
+
199
+ # Drop rows with invalid raw_drop_number
200
+ # --> 440 value # 22x20
201
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
202
+
203
+ # Drop columns not agreeing with DISDRODB L0 standards
204
+ columns_to_drop = [
205
+ "start_identifier",
206
+ "device_address",
207
+ "sensor_serial_number",
208
+ "sensor_date",
209
+ "sensor_time",
210
+ "date",
211
+ "unknown",
212
+ "TO_BE_FURTHER_PROCESSED",
213
+ "air_temperature",
214
+ "relative_humidity",
215
+ "wind_speed",
216
+ "wind_direction",
217
+ "checksum",
218
+ ]
219
+ df = df.drop(columns=columns_to_drop)
220
+ return df
221
+
222
+
223
+ @is_documented_by(reader_generic_docstring)
224
+ def reader(
225
+ filepath,
226
+ logger=None,
227
+ ):
228
+ """Reader."""
229
+ import zipfile
230
+
231
+ ##------------------------------------------------------------------------.
232
+ # filename = os.path.basename(filepath)
233
+ # return read_txt_file(file=filepath, filename=filename, logger=logger)
234
+
235
+ # ---------------------------------------------------------------------.
236
+ #### Iterate over all files (aka timesteps) in the daily zip archive
237
+ # - Each file contain a single timestep !
238
+ # list_df = []
239
+ # with tempfile.TemporaryDirectory() as temp_dir:
240
+ # # Extract all files
241
+ # unzip_file_on_terminal(filepath, temp_dir)
242
+
243
+ # # Walk through extracted files
244
+ # for root, _, files in os.walk(temp_dir):
245
+ # for filename in sorted(files):
246
+ # if filename.endswith(".txt"):
247
+ # full_path = os.path.join(root, filename)
248
+ # try:
249
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
250
+ # if df is not None:
251
+ # list_df.append(df)
252
+ # except Exception as e:
253
+ # msg = f"An error occurred while reading {filename}: {e}"
254
+ # log_error(logger=logger, msg=msg, verbose=True)
255
+
256
+ list_df = []
257
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
258
+ filenames = sorted(zip_ref.namelist())
259
+ for filename in filenames:
260
+ if filename.endswith(".txt"):
261
+ # Open file
262
+ with zip_ref.open(filename) as file:
263
+ try:
264
+ df = read_txt_file(file=file, filename=filename, logger=logger)
265
+ if df is not None:
266
+ list_df.append(df)
267
+ except Exception as e:
268
+ msg = f"An error occurred while reading {filename}. The error is: {e}"
269
+ log_error(logger=logger, msg=msg, verbose=True)
270
+
271
+ # Check the zip file contains at least some non.empty files
272
+ if len(list_df) == 0:
273
+ raise ValueError(f"{filepath} contains only empty files!")
274
+
275
+ # Concatenate all dataframes into a single one
276
+ df = pd.concat(list_df)
277
+
278
+ # ---------------------------------------------------------------------.
279
+ return df