disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ @is_documented_by(reader_generic_docstring)
26
+ def reader(
27
+ filepath,
28
+ logger=None,
29
+ ):
30
+ """Reader."""
31
+ ##------------------------------------------------------------------------.
32
+ #### Define column names
33
+ column_names = ["TO_PARSE"]
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define reader options
37
+ reader_kwargs = {}
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+ # - Skip first row as columns names
41
+ # - Define encoding
42
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
43
+ # - Avoid first column to become df index !!!
44
+ reader_kwargs["index_col"] = False
45
+ # - Define behaviour when encountering bad lines
46
+ reader_kwargs["on_bad_lines"] = "skip"
47
+ # - Define reader engine
48
+ # - C engine is faster
49
+ # - Python engine is more feature-complete
50
+ reader_kwargs["engine"] = "python"
51
+ # - Define on-the-fly decompression of on-disk data
52
+ # - Available: gzip, bz2, zip
53
+ reader_kwargs["compression"] = "infer"
54
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
55
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
+ reader_kwargs["na_values"] = ["na", "", "error"]
59
+
60
+ ##------------------------------------------------------------------------.
61
+ #### Read the data
62
+ df = read_raw_text_file(
63
+ filepath=filepath,
64
+ column_names=column_names,
65
+ reader_kwargs=reader_kwargs,
66
+ logger=logger,
67
+ )
68
+
69
+ ##------------------------------------------------------------------------.
70
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
+ # Create ID and Value columns
72
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
73
+ df.columns = ["ID", "Value"]
74
+
75
+ # Select only rows with values
76
+ df = df[df["Value"].astype(bool)]
77
+ df = df[df["Value"].apply(lambda x: x is not None)]
78
+
79
+ # Drop rows with invalid IDs
80
+ # - Corrupted rows
81
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
83
+
84
+ # Create the dataframe with each row corresponding to a timestep
85
+ # - Group rows based on when ID values restart
86
+ groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
+
88
+ # Reshape the dataframe
89
+ group_dfs = []
90
+ for _, group in groups:
91
+ group_df = group.set_index("ID").T
92
+ group_dfs.append(group_df)
93
+
94
+ # Merge each timestep dataframe
95
+ # --> Missing columns are infilled by NaN
96
+ df = pd.concat(group_dfs, axis=0)
97
+ df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
98
+
99
+ # Define available column names
100
+ column_dict = {
101
+ "01": "rainfall_rate_32bit",
102
+ "02": "rainfall_accumulated_32bit",
103
+ "03": "weather_code_synop_4680",
104
+ "04": "weather_code_synop_4677",
105
+ "05": "weather_code_metar_4678",
106
+ "06": "weather_code_nws",
107
+ "07": "reflectivity_32bit",
108
+ "08": "mor_visibility",
109
+ "09": "sample_interval",
110
+ "10": "laser_amplitude",
111
+ "11": "number_particles",
112
+ "12": "sensor_temperature",
113
+ # "13": "sensor_serial_number",
114
+ # "14": "firmware_iop",
115
+ # "15": "firmware_dsp",
116
+ "16": "sensor_heating_current",
117
+ "17": "sensor_battery_voltage",
118
+ "18": "sensor_status",
119
+ # "19": "start_time",
120
+ "20": "sensor_time",
121
+ "21": "sensor_date",
122
+ # "22": "station_name",
123
+ # "23": "station_number",
124
+ "24": "rainfall_amount_absolute_32bit",
125
+ "25": "error_code",
126
+ # "30": "rainfall_rate_16_bit_30",
127
+ # "31": "rainfall_rate_16_bit_1200",
128
+ # "32": "rainfall_accumulated_16bit",
129
+ "34": "rain_kinetic_energy",
130
+ "35": "snowfall_rate",
131
+ "90": "raw_drop_concentration",
132
+ "91": "raw_drop_average_velocity",
133
+ "93": "raw_drop_number",
134
+ }
135
+
136
+ # Identify missing columns and add NaN
137
+ expected_columns = np.array(list(column_dict.keys()))
138
+ missing_columns = expected_columns[np.isin(expected_columns, df.columns, invert=True)].tolist()
139
+ if len(missing_columns) > 0:
140
+ for column in missing_columns:
141
+ df[column] = "NaN"
142
+
143
+ # Rename columns
144
+ df = df.rename(column_dict, axis=1)
145
+
146
+ # Keep only columns defined in the dictionary
147
+ df = df[list(column_dict.values())]
148
+
149
+ # Define datetime "time" column
150
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
151
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
152
+
153
+ # Drop columns not agreeing with DISDRODB L0 standards
154
+ columns_to_drop = [
155
+ "sensor_date",
156
+ "sensor_time",
157
+ # "firmware_iop",
158
+ # "firmware_dsp",
159
+ # "sensor_serial_number",
160
+ # "station_name",
161
+ # "station_number",
162
+ ]
163
+ df = df.drop(columns=columns_to_drop)
164
+
165
+ return df
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for DELFT OTT PARSIVEL2 sensor in netCDF format."""
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Open the netCDF
32
+ ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
36
+ # Define dictionary mapping dataset variables to select and rename
37
+ dict_names = {
38
+ ### Dimensions
39
+ "diameter": "diameter_bin_center",
40
+ "velocity": "velocity_bin_center",
41
+ ### Variables
42
+ "rainfall_rate_32bit": "rainfall_rate_32bit",
43
+ "synop_WaWa": "weather_code_synop_4680",
44
+ "synop_WW": "weather_code_synop_4677",
45
+ "radar_reflectivity": "reflectivity_32bit",
46
+ "visibility": "mor_visibility",
47
+ "interval": "sample_interval",
48
+ "sig_laser": "laser_amplitude",
49
+ "n_particles": "number_particles",
50
+ "T_sensor": "sensor_temperature",
51
+ "I_heating": "sensor_heating_current",
52
+ "V_power_supply": "sensor_battery_voltage",
53
+ "state_sensor": "sensor_status",
54
+ "error_code": "error_code",
55
+ "kinetic_energy": "rain_kinetic_energy",
56
+ "snowfall_rate": "snowfall_rate",
57
+ "fall_velocity": "raw_drop_average_velocity",
58
+ "number_concentration": "raw_drop_concentration",
59
+ "data_raw": "raw_drop_number",
60
+ }
61
+
62
+ # Rename dataset variables and columns and infill missing variables
63
+ ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="PARSIVEL2")
64
+
65
+ # Ensure sensor_temperature in Celsius degree (as logged by sensor)
66
+ ds["sensor_temperature"] = ds["sensor_temperature"] - 273.15
67
+
68
+ # Return the dataset adhering to DISDRODB L0B standards
69
+ return ds
@@ -17,7 +17,9 @@
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
19
  """DISDRODB reader for ENPC PARSIVEL2 raw text data."""
20
- import zipfile
20
+ # import os
21
+ # import tempfile
22
+ # from disdrodb.utils.compression import unzip_file_on_terminal
21
23
 
22
24
  import numpy as np
23
25
  import pandas as pd
@@ -26,6 +28,232 @@ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
28
  from disdrodb.l0.l0a_processing import read_raw_text_file
27
29
  from disdrodb.utils.logger import log_error
28
30
 
31
+ COLUMN_DICT = {
32
+ "01": "rainfall_rate_32bit",
33
+ "02": "rainfall_accumulated_32bit",
34
+ "03": "weather_code_synop_4680",
35
+ "04": "weather_code_synop_4677",
36
+ "05": "weather_code_metar_4678",
37
+ "06": "weather_code_nws",
38
+ "07": "reflectivity_32bit",
39
+ "08": "mor_visibility",
40
+ # "09": "sample_interval",
41
+ "10": "laser_amplitude",
42
+ "11": "number_particles",
43
+ "12": "sensor_temperature",
44
+ # "13": "sensor_serial_number",
45
+ # "14": "firmware_iop",
46
+ # "15": "firmware_dsp",
47
+ "16": "sensor_heating_current",
48
+ "17": "sensor_battery_voltage",
49
+ "18": "sensor_status",
50
+ # "19": "start_time",
51
+ # "20": "sensor_time",
52
+ # "21": "sensor_date",
53
+ # "22": "station_name",
54
+ # "23": "station_number",
55
+ "24": "rainfall_amount_absolute_32bit",
56
+ "25": "error_code",
57
+ "26": "sensor_temperature_pcb",
58
+ "27": "sensor_temperature_receiver",
59
+ "28": "sensor_temperature_trasmitter",
60
+ "30": "rainfall_rate_16_bit_30",
61
+ "31": "rainfall_rate_16_bit_1200",
62
+ "32": "rainfall_accumulated_16bit",
63
+ "34": "rain_kinetic_energy",
64
+ "35": "snowfall_rate",
65
+ "90": "raw_drop_concentration",
66
+ "91": "raw_drop_average_velocity",
67
+ "93": "raw_drop_number",
68
+ }
69
+
70
+
71
+ def parse_single_line_format(df, filename, logger): # noqa: ARG001
72
+ """Read single-line format."""
73
+ # Split into lines
74
+ text = df["TO_PARSE"].iloc[0]
75
+ decoded_text = text.encode().decode("unicode_escape")
76
+ lines = decoded_text.splitlines() # handles \r\n, \r, \n
77
+
78
+ # Split each line at the first colon
79
+ data = [line.split(":", 1) for line in lines if ":" in line]
80
+
81
+ # Create the DataFrame
82
+ df = pd.DataFrame(data, columns=["ID", "Value"])
83
+
84
+ # Drop rows with invalid IDs
85
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
86
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
87
+
88
+ # Select only rows with values
89
+ df = df[df["Value"].apply(lambda x: x is not None)]
90
+
91
+ # Reshape dataframe
92
+ df = df.set_index("ID").T
93
+
94
+ # Assign column names
95
+ df = df.rename(COLUMN_DICT, axis=1)
96
+
97
+ # Keep only columns defined in the dictionary
98
+ df = df.filter(items=list(COLUMN_DICT.values()))
99
+
100
+ # Infill missing columns
101
+ df = infill_missing_columns(df)
102
+
103
+ # Define datetime "time" column from filename
104
+ # Formats:
105
+ # - Raw_pars2_2017_12_28_23_58_30.txt
106
+ # - Raw_Pars_RW_turb_1_20201211_235930.txt
107
+ if filename.startswith("Raw_Pars_RW_turb"):
108
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[5:])
109
+ df["time"] = pd.to_datetime(datetime_str, format="%Y%m%d %H%M%S", errors="coerce")
110
+ else:
111
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
112
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S", errors="coerce")
113
+
114
+ return df
115
+
116
+
117
+ def parse_multiline_format(df, filename):
118
+ """Read multi-line format."""
119
+ # Create ID and Value columns
120
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
121
+ df.columns = ["ID", "Value"]
122
+
123
+ # Drop rows with invalid IDs
124
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
125
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
126
+
127
+ # Select only rows with values
128
+ df = df[df["Value"].apply(lambda x: x is not None)]
129
+
130
+ # Reshape dataframe
131
+ df = df.set_index("ID").T
132
+
133
+ # Assign column names
134
+ df = df.rename(COLUMN_DICT, axis=1)
135
+
136
+ # Keep only columns defined in the dictionary
137
+ df = df.filter(items=list(COLUMN_DICT.values()))
138
+
139
+ # Infill missing columns
140
+ df = infill_missing_columns(df)
141
+
142
+ # Define datetime "time" column from filename
143
+ # Formats:
144
+ # - Raw_pars2_2017_12_28_23_58_30.txt
145
+ # - Raw_Pars_RW_turb_1_20201211_235930.txt
146
+ if filename.startswith("Raw_Pars_RW_turb"):
147
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[5:])
148
+ df["time"] = pd.to_datetime(datetime_str, format="%Y%m%d %H%M%S")
149
+ else:
150
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
151
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
152
+
153
+ return df
154
+
155
+
156
+ def parse_older_format(df, filename, logger):
157
+ """Read old single-line format."""
158
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
159
+ df = df["TO_PARSE"].str.split(";", expand=True)
160
+
161
+ if len(df.columns) != 5 or len(df[0].iloc[0]) > 9:
162
+ log_error(logger, msg=f"{filename} is corrupted", verbose=False)
163
+ return None
164
+
165
+ names = [
166
+ "rainfall_rate_32bit",
167
+ "raw_drop_concentration",
168
+ "raw_drop_number",
169
+ "unknown",
170
+ "TO_SPLIT",
171
+ ]
172
+ df.columns = names
173
+
174
+ # Extract and clean out additional variables
175
+ df["rainfall_rate_32bit"] = df["rainfall_rate_32bit"].str.strip(",")
176
+ df["mor_visibility"] = df["TO_SPLIT"].str.split(",", expand=True)[0]
177
+
178
+ # Define datetime "time" column from filename
179
+ datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
180
+ df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
181
+
182
+ # # Drop columns not agreeing with DISDRODB L0 standards
183
+ columns_to_drop = [
184
+ "TO_SPLIT",
185
+ "unknown",
186
+ ]
187
+ df = df.drop(columns=columns_to_drop)
188
+
189
+ # Infill missing columns
190
+ df = infill_missing_columns(df)
191
+ return df
192
+
193
+
194
+ def infill_missing_columns(df):
195
+ """Infill with NaN missing columns."""
196
+ columns = set(COLUMN_DICT.values())
197
+ for c in columns:
198
+ if c not in df.columns:
199
+ df[c] = "NaN"
200
+ return df
201
+
202
+
203
+ def read_txt_file(file, filename, logger):
204
+ """Parse a single txt file within the daily zip file."""
205
+ ##------------------------------------------------------------------------.
206
+ #### Define column names
207
+ column_names = ["TO_PARSE"]
208
+
209
+ ##------------------------------------------------------------------------.
210
+ #### Define reader options
211
+ reader_kwargs = {}
212
+ # - Define delimiter
213
+ reader_kwargs["delimiter"] = "\\n"
214
+ # - Skip first row as columns names
215
+ # - Define encoding
216
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
217
+ # - Avoid first column to become df index !!!
218
+ reader_kwargs["index_col"] = False
219
+ # - Define behaviour when encountering bad lines
220
+ reader_kwargs["on_bad_lines"] = "skip"
221
+ # - Define reader engine
222
+ # - C engine is faster
223
+ # - Python engine is more feature-complete
224
+ reader_kwargs["engine"] = "python"
225
+ # - Define on-the-fly decompression of on-disk data
226
+ # - Available: gzip, bz2, zip
227
+ reader_kwargs["compression"] = "infer"
228
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
229
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
230
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
231
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
232
+ reader_kwargs["na_values"] = ["na", "", "error"]
233
+
234
+ ##------------------------------------------------------------------------.
235
+ #### Read the data
236
+ df = read_raw_text_file(
237
+ filepath=file,
238
+ column_names=column_names,
239
+ reader_kwargs=reader_kwargs,
240
+ logger=logger,
241
+ )
242
+ ##--------------------------------\----------------------------------------.
243
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
244
+ # Empty file, return None
245
+ if len(df) == 0:
246
+ raise ValueError(f"{filename} is empty.")
247
+
248
+ # Deal with different data formats
249
+ if len(df) == 1:
250
+ # If TYP in first row --> single-line new format
251
+ if "TYP" in df["TO_PARSE"].iloc[0]:
252
+ return parse_single_line_format(df, filename, logger=logger)
253
+ # Otherwise old format
254
+ return parse_older_format(df, filename, logger=logger)
255
+ return parse_multiline_format(df, filename)
256
+
29
257
 
30
258
  @is_documented_by(reader_generic_docstring)
31
259
  def reader(
@@ -33,141 +261,29 @@ def reader(
33
261
  logger=None,
34
262
  ):
35
263
  """Reader."""
36
-
37
- ##------------------------------------------------------------------------.
38
- #### Define function to read each txt file inside each daily zip file
39
- def read_txt_file(file, filename):
40
- """Parse a single txt file within the daily zip file."""
41
- ##------------------------------------------------------------------------.
42
- #### Define column names
43
- column_names = ["TO_PARSE"]
44
-
45
- ##------------------------------------------------------------------------.
46
- #### Define reader options
47
- reader_kwargs = {}
48
- # - Define delimiter
49
- reader_kwargs["delimiter"] = "\\n"
50
- # - Skip first row as columns names
51
- # - Define encoding
52
- reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
53
- # - Avoid first column to become df index !!!
54
- reader_kwargs["index_col"] = False
55
- # - Define behaviour when encountering bad lines
56
- reader_kwargs["on_bad_lines"] = "skip"
57
- # - Define reader engine
58
- # - C engine is faster
59
- # - Python engine is more feature-complete
60
- reader_kwargs["engine"] = "python"
61
- # - Define on-the-fly decompression of on-disk data
62
- # - Available: gzip, bz2, zip
63
- reader_kwargs["compression"] = "infer"
64
- # - Strings to recognize as NA/NaN and replace with standard NA flags
65
- # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
- # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
- # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
- reader_kwargs["na_values"] = ["na", "", "error"]
69
-
70
- ##------------------------------------------------------------------------.
71
- #### Read the data
72
- df = read_raw_text_file(
73
- filepath=file,
74
- column_names=column_names,
75
- reader_kwargs=reader_kwargs,
76
- logger=logger,
77
- )
78
-
79
- ##------------------------------------------------------------------------.
80
- #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
- # Create ID and Value columns
82
- df = df["TO_PARSE"].str.split(":", expand=True, n=1)
83
- df.columns = ["ID", "Value"]
84
-
85
- # Select only rows with values
86
- df = df[df["Value"].apply(lambda x: x is not None)]
87
-
88
- # Drop rows with invalid IDs
89
- valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
90
- df = df[df["ID"].astype(str).isin(valid_id_str)]
91
-
92
- # Create the dataframe with each row corresponding to a timestep
93
- # - Group rows based on when ID values restart
94
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
95
-
96
- # Reshape the dataframe
97
- group_dfs = []
98
- for _, group in groups:
99
- group_df = group.set_index("ID").T
100
- group_dfs.append(group_df)
101
-
102
- # Merge each timestep dataframe
103
- # --> Missing columns are infilled by NaN
104
- df = pd.concat(group_dfs, axis=0)
105
-
106
- # Assign column names
107
- column_dict = {
108
- "01": "rainfall_rate_32bit",
109
- "02": "rainfall_accumulated_32bit",
110
- "03": "weather_code_synop_4680",
111
- "04": "weather_code_synop_4677",
112
- "05": "weather_code_metar_4678",
113
- "06": "weather_code_nws",
114
- "07": "reflectivity_32bit",
115
- "08": "mor_visibility",
116
- "09": "sample_interval",
117
- "10": "laser_amplitude",
118
- "11": "number_particles",
119
- "12": "sensor_temperature",
120
- # "13": "sensor_serial_number",
121
- # "14": "firmware_iop",
122
- # "15": "firmware_dsp",
123
- "16": "sensor_heating_current",
124
- "17": "sensor_battery_voltage",
125
- "18": "sensor_status",
126
- # "19": "start_time",
127
- # "20": "sensor_time",
128
- # "21": "sensor_date",
129
- # "22": "station_name",
130
- # "23": "station_number",
131
- "24": "rainfall_amount_absolute_32bit",
132
- "25": "error_code",
133
- "26": "sensor_temperature_pcb",
134
- "27": "sensor_temperature_receiver",
135
- "28": "sensor_temperature_trasmitter",
136
- "30": "rainfall_rate_16_bit_30",
137
- "31": "rainfall_rate_16_bit_1200",
138
- "32": "rainfall_accumulated_16bit",
139
- "34": "rain_kinetic_energy",
140
- "35": "snowfall_rate",
141
- "90": "raw_drop_concentration",
142
- "91": "raw_drop_average_velocity",
143
- "93": "raw_drop_number",
144
- }
145
-
146
- df = df.rename(column_dict, axis=1)
147
-
148
- # Keep only columns defined in the dictionary
149
- df = df[list(column_dict.values())]
150
-
151
- # Define datetime "time" column from filename
152
- datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
153
- df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
154
-
155
- # # Drop columns not agreeing with DISDRODB L0 standards
156
- # columns_to_drop = [
157
- # "sensor_date",
158
- # "sensor_time",
159
- # "firmware_iop",
160
- # "firmware_dsp",
161
- # "sensor_serial_number",
162
- # "station_name",
163
- # "station_number",
164
- # ]
165
- # df = df.drop(columns=columns_to_drop)
166
- return df
264
+ import zipfile
167
265
 
168
266
  # ---------------------------------------------------------------------.
169
267
  #### Iterate over all files (aka timesteps) in the daily zip archive
170
268
  # - Each file contain a single timestep !
269
+ # list_df = []
270
+ # with tempfile.TemporaryDirectory() as temp_dir:
271
+ # # Extract all files
272
+ # unzip_file_on_terminal(filepath, temp_dir)
273
+
274
+ # # Walk through extracted files
275
+ # for root, _, files in os.walk(temp_dir):
276
+ # for filename in sorted(files):
277
+ # if filename.endswith(".txt"):
278
+ # full_path = os.path.join(root, filename)
279
+ # try:
280
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
281
+ # if df is not None:
282
+ # list_df.append(df)
283
+ # except Exception as e:
284
+ # msg = f"An error occurred while reading {filename}: {e}"
285
+ # log_error(logger=logger, msg=msg, verbose=True)
286
+
171
287
  list_df = []
172
288
  with zipfile.ZipFile(filepath, "r") as zip_ref:
173
289
  filenames = sorted(zip_ref.namelist())
@@ -176,12 +292,17 @@ def reader(
176
292
  # Open file
177
293
  with zip_ref.open(filename) as file:
178
294
  try:
179
- df = read_txt_file(file=file, filename=filename)
180
- list_df.append(df)
295
+ df = read_txt_file(file=file, filename=filename, logger=logger)
296
+ if df is not None:
297
+ list_df.append(df)
181
298
  except Exception as e:
182
299
  msg = f"An error occurred while reading {filename}. The error is: {e}."
183
300
  log_error(logger=logger, msg=msg, verbose=True)
184
301
 
302
+ # Check the zip file contains at least some non.empty files
303
+ if len(list_df) == 0:
304
+ raise ValueError(f"{filepath} contains only empty files!")
305
+
185
306
  # Concatenate all dataframes into a single one
186
307
  df = pd.concat(list_df)
187
308