disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -16,19 +16,18 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for GID LPM sensors not measuring wind."""
19
+ """DISDRODB reader for ARSO LPM sensors."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
23
  from disdrodb.l0.l0a_processing import read_raw_text_file
24
24
 
25
25
 
26
- @is_documented_by(reader_generic_docstring)
27
- def reader(
26
+ def read_SM03_telegram(
28
27
  filepath,
29
28
  logger=None,
30
29
  ):
31
- """Reader."""
30
+ """Read SM03 telegram."""
32
31
  ##------------------------------------------------------------------------.
33
32
  #### - Define raw data headers
34
33
  column_names = ["TO_PARSE"]
@@ -77,16 +76,99 @@ def reader(
77
76
  logger=logger,
78
77
  )
79
78
 
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Count number of delimiters to identify valid rows
82
+ df = df[df["TO_PARSE"].str.count(";") == 12]
83
+
84
+ # Check there are valid rows left
85
+ if len(df) == 0:
86
+ raise ValueError(f"No valid data in {filepath}")
87
+
88
+ # Split by ; delimiter (before raw drop number)
89
+ df = df["TO_PARSE"].str.split(";", expand=True)
90
+
91
+ # Assign column names
92
+ names = []
93
+ df.columns = names
94
+
95
+ # Define datetime "time" column
96
+ time = df[0].str[-19:]
97
+ df["time"] = pd.to_datetime(time, format="%d/%m/%Y %H:%M:%S", errors="coerce")
98
+
99
+ return df
100
+
101
+
102
+ def read_SM05_telegram(
103
+ filepath,
104
+ logger=None,
105
+ ):
106
+ """Read SM05 telegram."""
107
+ ##------------------------------------------------------------------------.
108
+ #### - Define raw data headers
109
+ column_names = ["TO_PARSE"]
110
+
111
+ ##------------------------------------------------------------------------.
112
+ #### Define reader options
113
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
114
+ reader_kwargs = {}
115
+
116
+ # - Define delimiter
117
+ reader_kwargs["delimiter"] = "\\n"
118
+
119
+ # - Avoid first column to become df index !!!
120
+ reader_kwargs["index_col"] = False
121
+
122
+ # - Define encoding
123
+ reader_kwargs["encoding"] = "latin"
124
+
125
+ # Since column names are expected to be passed explicitly, header is set to None
126
+ reader_kwargs["header"] = None
127
+
128
+ # - Number of rows to be skipped at the beginning of the file
129
+ reader_kwargs["skiprows"] = None
130
+
131
+ # - Define behaviour when encountering bad lines
132
+ reader_kwargs["on_bad_lines"] = "skip"
133
+
134
+ # - Define reader engine
135
+ # - C engine is faster
136
+ # - Python engine is more feature-complete
137
+ reader_kwargs["engine"] = "python"
138
+
139
+ # - Define on-the-fly decompression of on-disk data
140
+ # - Available: gzip, bz2, zip
141
+ reader_kwargs["compression"] = "infer"
142
+
143
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
144
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
145
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
146
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
147
+ reader_kwargs["na_values"] = ["na", "", "error"]
148
+
149
+ ##------------------------------------------------------------------------.
150
+ #### Read the data
151
+ df = read_raw_text_file(
152
+ filepath=filepath,
153
+ column_names=column_names,
154
+ reader_kwargs=reader_kwargs,
155
+ logger=logger,
156
+ )
157
+
80
158
  ##------------------------------------------------------------------------.
81
159
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
160
  # Count number of delimiters to identify valid rows
83
161
  df = df[df["TO_PARSE"].str.count(";") == 521]
84
162
 
163
+ # Check there are valid rows left
164
+ if len(df) == 0:
165
+ raise ValueError(f"No valid data in {filepath}")
166
+
85
167
  # Split by ; delimiter (before raw drop number)
86
168
  df = df["TO_PARSE"].str.split(";", expand=True, n=80)
87
169
 
88
170
  # Assign column names
89
- column_names = [
171
+ names = [
90
172
  "time",
91
173
  "start_identifier",
92
174
  "device_address",
@@ -109,7 +191,7 @@ def reader(
109
191
  "quality_index",
110
192
  "max_hail_diameter",
111
193
  "laser_status",
112
- "static_signal",
194
+ "static_signal_status",
113
195
  "laser_temperature_analog_status",
114
196
  "laser_temperature_digital_status",
115
197
  "laser_current_analog_status",
@@ -123,7 +205,7 @@ def reader(
123
205
  "current_heating_heads_status",
124
206
  "current_heating_carriers_status",
125
207
  "control_output_laser_power_status",
126
- "reserve_status",
208
+ "reserved_status",
127
209
  "temperature_interior",
128
210
  "laser_temperature",
129
211
  "laser_current_average",
@@ -169,15 +251,16 @@ def reader(
169
251
  "number_particles_class_9_internal_data",
170
252
  "raw_drop_number",
171
253
  ]
172
- df.columns = column_names
254
+ df.columns = names
255
+
256
+ # Define datetime "time" column
257
+ time_str = df["time"].str[-19:]
258
+ # time_str = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
259
+ df["time"] = pd.to_datetime(time_str, format="%d/%m/%Y %H:%M:%S", errors="coerce")
173
260
 
174
261
  # Remove checksum from raw_drop_number
175
262
  df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
176
263
 
177
- # Define datetime "time" column
178
- time = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
179
- df["time"] = pd.to_datetime(time, format="%d/%m/%Y %H:%M:%S", errors="coerce")
180
-
181
264
  # Drop row if start_identifier different than 00
182
265
  df["start_identifier"] = df["start_identifier"].astype(str).str[-2:]
183
266
  df = df[df["start_identifier"] == "00"]
@@ -195,3 +278,15 @@ def reader(
195
278
  ]
196
279
  df = df.drop(columns=columns_to_drop)
197
280
  return df
281
+
282
+
283
+ @is_documented_by(reader_generic_docstring)
284
+ def reader(
285
+ filepath,
286
+ logger=None,
287
+ ):
288
+ """Reader."""
289
+ return read_SM05_telegram(
290
+ filepath=filepath,
291
+ logger=logger,
292
+ )
@@ -16,7 +16,7 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for GID LPM sensors not measuring wind."""
19
+ """DISDRODB reader for UL LPM sensors in Crni Vrh."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -113,7 +113,7 @@ def reader(
113
113
  "quality_index",
114
114
  "max_hail_diameter",
115
115
  "laser_status",
116
- "static_signal",
116
+ "static_signal_status",
117
117
  "laser_temperature_analog_status",
118
118
  "laser_temperature_digital_status",
119
119
  "laser_current_analog_status",
@@ -127,7 +127,7 @@ def reader(
127
127
  "current_heating_heads_status",
128
128
  "current_heating_carriers_status",
129
129
  "control_output_laser_power_status",
130
- "reserve_status",
130
+ "reserved_status",
131
131
  "temperature_interior",
132
132
  "laser_temperature",
133
133
  "laser_current_average",
@@ -108,7 +108,7 @@ def reader(
108
108
  "quality_index",
109
109
  "max_hail_diameter",
110
110
  "laser_status",
111
- "static_signal",
111
+ "static_signal_status",
112
112
  "laser_temperature_analog_status",
113
113
  "laser_temperature_digital_status",
114
114
  "laser_current_analog_status",
@@ -122,7 +122,7 @@ def reader(
122
122
  "current_heating_heads_status",
123
123
  "current_heating_carriers_status",
124
124
  "control_output_laser_power_status",
125
- "reserve_status",
125
+ "reserved_status",
126
126
  "temperature_interior",
127
127
  "laser_temperature",
128
128
  "laser_current_average",
@@ -0,0 +1,227 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """DISDRODB reader for EUSKALMET OTT Parsivel raw data."""
18
+ # import os
19
+ # import tempfile
20
+ # from disdrodb.utils.compression import unzip_file_on_terminal
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+ from disdrodb.utils.logger import log_error
28
+
29
+ COLUMN_DICT = {
30
+ "01": "rainfall_rate_32bit",
31
+ "02": "rainfall_accumulated_32bit",
32
+ "03": "weather_code_synop_4680",
33
+ "04": "weather_code_synop_4677",
34
+ "05": "weather_code_metar_4678", # empty
35
+ "06": "weather_code_nws", # empty
36
+ "07": "reflectivity_32bit",
37
+ "08": "mor_visibility",
38
+ "09": "sample_interval",
39
+ "10": "laser_amplitude",
40
+ "11": "number_particles",
41
+ "12": "sensor_temperature",
42
+ # "13": "sensor_serial_number",
43
+ # "14": "firmware_iop",
44
+ # "15": "firmware_dsp",
45
+ "16": "sensor_heating_current",
46
+ "17": "sensor_battery_voltage",
47
+ "18": "sensor_status",
48
+ # "19": "start_time",
49
+ # "20": "sensor_time",
50
+ # "21": "sensor_date",
51
+ # "22": "station_name",
52
+ # "23": "station_number",
53
+ "24": "rainfall_amount_absolute_32bit",
54
+ "25": "error_code",
55
+ "30": "rainfall_rate_16bit",
56
+ "31": "rainfall_rate_12bit",
57
+ "32": "rainfall_accumulated_16bit",
58
+ "90": "raw_drop_concentration",
59
+ "91": "raw_drop_average_velocity",
60
+ "93": "raw_drop_number",
61
+ }
62
+
63
+
64
+ def infill_missing_columns(df):
65
+ """Infill with NaN missing columns."""
66
+ columns = set(COLUMN_DICT.values())
67
+ for c in columns:
68
+ if c not in df.columns:
69
+ df[c] = "NaN"
70
+ return df
71
+
72
+
73
+ def read_txt_file(file, filename, logger):
74
+ """Parse a single txt file within the daily zip file."""
75
+ ##------------------------------------------------------------------------.
76
+ #### Define column names
77
+ column_names = ["TO_PARSE"]
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Define reader options
81
+ reader_kwargs = {}
82
+ # - Define delimiter
83
+ reader_kwargs["delimiter"] = "\\n"
84
+ # - Skip first row as columns names
85
+ # - Define encoding
86
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
87
+ # - Avoid first column to become df index !!!
88
+ reader_kwargs["index_col"] = False
89
+ # - Define behaviour when encountering bad lines
90
+ reader_kwargs["on_bad_lines"] = "skip"
91
+ # - Define reader engine
92
+ # - C engine is faster
93
+ # - Python engine is more feature-complete
94
+ reader_kwargs["engine"] = "python"
95
+ # - Define on-the-fly decompression of on-disk data
96
+ # - Available: gzip, bz2, zip
97
+ reader_kwargs["compression"] = "infer"
98
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
99
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
100
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
101
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
102
+ reader_kwargs["na_values"] = ["na", "", "error"]
103
+
104
+ ##------------------------------------------------------------------------.
105
+ #### Read the data
106
+ df = read_raw_text_file(
107
+ filepath=file,
108
+ column_names=column_names,
109
+ reader_kwargs=reader_kwargs,
110
+ logger=logger,
111
+ )
112
+
113
+ ##--------------------------------\----------------------------------------.
114
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
115
+ # Empty file, raise error
116
+ if len(df) == 0:
117
+ raise ValueError(f"{filename} is empty.")
118
+
119
+ # Select rows with valid spectrum
120
+ # df = df[df["TO_PARSE"].str.count(";") == 1191] # 1112
121
+
122
+ # Raise errof if corrupted file
123
+ if len(df) == 4:
124
+ raise ValueError(f"{filename} is corrupted.")
125
+
126
+ # Extract string
127
+ string = df["TO_PARSE"].iloc[4]
128
+
129
+ # Split into lines
130
+ decoded_text = string.encode().decode("unicode_escape")
131
+ decoded_text = decoded_text.replace("'", "").replace('"', "")
132
+ lines = decoded_text.split()
133
+
134
+ # Extract time
135
+ time_str = lines[0].split(",")[1]
136
+
137
+ # Split each line at the first colon
138
+ data = [line.split(":", 1) for line in lines if ":" in line]
139
+
140
+ # Create the DataFrame
141
+ df = pd.DataFrame(data, columns=["ID", "Value"])
142
+
143
+ # Drop rows with invalid IDs
144
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
145
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
146
+
147
+ # Select only rows with values
148
+ df = df[df["Value"].apply(lambda x: x is not None)]
149
+
150
+ # Reshape dataframe
151
+ df = df.set_index("ID").T
152
+
153
+ # Assign column names
154
+ df = df.rename(COLUMN_DICT, axis=1)
155
+
156
+ # Keep only columns defined in the dictionary
157
+ df = df.filter(items=list(COLUMN_DICT.values()))
158
+
159
+ # Infill missing columns
160
+ df = infill_missing_columns(df)
161
+
162
+ # Add time column ad datetime dtype
163
+ df["time"] = pd.to_datetime(time_str, format="%Y%m%d%H%M%S", errors="coerce")
164
+
165
+ # Preprocess the raw spectrum and raw_drop_average_velocity
166
+ # - Add 0 before every ; if ; not preceded by a digit
167
+ # - Example: ';;1;;' --> '0;0;1;0;'
168
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
169
+ df["raw_drop_average_velocity"] = df["raw_drop_average_velocity"].str.replace(r"(?<!\d);", "0;", regex=True)
170
+
171
+ # Return the dataframe adhering to DISDRODB L0 standards
172
+ return df
173
+
174
+
175
+ @is_documented_by(reader_generic_docstring)
176
+ def reader(
177
+ filepath,
178
+ logger=None,
179
+ ):
180
+ """Reader."""
181
+ import zipfile
182
+
183
+ # ---------------------------------------------------------------------.
184
+ #### Iterate over all files (aka timesteps) in the daily zip archive
185
+ # - Each file contain a single timestep !
186
+ # list_df = []
187
+ # with tempfile.TemporaryDirectory() as temp_dir:
188
+ # # Extract all files
189
+ # unzip_file_on_terminal(filepath, temp_dir)
190
+
191
+ # # Walk through extracted files
192
+ # for root, _, files in os.walk(temp_dir):
193
+ # for filename in sorted(files):
194
+ # if filename.endswith(".txt"):
195
+ # full_path = os.path.join(root, filename)
196
+ # try:
197
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
198
+ # if df is not None:
199
+ # list_df.append(df)
200
+ # except Exception as e:
201
+ # msg = f"An error occurred while reading {filename}: {e}"
202
+ # log_error(logger=logger, msg=msg, verbose=True)
203
+
204
+ list_df = []
205
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
206
+ filenames = sorted(zip_ref.namelist())
207
+ for filename in filenames:
208
+ if filename.endswith(".dat"):
209
+ # Open file
210
+ with zip_ref.open(filename) as file:
211
+ try:
212
+ df = read_txt_file(file=file, filename=filename, logger=logger)
213
+ if df is not None:
214
+ list_df.append(df)
215
+ except Exception as e:
216
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
217
+ log_error(logger=logger, msg=msg, verbose=True)
218
+
219
+ # Check the zip file contains at least some non.empty files
220
+ if len(list_df) == 0:
221
+ raise ValueError(f"{filepath} contains only empty files!")
222
+
223
+ # Concatenate all dataframes into a single one
224
+ df = pd.concat(list_df)
225
+
226
+ # ---------------------------------------------------------------------.
227
+ return df
@@ -40,7 +40,7 @@ def reader(
40
40
  reader_kwargs["header"] = None
41
41
  # - Skip file with encoding errors
42
42
  reader_kwargs["encoding_errors"] = "ignore"
43
- # - Need for zipped raw file (GPM files)
43
+ # - Need for zipped raw file (NASA files)
44
44
  reader_kwargs["zipped"] = True
45
45
  # - Searched file into tar files
46
46
  reader_kwargs["filename_to_read_zipped"] = "spectrum.txt"
@@ -93,20 +93,11 @@ def reader(
93
93
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
94
94
  df = df[df["ID"].astype(str).isin(valid_id_str)]
95
95
 
96
- # Create the dataframe with each row corresponding to a timestep
97
- # - Group rows based on when ID values restart
98
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
99
-
100
- # Reshape the dataframe
101
- group_dfs = []
102
- for _, group in groups:
103
- group_df = group.set_index("ID").T
104
- group_dfs.append(group_df)
105
-
106
- # Merge each timestep dataframe
107
- # --> Missing columns are infilled by NaN
108
- df = pd.concat(group_dfs, axis=0)
109
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
96
+ # Create the dataframe where each row corresponds to a timestep
97
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
98
+ df = df.pivot(index="_group", columns="ID") # noqa
99
+ df.columns = df.columns.get_level_values("ID")
100
+ df = df.reset_index(drop=True)
110
101
 
111
102
  # Define column names
112
103
  column_dict = {
@@ -97,20 +97,11 @@ def reader(
97
97
  if len(df) == 0:
98
98
  raise ValueError("No rows left after removing corrupted ones.")
99
99
 
100
- # Create the dataframe with each row corresponding to a timestep
101
- # - Group rows based on when ID values restart
102
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
103
-
104
- # Reshape the dataframe
105
- group_dfs = []
106
- for _, group in groups:
107
- group_df = group.set_index("ID").T
108
- group_dfs.append(group_df)
109
-
110
- # Merge each timestep dataframe
111
- # --> Missing columns are infilled by NaN
112
- df = pd.concat(group_dfs, axis=0)
113
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
100
+ # Create the dataframe where each row corresponds to a timestep
101
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
102
+ df = df.pivot(index="_group", columns="ID") # noqa
103
+ df.columns = df.columns.get_level_values("ID")
104
+ df = df.reset_index(drop=True)
114
105
 
115
106
  # Define column names
116
107
  column_dict = {
@@ -139,8 +130,8 @@ def reader(
139
130
  # "23": "station_number",
140
131
  "24": "rainfall_amount_absolute_32bit",
141
132
  # "25": "error_code",
142
- # "30": "rainfall_rate_16_bit_30",
143
- # "31": "rainfall_rate_16_bit_1200",
133
+ # "30": "rainfall_rate_16bit",
134
+ # "31": "rainfall_rate_12bit",
144
135
  "32": "rainfall_accumulated_16bit",
145
136
  # "90": "raw_drop_concentration",
146
137
  # "91": "raw_drop_average_velocity",
@@ -177,7 +168,7 @@ def reader(
177
168
  df = df.drop(columns=columns_to_drop)
178
169
 
179
170
  # Stations UF4-7 have NAN at the end of the raw drop number
180
- df["raw_drop_number"] = df["raw_drop_number"].str.replace("NaN;", "").iloc[0]
171
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("NaN;", "")
181
172
 
182
173
  # Return the dataframe adhering to DISDRODB L0 standards
183
174
  return df
@@ -22,12 +22,8 @@ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
22
  from disdrodb.l0.l0a_processing import read_raw_text_file
23
23
 
24
24
 
25
- @is_documented_by(reader_generic_docstring)
26
- def reader(
27
- filepath,
28
- logger=None,
29
- ):
30
- """Reader."""
25
+ def read_format1(filepath, logger):
26
+ """Read format F1 (before 2020)."""
31
27
  ##------------------------------------------------------------------------.
32
28
  #### Define column names
33
29
  column_names = ["TO_PARSE"]
@@ -91,8 +87,8 @@ def reader(
91
87
  "weather_code_synop_4680",
92
88
  "reflectivity_32bit",
93
89
  "mor_visibility",
94
- "sensor_temperature", # maybe
95
- "laser_amplitude", # probably
90
+ "sensor_temperature",
91
+ "laser_amplitude",
96
92
  "number_particles",
97
93
  "sensor_status",
98
94
  "sensor_heating_current",
@@ -119,3 +115,116 @@ def reader(
119
115
 
120
116
  # Return the dataframe adhering to DISDRODB L0 standards
121
117
  return df
118
+
119
+
120
+ def read_format2(filepath, logger):
121
+ """Read format 2 (July 2012-April 2023)."""
122
+ ##------------------------------------------------------------------------.
123
+ #### Define column names
124
+ column_names = ["TO_PARSE"]
125
+
126
+ ##------------------------------------------------------------------------.
127
+ #### Define reader options
128
+ reader_kwargs = {}
129
+ # - Define delimiter
130
+ reader_kwargs["delimiter"] = "\\n"
131
+
132
+ # - Avoid first column to become df index !!!
133
+ reader_kwargs["index_col"] = False
134
+
135
+ # - Define behaviour when encountering bad lines
136
+ reader_kwargs["on_bad_lines"] = "skip"
137
+
138
+ # Skip the first row (header)
139
+ reader_kwargs["skiprows"] = 0
140
+
141
+ # - Define encoding
142
+ reader_kwargs["encoding"] = "latin"
143
+
144
+ # - Define reader engine
145
+ # - C engine is faster
146
+ # - Python engine is more feature-complete
147
+ reader_kwargs["engine"] = "python"
148
+
149
+ # - Define on-the-fly decompression of on-disk data
150
+ # - Available: gzip, bz2, zip
151
+ reader_kwargs["compression"] = "infer"
152
+
153
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
154
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
155
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
156
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
157
+ reader_kwargs["na_values"] = ["na", "", "error"]
158
+
159
+ # Skip first row as columns names
160
+ reader_kwargs["header"] = None
161
+
162
+ ##------------------------------------------------------------------------.
163
+ #### Read the data
164
+ df_raw = read_raw_text_file(
165
+ filepath=filepath,
166
+ column_names=column_names,
167
+ reader_kwargs=reader_kwargs,
168
+ logger=logger,
169
+ )
170
+
171
+ ##------------------------------------------------------------------------.
172
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
173
+ # Create ID and Value columns
174
+ df = df_raw["TO_PARSE"].str.split(";", expand=True, n=14)
175
+
176
+ # Assign column names
177
+ names = [
178
+ "id",
179
+ "time",
180
+ "rainfall_rate_32bit",
181
+ "rainfall_accumulated_32bit",
182
+ "weather_code_synop_4680",
183
+ "reflectivity_32bit",
184
+ "mor_visibility",
185
+ "sensor_temperature",
186
+ "laser_amplitude",
187
+ "number_particles",
188
+ "sensor_status",
189
+ "sensor_heating_current",
190
+ "sensor_battery_voltage",
191
+ "error_code",
192
+ "raw_drop_number",
193
+ ]
194
+ df.columns = names
195
+
196
+ # Convert time column to datetime
197
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
198
+
199
+ # Preprocess the raw spectrum
200
+ # - Add 0 before every ; if ; not preceded by a digit
201
+ # Example: ';;1;;' --> '0;0;1;0;'
202
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("R;", "")
203
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
204
+
205
+ # Drop rows with invalid spectrum
206
+ df = df[df["raw_drop_number"].str.count(";") == 1024]
207
+
208
+ # df["rainfall_rate_32bit"].max()
209
+
210
+ # Drop columns not agreeing with DISDRODB L0 standards
211
+ columns_to_drop = [
212
+ "id",
213
+ ]
214
+ df = df.drop(columns=columns_to_drop)
215
+
216
+ # Return the dataframe adhering to DISDRODB L0 standards
217
+ return df
218
+
219
+
220
+ @is_documented_by(reader_generic_docstring)
221
+ def reader(
222
+ filepath,
223
+ logger=None,
224
+ ):
225
+ """Reader."""
226
+ if filepath.endswith("F1.dat.gz"):
227
+ return read_format1(filepath, logger)
228
+ if filepath.endswith("F2.dat.gz"):
229
+ return read_format2(filepath, logger)
230
+ raise ValueError("Unexpected filename: {os.path.basename(filepath)")