disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """This reader allows to read raw data from NASA APU stations.
19
+
20
+ The reader allows to read raw APU data from the following NASA campaigns:
21
+
22
+ - HYMEX
23
+ - IFLOODS
24
+ - IPHEX
25
+ - OLYMPEX
26
+ - ICEPOP
27
+ - IMPACTS
28
+ - GCPEX
29
+ - WFF
30
+
31
+ """
32
+
33
+ import pandas as pd
34
+
35
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
36
+ from disdrodb.l0.l0a_processing import read_raw_text_file
37
+
38
+
39
+ @is_documented_by(reader_generic_docstring)
40
+ def reader(
41
+ filepath,
42
+ logger=None,
43
+ ):
44
+ """Reader."""
45
+ ##------------------------------------------------------------------------.
46
+ #### Define column names
47
+ column_names = ["time", "TO_BE_SPLITTED"]
48
+
49
+ ##------------------------------------------------------------------------.
50
+ #### Define reader options
51
+ reader_kwargs = {}
52
+ # - Define delimiter
53
+ reader_kwargs["delimiter"] = ";"
54
+ # - Skip first row as columns names
55
+ reader_kwargs["header"] = None
56
+ reader_kwargs["skiprows"] = 0
57
+ # - Skip file with encoding errors
58
+ reader_kwargs["encoding_errors"] = "ignore"
59
+ # - Avoid first column to become df index !!!
60
+ reader_kwargs["index_col"] = False
61
+ # - Define behaviour when encountering bad lines
62
+ reader_kwargs["on_bad_lines"] = "skip"
63
+ # - Define reader engine
64
+ # - C engine is faster
65
+ # - Python engine is more feature-complete
66
+ reader_kwargs["engine"] = "python"
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
71
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
72
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
73
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
74
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
75
+
76
+ ##------------------------------------------------------------------------.
77
+ #### Read the data
78
+ df = read_raw_text_file(
79
+ filepath=filepath,
80
+ column_names=column_names,
81
+ reader_kwargs=reader_kwargs,
82
+ logger=logger,
83
+ )
84
+
85
+ ##------------------------------------------------------------------------.
86
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
87
+ # Convert time column to datetime
88
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
89
+
90
+ # Split the 'TO_BE_SPLITTED' column
91
+ df = df["TO_BE_SPLITTED"].str.split(",", n=3, expand=True)
92
+
93
+ # Assign column names
94
+ names = [
95
+ "station_name",
96
+ "unknown",
97
+ "unknown2",
98
+ "raw_drop_number",
99
+ ]
100
+ df.columns = names
101
+
102
+ # Add the time column
103
+ df["time"] = df_time
104
+
105
+ # Drop columns not agreeing with DISDRODB L0 standards
106
+ df = df.drop(columns=["station_name", "unknown", "unknown2"])
107
+
108
+ # Return the dataframe adhering to DISDRODB L0 standards
109
+ return df
@@ -109,17 +109,11 @@ def reader(
109
109
  df.columns = names
110
110
 
111
111
  # Derive raw drop arrays
112
- def split_string(s):
113
- vals = [v.strip() for v in s.split(",")]
114
- c1 = ", ".join(vals[:32])
115
- c2 = ", ".join(vals[32:64])
116
- c3 = ", ".join(vals[64:])
117
- return pd.Series({"raw_drop_concentration": c1, "raw_drop_average_velocity": c2, "raw_drop_number": c3})
118
-
119
- splitted_string = df["TO_SPLIT"].apply(split_string)
120
- df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
121
- df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
122
- df["raw_drop_number"] = splitted_string["raw_drop_number"]
112
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
113
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
114
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
115
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
116
+ del df_split
123
117
 
124
118
  # Define datetime "time" column
125
119
  df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
@@ -89,23 +89,10 @@ def reader(
89
89
  df.columns = names
90
90
 
91
91
  # Derive raw drop arrays
92
- def split_string(s):
93
- vals = [v.strip() for v in s.split(",")]
94
- c1 = ", ".join(vals[:32])
95
- c2 = ", ".join(vals[32:64])
96
- # c3 = ", ".join(vals[64:])
97
- series = pd.Series(
98
- {
99
- "raw_drop_concentration": c1,
100
- "raw_drop_average_velocity": c2,
101
- # "raw_drop_number": c3,
102
- },
103
- )
104
- return series
105
-
106
- splitted_string = df["TO_SPLIT"].apply(split_string)
107
- df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
108
- df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
92
+ # - raw_drop_number is missing !
93
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
94
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
95
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
109
96
 
110
97
  # Define datetime time column
111
98
  df["year"] = df["year"].str.replace(".0", "")
@@ -90,20 +90,11 @@ def reader(
90
90
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
91
91
  df = df[df["ID"].astype(str).isin(valid_id_str)]
92
92
 
93
- # Create the dataframe with each row corresponding to a timestep
94
- # - Group rows based on when ID values restart
95
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
96
-
97
- # Reshape the dataframe
98
- group_dfs = []
99
- for _, group in groups:
100
- group_df = group.set_index("ID").T
101
- group_dfs.append(group_df)
102
-
103
- # Merge each timestep dataframe
104
- # --> Missing columns are infilled by NaN
105
- df = pd.concat(group_dfs, axis=0)
106
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
93
+ # Create the dataframe where each row corresponds to a timestep
94
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
95
+ df = df.pivot(index="_group", columns="ID") # noqa
96
+ df.columns = df.columns.get_level_values("ID")
97
+ df = df.reset_index(drop=True)
107
98
 
108
99
  # Assign column names
109
100
  column_dict = {
@@ -81,19 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
97
89
 
98
90
  # Assign column names
99
91
  column_dict = {
@@ -122,9 +114,14 @@ def reader(
122
114
  # "23": "station_number",
123
115
  "24": "rainfall_amount_absolute_32bit",
124
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
125
120
  "30": "rainfall_rate_16_bit_30",
126
121
  "31": "rainfall_rate_16_bit_1200",
127
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
128
125
  "90": "raw_drop_concentration",
129
126
  "91": "raw_drop_average_velocity",
130
127
  "93": "raw_drop_number",
@@ -81,19 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
97
89
 
98
90
  # Assign column names
99
91
  column_dict = {
@@ -122,9 +114,14 @@ def reader(
122
114
  # "23": "station_number",
123
115
  "24": "rainfall_amount_absolute_32bit",
124
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
125
120
  "30": "rainfall_rate_16_bit_30",
126
121
  "31": "rainfall_rate_16_bit_1200",
127
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
128
125
  "90": "raw_drop_concentration",
129
126
  "91": "raw_drop_average_velocity",
130
127
  "93": "raw_drop_number",
@@ -65,5 +65,8 @@ def reader(
65
65
  # Rename dataset variables and columns and infill missing variables
66
66
  ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="PARSIVEL2")
67
67
 
68
+ # Replace NaN flags
69
+ ds["raw_drop_number"] = ds["raw_drop_number"].where(ds["raw_drop_number"] < 65437) # dummy flag 65437.0
70
+
68
71
  # Return the dataset adhering to DISDRODB L0B standards
69
72
  return ds
@@ -0,0 +1,232 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for PANGASA PARSIVEL2 raw text data."""
20
+ # import os
21
+ # import tempfile
22
+ # from disdrodb.utils.compression import unzip_file_on_terminal
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+
27
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
28
+ from disdrodb.l0.l0a_processing import read_raw_text_file
29
+ from disdrodb.utils.logger import log_error
30
+
31
+
32
+ def read_txt_file(file, filename, logger):
33
+ """Parse a single txt file within the daily zip file."""
34
+ ##------------------------------------------------------------------------.
35
+ #### Define column names
36
+ column_names = ["TO_PARSE"]
37
+
38
+ ##------------------------------------------------------------------------.
39
+ #### Define reader options
40
+ reader_kwargs = {}
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+ # - Skip first row as columns names
44
+ # - Define encoding
45
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
46
+ # - Avoid first column to become df index !!!
47
+ reader_kwargs["index_col"] = False
48
+ # - Define behaviour when encountering bad lines
49
+ reader_kwargs["on_bad_lines"] = "skip"
50
+ # - Define reader engine
51
+ # - C engine is faster
52
+ # - Python engine is more feature-complete
53
+ reader_kwargs["engine"] = "python"
54
+ # - Define on-the-fly decompression of on-disk data
55
+ # - Available: gzip, bz2, zip
56
+ reader_kwargs["compression"] = "infer"
57
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
58
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
59
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
60
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
61
+ reader_kwargs["na_values"] = ["na", "", "error"]
62
+
63
+ ##------------------------------------------------------------------------.
64
+ #### Read the data
65
+ df = read_raw_text_file(
66
+ filepath=file,
67
+ column_names=column_names,
68
+ reader_kwargs=reader_kwargs,
69
+ logger=logger,
70
+ )
71
+ ##--------------------------------\----------------------------------------.
72
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
73
+ # Empty file, return None
74
+ if len(df) == 0:
75
+ raise ValueError(f"{filename} is empty.")
76
+
77
+ # Create ID and Value columns
78
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
79
+ df.columns = ["ID", "Value"]
80
+
81
+ # Select only rows with values
82
+ df = df[df["Value"].astype(bool)]
83
+ df = df[df["Value"].apply(lambda x: x is not None)]
84
+
85
+ # Drop rows with invalid IDs
86
+ # - Corrupted rows
87
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
88
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
89
+
90
+ # Create the dataframe where each row corresponds to a timestep
91
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
92
+ df = df.pivot(index="_group", columns="ID") # noqa
93
+ df.columns = df.columns.get_level_values("ID")
94
+ df = df.reset_index(drop=True)
95
+
96
+ # Assign column names
97
+ column_dict = {
98
+ "01": "rainfall_rate_32bit",
99
+ "02": "rainfall_accumulated_32bit",
100
+ "03": "weather_code_synop_4680",
101
+ "04": "weather_code_synop_4677",
102
+ "05": "weather_code_metar_4678",
103
+ "06": "weather_code_nws",
104
+ "07": "reflectivity_32bit",
105
+ "08": "mor_visibility",
106
+ "09": "sample_interval",
107
+ "10": "laser_amplitude",
108
+ "11": "number_particles",
109
+ "12": "sensor_temperature",
110
+ # "13": "sensor_serial_number",
111
+ # "14": "firmware_iop",
112
+ # "15": "firmware_dsp",
113
+ "16": "sensor_heating_current",
114
+ "17": "sensor_battery_voltage",
115
+ "18": "sensor_status",
116
+ # "19": "start_time",
117
+ # "20": "sensor_time",
118
+ # "21": "sensor_date",
119
+ # "22": "station_name",
120
+ # "23": "station_number",
121
+ "24": "rainfall_amount_absolute_32bit",
122
+ "25": "error_code",
123
+ "26": "sensor_temperature_pcb",
124
+ "27": "sensor_temperature_receiver",
125
+ "28": "sensor_temperature_trasmitter",
126
+ "30": "rainfall_rate_16_bit_30",
127
+ "31": "rainfall_rate_16_bit_1200",
128
+ "32": "rainfall_accumulated_16bit",
129
+ # "33": "reflectivity_16bit",
130
+ "34": "rain_kinetic_energy",
131
+ "35": "snowfall_rate",
132
+ # 60: "number_particles_all",
133
+ # 61: "list_particles",
134
+ "90": "raw_drop_concentration",
135
+ "91": "raw_drop_average_velocity",
136
+ "93": "raw_drop_number",
137
+ }
138
+
139
+ # Identify missing columns and add NaN
140
+ expected_columns = np.array(list(column_dict.keys()))
141
+ missing_columns = expected_columns[np.isin(expected_columns, df.columns, invert=True)].tolist()
142
+ if len(missing_columns) > 0:
143
+ for column in missing_columns:
144
+ df[column] = "NaN"
145
+
146
+ # Rename columns
147
+ df = df.rename(column_dict, axis=1)
148
+
149
+ # Keep only columns defined in the dictionary
150
+ df = df[list(column_dict.values())]
151
+
152
+ # Define datetime "time" column
153
+ time_str = filename.split("_")[-1].replace(".txt", "")
154
+ df["time"] = pd.to_datetime(time_str, format="%Y%m%d%H%M%S", errors="coerce")
155
+
156
+ # Keep only rows with valid raw_drop_number
157
+ df = df[df["raw_drop_number"].str.count(";") == 1024]
158
+ if len(df) == 0:
159
+ raise ValueError("Invalid raw drop number field.")
160
+
161
+ # Drop columns not agreeing with DISDRODB L0 standards
162
+ # columns_to_drop = [
163
+ # # "sensor_date",
164
+ # # "sensor_time",
165
+ # # "firmware_iop",
166
+ # # "firmware_dsp",
167
+ # # "sensor_serial_number",
168
+ # # "station_name",
169
+ # # "station_number",
170
+ # ]
171
+ # df = df.drop(columns=columns_to_drop)
172
+ return df
173
+
174
+
175
+ @is_documented_by(reader_generic_docstring)
176
+ def reader(
177
+ filepath,
178
+ logger=None,
179
+ ):
180
+ """Reader."""
181
+ import zipfile
182
+
183
+ # return read_txt_file(file=filepath,
184
+ # filename=os.path.basename(filepath),
185
+ # logger=logger,
186
+ # )
187
+
188
+ # ---------------------------------------------------------------------.
189
+ #### Iterate over all files (aka timesteps) in the daily zip archive
190
+ # - Each file contain a single timestep !
191
+ # list_df = []
192
+ # with tempfile.TemporaryDirectory() as temp_dir:
193
+ # # Extract all files
194
+ # unzip_file_on_terminal(filepath, temp_dir)
195
+
196
+ # # Walk through extracted files
197
+ # for root, _, files in os.walk(temp_dir):
198
+ # for filename in sorted(files):
199
+ # if filename.endswith(".txt"):
200
+ # full_path = os.path.join(root, filename)
201
+ # try:
202
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
203
+ # if df is not None:
204
+ # list_df.append(df)
205
+ # except Exception as e:
206
+ # msg = f"An error occurred while reading {filename}: {e}"
207
+ # log_error(logger=logger, msg=msg, verbose=True)
208
+
209
+ list_df = []
210
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
211
+ filenames = sorted(zip_ref.namelist())
212
+ for filename in filenames:
213
+ if filename.endswith(".txt"):
214
+ # Open file
215
+ with zip_ref.open(filename) as file:
216
+ try:
217
+ df = read_txt_file(file=file, filename=filename, logger=logger)
218
+ if df is not None:
219
+ list_df.append(df)
220
+ except Exception as e:
221
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
222
+ log_error(logger=logger, msg=msg, verbose=True)
223
+
224
+ # Check the zip file contains at least some non.empty files
225
+ if len(list_df) == 0:
226
+ raise ValueError(f"{filepath} contains only empty files!")
227
+
228
+ # Concatenate all dataframes into a single one
229
+ df = pd.concat(list_df)
230
+
231
+ # ---------------------------------------------------------------------.
232
+ return df
@@ -111,31 +111,19 @@ def reader(
111
111
  df_raw_spectrum = df[df["TO_PARSE"].str.len() == 4545]
112
112
 
113
113
  # Derive raw drop arrays
114
- def split_string(s):
115
- vals = [v.strip() for v in s.split(",")]
116
- c1 = ",".join(vals[:32])
117
- c2 = ",".join(vals[32:64])
118
- c3 = ",".join(vals[64].replace("r", "").split("/"))
119
- series = pd.Series(
120
- {
121
- "raw_drop_concentration": c1,
122
- "raw_drop_average_velocity": c2,
123
- "raw_drop_number": c3,
124
- },
125
- )
126
- return series
127
-
128
- splitted_string = df_raw_spectrum["TO_PARSE"].apply(split_string)
129
- df_raw_spectrum["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
130
- df_raw_spectrum["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
131
- df_raw_spectrum["raw_drop_number"] = splitted_string["raw_drop_number"]
114
+ df_split = df["TO_PARSE"].str.split(",", expand=True)
115
+ df_raw_spectrum["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
116
+ df_raw_spectrum["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
117
+ df_raw_spectrum["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
132
118
  df_raw_spectrum = df_raw_spectrum.drop(columns=["date", "TO_PARSE"])
119
+ del df_split
133
120
 
134
121
  # Add raw array
135
122
  df = df_data.set_index("time")
136
123
  df_raw_spectrum = df_raw_spectrum.set_index("time")
137
124
 
138
125
  df.update(df_raw_spectrum)
126
+ del df_raw_spectrum
139
127
 
140
128
  # Set back time as column
141
129
  df = df.reset_index()
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for Granada Parsivel2 raw text data."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### Define column names
34
+ column_names = ["TO_PARSE"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ reader_kwargs = {}
39
+ # - Define delimiter
40
+ reader_kwargs["delimiter"] = "\\n"
41
+ # - Skip first row as columns names
42
+ # - Define encoding
43
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+ # - Define behaviour when encountering bad lines
47
+ reader_kwargs["on_bad_lines"] = "skip"
48
+ reader_kwargs["skiprows"] = 4
49
+
50
+ # - Define reader engine
51
+ # - C engine is faster
52
+ # - Python engine is more feature-complete
53
+ reader_kwargs["engine"] = "python"
54
+ # - Define on-the-fly decompression of on-disk data
55
+ # - Available: gzip, bz2, zip
56
+ reader_kwargs["compression"] = "infer"
57
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
58
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
59
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
60
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
61
+ reader_kwargs["na_values"] = ["na", "", "error"]
62
+
63
+ ##------------------------------------------------------------------------.
64
+ #### Read the data
65
+ df_raw = read_raw_text_file(
66
+ filepath=filepath,
67
+ column_names=column_names,
68
+ reader_kwargs=reader_kwargs,
69
+ logger=logger,
70
+ )
71
+
72
+ ##------------------------------------------------------------------------.
73
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
74
+ # Remove corrupted rows
75
+ df_raw = df_raw[df_raw["TO_PARSE"].str.count(",") == 1106]
76
+
77
+ # Create ID and Value columns
78
+ df = df_raw["TO_PARSE"].str.split(",", expand=True, n=19)
79
+
80
+ # Assign names
81
+ names = [
82
+ "time",
83
+ "id",
84
+ "rainfall_rate_32bit",
85
+ "snowfall_rate",
86
+ "rainfall_accumulated_32bit",
87
+ "weather_code_synop_4680",
88
+ "reflectivity_32bit",
89
+ "mor_visibility",
90
+ "rain_kinetic_energy",
91
+ "laser_amplitude",
92
+ "sensor_temperature",
93
+ "sensor_temperature_pcb",
94
+ "sensor_temperature_receiver",
95
+ "sensor_temperature_trasmitter",
96
+ "sensor_heating_current",
97
+ "sensor_battery_voltage",
98
+ "sensor_status",
99
+ "error_code",
100
+ "number_particles",
101
+ "TO_SPLIT",
102
+ ]
103
+ df.columns = names
104
+
105
+ # Define datetime "time" column
106
+ df["time"] = df["time"].str.replace('"', "")
107
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")
108
+
109
+ # Retrieve raw array
110
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
111
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
112
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:].agg(",".join, axis=1)
113
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
114
+ del df_split
115
+
116
+ # Drop columns not agreeing with DISDRODB L0 standards
117
+ df = df.drop(columns=["TO_SPLIT", "id"])
118
+
119
+ # Return the dataframe adhering to DISDRODB L0 standards
120
+ return df