disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +22 -4
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/io.py +20 -18
  6. disdrodb/api/path.py +42 -77
  7. disdrodb/api/search.py +89 -23
  8. disdrodb/cli/disdrodb_create_summary.py +1 -1
  9. disdrodb/cli/disdrodb_run_l0.py +1 -1
  10. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  13. disdrodb/cli/disdrodb_run_l1.py +1 -1
  14. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  15. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  16. disdrodb/configs.py +30 -83
  17. disdrodb/constants.py +4 -3
  18. disdrodb/data_transfer/download_data.py +4 -2
  19. disdrodb/docs.py +2 -2
  20. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  21. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  22. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/global.yaml +6 -0
  29. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  30. disdrodb/etc/products/L2E/global.yaml +1 -1
  31. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/global.yaml +1 -1
  33. disdrodb/issue/checks.py +2 -2
  34. disdrodb/l0/check_configs.py +1 -1
  35. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  37. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  38. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  39. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  40. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  41. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  42. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  43. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  44. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  48. disdrodb/l0/l0_reader.py +2 -2
  49. disdrodb/l0/l0a_processing.py +6 -2
  50. disdrodb/l0/l0b_processing.py +26 -19
  51. disdrodb/l0/l0c_processing.py +17 -3
  52. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  54. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  55. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  56. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  57. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  58. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  59. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  60. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  61. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  62. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  63. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  64. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  65. disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
  66. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
  68. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  69. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  73. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  75. disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
  76. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
  77. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
  78. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  79. disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  80. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
  81. disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  82. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  83. disdrodb/l1/beard_model.py +45 -1
  84. disdrodb/l1/fall_velocity.py +1 -6
  85. disdrodb/l1/filters.py +2 -0
  86. disdrodb/l1/processing.py +6 -5
  87. disdrodb/l1/resampling.py +101 -38
  88. disdrodb/l2/empirical_dsd.py +12 -8
  89. disdrodb/l2/processing.py +4 -3
  90. disdrodb/metadata/search.py +3 -4
  91. disdrodb/routines/l0.py +4 -4
  92. disdrodb/routines/l1.py +173 -60
  93. disdrodb/routines/l2.py +121 -269
  94. disdrodb/routines/options.py +347 -0
  95. disdrodb/routines/wrappers.py +9 -1
  96. disdrodb/scattering/axis_ratio.py +3 -0
  97. disdrodb/scattering/routines.py +1 -1
  98. disdrodb/summary/routines.py +765 -724
  99. disdrodb/utils/archiving.py +51 -44
  100. disdrodb/utils/attrs.py +1 -1
  101. disdrodb/utils/compression.py +4 -2
  102. disdrodb/utils/dask.py +35 -15
  103. disdrodb/utils/dict.py +33 -0
  104. disdrodb/utils/encoding.py +1 -1
  105. disdrodb/utils/manipulations.py +7 -1
  106. disdrodb/utils/routines.py +9 -8
  107. disdrodb/utils/time.py +9 -1
  108. disdrodb/viz/__init__.py +0 -13
  109. disdrodb/viz/plots.py +209 -0
  110. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  111. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
  112. disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
  113. /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
  114. /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
  115. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
  116. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
  117. /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
  118. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  119. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  120. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  121. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  122. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  124. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  125. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # -----------------------------------------------------------------------------.
3
2
  # Copyright (c) 2021-2023 DISDRODB developers
4
3
  #
@@ -15,7 +14,8 @@
15
14
  # You should have received a copy of the GNU General Public License
16
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
16
  # -----------------------------------------------------------------------------.
18
- """This reader allows to read raw data from NASA GCPEX, OLYMPEX and IPHEX campaigns."""
17
+ """DISDRODB reader for University of Bergen OTT Parsivel 2 raw data."""
18
+
19
19
  import pandas as pd
20
20
 
21
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -30,34 +30,44 @@ def reader(
30
30
  """Reader."""
31
31
  ##------------------------------------------------------------------------.
32
32
  #### Define column names
33
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = ["TO_PARSE"]
34
34
 
35
35
  ##------------------------------------------------------------------------.
36
36
  #### Define reader options
37
37
  reader_kwargs = {}
38
+
38
39
  # - Define delimiter
39
- reader_kwargs["delimiter"] = ";"
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
40
42
  # - Skip first row as columns names
41
43
  reader_kwargs["header"] = None
44
+
45
+ # - Skip header
42
46
  reader_kwargs["skiprows"] = 0
43
- # - Skip file with encoding errors
44
- reader_kwargs["encoding_errors"] = "ignore"
47
+
48
+ # - Define encoding
49
+ reader_kwargs["encoding"] = "ISO-8859-1"
50
+
45
51
  # - Avoid first column to become df index !!!
46
52
  reader_kwargs["index_col"] = False
53
+
47
54
  # - Define behaviour when encountering bad lines
48
55
  reader_kwargs["on_bad_lines"] = "skip"
56
+
49
57
  # - Define reader engine
50
58
  # - C engine is faster
51
59
  # - Python engine is more feature-complete
52
60
  reader_kwargs["engine"] = "python"
61
+
53
62
  # - Define on-the-fly decompression of on-disk data
54
63
  # - Available: gzip, bz2, zip
55
- reader_kwargs["compression"] = "infer"
64
+ # reader_kwargs['compression'] = 'xz'
65
+
56
66
  # - Strings to recognize as NA/NaN and replace with standard NA flags
57
67
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
58
68
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
59
69
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
60
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
70
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
61
71
 
62
72
  ##------------------------------------------------------------------------.
63
73
  #### Read the data
@@ -70,37 +80,52 @@ def reader(
70
80
 
71
81
  ##------------------------------------------------------------------------.
72
82
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
73
- # Convert time column to datetime
74
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
83
+ # Remove corrupted rows
84
+ df = df[df["TO_PARSE"].str.count(";") == 1101]
75
85
 
76
- # Split the 'TO_BE_SPLITTED' column
77
- df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
86
+ # Split into columns
87
+ df = df["TO_PARSE"].str.split(";", expand=True, n=13)
78
88
 
79
- # Assign column names
89
+ # Assign columns names
80
90
  names = [
81
- "station_name",
82
- "sensor_status",
83
- "sensor_temperature",
84
- "number_particles",
91
+ "date",
92
+ "time",
85
93
  "rainfall_rate_32bit",
86
- "reflectivity_16bit",
87
- "mor_visibility",
94
+ "rainfall_accumulated_32bit",
95
+ "snowfall_rate",
88
96
  "weather_code_synop_4680",
89
- "weather_code_synop_4677",
90
- "raw_drop_number",
97
+ "reflectivity_32bit",
98
+ "mor_visibility",
99
+ "rain_kinetic_energy",
100
+ "sensor_temperature",
101
+ "laser_amplitude",
102
+ "number_particles",
103
+ "sensor_battery_voltage",
104
+ "TO_SPLIT",
91
105
  ]
92
106
  df.columns = names
93
107
 
94
- # Add the time column
95
- df["time"] = df_time
108
+ # Sanitize date
109
+ date = pd.to_datetime(df["date"], format="%d.%m.%Y", errors="coerce")
110
+ date = date.ffill().bfill()
96
111
 
97
- # Drop columns not agreeing with DISDRODB L0 standards
98
- df = df.drop(columns=["station_name"])
112
+ # Add datetime time column
113
+ time_str = date.astype(str) + "T" + df["time"]
114
+ df["time"] = pd.to_datetime(time_str, format="%Y-%m-%dT%H:%M:%S", errors="coerce")
115
+ df = df.drop(columns=["date"])
116
+
117
+ # Derive raw drop arrays
118
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
119
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(";".join, axis=1)
120
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(";".join, axis=1)
121
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(";".join, axis=1)
122
+ del df_split
99
123
 
100
- # Drop rows with invalid values
101
- # --> Ensure that weather_code_synop_4677 has length 2
102
- # --> If a previous column is missing it will have 000
103
- df = df[df["weather_code_synop_4677"].str.len() == 2]
124
+ # Drop columns not agreeing with DISDRODB L0 standards
125
+ columns_to_drop = [
126
+ "TO_SPLIT",
127
+ ]
128
+ df = df.drop(columns=columns_to_drop)
104
129
 
105
130
  # Return the dataframe adhering to DISDRODB L0 standards
106
131
  return df
@@ -154,9 +154,12 @@ def read_txt_file(file, filename, logger):
154
154
  df["time"] = pd.to_datetime(time_str, format="%Y%m%d%H%M%S", errors="coerce")
155
155
 
156
156
  # Keep only rows with valid raw_drop_number
157
- df = df[df["raw_drop_number"].str.count(";") == 1024]
158
- if len(df) == 0:
159
- raise ValueError("Invalid raw drop number field.")
157
+ invalid_data = df["raw_drop_number"].str.count(";") != 1024
158
+ df.loc[invalid_data, "raw_drop_number"] = "NaN" # TODO: if number_particles = 0, could be set to "0".
159
+
160
+ # df = df[df["raw_drop_number"].str.count(";") == 1024]
161
+ # if len(df) == 0:
162
+ # raise ValueError("Invalid raw drop number field.")
160
163
 
161
164
  # Drop columns not agreeing with DISDRODB L0 standards
162
165
  # columns_to_drop = [
@@ -109,7 +109,7 @@ def reader(
109
109
  # Retrieve raw array
110
110
  df_split = df["TO_SPLIT"].str.split(",", expand=True)
111
111
  df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
112
- df["raw_drop_average_velocity"] = df_split.iloc[:, 32:].agg(",".join, axis=1)
112
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
113
113
  df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
114
114
  del df_split
115
115
 
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for SHMI OTT Parsivel2."""
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ def parse_old_format(df):
26
+ """Reformat old format."""
27
+ # Remove rows with invalid number of separators
28
+ df = df[df["TO_PARSE"].str.count(";") == 1106]
29
+
30
+ # Split the columns
31
+ df = df["TO_PARSE"].str.split(";", n=18, expand=True)
32
+
33
+ # Assign column names
34
+ names = [
35
+ "time",
36
+ "rainfall_rate_32bit",
37
+ "rainfall_accumulated_32bit",
38
+ "weather_code_synop_4680", # wawa
39
+ "reflectivity_32bit",
40
+ "mor_visibility",
41
+ "sample_interval",
42
+ "laser_amplitude",
43
+ "number_particles",
44
+ "sensor_temperature",
45
+ "sensor_serial_number",
46
+ "firmware_iop",
47
+ "sensor_heating_current",
48
+ "sensor_battery_voltage",
49
+ "sensor_status",
50
+ "station_id",
51
+ "rainfall_amount_absolute_32bit",
52
+ "error_code",
53
+ "TO_SPLIT",
54
+ ]
55
+
56
+ df.columns = names
57
+
58
+ # Derive raw arrays
59
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
60
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
61
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
62
+ df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
63
+
64
+ # Ensure the time column is datetime dtype
65
+ df["time"] = df["time"].str[0:12] + "00"
66
+ df["time"] = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
67
+
68
+ # Drop columns not agreeing with DISDRODB L0 standards
69
+ columns_to_drop = [
70
+ "sensor_serial_number",
71
+ "firmware_iop",
72
+ "station_id",
73
+ "TO_SPLIT",
74
+ ]
75
+ df = df.drop(columns=columns_to_drop)
76
+
77
+ # Return the dataframe adhering to DISDRODB L0 standards
78
+ return df
79
+
80
+
81
+ def parse_new_format(df):
82
+ """Reformat new format."""
83
+ # Remove rows with invalid number of separators
84
+ df = df[df["TO_PARSE"].str.count(";") == 1106]
85
+
86
+ # Split the columns
87
+ df = df["TO_PARSE"].str.split(";", n=18, expand=True)
88
+
89
+ # Assign column names
90
+ names = [
91
+ "time",
92
+ "rainfall_rate_32bit",
93
+ "rainfall_accumulated_32bit",
94
+ "weather_code_synop_4680", # wawa
95
+ "reflectivity_32bit",
96
+ "mor_visibility",
97
+ "sample_interval",
98
+ "laser_amplitude",
99
+ "number_particles",
100
+ "sensor_temperature",
101
+ "sensor_serial_number",
102
+ "firmware_iop",
103
+ "sensor_heating_current",
104
+ "sensor_battery_voltage",
105
+ "sensor_status",
106
+ "station_id",
107
+ "rainfall_amount_absolute_32bit",
108
+ "error_code",
109
+ "TO_SPLIT",
110
+ ]
111
+
112
+ df.columns = names
113
+
114
+ # Derive raw arrays
115
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
116
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
117
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
118
+ df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
119
+
120
+ # Add the time column
121
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
122
+
123
+ # Drop columns not agreeing with DISDRODB L0 standards
124
+ columns_to_drop = [
125
+ "sensor_serial_number",
126
+ "firmware_iop",
127
+ "station_id",
128
+ "TO_SPLIT",
129
+ ]
130
+ df = df.drop(columns=columns_to_drop)
131
+
132
+ # Return the dataframe adhering to DISDRODB L0 standards
133
+ return df
134
+
135
+
136
+ @is_documented_by(reader_generic_docstring)
137
+ def reader(
138
+ filepath,
139
+ logger=None,
140
+ ):
141
+ """Reader."""
142
+ ##------------------------------------------------------------------------.
143
+ #### Define column names
144
+ column_names = ["TO_PARSE"]
145
+
146
+ ##------------------------------------------------------------------------.
147
+ #### Define reader options
148
+ reader_kwargs = {}
149
+ # Skip first row as columns names
150
+ reader_kwargs["header"] = None
151
+ # Skip file with encoding errors
152
+ reader_kwargs["encoding_errors"] = "ignore"
153
+ # - Define delimiter
154
+ reader_kwargs["delimiter"] = "\\n"
155
+ # - Avoid first column to become df index !!!
156
+ reader_kwargs["index_col"] = False
157
+ # - Define behaviour when encountering bad lines
158
+ reader_kwargs["on_bad_lines"] = "skip"
159
+
160
+ # - Define reader engine
161
+ # - C engine is faster
162
+ # - Python engine is more feature-complete
163
+ reader_kwargs["engine"] = "python"
164
+ # - Define on-the-fly decompression of on-disk data
165
+ # - Available: gzip, bz2, zip
166
+ reader_kwargs["compression"] = "infer"
167
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
168
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
169
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
170
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
171
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
172
+
173
+ ##------------------------------------------------------------------------.
174
+ #### Read the data
175
+ df = read_raw_text_file(
176
+ filepath=filepath,
177
+ column_names=column_names,
178
+ reader_kwargs=reader_kwargs,
179
+ logger=logger,
180
+ )
181
+
182
+ ##------------------------------------------------------------------------.
183
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
184
+ if df["TO_PARSE"].iloc[0].startswith("datetime_utc"):
185
+ # Remove header if present (2025 onward)
186
+ df = df.iloc[1:]
187
+ # Parse new format
188
+ return parse_new_format(df)
189
+ return parse_old_format(df)
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # -----------------------------------------------------------------------------.
3
2
  # Copyright (c) 2021-2023 DISDRODB developers
4
3
  #
@@ -15,6 +14,8 @@
15
14
  # You should have received a copy of the GNU General Public License
16
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
16
  # -----------------------------------------------------------------------------.
17
+ """DISDRODB reader for Colorado State University OTT Parsivel 2 raw data."""
18
+
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -29,34 +30,44 @@ def reader(
29
30
  """Reader."""
30
31
  ##------------------------------------------------------------------------.
31
32
  #### Define column names
32
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = ["TO_PARSE"]
33
34
 
34
35
  ##------------------------------------------------------------------------.
35
36
  #### Define reader options
36
37
  reader_kwargs = {}
38
+
37
39
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
39
42
  # - Skip first row as columns names
40
43
  reader_kwargs["header"] = None
44
+
45
+ # - Skip header
41
46
  reader_kwargs["skiprows"] = 0
42
- # - Skip file with encoding errors
43
- reader_kwargs["encoding_errors"] = "ignore"
47
+
48
+ # - Define encoding
49
+ reader_kwargs["encoding"] = "ISO-8859-1"
50
+
44
51
  # - Avoid first column to become df index !!!
45
52
  reader_kwargs["index_col"] = False
53
+
46
54
  # - Define behaviour when encountering bad lines
47
55
  reader_kwargs["on_bad_lines"] = "skip"
56
+
48
57
  # - Define reader engine
49
58
  # - C engine is faster
50
59
  # - Python engine is more feature-complete
51
60
  reader_kwargs["engine"] = "python"
61
+
52
62
  # - Define on-the-fly decompression of on-disk data
53
63
  # - Available: gzip, bz2, zip
54
- reader_kwargs["compression"] = "infer"
64
+ # reader_kwargs['compression'] = 'xz'
65
+
55
66
  # - Strings to recognize as NA/NaN and replace with standard NA flags
56
67
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
57
68
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
58
69
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
59
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
70
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
60
71
 
61
72
  ##------------------------------------------------------------------------.
62
73
  #### Read the data
@@ -69,37 +80,59 @@ def reader(
69
80
 
70
81
  ##------------------------------------------------------------------------.
71
82
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
72
- # Convert time column to datetime
73
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
83
+ # Remove corrupted rows
84
+ df = df[df["TO_PARSE"].str.count(",").isin([15, 1040])]
74
85
 
75
- # Split the 'TO_BE_SPLITTED' column
76
- df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
86
+ # Split into columns
87
+ df["TO_PARSE"] = df["TO_PARSE"] + ",0"
88
+ df = df["TO_PARSE"].str.split(",", expand=True, n=16)
77
89
 
78
- # Assign column names
79
- columns_names = [
80
- "station_name",
81
- "sensor_status",
82
- "sensor_temperature",
83
- "number_particles",
90
+ # Assign columns names
91
+ names = [
92
+ "date",
93
+ "time",
84
94
  "rainfall_rate_32bit",
85
- "reflectivity_16bit",
86
- "mor_visibility",
95
+ "rainfall_accumulated_32bit",
87
96
  "weather_code_synop_4680",
88
- "weather_code_synop_4677",
97
+ "weather_code_metar_4678",
98
+ "weather_code_nws",
99
+ "reflectivity_32bit",
100
+ "mor_visibility",
101
+ "laser_amplitude",
102
+ "number_particles",
103
+ "sensor_temperature",
104
+ "sensor_heating_current",
105
+ "sensor_battery_voltage",
106
+ "rain_kinetic_energy",
107
+ "snowfall_rate",
89
108
  "raw_drop_number",
90
109
  ]
91
- df.columns = columns_names
110
+ df.columns = names
92
111
 
93
- # Add the time column
94
- df["time"] = df_time
112
+ # Add datetime time column
113
+ time_str = df["date"] + "-" + df["time"]
114
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y-%H:%M:%S", errors="coerce")
95
115
 
96
- # Drop columns not agreeing with DISDRODB L0 standards
97
- df = df.drop(columns=["station_name"])
116
+ # Derive the raw spectrum
117
+ # - When no drops detected, None
118
+ # - After conversion to string, becomes NaN
119
+ df["raw_drop_number"] = df["raw_drop_number"].astype("string")
120
+ df["raw_drop_number"] = df["raw_drop_number"].str.strip()
121
+
122
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
123
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
124
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>,0", "")
125
+
126
+ # Preprocess the raw spectrum and raw_drop_average_velocity
127
+ # - Add 0 before every ; if ; not preceded by a digit
128
+ # - Example: ';;1;;' --> '0;0;1;0;'
129
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d),", "0,", regex=True)
98
130
 
99
- # Drop rows with invalid values
100
- # --> Ensure that weather_code_synop_4677 has length 2
101
- # --> If a previous column is missing it will have 000
102
- df = df[df["weather_code_synop_4677"].str.len() == 2]
131
+ # Drop columns not agreeing with DISDRODB L0 standards
132
+ columns_to_drop = [
133
+ "date",
134
+ ]
135
+ df = df.drop(columns=columns_to_drop)
103
136
 
104
137
  # Return the dataframe adhering to DISDRODB L0 standards
105
138
  return df
@@ -15,6 +15,7 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  # -----------------------------------------------------------------------------.
18
+ """Reader for the OTT Parsivel2 sensors of the CW3E network."""
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -68,39 +69,65 @@ def reader(
68
69
 
69
70
  ##------------------------------------------------------------------------.
70
71
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
- # Define 'time' datetime
72
+ # Remove rows with invalid number of separators
73
+ df = df[df["TO_PARSE"].str.count(";").isin([1104, 1105])]
74
+ if len(df) == 0:
75
+ raise ValueError(f"No valid data in {filepath}")
72
76
 
73
- # Split the columns
74
- df["TO_PARSE"].iloc[0:5].str.split(";", n=16, expand=True).iloc[0]
77
+ n_delimiters = int(df["TO_PARSE"].str.count(";").iloc[0])
78
+ if n_delimiters == 1104:
79
+ names = [
80
+ "sensor_serial_number",
81
+ "sensor_status",
82
+ "laser_amplitude",
83
+ "sensor_heating_current",
84
+ "sensor_battery_voltage",
85
+ "dummy_date",
86
+ "sensor_time",
87
+ "sensor_date",
88
+ # "sensor_temperature",
89
+ "number_particles",
90
+ "rainfall_rate_32bit",
91
+ "reflectivity_32bit",
92
+ "rainfall_accumulated_16bit",
93
+ "mor_visibility",
94
+ "weather_code_synop_4680",
95
+ "weather_code_synop_4677",
96
+ "TO_SPLIT",
97
+ ]
98
+ n = 15
99
+ else:
100
+ names = [
101
+ "sensor_serial_number",
102
+ "sensor_status",
103
+ "laser_amplitude",
104
+ "sensor_heating_current",
105
+ "sensor_battery_voltage",
106
+ "dummy_date",
107
+ "sensor_time",
108
+ "sensor_date",
109
+ "sensor_temperature",
110
+ "number_particles",
111
+ "rainfall_rate_32bit",
112
+ "reflectivity_32bit",
113
+ "rainfall_accumulated_16bit",
114
+ "mor_visibility",
115
+ "weather_code_synop_4680",
116
+ "weather_code_synop_4677",
117
+ "TO_SPLIT",
118
+ ]
119
+ n = 16
75
120
 
76
- df = df["TO_PARSE"].str.split(";", n=16, expand=True)
121
+ # Split the columns
122
+ df = df["TO_PARSE"].str.split(";", n=n, expand=True)
77
123
 
78
124
  # Assign column names
79
- names = [
80
- "sensor_serial_number",
81
- "sensor_status",
82
- "laser_amplitude",
83
- "sensor_heating_current",
84
- "sensor_battery_voltage",
85
- "dummy_date",
86
- "sensor_time",
87
- "sensor_date",
88
- "sensor_temperature",
89
- "number_particles",
90
- "rainfall_rate_32bit",
91
- "reflectivity_32bit",
92
- "rainfall_accumulated_16bit",
93
- "mor_visibility",
94
- "weather_code_synop_4680",
95
- "weather_code_synop_4677",
96
- "TO_SPLIT",
97
- ]
98
125
  df.columns = names
99
126
 
100
127
  # Derive raw arrays
101
128
  df_split = df["TO_SPLIT"].str.split(";", expand=True)
102
129
  df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
103
- df["raw_drop_average_velocity"] = df_split.iloc[:, 32:].agg(",".join, axis=1)
130
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
104
131
  df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
105
132
  df["rain_kinetic_energy"] = df_split.iloc[:, 1088]
106
133
  df["CHECK_EMPTY"] = df_split.iloc[:, 1089]