disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +145 -14
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  37. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  38. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  39. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  40. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  41. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  42. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  43. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
  44. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
  45. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  46. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  47. disdrodb/l0/l0a_processing.py +30 -30
  48. disdrodb/l0/l0b_nc_processing.py +108 -2
  49. disdrodb/l0/l0b_processing.py +4 -4
  50. disdrodb/l0/l0c_processing.py +5 -13
  51. disdrodb/l0/manuals/SWS250.pdf +0 -0
  52. disdrodb/l0/manuals/VPF730.pdf +0 -0
  53. disdrodb/l0/manuals/VPF750.pdf +0 -0
  54. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  55. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  56. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  57. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
  58. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
  59. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  62. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  63. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  64. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  65. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  66. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  68. disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  70. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  71. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
  72. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  73. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
  77. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
  78. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  79. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  80. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  81. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  82. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  83. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  84. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
  85. disdrodb/l0/routines.py +105 -14
  86. disdrodb/l1/__init__.py +5 -0
  87. disdrodb/l1/filters.py +34 -20
  88. disdrodb/l1/processing.py +45 -44
  89. disdrodb/l1/resampling.py +77 -66
  90. disdrodb/l1/routines.py +35 -42
  91. disdrodb/l1_env/routines.py +18 -3
  92. disdrodb/l2/__init__.py +7 -0
  93. disdrodb/l2/empirical_dsd.py +58 -10
  94. disdrodb/l2/event.py +27 -120
  95. disdrodb/l2/processing.py +267 -116
  96. disdrodb/l2/routines.py +618 -254
  97. disdrodb/metadata/standards.py +3 -1
  98. disdrodb/psd/fitting.py +463 -144
  99. disdrodb/psd/models.py +8 -5
  100. disdrodb/routines.py +3 -3
  101. disdrodb/scattering/__init__.py +16 -4
  102. disdrodb/scattering/axis_ratio.py +56 -36
  103. disdrodb/scattering/permittivity.py +486 -0
  104. disdrodb/scattering/routines.py +701 -159
  105. disdrodb/summary/__init__.py +17 -0
  106. disdrodb/summary/routines.py +4120 -0
  107. disdrodb/utils/attrs.py +68 -125
  108. disdrodb/utils/compression.py +30 -1
  109. disdrodb/utils/dask.py +59 -8
  110. disdrodb/utils/dataframe.py +63 -9
  111. disdrodb/utils/directories.py +49 -17
  112. disdrodb/utils/encoding.py +33 -19
  113. disdrodb/utils/logger.py +13 -6
  114. disdrodb/utils/manipulations.py +71 -0
  115. disdrodb/utils/subsetting.py +214 -0
  116. disdrodb/utils/time.py +165 -19
  117. disdrodb/utils/writer.py +20 -7
  118. disdrodb/utils/xarray.py +85 -4
  119. disdrodb/viz/__init__.py +13 -0
  120. disdrodb/viz/plots.py +327 -0
  121. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  122. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
  123. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  124. disdrodb/l1/encoding_attrs.py +0 -635
  125. disdrodb/l2/processing_options.py +0 -213
  126. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  127. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  128. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  129. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_PARSE"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+ # - Skip first row as columns names
40
+ # - Define encoding
41
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
42
+ # - Avoid first column to become df index !!!
43
+ reader_kwargs["index_col"] = False
44
+ # - Define behaviour when encountering bad lines
45
+ reader_kwargs["on_bad_lines"] = "skip"
46
+ # - Define reader engine
47
+ # - C engine is faster
48
+ # - Python engine is more feature-complete
49
+ reader_kwargs["engine"] = "python"
50
+ # - Define on-the-fly decompression of on-disk data
51
+ # - Available: gzip, bz2, zip
52
+ reader_kwargs["compression"] = "infer"
53
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
54
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
55
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
56
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
57
+ reader_kwargs["na_values"] = ["na", "", "error"]
58
+
59
+ ##------------------------------------------------------------------------.
60
+ #### Read the data
61
+ df = read_raw_text_file(
62
+ filepath=filepath,
63
+ column_names=column_names,
64
+ reader_kwargs=reader_kwargs,
65
+ logger=logger,
66
+ )
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
+ # Identify groups of lines corresponding to same measurement
71
+ is_start = df["TO_PARSE"].str.startswith("#0#,")
72
+ df["observation_id"] = is_start.cumsum()
73
+
74
+ # Loop over groups and create a dataframe with a single row for each measurement
75
+ list_obs = []
76
+ for _, df_obs in df.groupby("observation_id", sort=False):
77
+ if len(df_obs) not in [6, 7]:
78
+ pass
79
+
80
+ # Remove #<id># and last comma
81
+ series = df_obs["TO_PARSE"].str.split(",", n=1, expand=True)[1].str.rstrip(",")
82
+ if len(df_obs) == 7:
83
+ series = series.iloc[0:6]
84
+
85
+ # Create dataframe and name columns
86
+ df_obs = series.to_frame().T
87
+ df_obs.columns = [
88
+ "time",
89
+ "TO_SPLIT1",
90
+ "TO_SPLIT2",
91
+ "raw_drop_concentration",
92
+ "raw_drop_average_velocity",
93
+ "raw_drop_number",
94
+ ]
95
+
96
+ # Append to the list
97
+ list_obs.append(df_obs)
98
+
99
+ # Concat all timesteps into a single dataframe
100
+ df = pd.concat(list_obs)
101
+
102
+ # Split and rename remaining variables
103
+ df_split1 = df["TO_SPLIT1"].str.split(",", expand=True)
104
+ df_split1.columns = [
105
+ "weather_code_synop_4680",
106
+ "rainfall_accumulated_32bit",
107
+ "rainfall_rate32bit",
108
+ "reflectivity_32bit",
109
+ ]
110
+ df_split2 = df["TO_SPLIT2"].str.split(",", expand=True)
111
+ df_split2.columns = ["parsivel_id", "sensor_serial_number", "mor_visibility", "laser_amplitude", "sensor_status"]
112
+
113
+ # Merge everything into a single dataframe
114
+ df = pd.concat([df, df_split1, df_split2], axis=1)
115
+
116
+ # Define time as datetime64
117
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y %H:%M:%S", errors="coerce")
118
+
119
+ # Remove unused variables
120
+ df = df.drop(columns=["TO_SPLIT1", "TO_SPLIT2", "parsivel_id", "sensor_serial_number"])
121
+
122
+ # Return the dataframe adhering to DISDRODB L0 standards
123
+ return df
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """This reader allows to read raw data from NASA APU stations.
19
+
20
+ The reader allows to read raw APU data from the following NASA campaigns:
21
+
22
+ - HYMEX
23
+ - IFLOODS
24
+ - IPHEX
25
+ - OLYMPEX
26
+ - ICEPOP
27
+ - IMPACTS
28
+ - GCPEX
29
+ - WFF
30
+
31
+ """
32
+
33
+ import pandas as pd
34
+
35
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
36
+ from disdrodb.l0.l0a_processing import read_raw_text_file
37
+
38
+
39
+ @is_documented_by(reader_generic_docstring)
40
+ def reader(
41
+ filepath,
42
+ logger=None,
43
+ ):
44
+ """Reader."""
45
+ ##------------------------------------------------------------------------.
46
+ #### Define column names
47
+ column_names = ["time", "TO_BE_SPLITTED"]
48
+
49
+ ##------------------------------------------------------------------------.
50
+ #### Define reader options
51
+ reader_kwargs = {}
52
+ # - Define delimiter
53
+ reader_kwargs["delimiter"] = ";"
54
+ # - Skip first row as columns names
55
+ reader_kwargs["header"] = None
56
+ reader_kwargs["skiprows"] = 0
57
+ # - Skip file with encoding errors
58
+ reader_kwargs["encoding_errors"] = "ignore"
59
+ # - Avoid first column to become df index !!!
60
+ reader_kwargs["index_col"] = False
61
+ # - Define behaviour when encountering bad lines
62
+ reader_kwargs["on_bad_lines"] = "skip"
63
+ # - Define reader engine
64
+ # - C engine is faster
65
+ # - Python engine is more feature-complete
66
+ reader_kwargs["engine"] = "python"
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
71
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
72
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
73
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
74
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
75
+
76
+ ##------------------------------------------------------------------------.
77
+ #### Read the data
78
+ df = read_raw_text_file(
79
+ filepath=filepath,
80
+ column_names=column_names,
81
+ reader_kwargs=reader_kwargs,
82
+ logger=logger,
83
+ )
84
+
85
+ ##------------------------------------------------------------------------.
86
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
87
+ # Convert time column to datetime
88
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
89
+
90
+ # Split the 'TO_BE_SPLITTED' column
91
+ df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
92
+
93
+ # Assign column names
94
+ names = [
95
+ "station_name",
96
+ "sensor_status",
97
+ "sensor_temperature",
98
+ "number_particles",
99
+ "rainfall_rate_32bit",
100
+ "reflectivity_16bit",
101
+ "mor_visibility",
102
+ "weather_code_synop_4680",
103
+ "weather_code_synop_4677",
104
+ "raw_drop_number",
105
+ ]
106
+ df.columns = names
107
+
108
+ # Add the time column
109
+ df["time"] = df_time
110
+
111
+ # Drop columns not agreeing with DISDRODB L0 standards
112
+ df = df.drop(columns=["station_name"])
113
+
114
+ # Drop rows with invalid values
115
+ # --> Ensure that weather_code_synop_4677 has length 2
116
+ # --> If a previous column is missing it will have 000
117
+ df = df[df["weather_code_synop_4677"].str.len() == 2]
118
+
119
+ # Return the dataframe adhering to DISDRODB L0 standards
120
+ return df
@@ -15,6 +15,7 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  # -----------------------------------------------------------------------------.
18
+ """Reader for CSWR FARM disdrometer data (used in PERILS and RELAMPAGO campaign)."""
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -29,33 +30,43 @@ def reader(
29
30
  """Reader."""
30
31
  ##------------------------------------------------------------------------.
31
32
  #### Define column names
32
- column_names = ["time", "epoch_time", "TO_BE_PARSED"]
33
+ column_names = ["TO_PARSE"]
33
34
 
34
35
  ##------------------------------------------------------------------------.
35
36
  #### Define reader options
36
37
  reader_kwargs = {}
38
+
37
39
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
39
- # - Skip first row as columns names
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
42
+ # - Define encoding
43
+ reader_kwargs["encoding"] = "ISO-8859-1"
44
+
45
+ # Skip first row as columns names
40
46
  reader_kwargs["header"] = None
41
- # - Avoid first column to become df index
47
+ reader_kwargs["skiprows"] = 2
48
+
49
+ # - Avoid first column to become df index !!!
42
50
  reader_kwargs["index_col"] = False
51
+
43
52
  # - Define behaviour when encountering bad lines
44
53
  reader_kwargs["on_bad_lines"] = "skip"
45
- # - Define parser engine
54
+
55
+ # - Define reader engine
46
56
  # - C engine is faster
47
57
  # - Python engine is more feature-complete
48
58
  reader_kwargs["engine"] = "python"
59
+
49
60
  # - Define on-the-fly decompression of on-disk data
50
61
  # - Available: gzip, bz2, zip
51
62
  reader_kwargs["compression"] = "infer"
52
- # reader_kwargs['zipped'] = False
53
- # reader_kwargs['zipped'] = True
63
+
54
64
  # - Strings to recognize as NA/NaN and replace with standard NA flags
55
65
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
66
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
67
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
- reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"]
68
+ reader_kwargs["na_values"] = ["na", "", "error"]
69
+
59
70
  ##------------------------------------------------------------------------.
60
71
  #### Read the data
61
72
  df = read_raw_text_file(
@@ -67,28 +78,13 @@ def reader(
67
78
 
68
79
  ##------------------------------------------------------------------------.
69
80
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
- # NOTE: Station 8 has all raw_drop_number corrupted, so it can't be used
71
-
72
- # Remove rows that have a corrupted "TO_BE_PARSED" column
73
- # - PAR001, PAR002 have length 3726 (no station_name)
74
- # - PAR007 have length 3736 ()
75
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() >= 3726]
76
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() <= 3736]
77
-
78
- # Convert 'time' column to datetime
79
- df_time = pd.to_datetime(df["time"], format="%Y%m%d-%H%M%S", errors="coerce")
80
-
81
- # Strip values from start and end of the string
82
- df["TO_BE_PARSED"] = df["TO_BE_PARSED"].str.lstrip("b'").str.rstrip("'").str.rstrip("\\r\\n'") # noqa: B005
83
-
84
- # Split the column 'TO_BE_PARSED'
85
- df_to_parse = df["TO_BE_PARSED"].str.split(";", expand=True, n=99)
81
+ # Split and assign integrated variables names
82
+ df = df["TO_PARSE"].str.split(",", expand=True, n=22)
86
83
 
87
- # Retrieve DISDRODB compliant columns
88
- df = df_to_parse.iloc[:, 0:35]
89
-
90
- # Assign column names
91
- column_names = [
84
+ names = [
85
+ "time",
86
+ "station_name",
87
+ "station_number",
92
88
  "rainfall_rate_32bit",
93
89
  "rainfall_accumulated_32bit",
94
90
  "weather_code_synop_4680",
@@ -107,58 +103,35 @@ def reader(
107
103
  "sensor_heating_current",
108
104
  "sensor_battery_voltage",
109
105
  "sensor_status",
110
- "sensor_time_measurement_start",
111
- "sensor_time",
112
- "sensor_date",
113
- "station_name",
114
- "station_number",
115
- "rainfall_amount_absolute_32bit",
116
- "error_code",
117
- "sensor_temperature_pcb",
118
- "sensor_temperature_receiver",
119
- "sensor_temperature_trasmitter",
120
- "rainfall_rate_16_bit_30",
121
- "rainfall_rate_16_bit_1200",
122
- "rainfall_accumulated_16bit",
123
- "reflectivity_16bit",
124
106
  "rain_kinetic_energy",
125
- "snowfall_rate",
126
- "number_particles_all",
127
- # "number_particles_all_detected",
107
+ "TO_SPLIT",
128
108
  ]
129
- df.columns = column_names
109
+ df.columns = names
130
110
 
131
- # Add time column
132
- df["time"] = df_time
111
+ # Derive raw drop arrays
112
+ def split_string(s):
113
+ vals = [v.strip() for v in s.split(",")]
114
+ c1 = ", ".join(vals[:32])
115
+ c2 = ", ".join(vals[32:64])
116
+ c3 = ", ".join(vals[64:])
117
+ return pd.Series({"raw_drop_concentration": c1, "raw_drop_average_velocity": c2, "raw_drop_number": c3})
133
118
 
134
- # Retrieve raw_drop_concentration
135
- df["raw_drop_concentration"] = df_to_parse.iloc[:, 35:67].apply(
136
- lambda x: ",".join(x.dropna().astype(str)),
137
- axis=1,
138
- )
139
- # Retrieve raw_drop_average_velocity
140
- df["raw_drop_average_velocity"] = df_to_parse.iloc[:, 67:99].apply(
141
- lambda x: ",".join(x.dropna().astype(str)),
142
- axis=1,
143
- )
119
+ splitted_string = df["TO_SPLIT"].apply(split_string)
120
+ df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
121
+ df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
122
+ df["raw_drop_number"] = splitted_string["raw_drop_number"]
144
123
 
145
- # Retrieve raw_drop_number
146
- df_raw_drop_number = df_to_parse.iloc[:, 99].squeeze()
147
- df_raw_drop_number = df_raw_drop_number.str.replace(r"(\w{3})", r"\1,", regex=True)
148
- df["raw_drop_number"] = df_raw_drop_number
124
+ # Define datetime "time" column
125
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
149
126
 
150
127
  # Drop columns not agreeing with DISDRODB L0 standards
151
128
  columns_to_drop = [
152
- "firmware_iop",
153
- "firmware_dsp",
154
- "sensor_time_measurement_start",
155
- "sensor_time",
156
- "sensor_date",
157
129
  "station_name",
158
130
  "station_number",
159
131
  "sensor_serial_number",
160
- "sensor_serial_number",
161
- # "number_particles_all_detected",
132
+ "firmware_iop",
133
+ "firmware_dsp",
134
+ "TO_SPLIT",
162
135
  ]
163
136
  df = df.drop(columns=columns_to_drop)
164
137
 
@@ -108,7 +108,7 @@ def reader(
108
108
  # --> "" generates an array of zeros in L0B processing
109
109
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
110
110
 
111
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
111
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
112
112
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
113
113
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
114
114
 
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for the PERILS 2022 and 2023 MIPS and RADAPS files."""
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ @is_documented_by(reader_generic_docstring)
26
+ def reader(
27
+ filepath,
28
+ logger=None,
29
+ ):
30
+ """Reader."""
31
+ ##------------------------------------------------------------------------.
32
+ #### Define column names
33
+ column_names = ["TO_PARSE"]
34
+
35
+ ##------------------------------------------------------------------------.
36
+ #### Define reader options
37
+ reader_kwargs = {}
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+ # - Avoid first column to become df index !!!
41
+ reader_kwargs["index_col"] = False
42
+
43
+ # - Define behaviour when encountering bad lines
44
+ reader_kwargs["on_bad_lines"] = "skip"
45
+
46
+ # Skip first row as columns names
47
+ reader_kwargs["header"] = None
48
+
49
+ # - Define encoding
50
+ reader_kwargs["encoding"] = "ISO-8859-1"
51
+
52
+ # - Define reader engine
53
+ # - C engine is faster
54
+ # - Python engine is more feature-complete
55
+ reader_kwargs["engine"] = "python"
56
+
57
+ # - Define on-the-fly decompression of on-disk data
58
+ # - Available: gzip, bz2, zip
59
+ reader_kwargs["compression"] = "infer"
60
+
61
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
62
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
63
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
64
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
65
+ reader_kwargs["na_values"] = ["na", "", "error"]
66
+
67
+ ##------------------------------------------------------------------------.
68
+ #### Read the data
69
+ df = read_raw_text_file(
70
+ filepath=filepath,
71
+ column_names=column_names,
72
+ reader_kwargs=reader_kwargs,
73
+ logger=logger,
74
+ )
75
+
76
+ ##------------------------------------------------------------------------.
77
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
78
+ # Split and assign integrated variables names
79
+ df = df["TO_PARSE"].str.split(",", expand=True, n=6)
80
+ names = [
81
+ "year",
82
+ "doy",
83
+ "time",
84
+ "sensor_temperature",
85
+ "number_particles",
86
+ "rainfall_rate_32bit",
87
+ "TO_SPLIT",
88
+ ]
89
+ df.columns = names
90
+
91
+ # Derive raw drop arrays
92
+ def split_string(s):
93
+ vals = [v.strip() for v in s.split(",")]
94
+ c1 = ", ".join(vals[:32])
95
+ c2 = ", ".join(vals[32:64])
96
+ # c3 = ", ".join(vals[64:])
97
+ series = pd.Series(
98
+ {
99
+ "raw_drop_concentration": c1,
100
+ "raw_drop_average_velocity": c2,
101
+ # "raw_drop_number": c3,
102
+ },
103
+ )
104
+ return series
105
+
106
+ splitted_string = df["TO_SPLIT"].apply(split_string)
107
+ df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
108
+ df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
109
+
110
+ # Define datetime time column
111
+ df["year"] = df["year"].str.replace(".0", "")
112
+ df["doy"] = df["doy"].str.replace(".0", "")
113
+ df["time"] = df["time"].str.replace(".0", "")
114
+ df["time"] = df["year"].astype(str) + "-" + df["doy"].astype(str) + " " + df["time"].astype(str)
115
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%j %H%M%S", errors="coerce")
116
+
117
+ # Drop columns not agreeing with DISDRODB L0 standards
118
+ columns_to_drop = [
119
+ "doy",
120
+ "year",
121
+ "TO_SPLIT",
122
+ ]
123
+ df = df.drop(columns=columns_to_drop)
124
+
125
+ # Return the dataframe adhering to DISDRODB L0 standards
126
+ return df