disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +68 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +177 -24
- disdrodb/api/configs.py +3 -3
- disdrodb/api/info.py +13 -13
- disdrodb/api/io.py +281 -22
- disdrodb/api/path.py +184 -195
- disdrodb/api/search.py +18 -9
- disdrodb/cli/disdrodb_create_summary.py +103 -0
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +127 -11
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +37 -32
- disdrodb/l0/l0b_nc_processing.py +118 -8
- disdrodb/l0/l0b_processing.py +30 -65
- disdrodb/l0/l0c_processing.py +369 -259
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/fall_velocity.py +46 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +46 -45
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/processing.py +268 -117
- disdrodb/metadata/checks.py +132 -125
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +631 -345
- disdrodb/psd/models.py +9 -6
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +316 -355
- disdrodb/{l1/routines.py → routines/l1.py} +76 -116
- disdrodb/routines/l2.py +1019 -0
- disdrodb/{routines.py → routines/wrappers.py} +98 -10
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +61 -37
- disdrodb/scattering/permittivity.py +504 -0
- disdrodb/scattering/routines.py +746 -184
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4196 -0
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +121 -9
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +37 -19
- disdrodb/{l2 → utils}/event.py +15 -173
- disdrodb/utils/logger.py +14 -7
- disdrodb/utils/manipulations.py +81 -0
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +35 -177
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +5 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +398 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- disdrodb/l2/routines.py +0 -868
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -47,7 +47,7 @@ number_particles:
|
|
|
47
47
|
sensor_temperature:
|
|
48
48
|
description: Temperature in sensor housing
|
|
49
49
|
long_name: Temperature of the sensor
|
|
50
|
-
units: "
|
|
50
|
+
units: "degC"
|
|
51
51
|
sensor_serial_number:
|
|
52
52
|
description: Sensor serial number
|
|
53
53
|
long_name: Serial number of the sensor
|
|
@@ -105,15 +105,15 @@ error_code:
|
|
|
105
105
|
sensor_temperature_pcb:
|
|
106
106
|
description: Temperature in printed circuit board
|
|
107
107
|
long_name: Sensor PCB temperature
|
|
108
|
-
units: "
|
|
108
|
+
units: "degC"
|
|
109
109
|
sensor_temperature_receiver:
|
|
110
110
|
description: Temperature in right sensor head
|
|
111
111
|
long_name: Sensor receiver temperature
|
|
112
|
-
units: "
|
|
112
|
+
units: "degC"
|
|
113
113
|
sensor_temperature_trasmitter:
|
|
114
114
|
description: Temperature in left sensor head
|
|
115
115
|
long_name: Sensor trasmitter temperature
|
|
116
|
-
units: "
|
|
116
|
+
units: "degC"
|
|
117
117
|
rainfall_rate_16_bit_30:
|
|
118
118
|
description: Rainfall rate
|
|
119
119
|
long_name: Rainfall rate max 30 mm/h 16 bit
|
|
@@ -161,7 +161,7 @@ raw_drop_number:
|
|
|
161
161
|
air_temperature:
|
|
162
162
|
description: "Air temperature in degrees Celsius (C)"
|
|
163
163
|
long_name: Air temperature
|
|
164
|
-
units: "
|
|
164
|
+
units: "degC"
|
|
165
165
|
relative_humidity:
|
|
166
166
|
description: "Relative humidity in percent (%)"
|
|
167
167
|
long_name: Relative humidity
|
|
@@ -102,7 +102,7 @@ sensor_temperature:
|
|
|
102
102
|
chunksizes: 5000
|
|
103
103
|
_FillValue: 127
|
|
104
104
|
sensor_serial_number:
|
|
105
|
-
dtype:
|
|
105
|
+
dtype: str
|
|
106
106
|
zlib: false
|
|
107
107
|
complevel: 3
|
|
108
108
|
shuffle: true
|
|
@@ -110,7 +110,7 @@ sensor_serial_number:
|
|
|
110
110
|
contiguous: false
|
|
111
111
|
chunksizes: 5000
|
|
112
112
|
firmware_iop:
|
|
113
|
-
dtype:
|
|
113
|
+
dtype: str
|
|
114
114
|
zlib: false
|
|
115
115
|
complevel: 3
|
|
116
116
|
shuffle: true
|
|
@@ -118,7 +118,7 @@ firmware_iop:
|
|
|
118
118
|
contiguous: false
|
|
119
119
|
chunksizes: 5000
|
|
120
120
|
firmware_dsp:
|
|
121
|
-
dtype:
|
|
121
|
+
dtype: str
|
|
122
122
|
zlib: false
|
|
123
123
|
complevel: 3
|
|
124
124
|
shuffle: true
|
|
@@ -25,7 +25,7 @@ sensor_status:
|
|
|
25
25
|
air_temperature:
|
|
26
26
|
description: "Air temperature in degrees Celsius"
|
|
27
27
|
long_name: Air temperature
|
|
28
|
-
units: "
|
|
28
|
+
units: "degC"
|
|
29
29
|
relative_humidity:
|
|
30
30
|
description: "Relative humidity in percent (%)"
|
|
31
31
|
long_name: Relative humidity
|
|
@@ -33,15 +33,15 @@ relative_humidity:
|
|
|
33
33
|
wetbulb_temperature:
|
|
34
34
|
description: "Wet bulb temperature in degrees Celsius"
|
|
35
35
|
long_name: Wet bulb temperature
|
|
36
|
-
units: "
|
|
36
|
+
units: "degC"
|
|
37
37
|
air_temperature_max:
|
|
38
38
|
description: "Maximum air temperature in degrees Celsius"
|
|
39
39
|
long_name: Maximum air temperature
|
|
40
|
-
units: "
|
|
40
|
+
units: "degC"
|
|
41
41
|
air_temperature_min:
|
|
42
42
|
description: "Minimum air temperature in degrees Celsius"
|
|
43
43
|
long_name: Minimum air temperature
|
|
44
|
-
units: "
|
|
44
|
+
units: "degC"
|
|
45
45
|
rainfall_rate:
|
|
46
46
|
description: Rainfall rate
|
|
47
47
|
long_name: Rainfall rate
|
disdrodb/l0/l0a_processing.py
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
# -----------------------------------------------------------------------------.
|
|
19
19
|
"""Functions to process raw text files into DISDRODB L0A Apache Parquet."""
|
|
20
20
|
|
|
21
|
-
|
|
22
21
|
import logging
|
|
23
22
|
import os
|
|
24
23
|
from typing import Union
|
|
25
24
|
|
|
26
25
|
import numpy as np
|
|
27
26
|
import pandas as pd
|
|
27
|
+
import pyarrow.parquet as pq
|
|
28
28
|
|
|
29
29
|
from disdrodb.l0.check_standards import check_l0a_column_names, check_l0a_standards
|
|
30
30
|
from disdrodb.l0.l0b_processing import infer_split_str
|
|
@@ -130,11 +130,15 @@ def read_raw_text_file(
|
|
|
130
130
|
try:
|
|
131
131
|
df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
|
|
132
132
|
except pd.errors.EmptyDataError:
|
|
133
|
+
# if isinstance(filepath, zipfile.ZipExtFile):
|
|
134
|
+
# filepath = filepath.name
|
|
133
135
|
msg = f"The following file is empty: {filepath}"
|
|
134
136
|
raise ValueError(msg)
|
|
135
137
|
|
|
136
138
|
# Check the dataframe is not empty
|
|
137
139
|
if len(df.index) == 0:
|
|
140
|
+
# if isinstance(filepath, zipfile.ZipExtFile):
|
|
141
|
+
# filepath = filepath.name
|
|
138
142
|
msg = f"The following file is empty: {filepath}"
|
|
139
143
|
raise ValueError(msg)
|
|
140
144
|
|
|
@@ -265,13 +269,15 @@ def remove_issue_timesteps(df, issue_dict, logger=None, verbose=False):
|
|
|
265
269
|
# Retrieve timesteps and time_periods
|
|
266
270
|
timesteps = issue_dict.get("timesteps", None)
|
|
267
271
|
time_periods = issue_dict.get("time_periods", None)
|
|
272
|
+
timesteps = [] if timesteps is None else timesteps
|
|
273
|
+
time_periods = [] if time_periods is None else time_periods
|
|
268
274
|
|
|
269
275
|
# Drop rows of specified timesteps
|
|
270
|
-
if timesteps:
|
|
276
|
+
if len(timesteps) > 0:
|
|
271
277
|
df = drop_timesteps(df=df, timesteps=timesteps)
|
|
272
278
|
|
|
273
279
|
# Drop rows within specified time_period
|
|
274
|
-
if time_periods:
|
|
280
|
+
if len(time_periods) > 0:
|
|
275
281
|
df = drop_time_periods(df, time_periods=time_periods)
|
|
276
282
|
|
|
277
283
|
# Report number of dropped rows
|
|
@@ -413,6 +419,8 @@ def is_raw_array_string_not_corrupted(string):
|
|
|
413
419
|
"""Check if the raw array is corrupted."""
|
|
414
420
|
if not isinstance(string, str):
|
|
415
421
|
return False
|
|
422
|
+
if string in ["", "NAN", "NaN"]:
|
|
423
|
+
return True
|
|
416
424
|
split_str = infer_split_str(string=string)
|
|
417
425
|
list_values = string.split(split_str)
|
|
418
426
|
values = pd.to_numeric(list_values, errors="coerce")
|
|
@@ -625,6 +633,9 @@ def sanitize_df(
|
|
|
625
633
|
# - Sort by time
|
|
626
634
|
df = df.sort_values("time")
|
|
627
635
|
|
|
636
|
+
# - Drop index
|
|
637
|
+
df = df.reset_index(drop=True)
|
|
638
|
+
|
|
628
639
|
# ------------------------------------------------------.
|
|
629
640
|
# - Check column names agrees to DISDRODB standards
|
|
630
641
|
check_l0a_column_names(df, sensor_name=sensor_name)
|
|
@@ -755,24 +766,8 @@ def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) ->
|
|
|
755
766
|
return df
|
|
756
767
|
|
|
757
768
|
|
|
758
|
-
def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
|
|
759
|
-
# Log
|
|
760
|
-
msg = f"Reading L0 Apache Parquet file at {filepath} started."
|
|
761
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
762
|
-
# Open file
|
|
763
|
-
df = pd.read_parquet(filepath)
|
|
764
|
-
if debugging_mode:
|
|
765
|
-
df = df.iloc[0:100]
|
|
766
|
-
# Log
|
|
767
|
-
msg = f"Reading L0 Apache Parquet file at {filepath} ended."
|
|
768
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
769
|
-
return df
|
|
770
|
-
|
|
771
|
-
|
|
772
769
|
def read_l0a_dataframe(
|
|
773
770
|
filepaths: Union[str, list],
|
|
774
|
-
verbose: bool = False,
|
|
775
|
-
logger=None,
|
|
776
771
|
debugging_mode: bool = False,
|
|
777
772
|
) -> pd.DataFrame:
|
|
778
773
|
"""Read DISDRODB L0A Apache Parquet file(s).
|
|
@@ -781,13 +776,10 @@ def read_l0a_dataframe(
|
|
|
781
776
|
----------
|
|
782
777
|
filepaths : str or list
|
|
783
778
|
Either a list or a single filepath.
|
|
784
|
-
verbose : bool
|
|
785
|
-
Whether to print detailed processing information into terminal.
|
|
786
|
-
The default is ``False``.
|
|
787
779
|
debugging_mode : bool
|
|
788
780
|
If ``True``, it reduces the amount of data to process.
|
|
789
781
|
If filepaths is a list, it reads only the first 3 files.
|
|
790
|
-
|
|
782
|
+
It selects only 100 rows sampled from the first 3 files.
|
|
791
783
|
The default is ``False``.
|
|
792
784
|
|
|
793
785
|
Returns
|
|
@@ -796,8 +788,6 @@ def read_l0a_dataframe(
|
|
|
796
788
|
L0A Dataframe.
|
|
797
789
|
|
|
798
790
|
"""
|
|
799
|
-
from disdrodb.l0.l0a_processing import concatenate_dataframe
|
|
800
|
-
|
|
801
791
|
# ----------------------------------------
|
|
802
792
|
# Check filepaths validity
|
|
803
793
|
if not isinstance(filepaths, (list, str)):
|
|
@@ -814,16 +804,22 @@ def read_l0a_dataframe(
|
|
|
814
804
|
|
|
815
805
|
# ---------------------------------------------------
|
|
816
806
|
# Define the list of dataframe
|
|
817
|
-
|
|
818
|
-
_read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
|
|
819
|
-
]
|
|
807
|
+
df = pq.ParquetDataset(filepaths).read().to_pandas()
|
|
820
808
|
|
|
821
|
-
#
|
|
822
|
-
|
|
809
|
+
# Reduce rows
|
|
810
|
+
if debugging_mode:
|
|
811
|
+
n_rows = min(100, len(df))
|
|
812
|
+
df = df.sample(n=n_rows)
|
|
823
813
|
|
|
824
814
|
# Ensure time is in nanoseconds
|
|
825
815
|
df["time"] = df["time"].astype("M8[ns]")
|
|
826
816
|
|
|
817
|
+
# Ensure sorted by time
|
|
818
|
+
df = df.sort_values(by="time")
|
|
819
|
+
|
|
820
|
+
# Ensure no index
|
|
821
|
+
df = df.reset_index(drop=True)
|
|
822
|
+
|
|
827
823
|
# ---------------------------------------------------
|
|
828
824
|
# Return dataframe
|
|
829
825
|
return df
|
|
@@ -833,14 +829,15 @@ def read_l0a_dataframe(
|
|
|
833
829
|
#### L0A Utility
|
|
834
830
|
|
|
835
831
|
|
|
836
|
-
def
|
|
832
|
+
def generate_l0a(
|
|
837
833
|
filepaths: Union[list, str],
|
|
838
834
|
reader,
|
|
839
835
|
sensor_name,
|
|
836
|
+
issue_dict=None,
|
|
840
837
|
verbose=True,
|
|
841
838
|
logger=None,
|
|
842
839
|
) -> pd.DataFrame:
|
|
843
|
-
"""Read and parse a list
|
|
840
|
+
"""Read and parse a list of raw files and generate a DISDRODB L0A dataframe.
|
|
844
841
|
|
|
845
842
|
Parameters
|
|
846
843
|
----------
|
|
@@ -851,6 +848,13 @@ def read_raw_text_files(
|
|
|
851
848
|
Format: reader(filepath, logger=None)
|
|
852
849
|
sensor_name : str
|
|
853
850
|
Name of the sensor.
|
|
851
|
+
issue_dict : dict, optional
|
|
852
|
+
Issue dictionary providing information on timesteps to remove.
|
|
853
|
+
The default is an empty dictionary ``{}``.
|
|
854
|
+
Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
|
|
855
|
+
Valid issue_dict values are list of datetime64 values (with second accuracy).
|
|
856
|
+
To correctly format and check the validity of the ``issue_dict``, use
|
|
857
|
+
the ``disdrodb.l0.issue.check_issue_dict`` function.
|
|
854
858
|
verbose : bool
|
|
855
859
|
Whether to verbose the processing. The default is ``True``.
|
|
856
860
|
|
|
@@ -886,6 +890,7 @@ def read_raw_text_files(
|
|
|
886
890
|
df = sanitize_df(
|
|
887
891
|
df=df,
|
|
888
892
|
sensor_name=sensor_name,
|
|
893
|
+
issue_dict=issue_dict,
|
|
889
894
|
logger=logger,
|
|
890
895
|
verbose=verbose,
|
|
891
896
|
)
|
disdrodb/l0/l0b_nc_processing.py
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
"""Functions to process DISDRODB raw netCDF files into DISDRODB L0B netCDF files."""
|
|
20
20
|
|
|
21
21
|
import logging
|
|
22
|
+
from typing import Union
|
|
22
23
|
|
|
23
24
|
import numpy as np
|
|
24
25
|
|
|
@@ -33,8 +34,8 @@ from disdrodb.l0.standards import (
|
|
|
33
34
|
get_valid_variable_names,
|
|
34
35
|
)
|
|
35
36
|
from disdrodb.utils.logger import (
|
|
37
|
+
log_error,
|
|
36
38
|
# log_warning,
|
|
37
|
-
# log_debug,
|
|
38
39
|
log_info,
|
|
39
40
|
)
|
|
40
41
|
|
|
@@ -169,6 +170,8 @@ def standardize_raw_dataset(ds, dict_names, sensor_name):
|
|
|
169
170
|
|
|
170
171
|
# If missing variables, infill with NaN array
|
|
171
172
|
missing_vars = _get_missing_variables(ds, dict_names, sensor_name)
|
|
173
|
+
if "raw_drop_number" in missing_vars:
|
|
174
|
+
raise ValueError("The raw drop spectrum is not present in the netCDF file!")
|
|
172
175
|
if len(missing_vars) > 0:
|
|
173
176
|
ds = add_dataset_missing_variables(ds=ds, missing_vars=missing_vars, sensor_name=sensor_name)
|
|
174
177
|
|
|
@@ -343,7 +346,7 @@ def drop_timesteps(ds, timesteps: list):
|
|
|
343
346
|
# Ensure there's at least one timestep left
|
|
344
347
|
if ds_filtered.sizes.get("time", 0) == 0:
|
|
345
348
|
raise ValueError(
|
|
346
|
-
"No timesteps left after removing problematic timesteps.
|
|
349
|
+
"No timesteps left after removing problematic timesteps. Maybe you need to adjust the issue YAML file.",
|
|
347
350
|
)
|
|
348
351
|
return ds_filtered
|
|
349
352
|
|
|
@@ -419,16 +422,21 @@ def remove_issue_timesteps(
|
|
|
419
422
|
ValueError
|
|
420
423
|
If after removing specified timesteps/periods no data remains.
|
|
421
424
|
"""
|
|
425
|
+
# Retrieve number of initial rows
|
|
422
426
|
n_initial = ds.sizes.get("time", 0)
|
|
423
|
-
|
|
424
|
-
|
|
427
|
+
|
|
428
|
+
# Retrieve timesteps and time_periods
|
|
429
|
+
timesteps = issue_dict.get("timesteps")
|
|
430
|
+
time_periods = issue_dict.get("time_periods")
|
|
431
|
+
timesteps = [] if timesteps is None else timesteps
|
|
432
|
+
time_periods = [] if time_periods is None else time_periods
|
|
425
433
|
|
|
426
434
|
# Drop individual timesteps
|
|
427
|
-
if timesteps:
|
|
435
|
+
if len(timesteps) > 0:
|
|
428
436
|
ds = drop_timesteps(ds, timesteps)
|
|
429
437
|
|
|
430
438
|
# Drop intervals of time
|
|
431
|
-
if time_periods:
|
|
439
|
+
if len(time_periods) > 0:
|
|
432
440
|
ds = drop_time_periods(ds, time_periods)
|
|
433
441
|
|
|
434
442
|
# Report number dropped
|
|
@@ -454,8 +462,8 @@ def sanitize_ds(
|
|
|
454
462
|
----------
|
|
455
463
|
ds : xarray.Dataset
|
|
456
464
|
Raw xarray dataset
|
|
457
|
-
|
|
458
|
-
|
|
465
|
+
metadata: dict
|
|
466
|
+
Station metadata to attach as global attributes to the xr.Dataset.
|
|
459
467
|
sensor_name : str
|
|
460
468
|
Name of the sensor.
|
|
461
469
|
verbose : bool
|
|
@@ -525,3 +533,105 @@ def open_raw_netcdf_file(
|
|
|
525
533
|
# Log information
|
|
526
534
|
log_info(logger=logger, msg=f"netCDF file {filepath} has been loaded successively into xarray.", verbose=False)
|
|
527
535
|
return ds
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def generate_l0b_from_nc(
|
|
539
|
+
filepaths: Union[list, str],
|
|
540
|
+
reader,
|
|
541
|
+
sensor_name,
|
|
542
|
+
metadata,
|
|
543
|
+
issue_dict=None,
|
|
544
|
+
verbose=True,
|
|
545
|
+
logger=None,
|
|
546
|
+
):
|
|
547
|
+
"""Read and parse a list of raw netCDF files and generate a DISDRODB L0B dataset.
|
|
548
|
+
|
|
549
|
+
Parameters
|
|
550
|
+
----------
|
|
551
|
+
filepaths : Union[list,str]
|
|
552
|
+
File(s) path(s)
|
|
553
|
+
reader:
|
|
554
|
+
DISDRODB reader function.
|
|
555
|
+
Format: reader(filepath, logger=None)
|
|
556
|
+
sensor_name : str
|
|
557
|
+
Name of the sensor.
|
|
558
|
+
metadata: dict
|
|
559
|
+
Station metadata to attach as global attributes to the xr.Dataset.
|
|
560
|
+
issue_dict : dict, optional
|
|
561
|
+
Issue dictionary providing information on timesteps to remove.
|
|
562
|
+
The default is an empty dictionary ``{}``.
|
|
563
|
+
Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
|
|
564
|
+
Valid issue_dict values are list of datetime64 values (with second accuracy).
|
|
565
|
+
To correctly format and check the validity of the ``issue_dict``, use
|
|
566
|
+
the ``disdrodb.l0.issue.check_issue_dict`` function.
|
|
567
|
+
verbose : bool
|
|
568
|
+
Whether to verbose the processing. The default is ``True``.
|
|
569
|
+
|
|
570
|
+
Returns
|
|
571
|
+
-------
|
|
572
|
+
xarray.Dataset
|
|
573
|
+
DISDRODB L0B Dataset.
|
|
574
|
+
|
|
575
|
+
Raises
|
|
576
|
+
------
|
|
577
|
+
ValueError
|
|
578
|
+
Input parameters can not be used or the raw file can not be processed.
|
|
579
|
+
|
|
580
|
+
"""
|
|
581
|
+
import xarray as xr
|
|
582
|
+
|
|
583
|
+
# Check input list
|
|
584
|
+
if isinstance(filepaths, str):
|
|
585
|
+
filepaths = [filepaths]
|
|
586
|
+
if len(filepaths) == 0:
|
|
587
|
+
raise ValueError("'filepaths' must contains at least 1 filepath.")
|
|
588
|
+
|
|
589
|
+
# ------------------------------------------------------.
|
|
590
|
+
# Loop over all raw files
|
|
591
|
+
n_files = len(filepaths)
|
|
592
|
+
processed_file_counter = 0
|
|
593
|
+
list_skipped_files_msg = []
|
|
594
|
+
list_ds = []
|
|
595
|
+
for filepath in filepaths:
|
|
596
|
+
# Try read the raw netCDF file
|
|
597
|
+
try:
|
|
598
|
+
ds = reader(filepath, logger=logger)
|
|
599
|
+
# Sanitize the dataframe
|
|
600
|
+
ds = sanitize_ds(
|
|
601
|
+
ds=ds,
|
|
602
|
+
sensor_name=sensor_name,
|
|
603
|
+
metadata=metadata,
|
|
604
|
+
issue_dict=issue_dict,
|
|
605
|
+
verbose=verbose,
|
|
606
|
+
logger=logger,
|
|
607
|
+
)
|
|
608
|
+
# Append dataframe to the list
|
|
609
|
+
list_ds.append(ds)
|
|
610
|
+
# Update the logger
|
|
611
|
+
processed_file_counter += 1
|
|
612
|
+
msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
|
|
613
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
614
|
+
|
|
615
|
+
# Skip the file if the processing fails
|
|
616
|
+
except Exception as e:
|
|
617
|
+
# Update the logger
|
|
618
|
+
msg = f"{filepath} has been skipped. The error is: {e}."
|
|
619
|
+
log_error(logger=logger, msg=msg, verbose=verbose)
|
|
620
|
+
list_skipped_files_msg.append(msg)
|
|
621
|
+
|
|
622
|
+
# Update logger
|
|
623
|
+
msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
|
|
624
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
625
|
+
|
|
626
|
+
# Check if there are files to concatenate
|
|
627
|
+
if len(list_ds) == 0:
|
|
628
|
+
raise ValueError("Any raw file could be read!")
|
|
629
|
+
|
|
630
|
+
##----------------------------------------------------------------.
|
|
631
|
+
# Concatenate the datasets
|
|
632
|
+
list_ds = [ds.chunk({"time": -1}) for ds in list_ds]
|
|
633
|
+
ds = xr.concat(list_ds, dim="time", join="outer", compat="no_conflicts", combine_attrs="override").sortby("time")
|
|
634
|
+
ds = ds.compute()
|
|
635
|
+
|
|
636
|
+
# Return the dataframe
|
|
637
|
+
return ds
|
disdrodb/l0/l0b_processing.py
CHANGED
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
"""Functions to process DISDRODB L0A files into DISDRODB L0B netCDF files."""
|
|
20
20
|
|
|
21
21
|
import logging
|
|
22
|
-
import os
|
|
23
22
|
|
|
24
23
|
import numpy as np
|
|
25
24
|
import pandas as pd
|
|
@@ -43,13 +42,8 @@ from disdrodb.utils.attrs import (
|
|
|
43
42
|
set_coordinate_attributes,
|
|
44
43
|
set_disdrodb_attrs,
|
|
45
44
|
)
|
|
46
|
-
from disdrodb.utils.directories import create_directory, remove_if_exists
|
|
47
45
|
from disdrodb.utils.encoding import set_encodings
|
|
48
|
-
from disdrodb.utils.logger import
|
|
49
|
-
# log_warning,
|
|
50
|
-
# log_debug,
|
|
51
|
-
log_info,
|
|
52
|
-
)
|
|
46
|
+
from disdrodb.utils.logger import log_info
|
|
53
47
|
from disdrodb.utils.time import ensure_sorted_by_time
|
|
54
48
|
|
|
55
49
|
logger = logging.getLogger(__name__)
|
|
@@ -246,12 +240,20 @@ def retrieve_l0b_arrays(
|
|
|
246
240
|
unavailable_keys.append(key)
|
|
247
241
|
continue
|
|
248
242
|
|
|
249
|
-
# Ensure is a string
|
|
250
|
-
|
|
243
|
+
# Ensure is a string, get a numpy array for each row and then stack
|
|
244
|
+
# - Option 1: Clear but lot of copies
|
|
245
|
+
# df_series = df[key].astype(str)
|
|
246
|
+
# list_arr = df_series.apply(_format_string_array, n_values=n_values)
|
|
247
|
+
# arr = np.stack(list_arr, axis=0)
|
|
248
|
+
|
|
249
|
+
# - Option 2: still copies
|
|
250
|
+
# arr = np.vstack(_format_string_array(s, n_values=n_values) for s in df_series.astype(str))
|
|
251
251
|
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
arr = np.
|
|
252
|
+
# - Option 3: more memory efficient
|
|
253
|
+
n_timesteps = len(df[key])
|
|
254
|
+
arr = np.empty((n_timesteps, n_values), dtype=float) # preallocates
|
|
255
|
+
for i, s in enumerate(df[key].astype(str)):
|
|
256
|
+
arr[i, :] = _format_string_array(s, n_values=n_values)
|
|
255
257
|
|
|
256
258
|
# Retrieve dimensions
|
|
257
259
|
dims_order = dims_order_dict[key]
|
|
@@ -333,18 +335,6 @@ def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
|
|
|
333
335
|
return ds
|
|
334
336
|
|
|
335
337
|
|
|
336
|
-
def _set_dataset_attrs(ds, sensor_name):
|
|
337
|
-
"""Set variable and coordinates attributes."""
|
|
338
|
-
# - Add netCDF variable attributes
|
|
339
|
-
# --> Attributes: long_name, units, descriptions, valid_min, valid_max
|
|
340
|
-
ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
|
|
341
|
-
# - Add netCDF coordinate attributes
|
|
342
|
-
ds = set_coordinate_attributes(ds=ds)
|
|
343
|
-
# - Set DISDRODB global attributes
|
|
344
|
-
ds = set_disdrodb_attrs(ds=ds, product="L0B")
|
|
345
|
-
return ds
|
|
346
|
-
|
|
347
|
-
|
|
348
338
|
def add_dataset_crs_coords(ds):
|
|
349
339
|
"""Add the CRS coordinate to the xr.Dataset."""
|
|
350
340
|
# TODO: define CF-compliant CRS !
|
|
@@ -386,13 +376,13 @@ def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
|
|
|
386
376
|
return data_vars
|
|
387
377
|
|
|
388
378
|
|
|
389
|
-
def
|
|
379
|
+
def generate_l0b(
|
|
390
380
|
df: pd.DataFrame,
|
|
391
381
|
metadata: dict,
|
|
392
382
|
logger=None,
|
|
393
383
|
verbose: bool = False,
|
|
394
384
|
) -> xr.Dataset:
|
|
395
|
-
"""Transform the L0A dataframe to the L0B xr.Dataset.
|
|
385
|
+
"""Transform the DISDRODB L0A dataframe to the DISDRODB L0B xr.Dataset.
|
|
396
386
|
|
|
397
387
|
Parameters
|
|
398
388
|
----------
|
|
@@ -475,16 +465,25 @@ def finalize_dataset(ds, sensor_name, metadata):
|
|
|
475
465
|
ds = add_dataset_crs_coords(ds)
|
|
476
466
|
|
|
477
467
|
# Set netCDF dimension order
|
|
468
|
+
# --> Required for correct encoding !
|
|
478
469
|
ds = ds.transpose("time", "diameter_bin_center", ...)
|
|
479
470
|
|
|
480
|
-
# Add netCDF variable and coordinate attributes
|
|
481
|
-
ds = _set_dataset_attrs(ds, sensor_name)
|
|
482
|
-
|
|
483
471
|
# Ensure variables with dtype object are converted to string
|
|
484
472
|
ds = _convert_object_variables_to_string(ds)
|
|
485
473
|
|
|
474
|
+
# Add netCDF variable and coordinate attributes
|
|
475
|
+
# - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
|
|
476
|
+
ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
|
|
477
|
+
# - Add netCDF coordinate attributes
|
|
478
|
+
ds = set_coordinate_attributes(ds=ds)
|
|
479
|
+
# - Set DISDRODB global attributes
|
|
480
|
+
ds = set_disdrodb_attrs(ds=ds, product="L0B")
|
|
481
|
+
|
|
486
482
|
# Check L0B standards
|
|
487
483
|
check_l0b_standards(ds)
|
|
484
|
+
|
|
485
|
+
# Set L0B encodings
|
|
486
|
+
ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
|
|
488
487
|
return ds
|
|
489
488
|
|
|
490
489
|
|
|
@@ -503,43 +502,9 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
|
|
|
503
502
|
xarray.Dataset
|
|
504
503
|
Output xarray dataset.
|
|
505
504
|
"""
|
|
506
|
-
|
|
507
|
-
ds = set_encodings(ds=ds,
|
|
505
|
+
encodings_dict = get_l0b_encodings_dict(sensor_name)
|
|
506
|
+
ds = set_encodings(ds=ds, encodings_dict=encodings_dict)
|
|
508
507
|
return ds
|
|
509
508
|
|
|
510
509
|
|
|
511
|
-
def write_l0b(ds: xr.Dataset, filepath: str, force=False) -> None:
|
|
512
|
-
"""Save the xarray dataset into a NetCDF file.
|
|
513
|
-
|
|
514
|
-
Parameters
|
|
515
|
-
----------
|
|
516
|
-
ds : xarray.Dataset
|
|
517
|
-
Input xarray dataset.
|
|
518
|
-
filepath : str
|
|
519
|
-
Output file path.
|
|
520
|
-
sensor_name : str
|
|
521
|
-
Name of the sensor.
|
|
522
|
-
force : bool, optional
|
|
523
|
-
Whether to overwrite existing data.
|
|
524
|
-
If ``True``, overwrite existing data into destination directories.
|
|
525
|
-
If ``False``, raise an error if there are already data into destination directories. This is the default.
|
|
526
|
-
"""
|
|
527
|
-
# Create station directory if does not exist
|
|
528
|
-
create_directory(os.path.dirname(filepath))
|
|
529
|
-
|
|
530
|
-
# Check if the file already exists
|
|
531
|
-
# - If force=True --> Remove it
|
|
532
|
-
# - If force=False --> Raise error
|
|
533
|
-
remove_if_exists(filepath, force=force)
|
|
534
|
-
|
|
535
|
-
# Get sensor name from dataset
|
|
536
|
-
sensor_name = ds.attrs.get("sensor_name")
|
|
537
|
-
|
|
538
|
-
# Set encodings
|
|
539
|
-
ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
|
|
540
|
-
|
|
541
|
-
# Write netcdf
|
|
542
|
-
ds.to_netcdf(filepath, engine="netcdf4")
|
|
543
|
-
|
|
544
|
-
|
|
545
510
|
####--------------------------------------------------------------------------.
|