disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +5 -5
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  37. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  38. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  39. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  40. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  41. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  42. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  43. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  44. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  45. disdrodb/l0/l0a_processing.py +30 -30
  46. disdrodb/l0/l0b_nc_processing.py +108 -2
  47. disdrodb/l0/l0b_processing.py +4 -4
  48. disdrodb/l0/l0c_processing.py +5 -13
  49. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  50. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  51. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  52. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  53. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  54. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  55. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  56. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  57. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  58. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  59. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  60. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  61. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  63. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  64. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  65. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  66. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  67. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  68. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  69. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  70. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  71. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  72. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
  73. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  74. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  75. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  76. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
  77. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  78. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  79. disdrodb/l0/routines.py +105 -14
  80. disdrodb/l1/__init__.py +5 -0
  81. disdrodb/l1/filters.py +34 -20
  82. disdrodb/l1/processing.py +45 -44
  83. disdrodb/l1/resampling.py +77 -66
  84. disdrodb/l1/routines.py +35 -43
  85. disdrodb/l1_env/routines.py +18 -3
  86. disdrodb/l2/__init__.py +7 -0
  87. disdrodb/l2/empirical_dsd.py +58 -10
  88. disdrodb/l2/event.py +27 -120
  89. disdrodb/l2/processing.py +267 -116
  90. disdrodb/l2/routines.py +618 -254
  91. disdrodb/metadata/standards.py +3 -1
  92. disdrodb/psd/fitting.py +463 -144
  93. disdrodb/psd/models.py +8 -5
  94. disdrodb/routines.py +3 -3
  95. disdrodb/scattering/__init__.py +16 -4
  96. disdrodb/scattering/axis_ratio.py +56 -36
  97. disdrodb/scattering/permittivity.py +486 -0
  98. disdrodb/scattering/routines.py +701 -159
  99. disdrodb/summary/__init__.py +17 -0
  100. disdrodb/summary/routines.py +4120 -0
  101. disdrodb/utils/attrs.py +68 -125
  102. disdrodb/utils/compression.py +30 -1
  103. disdrodb/utils/dask.py +59 -8
  104. disdrodb/utils/dataframe.py +61 -7
  105. disdrodb/utils/directories.py +35 -15
  106. disdrodb/utils/encoding.py +33 -19
  107. disdrodb/utils/logger.py +13 -6
  108. disdrodb/utils/manipulations.py +71 -0
  109. disdrodb/utils/subsetting.py +214 -0
  110. disdrodb/utils/time.py +165 -19
  111. disdrodb/utils/writer.py +20 -7
  112. disdrodb/utils/xarray.py +2 -4
  113. disdrodb/viz/__init__.py +13 -0
  114. disdrodb/viz/plots.py +327 -0
  115. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  116. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
  117. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  118. disdrodb/l1/encoding_attrs.py +0 -642
  119. disdrodb/l2/processing_options.py +0 -213
  120. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  121. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  122. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  123. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -47,7 +47,7 @@ number_particles:
47
47
  sensor_temperature:
48
48
  description: Temperature in sensor housing
49
49
  long_name: Temperature of the sensor
50
- units: "C"
50
+ units: "degC"
51
51
  sensor_serial_number:
52
52
  description: Sensor serial number
53
53
  long_name: Serial number of the sensor
@@ -105,15 +105,15 @@ error_code:
105
105
  sensor_temperature_pcb:
106
106
  description: Temperature in printed circuit board
107
107
  long_name: Sensor PCB temperature
108
- units: "C"
108
+ units: "degC"
109
109
  sensor_temperature_receiver:
110
110
  description: Temperature in right sensor head
111
111
  long_name: Sensor receiver temperature
112
- units: "C"
112
+ units: "degC"
113
113
  sensor_temperature_trasmitter:
114
114
  description: Temperature in left sensor head
115
115
  long_name: Sensor trasmitter temperature
116
- units: "C"
116
+ units: "degC"
117
117
  rainfall_rate_16_bit_30:
118
118
  description: Rainfall rate
119
119
  long_name: Rainfall rate max 30 mm/h 16 bit
@@ -161,7 +161,7 @@ raw_drop_number:
161
161
  air_temperature:
162
162
  description: "Air temperature in degrees Celsius (C)"
163
163
  long_name: Air temperature
164
- units: "C"
164
+ units: "degC"
165
165
  relative_humidity:
166
166
  description: "Relative humidity in percent (%)"
167
167
  long_name: Relative humidity
@@ -102,7 +102,7 @@ sensor_temperature:
102
102
  chunksizes: 5000
103
103
  _FillValue: 127
104
104
  sensor_serial_number:
105
- dtype: object
105
+ dtype: str
106
106
  zlib: false
107
107
  complevel: 3
108
108
  shuffle: true
@@ -110,7 +110,7 @@ sensor_serial_number:
110
110
  contiguous: false
111
111
  chunksizes: 5000
112
112
  firmware_iop:
113
- dtype: object
113
+ dtype: str
114
114
  zlib: false
115
115
  complevel: 3
116
116
  shuffle: true
@@ -118,7 +118,7 @@ firmware_iop:
118
118
  contiguous: false
119
119
  chunksizes: 5000
120
120
  firmware_dsp:
121
- dtype: object
121
+ dtype: str
122
122
  zlib: false
123
123
  complevel: 3
124
124
  shuffle: true
@@ -15,7 +15,7 @@ rainfall_accumulated_32bit:
15
15
  n_naturals: 4
16
16
  data_range:
17
17
  - 0
18
- - 300.0
18
+ - 9999.0
19
19
  nan_flags: null
20
20
  field_number: "02"
21
21
  weather_code_synop_4680:
@@ -25,7 +25,7 @@ sensor_status:
25
25
  air_temperature:
26
26
  description: "Air temperature in degrees Celsius"
27
27
  long_name: Air temperature
28
- units: "C"
28
+ units: "degC"
29
29
  relative_humidity:
30
30
  description: "Relative humidity in percent (%)"
31
31
  long_name: Relative humidity
@@ -33,15 +33,15 @@ relative_humidity:
33
33
  wetbulb_temperature:
34
34
  description: "Wet bulb temperature in degrees Celsius"
35
35
  long_name: Wet bulb temperature
36
- units: "C"
36
+ units: "degC"
37
37
  air_temperature_max:
38
38
  description: "Maximum air temperature in degrees Celsius"
39
39
  long_name: Maximum air temperature
40
- units: "C"
40
+ units: "degC"
41
41
  air_temperature_min:
42
42
  description: "Minimum air temperature in degrees Celsius"
43
43
  long_name: Minimum air temperature
44
- units: "C"
44
+ units: "degC"
45
45
  rainfall_rate:
46
46
  description: Rainfall rate
47
47
  long_name: Rainfall rate
@@ -5,7 +5,7 @@ mor_visibility:
5
5
  n_naturals: 4
6
6
  data_range:
7
7
  - 0
8
- - 9999.9
8
+ - 20000
9
9
  nan_flags: null
10
10
  field_number: "20"
11
11
  weather_code_synop_4680:
@@ -18,13 +18,13 @@
18
18
  # -----------------------------------------------------------------------------.
19
19
  """Functions to process raw text files into DISDRODB L0A Apache Parquet."""
20
20
 
21
-
22
21
  import logging
23
22
  import os
24
23
  from typing import Union
25
24
 
26
25
  import numpy as np
27
26
  import pandas as pd
27
+ import pyarrow.parquet as pq
28
28
 
29
29
  from disdrodb.l0.check_standards import check_l0a_column_names, check_l0a_standards
30
30
  from disdrodb.l0.l0b_processing import infer_split_str
@@ -130,11 +130,15 @@ def read_raw_text_file(
130
130
  try:
131
131
  df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
132
132
  except pd.errors.EmptyDataError:
133
+ # if isinstance(filepath, zipfile.ZipExtFile):
134
+ # filepath = filepath.name
133
135
  msg = f"The following file is empty: {filepath}"
134
136
  raise ValueError(msg)
135
137
 
136
138
  # Check the dataframe is not empty
137
139
  if len(df.index) == 0:
140
+ # if isinstance(filepath, zipfile.ZipExtFile):
141
+ # filepath = filepath.name
138
142
  msg = f"The following file is empty: {filepath}"
139
143
  raise ValueError(msg)
140
144
 
@@ -413,6 +417,8 @@ def is_raw_array_string_not_corrupted(string):
413
417
  """Check if the raw array is corrupted."""
414
418
  if not isinstance(string, str):
415
419
  return False
420
+ if string in ["", "NAN", "NaN"]:
421
+ return True
416
422
  split_str = infer_split_str(string=string)
417
423
  list_values = string.split(split_str)
418
424
  values = pd.to_numeric(list_values, errors="coerce")
@@ -625,6 +631,9 @@ def sanitize_df(
625
631
  # - Sort by time
626
632
  df = df.sort_values("time")
627
633
 
634
+ # - Drop index
635
+ df = df.reset_index(drop=True)
636
+
628
637
  # ------------------------------------------------------.
629
638
  # - Check column names agrees to DISDRODB standards
630
639
  check_l0a_column_names(df, sensor_name=sensor_name)
@@ -755,24 +764,8 @@ def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) ->
755
764
  return df
756
765
 
757
766
 
758
- def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
759
- # Log
760
- msg = f"Reading L0 Apache Parquet file at {filepath} started."
761
- log_info(logger=logger, msg=msg, verbose=verbose)
762
- # Open file
763
- df = pd.read_parquet(filepath)
764
- if debugging_mode:
765
- df = df.iloc[0:100]
766
- # Log
767
- msg = f"Reading L0 Apache Parquet file at {filepath} ended."
768
- log_info(logger=logger, msg=msg, verbose=verbose)
769
- return df
770
-
771
-
772
767
  def read_l0a_dataframe(
773
768
  filepaths: Union[str, list],
774
- verbose: bool = False,
775
- logger=None,
776
769
  debugging_mode: bool = False,
777
770
  ) -> pd.DataFrame:
778
771
  """Read DISDRODB L0A Apache Parquet file(s).
@@ -781,13 +774,10 @@ def read_l0a_dataframe(
781
774
  ----------
782
775
  filepaths : str or list
783
776
  Either a list or a single filepath.
784
- verbose : bool
785
- Whether to print detailed processing information into terminal.
786
- The default is ``False``.
787
777
  debugging_mode : bool
788
778
  If ``True``, it reduces the amount of data to process.
789
779
  If filepaths is a list, it reads only the first 3 files.
790
- For each file it select only the first 100 rows.
780
+ It selects only 100 rows sampled from the first 3 files.
791
781
  The default is ``False``.
792
782
 
793
783
  Returns
@@ -796,8 +786,6 @@ def read_l0a_dataframe(
796
786
  L0A Dataframe.
797
787
 
798
788
  """
799
- from disdrodb.l0.l0a_processing import concatenate_dataframe
800
-
801
789
  # ----------------------------------------
802
790
  # Check filepaths validity
803
791
  if not isinstance(filepaths, (list, str)):
@@ -814,12 +802,15 @@ def read_l0a_dataframe(
814
802
 
815
803
  # ---------------------------------------------------
816
804
  # Define the list of dataframe
817
- list_df = [
818
- _read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
819
- ]
805
+ df = pq.ParquetDataset(filepaths).read().to_pandas()
820
806
 
821
- # Concatenate dataframe
822
- df = concatenate_dataframe(list_df, logger=logger, verbose=verbose)
807
+ # Ensure no index
808
+ df = df.reset_index(drop=True)
809
+
810
+ # Reduce rows
811
+ if debugging_mode:
812
+ n_rows = min(100, len(df))
813
+ df = df.sample(n=n_rows)
823
814
 
824
815
  # Ensure time is in nanoseconds
825
816
  df["time"] = df["time"].astype("M8[ns]")
@@ -833,14 +824,15 @@ def read_l0a_dataframe(
833
824
  #### L0A Utility
834
825
 
835
826
 
836
- def read_raw_text_files(
827
+ def generate_l0a(
837
828
  filepaths: Union[list, str],
838
829
  reader,
839
830
  sensor_name,
831
+ issue_dict=None,
840
832
  verbose=True,
841
833
  logger=None,
842
834
  ) -> pd.DataFrame:
843
- """Read and parse a list for raw files into a dataframe.
835
+ """Read and parse a list of raw files and generate a DISDRODB L0A dataframe.
844
836
 
845
837
  Parameters
846
838
  ----------
@@ -851,6 +843,13 @@ def read_raw_text_files(
851
843
  Format: reader(filepath, logger=None)
852
844
  sensor_name : str
853
845
  Name of the sensor.
846
+ issue_dict : dict, optional
847
+ Issue dictionary providing information on timesteps to remove.
848
+ The default is an empty dictionary ``{}``.
849
+ Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
850
+ Valid issue_dict values are list of datetime64 values (with second accuracy).
851
+ To correctly format and check the validity of the ``issue_dict``, use
852
+ the ``disdrodb.l0.issue.check_issue_dict`` function.
854
853
  verbose : bool
855
854
  Whether to verbose the processing. The default is ``True``.
856
855
 
@@ -886,6 +885,7 @@ def read_raw_text_files(
886
885
  df = sanitize_df(
887
886
  df=df,
888
887
  sensor_name=sensor_name,
888
+ issue_dict=issue_dict,
889
889
  logger=logger,
890
890
  verbose=verbose,
891
891
  )
@@ -19,6 +19,7 @@
19
19
  """Functions to process DISDRODB raw netCDF files into DISDRODB L0B netCDF files."""
20
20
 
21
21
  import logging
22
+ from typing import Union
22
23
 
23
24
  import numpy as np
24
25
 
@@ -33,6 +34,7 @@ from disdrodb.l0.standards import (
33
34
  get_valid_variable_names,
34
35
  )
35
36
  from disdrodb.utils.logger import (
37
+ log_error,
36
38
  # log_warning,
37
39
  # log_debug,
38
40
  log_info,
@@ -169,6 +171,8 @@ def standardize_raw_dataset(ds, dict_names, sensor_name):
169
171
 
170
172
  # If missing variables, infill with NaN array
171
173
  missing_vars = _get_missing_variables(ds, dict_names, sensor_name)
174
+ if "raw_drop_number" in missing_vars:
175
+ raise ValueError("The raw drop spectrum is not present in the netCDF file!")
172
176
  if len(missing_vars) > 0:
173
177
  ds = add_dataset_missing_variables(ds=ds, missing_vars=missing_vars, sensor_name=sensor_name)
174
178
 
@@ -454,8 +458,8 @@ def sanitize_ds(
454
458
  ----------
455
459
  ds : xarray.Dataset
456
460
  Raw xarray dataset
457
- attrs: dict
458
- Global metadata to attach as global attributes to the xr.Dataset.
461
+ metadata: dict
462
+ Station metadata to attach as global attributes to the xr.Dataset.
459
463
  sensor_name : str
460
464
  Name of the sensor.
461
465
  verbose : bool
@@ -525,3 +529,105 @@ def open_raw_netcdf_file(
525
529
  # Log information
526
530
  log_info(logger=logger, msg=f"netCDF file {filepath} has been loaded successively into xarray.", verbose=False)
527
531
  return ds
532
+
533
+
534
+ def generate_l0b_from_nc(
535
+ filepaths: Union[list, str],
536
+ reader,
537
+ sensor_name,
538
+ metadata,
539
+ issue_dict=None,
540
+ verbose=True,
541
+ logger=None,
542
+ ):
543
+ """Read and parse a list of raw netCDF files and generate a DISDRODB L0B dataset.
544
+
545
+ Parameters
546
+ ----------
547
+ filepaths : Union[list,str]
548
+ File(s) path(s)
549
+ reader:
550
+ DISDRODB reader function.
551
+ Format: reader(filepath, logger=None)
552
+ sensor_name : str
553
+ Name of the sensor.
554
+ metadata: dict
555
+ Station metadata to attach as global attributes to the xr.Dataset.
556
+ issue_dict : dict, optional
557
+ Issue dictionary providing information on timesteps to remove.
558
+ The default is an empty dictionary ``{}``.
559
+ Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
560
+ Valid issue_dict values are list of datetime64 values (with second accuracy).
561
+ To correctly format and check the validity of the ``issue_dict``, use
562
+ the ``disdrodb.l0.issue.check_issue_dict`` function.
563
+ verbose : bool
564
+ Whether to verbose the processing. The default is ``True``.
565
+
566
+ Returns
567
+ -------
568
+ xarray.Dataset
569
+ DISDRODB L0B Dataset.
570
+
571
+ Raises
572
+ ------
573
+ ValueError
574
+ Input parameters can not be used or the raw file can not be processed.
575
+
576
+ """
577
+ import xarray as xr
578
+
579
+ # Check input list
580
+ if isinstance(filepaths, str):
581
+ filepaths = [filepaths]
582
+ if len(filepaths) == 0:
583
+ raise ValueError("'filepaths' must contains at least 1 filepath.")
584
+
585
+ # ------------------------------------------------------.
586
+ # Loop over all raw files
587
+ n_files = len(filepaths)
588
+ processed_file_counter = 0
589
+ list_skipped_files_msg = []
590
+ list_ds = []
591
+ for filepath in filepaths:
592
+ # Try read the raw netCDF file
593
+ try:
594
+ ds = reader(filepath, logger=logger)
595
+ # Sanitize the dataframe
596
+ ds = sanitize_ds(
597
+ ds=ds,
598
+ sensor_name=sensor_name,
599
+ metadata=metadata,
600
+ issue_dict=issue_dict,
601
+ verbose=verbose,
602
+ logger=logger,
603
+ )
604
+ # Append dataframe to the list
605
+ list_ds.append(ds)
606
+ # Update the logger
607
+ processed_file_counter += 1
608
+ msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
609
+ log_info(logger=logger, msg=msg, verbose=verbose)
610
+
611
+ # Skip the file if the processing fails
612
+ except Exception as e:
613
+ # Update the logger
614
+ msg = f"{filepath} has been skipped. The error is: {e}."
615
+ log_error(logger=logger, msg=msg, verbose=verbose)
616
+ list_skipped_files_msg.append(msg)
617
+
618
+ # Update logger
619
+ msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
620
+ log_info(logger=logger, msg=msg, verbose=verbose)
621
+
622
+ # Check if there are files to concatenate
623
+ if len(list_ds) == 0:
624
+ raise ValueError("Any raw file could be read!")
625
+
626
+ ##----------------------------------------------------------------.
627
+ # Concatenate the datasets
628
+ list_ds = [ds.chunk({"time": -1}) for ds in list_ds]
629
+ ds = xr.concat(list_ds, dim="time", join="outer", compat="no_conflicts", combine_attrs="override").sortby("time")
630
+ ds = ds.compute()
631
+
632
+ # Return the dataframe
633
+ return ds
@@ -386,13 +386,13 @@ def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
386
386
  return data_vars
387
387
 
388
388
 
389
- def create_l0b_from_l0a(
389
+ def generate_l0b(
390
390
  df: pd.DataFrame,
391
391
  metadata: dict,
392
392
  logger=None,
393
393
  verbose: bool = False,
394
394
  ) -> xr.Dataset:
395
- """Transform the L0A dataframe to the L0B xr.Dataset.
395
+ """Transform the DISDRODB L0A dataframe to the DISDRODB L0B xr.Dataset.
396
396
 
397
397
  Parameters
398
398
  ----------
@@ -503,8 +503,8 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
503
503
  xarray.Dataset
504
504
  Output xarray dataset.
505
505
  """
506
- encoding_dict = get_l0b_encodings_dict(sensor_name)
507
- ds = set_encodings(ds=ds, encoding_dict=encoding_dict)
506
+ encodings_dict = get_l0b_encodings_dict(sensor_name)
507
+ ds = set_encodings(ds=ds, encodings_dict=encodings_dict)
508
508
  return ds
509
509
 
510
510
 
@@ -388,11 +388,10 @@ def check_timesteps_regularity(ds, sample_interval, verbose=False, logger=None):
388
388
  return ds
389
389
 
390
390
 
391
- def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True, logger=None):
391
+ def finalize_l0c_dataset(ds, sample_interval, verbose=True, logger=None):
392
392
  """Finalize a L0C dataset with unique sampling interval.
393
393
 
394
- It adds the sampling_interval coordinate and it regularizes
395
- the timesteps for trailing seconds.
394
+ It adds the sampling_interval coordinate and it regularizes the timesteps for trailing seconds.
396
395
  """
397
396
  # Add sample interval as coordinate
398
397
  ds = add_sample_interval(ds, sample_interval=sample_interval)
@@ -409,9 +408,6 @@ def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True,
409
408
 
410
409
  # Performs checks about timesteps regularity
411
410
  ds = check_timesteps_regularity(ds=ds, sample_interval=sample_interval, verbose=verbose, logger=logger)
412
-
413
- # Slice for requested day
414
- ds = ds.sel({"time": slice(start_day, end_day)})
415
411
  return ds
416
412
 
417
413
 
@@ -442,7 +438,7 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
442
438
  - The function adds a tolerance for searching timesteps
443
439
  before and after 00:00 to account for imprecise logging times.
444
440
  - It checks that duplicated timesteps have the same raw drop number values.
445
- - The function infers the time integration sample interval and
441
+ - The function infers the sample interval and
446
442
  regularizes timesteps to handle trailing seconds.
447
443
  - The data is loaded into memory and connections to source files
448
444
  are closed before returning the dataset.
@@ -461,10 +457,8 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
461
457
 
462
458
  # ---------------------------------------------------------------------------------------.
463
459
  # Open files with data within the provided day and concatenate them
464
- # list_ds = [xr.open_dataset(filepath, chunks={}).sel({"time": slice(start_day_tol, end_day_tol)})
465
- # for filepath in filepaths]
466
460
  list_ds = [
467
- xr.open_dataset(filepath, decode_timedelta=False, chunks={}, cache=False).sortby("time")
461
+ xr.open_dataset(filepath, decode_timedelta=False, chunks=-1, cache=False).sortby("time")
468
462
  for filepath in filepaths
469
463
  ]
470
464
  list_ds = [ds.sel({"time": slice(start_day_tol, end_day_tol)}) for ds in list_ds]
@@ -533,11 +527,9 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
533
527
  sample_interval: finalize_l0c_dataset(
534
528
  ds=ds,
535
529
  sample_interval=sample_interval,
536
- start_day=start_day,
537
- end_day=end_day,
538
530
  verbose=verbose,
539
531
  logger=logger,
540
- )
532
+ ).sel({"time": slice(start_day, end_day)})
541
533
  for sample_interval, ds in dict_ds.items()
542
534
  }
543
535
  return dict_ds
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for DELFT Thies LPM sensor in netCDF format."""
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Open the netCDF
32
+ ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
36
+ # Add time coordinate
37
+ ds["time"] = ds["time_as_string"].astype("M8[s]")
38
+ ds["time"].attrs.pop("comment", None)
39
+ ds["time"].attrs.pop("units", None)
40
+ ds = ds.set_coords("time")
41
+
42
+ # Define dictionary mapping dataset variables to select and rename
43
+ dict_names = {
44
+ ### Dimensions
45
+ "diameter_classes": "diameter_bin_center",
46
+ "velocity_classes": "velocity_bin_center",
47
+ ### Variables
48
+ "liquid_precip_intensity": "rainfall_rate",
49
+ "solid_precip_intensity": "snowfall_rate",
50
+ "all_precip_intensity": "precipitation_rate",
51
+ "weather_code_synop_4680": "weather_code_synop_4680",
52
+ "weather_code_synop_4677": "weather_code_synop_4677",
53
+ "reflectivity": "reflectivity",
54
+ "visibility": "mor_visibility",
55
+ "total_number_particles": "number_particles",
56
+ "ambient_temperature": "temperature_ambient",
57
+ "status_laser": "laser_status",
58
+ "measurement_quality": "quality_index",
59
+ "raw_spectrum": "raw_drop_number",
60
+ }
61
+
62
+ # Rename dataset variables and columns and infill missing variables
63
+ ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="LPM")
64
+
65
+ # Return the dataset adhering to DISDRODB L0B standards
66
+ return ds
@@ -62,6 +62,9 @@ def reader(
62
62
  # - Available: gzip, bz2, zip
63
63
  reader_kwargs["compression"] = "infer"
64
64
 
65
+ # - Skip rows with badly encoded data
66
+ reader_kwargs["encoding_errors"] = "replace"
67
+
65
68
  # - Strings to recognize as NA/NaN and replace with standard NA flags
66
69
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
70
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',