disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +145 -14
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  37. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  38. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  39. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  40. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  41. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  42. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  43. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
  44. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
  45. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  46. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  47. disdrodb/l0/l0a_processing.py +30 -30
  48. disdrodb/l0/l0b_nc_processing.py +108 -2
  49. disdrodb/l0/l0b_processing.py +4 -4
  50. disdrodb/l0/l0c_processing.py +5 -13
  51. disdrodb/l0/manuals/SWS250.pdf +0 -0
  52. disdrodb/l0/manuals/VPF730.pdf +0 -0
  53. disdrodb/l0/manuals/VPF750.pdf +0 -0
  54. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  55. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  56. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  57. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
  58. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
  59. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  62. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  63. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  64. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  65. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  66. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  68. disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  70. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  71. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
  72. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  73. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
  77. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
  78. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  79. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  80. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  81. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  82. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  83. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  84. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
  85. disdrodb/l0/routines.py +105 -14
  86. disdrodb/l1/__init__.py +5 -0
  87. disdrodb/l1/filters.py +34 -20
  88. disdrodb/l1/processing.py +45 -44
  89. disdrodb/l1/resampling.py +77 -66
  90. disdrodb/l1/routines.py +35 -42
  91. disdrodb/l1_env/routines.py +18 -3
  92. disdrodb/l2/__init__.py +7 -0
  93. disdrodb/l2/empirical_dsd.py +58 -10
  94. disdrodb/l2/event.py +27 -120
  95. disdrodb/l2/processing.py +267 -116
  96. disdrodb/l2/routines.py +618 -254
  97. disdrodb/metadata/standards.py +3 -1
  98. disdrodb/psd/fitting.py +463 -144
  99. disdrodb/psd/models.py +8 -5
  100. disdrodb/routines.py +3 -3
  101. disdrodb/scattering/__init__.py +16 -4
  102. disdrodb/scattering/axis_ratio.py +56 -36
  103. disdrodb/scattering/permittivity.py +486 -0
  104. disdrodb/scattering/routines.py +701 -159
  105. disdrodb/summary/__init__.py +17 -0
  106. disdrodb/summary/routines.py +4120 -0
  107. disdrodb/utils/attrs.py +68 -125
  108. disdrodb/utils/compression.py +30 -1
  109. disdrodb/utils/dask.py +59 -8
  110. disdrodb/utils/dataframe.py +63 -9
  111. disdrodb/utils/directories.py +49 -17
  112. disdrodb/utils/encoding.py +33 -19
  113. disdrodb/utils/logger.py +13 -6
  114. disdrodb/utils/manipulations.py +71 -0
  115. disdrodb/utils/subsetting.py +214 -0
  116. disdrodb/utils/time.py +165 -19
  117. disdrodb/utils/writer.py +20 -7
  118. disdrodb/utils/xarray.py +85 -4
  119. disdrodb/viz/__init__.py +13 -0
  120. disdrodb/viz/plots.py +327 -0
  121. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  122. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
  123. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  124. disdrodb/l1/encoding_attrs.py +0 -635
  125. disdrodb/l2/processing_options.py +0 -213
  126. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  127. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  128. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  129. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/api/io.py CHANGED
@@ -17,6 +17,7 @@
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
19
  """Routines to list and open DISDRODB products."""
20
+ import datetime
20
21
  import os
21
22
  import shutil
22
23
  import subprocess
@@ -24,6 +25,14 @@ import sys
24
25
  from pathlib import Path
25
26
  from typing import Optional
26
27
 
28
+ import numpy as np
29
+
30
+ from disdrodb.api.checks import (
31
+ check_filepaths,
32
+ check_start_end_time,
33
+ get_current_utc_time,
34
+ )
35
+ from disdrodb.api.info import get_start_end_time_from_filepaths
27
36
  from disdrodb.api.path import (
28
37
  define_campaign_dir,
29
38
  define_data_dir,
@@ -48,6 +57,75 @@ def filter_filepaths(filepaths, debugging_mode):
48
57
  return filepaths
49
58
 
50
59
 
60
+ def is_within_time_period(l_start_time, l_end_time, start_time, end_time):
61
+ """Assess which files are within the start and end time."""
62
+ # - Case 1
63
+ # s e
64
+ # | |
65
+ # ---------> (-------->)
66
+ idx_select1 = np.logical_and(l_start_time <= start_time, l_end_time > start_time)
67
+ # - Case 2
68
+ # s e
69
+ # | |
70
+ # ---------(-.)
71
+ idx_select2 = np.logical_and(l_start_time >= start_time, l_end_time <= end_time)
72
+ # - Case 3
73
+ # s e
74
+ # | |
75
+ # -------------
76
+ idx_select3 = np.logical_and(l_start_time < end_time, l_end_time > end_time)
77
+ # - Get idx where one of the cases occur
78
+ idx_select = np.logical_or.reduce([idx_select1, idx_select2, idx_select3])
79
+ return idx_select
80
+
81
+
82
+ def filter_by_time(filepaths, start_time=None, end_time=None):
83
+ """Filter filepaths by start_time and end_time.
84
+
85
+ Parameters
86
+ ----------
87
+ filepaths : list
88
+ List of filepaths.
89
+ start_time : datetime.datetime
90
+ Start time.
91
+ If ``None``, will be set to 1997-01-01.
92
+ end_time : datetime.datetime
93
+ End time.
94
+ If ``None`` will be set to current UTC time.
95
+
96
+ Returns
97
+ -------
98
+ filepaths : list
99
+ List of valid filepaths.
100
+ If no valid filepaths, returns an empty list !
101
+
102
+ """
103
+ # -------------------------------------------------------------------------.
104
+ # Check filepaths
105
+ if isinstance(filepaths, type(None)):
106
+ return []
107
+ filepaths = check_filepaths(filepaths)
108
+ if len(filepaths) == 0:
109
+ return []
110
+
111
+ # -------------------------------------------------------------------------.
112
+ # Check start_time and end_time
113
+ if start_time is None:
114
+ start_time = datetime.datetime(1978, 1, 1, 0, 0, 0) # Dummy start
115
+ if end_time is None:
116
+ end_time = get_current_utc_time() # Current time
117
+ start_time, end_time = check_start_end_time(start_time, end_time)
118
+
119
+ # -------------------------------------------------------------------------.
120
+ # - Retrieve start_time and end_time of GPM granules
121
+ l_start_time, l_end_time = get_start_end_time_from_filepaths(filepaths)
122
+
123
+ # -------------------------------------------------------------------------.
124
+ # Select granules with data within the start and end time
125
+ idx_select = is_within_time_period(l_start_time, l_end_time, start_time=start_time, end_time=end_time)
126
+ return np.array(filepaths)[idx_select].tolist()
127
+
128
+
51
129
  def find_files(
52
130
  data_source,
53
131
  campaign_name,
@@ -55,7 +133,9 @@ def find_files(
55
133
  product,
56
134
  debugging_mode: bool = False,
57
135
  data_archive_dir: Optional[str] = None,
58
- glob_pattern="*",
136
+ glob_pattern=None,
137
+ start_time=None,
138
+ end_time=None,
59
139
  **product_kwargs,
60
140
  ):
61
141
  """Retrieve DISDRODB product files for a give station.
@@ -100,6 +180,8 @@ def find_files(
100
180
  List of file paths.
101
181
 
102
182
  """
183
+ from disdrodb.metadata import read_station_metadata
184
+
103
185
  # Retrieve data directory
104
186
  data_dir = define_data_dir(
105
187
  data_archive_dir=data_archive_dir,
@@ -110,8 +192,16 @@ def find_files(
110
192
  # Product options
111
193
  **product_kwargs,
112
194
  )
113
-
114
- # Define or check the specified glob pattern
195
+ # For the DISDRODB RAW product, retrieve glob_pattern from metadata if not specified
196
+ if product == "RAW" and glob_pattern is None:
197
+ metadata = read_station_metadata(
198
+ data_source=data_source,
199
+ campaign_name=campaign_name,
200
+ station_name=station_name,
201
+ )
202
+ glob_pattern = metadata.get("raw_data_glob_pattern", "")
203
+
204
+ # For the others DISDRODB products, define the correct glob pattern
115
205
  if product != "RAW":
116
206
  glob_pattern = "*.parquet" if product == "L0A" else "*.nc"
117
207
 
@@ -126,6 +216,13 @@ def find_files(
126
216
  msg = f"No {product} files are available in {data_dir}. Run {product} processing first."
127
217
  raise ValueError(msg)
128
218
 
219
+ # Filter files by start_time and end_time
220
+ if product != "RAW":
221
+ filepaths = filter_by_time(filepaths=filepaths, start_time=start_time, end_time=end_time)
222
+ if len(filepaths) == 0:
223
+ msg = f"No {product} files are available between {start_time} and {end_time}."
224
+ raise ValueError(msg)
225
+
129
226
  # Sort filepaths
130
227
  filepaths = sorted(filepaths)
131
228
  return filepaths
@@ -133,6 +230,117 @@ def find_files(
133
230
 
134
231
  ####----------------------------------------------------------------------------------
135
232
  #### DISDRODB Open Product Files
233
+
234
+
235
+ def open_raw_files(filepaths, data_source, campaign_name, station_name):
236
+ """Open raw files to DISDRODB L0A or L0B format.
237
+
238
+ Raw text files are opened into a DISDRODB L0A pandas Dataframe.
239
+ Raw netCDF files are opened into a DISDRODB L0B xarray Dataset.
240
+ """
241
+ from disdrodb.issue import read_station_issue
242
+ from disdrodb.l0 import generate_l0a, generate_l0b_from_nc, get_station_reader
243
+ from disdrodb.metadata import read_station_metadata
244
+
245
+ # Read station metadata
246
+ metadata = read_station_metadata(
247
+ data_source=data_source,
248
+ campaign_name=campaign_name,
249
+ station_name=station_name,
250
+ )
251
+ sensor_name = metadata["sensor_name"]
252
+
253
+ # Read station issue YAML file
254
+ try:
255
+ issue_dict = read_station_issue(
256
+ data_source=data_source,
257
+ campaign_name=campaign_name,
258
+ station_name=station_name,
259
+ )
260
+ except Exception:
261
+ issue_dict = None
262
+
263
+ # Get reader
264
+ reader = get_station_reader(
265
+ data_source=data_source,
266
+ campaign_name=campaign_name,
267
+ station_name=station_name,
268
+ )
269
+ # Return DISDRODB L0A dataframe if raw text files
270
+ if metadata["raw_data_format"] == "txt":
271
+ df = generate_l0a(
272
+ filepaths=filepaths,
273
+ reader=reader,
274
+ sensor_name=sensor_name,
275
+ issue_dict=issue_dict,
276
+ verbose=False,
277
+ )
278
+ return df
279
+
280
+ # Return DISDRODB L0B dataframe if raw netCDF files
281
+ ds = generate_l0b_from_nc(
282
+ filepaths=filepaths,
283
+ reader=reader,
284
+ sensor_name=sensor_name,
285
+ metadata=metadata,
286
+ issue_dict=issue_dict,
287
+ verbose=False,
288
+ )
289
+ return ds
290
+
291
+
292
+ def open_netcdf_files(
293
+ filepaths,
294
+ chunks=-1,
295
+ start_time=None,
296
+ end_time=None,
297
+ variables=None,
298
+ parallel=False,
299
+ compute=True,
300
+ **open_kwargs,
301
+ ):
302
+ """Open DISDRODB netCDF files using xarray."""
303
+ import xarray as xr
304
+
305
+ # Ensure variables is a list
306
+ if variables is not None and isinstance(variables, str):
307
+ variables = [variables]
308
+ # Define preprocessing function for parallel opening
309
+ preprocess = (lambda ds: ds[variables]) if parallel and variables is not None else None
310
+
311
+ # Open netcdf
312
+ ds = xr.open_mfdataset(
313
+ filepaths,
314
+ chunks=chunks,
315
+ combine="nested",
316
+ concat_dim="time",
317
+ engine="netcdf4",
318
+ parallel=parallel,
319
+ preprocess=preprocess,
320
+ compat="no_conflicts",
321
+ combine_attrs="override",
322
+ coords="different", # maybe minimal?
323
+ decode_timedelta=False,
324
+ cache=False,
325
+ autoclose=True,
326
+ **open_kwargs,
327
+ )
328
+ # - Subset variables
329
+ if variables is not None and preprocess is None:
330
+ ds = ds[variables]
331
+ # - Subset time
332
+ ds = ds.sel(time=slice(start_time, end_time))
333
+ # - If compute=True, load in memory and close connections to files
334
+ if compute:
335
+ dataset = ds.compute()
336
+ ds.close()
337
+ dataset.close()
338
+ del ds
339
+ else:
340
+ dataset = ds
341
+ return dataset
342
+
343
+
136
344
  def open_dataset(
137
345
  data_source,
138
346
  campaign_name,
@@ -141,7 +349,12 @@ def open_dataset(
141
349
  product_kwargs=None,
142
350
  debugging_mode: bool = False,
143
351
  data_archive_dir: Optional[str] = None,
352
+ chunks=-1,
144
353
  parallel=False,
354
+ compute=False,
355
+ start_time=None,
356
+ end_time=None,
357
+ variables=None,
145
358
  **open_kwargs,
146
359
  ):
147
360
  """Retrieve DISDRODB product files for a give station.
@@ -179,13 +392,8 @@ def open_dataset(
179
392
  xarray.Dataset
180
393
 
181
394
  """
182
- import xarray as xr
183
-
184
395
  from disdrodb.l0.l0a_processing import read_l0a_dataframe
185
396
 
186
- # Check product validity
187
- if product == "RAW":
188
- raise ValueError("It's not possible to open the raw data with this function.")
189
397
  product_kwargs = product_kwargs if product_kwargs else {}
190
398
 
191
399
  # List product files
@@ -196,25 +404,36 @@ def open_dataset(
196
404
  station_name=station_name,
197
405
  product=product,
198
406
  debugging_mode=debugging_mode,
407
+ start_time=start_time,
408
+ end_time=end_time,
199
409
  **product_kwargs,
200
410
  )
201
411
 
412
+ # Open RAW files
413
+ # - For raw txt files return DISDRODB L0A dataframe
414
+ # - For raw netCDF files return DISDRODB L0B dataframe
415
+ if product == "RAW":
416
+ obj = open_raw_files(
417
+ filepaths=filepaths,
418
+ data_source=data_source,
419
+ campaign_name=campaign_name,
420
+ station_name=station_name,
421
+ )
422
+ return obj
423
+
202
424
  # Open L0A Parquet files
203
425
  if product == "L0A":
204
426
  return read_l0a_dataframe(filepaths)
205
427
 
206
428
  # Open DISDRODB netCDF files using xarray
207
- # - TODO: parallel option and add closers !
208
- # - decode_timedelta -- > sample_interval not decoded to timedelta !
209
- # list_ds = [xr.open_dataset(fpath, decode_timedelta=False, **open_kwargs) for fpath in filepaths]
210
- # ds = xr.concat(list_ds, dim="time")
211
- ds = xr.open_mfdataset(
212
- filepaths,
213
- engine="netcdf4",
214
- combine="nested", # 'by_coords',
215
- concat_dim="time",
216
- decode_timedelta=False,
429
+ ds = open_netcdf_files(
430
+ filepaths=filepaths,
431
+ chunks=chunks,
432
+ start_time=start_time,
433
+ end_time=end_time,
434
+ variables=variables,
217
435
  parallel=parallel,
436
+ compute=compute,
218
437
  **open_kwargs,
219
438
  )
220
439
  return ds
disdrodb/api/path.py CHANGED
@@ -20,11 +20,12 @@
20
20
  import os
21
21
 
22
22
  from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
23
+ from disdrodb.constants import ARCHIVE_VERSION
23
24
  from disdrodb.utils.directories import check_directory_exists
24
25
  from disdrodb.utils.time import (
25
26
  ensure_sample_interval_in_seconds,
26
27
  get_file_start_end_time,
27
- seconds_to_acronym,
28
+ seconds_to_temporal_resolution,
28
29
  )
29
30
 
30
31
  ####--------------------------------------------------------------------------.
@@ -68,8 +69,6 @@ def define_disdrodb_path(
68
69
  dir_path : str
69
70
  Directory path
70
71
  """
71
- from disdrodb import ARCHIVE_VERSION
72
-
73
72
  if len(campaign_name) > 0 and len(data_source) == 0:
74
73
  raise ValueError("If campaign_name is specified, data_source must be specified.")
75
74
 
@@ -349,6 +348,55 @@ def define_config_dir(product):
349
348
  #### Directory/Filepaths L0A and L0B products
350
349
 
351
350
 
351
+ def define_partitioning_tree(time, folder_partitioning):
352
+ """Define the time directory tree given a timestep.
353
+
354
+ Parameters
355
+ ----------
356
+ time : datetime.datetime
357
+ Timestep.
358
+ folder_partitioning : str or None
359
+ Define the subdirectory structure where saving files.
360
+ Allowed values are:
361
+ - None: Files are saved directly in data_dir.
362
+ - "year": Files are saved under a subdirectory for the year.
363
+ - "year/month": Files are saved under subdirectories for year and month.
364
+ - "year/month/day": Files are saved under subdirectories for year, month and day
365
+ - "year/month_name": Files are stored under subdirectories by year and month name
366
+ - "year/quarter": Files are saved under subdirectories for year and quarter.
367
+
368
+ Returns
369
+ -------
370
+ str
371
+ A time partitioned directory tree.
372
+ """
373
+ if folder_partitioning == "":
374
+ return ""
375
+ if folder_partitioning == "year":
376
+ year = str(time.year)
377
+ return year
378
+ if folder_partitioning == "year/month":
379
+ year = str(time.year)
380
+ month = str(time.month).zfill(2)
381
+ return os.path.join(year, month)
382
+ if folder_partitioning == "year/month/day":
383
+ year = str(time.year)
384
+ month = str(time.month).zfill(2)
385
+ day = str(time.day).zfill(2)
386
+ return os.path.join(year, month, day)
387
+ if folder_partitioning == "year/month_name":
388
+ year = str(time.year)
389
+ month = str(time.month_name())
390
+ return os.path.join(year, month)
391
+ if folder_partitioning == "year/quarter":
392
+ year = str(time.year)
393
+ # Calculate quarter: months 1-3 => Q1, 4-6 => Q2, etc.
394
+ quarter = (time.month - 1) // 3 + 1
395
+ quarter_dir = f"Q{quarter}"
396
+ return os.path.join(year, quarter_dir)
397
+ raise NotImplementedError(f"Unrecognized '{folder_partitioning}' folder partitioning scheme.")
398
+
399
+
352
400
  def define_file_folder_path(obj, data_dir, folder_partitioning):
353
401
  """
354
402
  Define the folder path where saving a file based on the dataset's starting time.
@@ -382,32 +430,9 @@ def define_file_folder_path(obj, data_dir, folder_partitioning):
382
430
  # Retrieve the starting time from the dataset.
383
431
  starting_time, _ = get_file_start_end_time(obj)
384
432
 
385
- # Build the folder path based on the chosen partition scheme.
386
- if folder_partitioning == "":
387
- return data_dir
388
- if folder_partitioning == "year":
389
- year = str(starting_time.year)
390
- return os.path.join(data_dir, year)
391
- if folder_partitioning == "year/month":
392
- year = str(starting_time.year)
393
- month = str(starting_time.month).zfill(2)
394
- return os.path.join(data_dir, year, month)
395
- if folder_partitioning == "year/month/day":
396
- year = str(starting_time.year)
397
- month = str(starting_time.month).zfill(2)
398
- day = str(starting_time.day).zfill(2)
399
- return os.path.join(data_dir, year, month, day)
400
- if folder_partitioning == "year/month_name":
401
- year = str(starting_time.year)
402
- month = str(starting_time.month_name())
403
- return os.path.join(data_dir, year, month)
404
- if folder_partitioning == "year/quarter":
405
- year = str(starting_time.year)
406
- # Calculate quarter: months 1-3 => Q1, 4-6 => Q2, etc.
407
- quarter = (starting_time.month - 1) // 3 + 1
408
- quarter_dir = f"Q{quarter}"
409
- return os.path.join(data_dir, year, quarter_dir)
410
- raise NotImplementedError(f"Unrecognized '{folder_partitioning}' folder partitioning scheme.")
433
+ # Build the folder path based on the chosen partition scheme
434
+ partitioning_tree = define_partitioning_tree(time=starting_time, folder_partitioning=folder_partitioning)
435
+ return os.path.join(data_dir, partitioning_tree)
411
436
 
412
437
 
413
438
  def define_product_dir_tree(
@@ -448,16 +473,16 @@ def define_product_dir_tree(
448
473
  sample_interval = product_kwargs.get("sample_interval")
449
474
  check_rolling(rolling)
450
475
  check_sample_interval(sample_interval)
451
- sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
452
- return os.path.join(sample_interval_acronym)
476
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
477
+ return os.path.join(temporal_resolution)
453
478
  if product == "L2M":
454
479
  rolling = product_kwargs.get("rolling")
455
480
  sample_interval = product_kwargs.get("sample_interval")
456
481
  model_name = product_kwargs.get("model_name")
457
482
  check_rolling(rolling)
458
483
  check_sample_interval(sample_interval)
459
- sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
460
- return os.path.join(model_name, sample_interval_acronym)
484
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
485
+ return os.path.join(model_name, temporal_resolution)
461
486
  raise ValueError(f"The product {product} is not defined.")
462
487
 
463
488
 
@@ -629,15 +654,15 @@ def define_data_dir(
629
654
  #### Filenames for DISDRODB products
630
655
 
631
656
 
632
- def define_accumulation_acronym(seconds, rolling):
633
- """Define the accumulation acronnym.
657
+ def define_temporal_resolution(seconds, rolling):
658
+ """Define the DISDRODB product temporal resolution.
634
659
 
635
- Prefix the accumulation interval acronym with ROLL if rolling=True.
660
+ Prefix the measurement interval with ROLL if rolling=True.
636
661
  """
637
- accumulation_acronym = seconds_to_acronym(seconds)
662
+ temporal_resolution = seconds_to_temporal_resolution(seconds)
638
663
  if rolling:
639
- accumulation_acronym = f"ROLL{accumulation_acronym}"
640
- return accumulation_acronym
664
+ temporal_resolution = f"ROLL{temporal_resolution}"
665
+ return temporal_resolution
641
666
 
642
667
 
643
668
  ####--------------------------------------------------------------------------.
@@ -685,32 +710,31 @@ def define_filename(
685
710
  str
686
711
  L0B file name.
687
712
  """
688
- from disdrodb import ARCHIVE_VERSION
689
713
  from disdrodb.api.checks import check_product, check_product_kwargs
690
714
 
691
715
  product = check_product(product)
692
716
  product_kwargs = check_product_kwargs(product, product_kwargs)
693
717
 
694
718
  # -----------------------------------------.
695
- # TODO: Define sample_interval_acronym
696
- # - ADD sample_interval_acronym also to L0A and L0B
697
- # - Add sample_interval_acronym also to L0C and L1
719
+ # TODO: Define temporal_resolution
720
+ # - ADD temporal_resolution also to L0A and L0B
721
+ # - Add temporal_resolution also to L0C and L1
698
722
 
699
723
  # -----------------------------------------.
700
- # Define product acronym
701
- product_acronym = f"{product}"
724
+ # Define product name
725
+ product_name = f"{product}"
702
726
  if product in ["L2E", "L2M"]:
703
727
  rolling = product_kwargs.get("rolling")
704
728
  sample_interval = product_kwargs.get("sample_interval")
705
- sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
706
- product_acronym = f"L2E.{sample_interval_acronym}"
729
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
730
+ product_name = f"L2E.{temporal_resolution}"
707
731
  if product in ["L2M"]:
708
732
  model_name = product_kwargs.get("model_name")
709
- product_acronym = f"L2M_{model_name}.{sample_interval_acronym}"
733
+ product_name = f"L2M_{model_name}.{temporal_resolution}"
710
734
 
711
735
  # -----------------------------------------.
712
736
  # Define base filename
713
- filename = f"{product_acronym}.{campaign_name}.{station_name}"
737
+ filename = f"{product_name}.{campaign_name}.{station_name}"
714
738
 
715
739
  # -----------------------------------------.
716
740
  # Add prefix
@@ -759,8 +783,6 @@ def define_l0a_filename(df, campaign_name: str, station_name: str) -> str:
759
783
  str
760
784
  L0A file name.
761
785
  """
762
- from disdrodb import ARCHIVE_VERSION
763
-
764
786
  starting_time, ending_time = get_file_start_end_time(df)
765
787
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
766
788
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
@@ -786,8 +808,6 @@ def define_l0b_filename(ds, campaign_name: str, station_name: str) -> str:
786
808
  str
787
809
  L0B file name.
788
810
  """
789
- from disdrodb import ARCHIVE_VERSION
790
-
791
811
  starting_time, ending_time = get_file_start_end_time(ds)
792
812
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
793
813
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
@@ -813,18 +833,14 @@ def define_l0c_filename(ds, campaign_name: str, station_name: str) -> str:
813
833
  str
814
834
  L0B file name.
815
835
  """
816
- from disdrodb import ARCHIVE_VERSION
817
-
818
836
  # TODO: add sample_interval as argument
819
837
  sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
820
- sample_interval_acronym = define_accumulation_acronym(sample_interval, rolling=False)
838
+ temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
821
839
  starting_time, ending_time = get_file_start_end_time(ds)
822
840
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
823
841
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
824
842
  version = ARCHIVE_VERSION
825
- filename = (
826
- f"L0C.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
827
- )
843
+ filename = f"L0C.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
828
844
  return filename
829
845
 
830
846
 
@@ -845,18 +861,14 @@ def define_l1_filename(ds, campaign_name, station_name: str) -> str:
845
861
  str
846
862
  L1 file name.
847
863
  """
848
- from disdrodb import ARCHIVE_VERSION
849
-
850
864
  # TODO: add sample_interval as argument
851
865
  sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
852
- sample_interval_acronym = define_accumulation_acronym(sample_interval, rolling=False)
866
+ temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
853
867
  starting_time, ending_time = get_file_start_end_time(ds)
854
868
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
855
869
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
856
870
  version = ARCHIVE_VERSION
857
- filename = (
858
- f"L1.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
859
- )
871
+ filename = f"L1.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
860
872
  return filename
861
873
 
862
874
 
@@ -877,16 +889,12 @@ def define_l2e_filename(ds, campaign_name: str, station_name: str, sample_interv
877
889
  str
878
890
  L0B file name.
879
891
  """
880
- from disdrodb import ARCHIVE_VERSION
881
-
882
- sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
892
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
883
893
  starting_time, ending_time = get_file_start_end_time(ds)
884
894
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
885
895
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
886
896
  version = ARCHIVE_VERSION
887
- filename = (
888
- f"L2E.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
889
- )
897
+ filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
890
898
  return filename
891
899
 
892
900
 
@@ -914,15 +922,13 @@ def define_l2m_filename(
914
922
  str
915
923
  L0B file name.
916
924
  """
917
- from disdrodb import ARCHIVE_VERSION
918
-
919
- sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
925
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
920
926
  starting_time, ending_time = get_file_start_end_time(ds)
921
927
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
922
928
  ending_time = ending_time.strftime("%Y%m%d%H%M%S")
923
929
  version = ARCHIVE_VERSION
924
930
  filename = (
925
- f"L2M_{model_name}.{sample_interval_acronym}.{campaign_name}."
931
+ f"L2M_{model_name}.{temporal_resolution}.{campaign_name}."
926
932
  + f"{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
927
933
  )
928
934
  return filename
disdrodb/api/search.py CHANGED
@@ -16,14 +16,13 @@ from disdrodb.api.path import (
16
16
  define_station_dir,
17
17
  )
18
18
  from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
19
- from disdrodb.utils.directories import contains_files, contains_netcdf_or_parquet_files
19
+ from disdrodb.constants import PRODUCTS_REQUIREMENTS
20
+ from disdrodb.utils.directories import contains_files, contains_netcdf_or_parquet_files, list_directories, list_files
20
21
  from disdrodb.utils.yaml import read_yaml
21
22
 
22
23
 
23
24
  def get_required_product(product):
24
25
  """Determine the required product for input product processing."""
25
- from disdrodb import PRODUCTS_REQUIREMENTS
26
-
27
26
  # Check input
28
27
  check_product(product)
29
28
  # Determine required product
@@ -37,7 +36,8 @@ def get_required_product(product):
37
36
 
38
37
  def list_data_sources(metadata_archive_dir, data_sources=None, invalid_fields_policy="raise"):
39
38
  """List data sources names in the DISDRODB Metadata Archive."""
40
- available_data_sources = os.listdir(os.path.join(metadata_archive_dir, "METADATA"))
39
+ path = os.path.join(metadata_archive_dir, "METADATA")
40
+ available_data_sources = sorted(list_directories(path, return_paths=False))
41
41
  # Filter by optionally specified data_sources
42
42
  if data_sources is not None:
43
43
  available_data_sources = check_valid_fields(
@@ -52,7 +52,7 @@ def list_data_sources(metadata_archive_dir, data_sources=None, invalid_fields_po
52
52
 
53
53
  def _list_campaign_names(metadata_archive_dir, data_source):
54
54
  data_source_dir = define_data_source_dir(metadata_archive_dir, product="METADATA", data_source=data_source)
55
- campaign_names = os.listdir(data_source_dir)
55
+ campaign_names = sorted(list_directories(data_source_dir, return_paths=False))
56
56
  return campaign_names
57
57
 
58
58
 
@@ -109,7 +109,7 @@ def _list_station_names(metadata_archive_dir, data_source, campaign_name):
109
109
  data_source=data_source,
110
110
  campaign_name=campaign_name,
111
111
  )
112
- metadata_filenames = os.listdir(metadata_dir)
112
+ metadata_filenames = sorted(list_files(metadata_dir, glob_pattern="*.yml", return_paths=False))
113
113
  station_names = [fname.replace(".yml", "").replace(".yaml", "") for fname in metadata_filenames]
114
114
  return station_names
115
115