disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/info.py +3 -3
  6. disdrodb/api/io.py +48 -8
  7. disdrodb/api/path.py +116 -133
  8. disdrodb/api/search.py +12 -3
  9. disdrodb/cli/disdrodb_create_summary.py +103 -0
  10. disdrodb/cli/disdrodb_create_summary_station.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  13. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  17. disdrodb/data_transfer/download_data.py +123 -7
  18. disdrodb/issue/writer.py +2 -0
  19. disdrodb/l0/l0a_processing.py +10 -5
  20. disdrodb/l0/l0b_nc_processing.py +10 -6
  21. disdrodb/l0/l0b_processing.py +26 -61
  22. disdrodb/l0/l0c_processing.py +369 -251
  23. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  24. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  25. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  26. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  27. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  28. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  29. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  30. disdrodb/l1/fall_velocity.py +46 -0
  31. disdrodb/l1/processing.py +1 -1
  32. disdrodb/l2/processing.py +1 -1
  33. disdrodb/metadata/checks.py +132 -125
  34. disdrodb/psd/fitting.py +172 -205
  35. disdrodb/psd/models.py +1 -1
  36. disdrodb/routines/__init__.py +54 -0
  37. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  38. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  39. disdrodb/{l2/routines.py → routines/l2.py} +249 -462
  40. disdrodb/{routines.py → routines/wrappers.py} +95 -7
  41. disdrodb/scattering/axis_ratio.py +5 -1
  42. disdrodb/scattering/permittivity.py +18 -0
  43. disdrodb/scattering/routines.py +56 -36
  44. disdrodb/summary/routines.py +110 -34
  45. disdrodb/utils/archiving.py +434 -0
  46. disdrodb/utils/cli.py +5 -5
  47. disdrodb/utils/dask.py +62 -1
  48. disdrodb/utils/decorators.py +31 -0
  49. disdrodb/utils/encoding.py +5 -1
  50. disdrodb/{l2 → utils}/event.py +1 -66
  51. disdrodb/utils/logger.py +1 -1
  52. disdrodb/utils/manipulations.py +22 -12
  53. disdrodb/utils/routines.py +166 -0
  54. disdrodb/utils/time.py +3 -291
  55. disdrodb/utils/xarray.py +3 -0
  56. disdrodb/viz/plots.py +85 -14
  57. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
  58. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
  59. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
  60. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  61. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  62. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -239,7 +239,7 @@ def check_consistent_station_name(metadata_filepath, station_name):
239
239
  return station_name
240
240
 
241
241
 
242
- def download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False) -> None:
242
+ def download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False, verbose=True) -> None:
243
243
  """Download and unzip the station data .
244
244
 
245
245
  Parameters
@@ -275,17 +275,27 @@ def download_station_data(metadata_filepath: str, data_archive_dir: str, force:
275
275
  raise ValueError(f"Invalid disdrodb_data_url '{disdrodb_data_url}' for station {station_name}")
276
276
 
277
277
  # Download files
278
- # - Option 1: Download Zip file containing all station raw data
278
+ # - Option 1: Download ZIP file containing all station raw data
279
279
  if disdrodb_data_url.startswith("https://zenodo.org/") or disdrodb_data_url.startswith("https://cloudnet.fmi.fi/"):
280
280
  download_zip_file(url=disdrodb_data_url, dst_dir=station_dir, force=force)
281
+
281
282
  # - Option 2: Recursive download from a web server via HTTP or HTTPS.
282
283
  elif disdrodb_data_url.startswith("http"):
283
- download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=True)
284
+ download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=verbose)
285
+ # - Retry to be more sure that all data have been downloaded
286
+ download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=True, verbose=verbose)
287
+
288
+ # - Option 3: Recursive download from a ftp server
289
+ elif disdrodb_data_url.startswith("ftp"):
290
+ download_ftp_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=verbose)
291
+ # - Retry to be more sure that all data have been downloaded
292
+ download_ftp_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=True, verbose=verbose)
293
+
284
294
  else:
285
295
  raise NotImplementedError(f"Open a GitHub Issue to enable the download of data from {disdrodb_data_url}.")
286
296
 
287
297
 
288
- ####-----------------------------------------------------------------------------------------.
298
+ ####--------------------------------------------------------------------.
289
299
  #### Download from Web Server via HTTP or HTTPS
290
300
 
291
301
 
@@ -301,9 +311,17 @@ def download_web_server_data(url: str, dst_dir: str, force=True, verbose=True) -
301
311
  3. Compute cut-dirs so that only the last segment of the path remains locally.
302
312
  4. Build and run the wget command.
303
313
 
304
- Example:
305
- download_with_wget("https://ruisdael.citg.tudelft.nl/parsivel/PAR001_Cabauw/2021/202101/")
306
- # Creates a local folder "202101/" with all files and subfolders.
314
+ Parameters
315
+ ----------
316
+ url : str
317
+ HTTPS URL pointing to webserver folder. Example: "https://ruisdael.citg.tudelft.nl/parsivel/PAR001_Cabauw/"
318
+ dst_dir : str
319
+ Local directory where to download the file (DISDRODB station data directory).
320
+ force : bool, optional
321
+ If ``True``, re-download new/updated files (skip unchanged ones).
322
+ If ``False``, keep existing files untouched.
323
+ verbose : bool, optional
324
+ Print wget output (default is True).
307
325
  """
308
326
  # 1. Ensure wget exists
309
327
  ensure_wget_available()
@@ -393,6 +411,104 @@ def build_webserver_wget_command(url: str, cut_dirs: int, dst_dir: str, force: b
393
411
  return cmd
394
412
 
395
413
 
414
+ ####--------------------------------------------------------------------.
415
+ #### Download from FTP Server
416
+
417
+
418
+ def build_ftp_server_wget_command(
419
+ url: str,
420
+ cut_dirs: int,
421
+ dst_dir: str,
422
+ force: bool,
423
+ verbose: bool,
424
+ ) -> list[str]:
425
+ """Construct the wget command list for FTP recursive download.
426
+
427
+ Parameters
428
+ ----------
429
+ url : str
430
+ FTP URL to download from.
431
+ cut_dirs : int
432
+ Number of leading path components to strip.
433
+ dst_dir : str
434
+ Local destination directory.
435
+ force : bool
436
+ If True, re-download newer files (--timestamping).
437
+ If False, keep existing files untouched (--no-clobber).
438
+ verbose : bool
439
+ If False, suppress wget output (-q).
440
+ """
441
+ cmd = ["wget"] # base command
442
+
443
+ if not verbose:
444
+ cmd.append("-q") # quiet mode --> no output except errors
445
+
446
+ cmd += [
447
+ "-r", # recursive --> traverse into subdirectories
448
+ "-np", # no parent --> don't ascend to higher-level dirs
449
+ "-nH", # no host dirs --> avoid creating ftp.example.com/ locally
450
+ f"--cut-dirs={cut_dirs}", # strip N leading path components
451
+ ]
452
+
453
+ if force:
454
+ cmd.append("--timestamping") # download if remote file is newer
455
+ else:
456
+ cmd.append("--no-clobber") # skip files that already exist
457
+
458
+ cmd += [
459
+ "-P", # specify local destination directory
460
+ dst_dir,
461
+ f"ftp://anonymous:disdrodb@{url}", # target FTP URL
462
+ ]
463
+ return cmd
464
+
465
+
466
+ def download_ftp_server_data(url: str, dst_dir: str, force: bool = False, verbose: bool = True) -> None:
467
+ """Download data from an FTP server with anonymous login.
468
+
469
+ Parameters
470
+ ----------
471
+ url : str
472
+ FTP server URL pointing to a folder. Example: "ftp://ftp.example.com/path/to/data/"
473
+ dst_dir : str
474
+ Local directory where to download the file (DISDRODB station data directory).
475
+ force : bool, optional
476
+ If ``True``, re-download new/updated files (skip unchanged ones).
477
+ If ``False``, keep existing files untouched.
478
+ verbose : bool, optional
479
+ Print wget output (default is True).
480
+ """
481
+ ensure_wget_available()
482
+
483
+ # Ensure trailing slash
484
+ url = ensure_trailing_slash(url)
485
+
486
+ # Compute cut-dirs so files land directly in dst_dir
487
+ cut_dirs = compute_cut_dirs(url)
488
+
489
+ # Make destination directory
490
+ os.makedirs(dst_dir, exist_ok=True)
491
+
492
+ # Build wget command
493
+ cmd = build_ftp_server_wget_command(
494
+ url,
495
+ cut_dirs=cut_dirs,
496
+ dst_dir=dst_dir,
497
+ force=force,
498
+ verbose=verbose,
499
+ )
500
+ # Run wget
501
+ try:
502
+ subprocess.run(cmd, check=True)
503
+ except subprocess.CalledProcessError as e:
504
+ raise subprocess.CalledProcessError(
505
+ returncode=e.returncode,
506
+ cmd=e.cmd,
507
+ output=e.output,
508
+ stderr=e.stderr,
509
+ )
510
+
511
+
396
512
  ####--------------------------------------------------------------------.
397
513
  #### Download from Zenodo
398
514
 
disdrodb/issue/writer.py CHANGED
@@ -120,9 +120,11 @@ def create_station_issue(data_source, campaign_name, station_name, metadata_arch
120
120
  )
121
121
  if os.path.exists(issue_filepath):
122
122
  raise ValueError("A issue YAML file already exists at {issue_filepath}.")
123
+
123
124
  # Create issue dir if not existing
124
125
  issue_dir = os.path.dirname(issue_filepath)
125
126
  os.makedirs(issue_dir, exist_ok=True)
127
+
126
128
  # Write issue file
127
129
  write_issue(filepath=issue_filepath)
128
130
  print(f"An empty issue YAML file for station {station_name} has been created .")
@@ -269,13 +269,15 @@ def remove_issue_timesteps(df, issue_dict, logger=None, verbose=False):
269
269
  # Retrieve timesteps and time_periods
270
270
  timesteps = issue_dict.get("timesteps", None)
271
271
  time_periods = issue_dict.get("time_periods", None)
272
+ timesteps = [] if timesteps is None else timesteps
273
+ time_periods = [] if time_periods is None else time_periods
272
274
 
273
275
  # Drop rows of specified timesteps
274
- if timesteps:
276
+ if len(timesteps) > 0:
275
277
  df = drop_timesteps(df=df, timesteps=timesteps)
276
278
 
277
279
  # Drop rows within specified time_period
278
- if time_periods:
280
+ if len(time_periods) > 0:
279
281
  df = drop_time_periods(df, time_periods=time_periods)
280
282
 
281
283
  # Report number of dropped rows
@@ -804,9 +806,6 @@ def read_l0a_dataframe(
804
806
  # Define the list of dataframe
805
807
  df = pq.ParquetDataset(filepaths).read().to_pandas()
806
808
 
807
- # Ensure no index
808
- df = df.reset_index(drop=True)
809
-
810
809
  # Reduce rows
811
810
  if debugging_mode:
812
811
  n_rows = min(100, len(df))
@@ -815,6 +814,12 @@ def read_l0a_dataframe(
815
814
  # Ensure time is in nanoseconds
816
815
  df["time"] = df["time"].astype("M8[ns]")
817
816
 
817
+ # Ensure sorted by time
818
+ df = df.sort_values(by="time")
819
+
820
+ # Ensure no index
821
+ df = df.reset_index(drop=True)
822
+
818
823
  # ---------------------------------------------------
819
824
  # Return dataframe
820
825
  return df
@@ -36,7 +36,6 @@ from disdrodb.l0.standards import (
36
36
  from disdrodb.utils.logger import (
37
37
  log_error,
38
38
  # log_warning,
39
- # log_debug,
40
39
  log_info,
41
40
  )
42
41
 
@@ -347,7 +346,7 @@ def drop_timesteps(ds, timesteps: list):
347
346
  # Ensure there's at least one timestep left
348
347
  if ds_filtered.sizes.get("time", 0) == 0:
349
348
  raise ValueError(
350
- "No timesteps left after removing problematic timesteps. " "Maybe you need to adjust the issue YAML file.",
349
+ "No timesteps left after removing problematic timesteps. Maybe you need to adjust the issue YAML file.",
351
350
  )
352
351
  return ds_filtered
353
352
 
@@ -423,16 +422,21 @@ def remove_issue_timesteps(
423
422
  ValueError
424
423
  If after removing specified timesteps/periods no data remains.
425
424
  """
425
+ # Retrieve number of initial rows
426
426
  n_initial = ds.sizes.get("time", 0)
427
- timesteps = issue_dict.get("timesteps", []) or []
428
- time_periods = issue_dict.get("time_periods", []) or []
427
+
428
+ # Retrieve timesteps and time_periods
429
+ timesteps = issue_dict.get("timesteps")
430
+ time_periods = issue_dict.get("time_periods")
431
+ timesteps = [] if timesteps is None else timesteps
432
+ time_periods = [] if time_periods is None else time_periods
429
433
 
430
434
  # Drop individual timesteps
431
- if timesteps:
435
+ if len(timesteps) > 0:
432
436
  ds = drop_timesteps(ds, timesteps)
433
437
 
434
438
  # Drop intervals of time
435
- if time_periods:
439
+ if len(time_periods) > 0:
436
440
  ds = drop_time_periods(ds, time_periods)
437
441
 
438
442
  # Report number dropped
@@ -19,7 +19,6 @@
19
19
  """Functions to process DISDRODB L0A files into DISDRODB L0B netCDF files."""
20
20
 
21
21
  import logging
22
- import os
23
22
 
24
23
  import numpy as np
25
24
  import pandas as pd
@@ -43,13 +42,8 @@ from disdrodb.utils.attrs import (
43
42
  set_coordinate_attributes,
44
43
  set_disdrodb_attrs,
45
44
  )
46
- from disdrodb.utils.directories import create_directory, remove_if_exists
47
45
  from disdrodb.utils.encoding import set_encodings
48
- from disdrodb.utils.logger import (
49
- # log_warning,
50
- # log_debug,
51
- log_info,
52
- )
46
+ from disdrodb.utils.logger import log_info
53
47
  from disdrodb.utils.time import ensure_sorted_by_time
54
48
 
55
49
  logger = logging.getLogger(__name__)
@@ -246,12 +240,20 @@ def retrieve_l0b_arrays(
246
240
  unavailable_keys.append(key)
247
241
  continue
248
242
 
249
- # Ensure is a string
250
- df_series = df[key].astype(str)
243
+ # Ensure is a string, get a numpy array for each row and then stack
244
+ # - Option 1: Clear but lot of copies
245
+ # df_series = df[key].astype(str)
246
+ # list_arr = df_series.apply(_format_string_array, n_values=n_values)
247
+ # arr = np.stack(list_arr, axis=0)
248
+
249
+ # - Option 2: still copies
250
+ # arr = np.vstack(_format_string_array(s, n_values=n_values) for s in df_series.astype(str))
251
251
 
252
- # Get a numpy array for each row and then stack
253
- list_arr = df_series.apply(_format_string_array, n_values=n_values)
254
- arr = np.stack(list_arr, axis=0)
252
+ # - Option 3: more memory efficient
253
+ n_timesteps = len(df[key])
254
+ arr = np.empty((n_timesteps, n_values), dtype=float) # preallocates
255
+ for i, s in enumerate(df[key].astype(str)):
256
+ arr[i, :] = _format_string_array(s, n_values=n_values)
255
257
 
256
258
  # Retrieve dimensions
257
259
  dims_order = dims_order_dict[key]
@@ -333,18 +335,6 @@ def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
333
335
  return ds
334
336
 
335
337
 
336
- def _set_dataset_attrs(ds, sensor_name):
337
- """Set variable and coordinates attributes."""
338
- # - Add netCDF variable attributes
339
- # --> Attributes: long_name, units, descriptions, valid_min, valid_max
340
- ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
341
- # - Add netCDF coordinate attributes
342
- ds = set_coordinate_attributes(ds=ds)
343
- # - Set DISDRODB global attributes
344
- ds = set_disdrodb_attrs(ds=ds, product="L0B")
345
- return ds
346
-
347
-
348
338
  def add_dataset_crs_coords(ds):
349
339
  """Add the CRS coordinate to the xr.Dataset."""
350
340
  # TODO: define CF-compliant CRS !
@@ -475,16 +465,25 @@ def finalize_dataset(ds, sensor_name, metadata):
475
465
  ds = add_dataset_crs_coords(ds)
476
466
 
477
467
  # Set netCDF dimension order
468
+ # --> Required for correct encoding !
478
469
  ds = ds.transpose("time", "diameter_bin_center", ...)
479
470
 
480
- # Add netCDF variable and coordinate attributes
481
- ds = _set_dataset_attrs(ds, sensor_name)
482
-
483
471
  # Ensure variables with dtype object are converted to string
484
472
  ds = _convert_object_variables_to_string(ds)
485
473
 
474
+ # Add netCDF variable and coordinate attributes
475
+ # - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
476
+ ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
477
+ # - Add netCDF coordinate attributes
478
+ ds = set_coordinate_attributes(ds=ds)
479
+ # - Set DISDRODB global attributes
480
+ ds = set_disdrodb_attrs(ds=ds, product="L0B")
481
+
486
482
  # Check L0B standards
487
483
  check_l0b_standards(ds)
484
+
485
+ # Set L0B encodings
486
+ ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
488
487
  return ds
489
488
 
490
489
 
@@ -508,38 +507,4 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
508
507
  return ds
509
508
 
510
509
 
511
- def write_l0b(ds: xr.Dataset, filepath: str, force=False) -> None:
512
- """Save the xarray dataset into a NetCDF file.
513
-
514
- Parameters
515
- ----------
516
- ds : xarray.Dataset
517
- Input xarray dataset.
518
- filepath : str
519
- Output file path.
520
- sensor_name : str
521
- Name of the sensor.
522
- force : bool, optional
523
- Whether to overwrite existing data.
524
- If ``True``, overwrite existing data into destination directories.
525
- If ``False``, raise an error if there are already data into destination directories. This is the default.
526
- """
527
- # Create station directory if does not exist
528
- create_directory(os.path.dirname(filepath))
529
-
530
- # Check if the file already exists
531
- # - If force=True --> Remove it
532
- # - If force=False --> Raise error
533
- remove_if_exists(filepath, force=force)
534
-
535
- # Get sensor name from dataset
536
- sensor_name = ds.attrs.get("sensor_name")
537
-
538
- # Set encodings
539
- ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
540
-
541
- # Write netcdf
542
- ds.to_netcdf(filepath, engine="netcdf4")
543
-
544
-
545
510
  ####--------------------------------------------------------------------------.