PyPI - disdrodb - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

disdrodb 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

disdrodb/_version.py +2 -2
disdrodb/accessor/methods.py +10 -3
disdrodb/api/checks.py +1 -1
disdrodb/api/io.py +6 -1
disdrodb/constants.py +1 -1
disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
disdrodb/etc/products/L1/global.yaml +1 -1
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/global.yaml +1 -1
disdrodb/issue/checks.py +2 -2
disdrodb/l0/check_configs.py +1 -1
disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
disdrodb/l0/l0a_processing.py +6 -2
disdrodb/l0/l0b_processing.py +26 -19
disdrodb/l0/l0c_processing.py +10 -0
disdrodb/l0/manuals/LPM_V0.pdf +0 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
disdrodb/l0/readers/PARSIVEL/NASA/LPVEX.py +25 -13
disdrodb/l0/readers/PARSIVEL/NASA/MC3E.py +1 -1
disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/{NASA/GCPEX.py → NORWAY/UIB.py} +54 -29
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +6 -3
disdrodb/l0/readers/{PARSIVEL/NASA/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +48 -21
disdrodb/l0/readers/{PARSIVEL/NASA/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
disdrodb/l1/beard_model.py +45 -1
disdrodb/l1/fall_velocity.py +1 -6
disdrodb/l1/filters.py +2 -0
disdrodb/l2/empirical_dsd.py +12 -8
disdrodb/routines/l0.py +2 -2
disdrodb/routines/options.py +2 -0
disdrodb/scattering/axis_ratio.py +3 -0
disdrodb/scattering/routines.py +1 -1
disdrodb/summary/routines.py +63 -61
disdrodb/utils/compression.py +4 -2
disdrodb/utils/dask.py +31 -11
disdrodb/utils/manipulations.py +7 -1
disdrodb/viz/plots.py +5 -3
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/RECORD +71 -54
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0

disdrodb/routines/l0.py CHANGED Viewed

@@ -696,7 +696,7 @@ def run_l0b_station(
     # -----------------------------------------------------------------.
     # Start L0B processing
     t_i = time.time()
-    msg = f"{product} processing of station_name {station_name} has started."
+    msg = f"{product} processing of station {station_name} has started."
     log_info(logger=logger, msg=msg, verbose=verbose)
     # -----------------------------------------------------------------.
@@ -774,7 +774,7 @@ def run_l0b_station(
     # -----------------------------------------------------------------.
     # End L0B processing
     timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
-    msg = f"{product} processing of station_name {station_name} completed in {timedelta_str}"
+    msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
     log_info(logger=logger, msg=msg, verbose=verbose)
     # -----------------------------------------------------------------.

disdrodb/routines/options.py CHANGED Viewed

@@ -35,6 +35,8 @@ from disdrodb.utils.yaml import read_yaml
 # get_product_options(product="L1", temporal_resolution="1MIN")
 # get_product_options(product="L1", temporal_resolution="1MIN", sensor_name="PARSIVEL")
+# test temporal_resolutions are unique
 # TODO: test return list
 # get_product_temporal_resolutions(product="L1")
 # get_product_temporal_resolutions(product="L2E")

disdrodb/scattering/axis_ratio.py CHANGED Viewed

@@ -83,6 +83,9 @@ def get_axis_ratio_battaglia_2010(diameter):
     """
     Compute the axis ratio of raindrops using the Battaglia et al. (2010) model.
+    This axis ratio is assumed by OTT Parsivel sensors internally to compute the
+    reported particle size (Deq).
     Parameters
     ----------
     diameter : array-like

disdrodb/scattering/routines.py CHANGED Viewed

@@ -973,7 +973,7 @@ def get_radar_parameters(
         list_ds = [func(ds_subset, **params) for params in list_params]
     # Merge into a single dataset
-    ds_radar = xr.merge(list_ds)
+    ds_radar = xr.merge(list_ds, compat="no_conflicts", join="outer")
     # Order frequency from lowest to highest
     # --> ['S', 'C', 'X', 'Ku', 'K', 'Ka', 'W']

disdrodb/summary/routines.py CHANGED Viewed

@@ -716,7 +716,7 @@ def create_nd_dataframe(ds, variables=None):
         "sample_interval",
         *RADAR_OPTIONS,
     ]
-    df_nd = ds_stack.to_dataframe().drop(columns=coords_to_drop, errors="ignore")
+    df_nd = ds_stack.to_dask_dataframe().drop(columns=coords_to_drop, errors="ignore").compute()
     df_nd["D"] = df_nd["diameter_bin_center"]
     df_nd["N(D)"] = df_nd["drop_number_concentration"]
     df_nd = df_nd[df_nd["R"] != 0]
@@ -3789,70 +3789,72 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
     ####---------------------------------------------------------------------.
     #### Create drop spectrum figures and statistics
-    # Compute sum of raw and filtered spectrum over time
-    raw_drop_number = ds["raw_drop_number"].sum(dim="time")
-    drop_number = ds["drop_number"].sum(dim="time")
-    # Define theoretical and measured average velocity
-    theoretical_average_velocity = ds["fall_velocity"].mean(dim="time")
-    measured_average_velocity = get_drop_average_velocity(drop_number)
-    # Save raw and filtered spectrum over time & theoretical and measured average fall velocity
-    ds_stats = xr.Dataset()
-    ds_stats["raw_drop_number"] = raw_drop_number
-    ds_stats["drop_number"] = raw_drop_number
-    ds_stats["theoretical_average_velocity"] = theoretical_average_velocity
-    ds_stats["measured_average_velocity"] = measured_average_velocity
-    filename = define_filename(
-        prefix="SpectrumStats",
-        extension="nc",
-        data_source=data_source,
-        campaign_name=campaign_name,
-        station_name=station_name,
-        temporal_resolution=temporal_resolution,
-    )
-    ds_stats.to_netcdf(os.path.join(summary_dir_path, filename))
+    if VELOCITY_DIMENSION in ds.dims:
+        # Compute sum of raw and filtered spectrum over time
+        raw_drop_number = ds["raw_drop_number"].sum(dim="time")
+        drop_number = ds["drop_number"].sum(dim="time")
+        # Define theoretical and measured average velocity
+        theoretical_average_velocity = ds["fall_velocity"].mean(dim="time")
+        measured_average_velocity = get_drop_average_velocity(drop_number)
+        # Save raw and filtered spectrum over time & theoretical and measured average fall velocity
+        ds_stats = xr.Dataset()
+        ds_stats["raw_drop_number"] = raw_drop_number
+        ds_stats["drop_number"] = raw_drop_number
+        ds_stats["theoretical_average_velocity"] = theoretical_average_velocity
+        if measured_average_velocity is not None:
+            ds_stats["measured_average_velocity"] = measured_average_velocity
+        filename = define_filename(
+            prefix="SpectrumStats",
+            extension="nc",
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            temporal_resolution=temporal_resolution,
+        )
+        ds_stats.to_netcdf(os.path.join(summary_dir_path, filename))
-    # Create figures with raw and filtered spectrum
-    # - Raw
-    filename = define_filename(
-        prefix="SpectrumRaw",
-        extension="png",
-        data_source=data_source,
-        campaign_name=campaign_name,
-        station_name=station_name,
-        temporal_resolution=temporal_resolution,
-    )
-    p = plot_spectrum(raw_drop_number, title="Raw Drop Spectrum")
-    p.figure.savefig(os.path.join(summary_dir_path, filename))
-    plt.close()
+        # Create figures with raw and filtered spectrum
+        # - Raw
+        filename = define_filename(
+            prefix="SpectrumRaw",
+            extension="png",
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            temporal_resolution=temporal_resolution,
+        )
+        p = plot_spectrum(raw_drop_number, title="Raw Drop Spectrum")
+        p.figure.savefig(os.path.join(summary_dir_path, filename))
+        plt.close()
-    # - Filtered
-    filename = define_filename(
-        prefix="SpectrumFiltered",
-        extension="png",
-        data_source=data_source,
-        campaign_name=campaign_name,
-        station_name=station_name,
-        temporal_resolution=temporal_resolution,
-    )
-    p = plot_spectrum(drop_number, title="Filtered Drop Spectrum")
-    p.figure.savefig(os.path.join(summary_dir_path, filename))
-    plt.close()
+        # - Filtered
+        filename = define_filename(
+            prefix="SpectrumFiltered",
+            extension="png",
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            temporal_resolution=temporal_resolution,
+        )
+        p = plot_spectrum(drop_number, title="Filtered Drop Spectrum")
+        p.figure.savefig(os.path.join(summary_dir_path, filename))
+        plt.close()
-    # Create figure comparing raw and filtered spectrum
-    filename = define_filename(
-        prefix="SpectrumSummary",
-        extension="png",
-        data_source=data_source,
-        campaign_name=campaign_name,
-        station_name=station_name,
-        temporal_resolution=temporal_resolution,
-    )
+        # Create figure comparing raw and filtered spectrum
+        filename = define_filename(
+            prefix="SpectrumSummary",
+            extension="png",
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            temporal_resolution=temporal_resolution,
+        )
-    fig = plot_raw_and_filtered_spectra(ds)
-    fig.savefig(os.path.join(summary_dir_path, filename))
-    plt.close()
+        fig = plot_raw_and_filtered_spectra(ds)
+        fig.savefig(os.path.join(summary_dir_path, filename))
+        plt.close()
     ####---------------------------------------------------------------------.
     #### Create L2E dataframe

disdrodb/utils/compression.py CHANGED Viewed

@@ -82,7 +82,7 @@ def unzip_file_on_terminal(filepath: str, dest_path: str) -> str:
     subprocess.run(cmd, check=True)
-def _zip_dir(dir_path: str) -> str:
+def _zip_dir(dir_path: str, dst_dir=None) -> str:
     """Zip a directory into a file located in the same directory.
     Parameters
@@ -95,7 +95,9 @@ def _zip_dir(dir_path: str) -> str:
     str
         Path of the zip archive.
     """
-    output_path_without_extension = os.path.join(tempfile.gettempdir(), os.path.basename(dir_path))
+    if dst_dir is None:
+        dst_dir = tempfile.gettempdir()
+    output_path_without_extension = os.path.join(dst_dir, os.path.basename(dir_path))
     output_path = output_path_without_extension + ".zip"
     shutil.make_archive(output_path_without_extension, "zip", dir_path)
     return output_path

disdrodb/utils/dask.py CHANGED Viewed

@@ -113,7 +113,13 @@ def close_dask_cluster(cluster, client):
         logger.setLevel(original_level)
-def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
+def _batch_iterable(iterable, n):
+    """Yield successive n-sized chunks from iterable."""
+    for i in range(0, len(iterable), n):
+        yield iterable[i : i + n]
+def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str, max_tasks_per_batch=5_000):
     """
     Execute Dask tasks and skip failed ones.
@@ -125,6 +131,9 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
         Whether to execute in parallel with Dask or not.
     logs_dir : str
         Directory to store FAILED_TASKS.log.
+    max_tasks_per_batch : int or None, optional
+     Maximum number of tasks to submit to `client.compute()` at once.
+     The default is 5000. Dask struggle if more than 10_000 tasks are submitted.
     Returns
     -------
@@ -150,18 +159,29 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
     except ValueError:
         raise ValueError("No Dask Distributed Client found.")
-    # Compute tasks (all concurrently)
-    # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
-    # - If errors occurs in some, skip it
-    futures = client.compute(list_tasks)
-    results = client.gather(futures, errors="skip")
+    all_results = []
+    failed_futures = []
+    # Batch execution
+    task_batches = list(_batch_iterable(list_tasks, max_tasks_per_batch)) if max_tasks_per_batch else [list_tasks]
+    for batch in task_batches:
+        # Compute tasks (all concurrently)
+        # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
+        # - If errors occurs in some, skip it
+        futures = client.compute(batch)
+        results = client.gather(futures, errors="skip")
+        # Identify and collect failed futures
+        batch_failed = [f for f in futures if f.status != "finished"]
+        failed_futures.extend(batch_failed)
-    # Collect failed futures
-    failed_futures = [f for f in futures if f.status != "finished"]  # "error"
+        # Collect results from successful tasks
+        all_results.extend(results)
     # If no tasks failed, return results
     if not failed_futures:
-        return results
+        return all_results
     # Otherwise define log file listing failed tasks
     with open(failed_log_path, "w") as f:
@@ -170,5 +190,5 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
             f.write(f"ERROR - DASK TASK FAILURE - Task {fut.key} failed: {err}\n")
     # Append to list of log filepaths (results) the dask failing log
-    results.append(failed_log_path)
-    return results
+    all_results.append(failed_log_path)
+    return all_results

disdrodb/utils/manipulations.py CHANGED Viewed

@@ -26,7 +26,13 @@ from disdrodb.utils.xarray import unstack_datarray_dimension
 def get_diameter_bin_edges(ds):
     """Retrieve diameter bin edges."""
-    bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
+    bin_edges = np.append(ds["diameter_bin_lower"].to_numpy(), ds["diameter_bin_upper"].to_numpy()[-1])
+    return bin_edges
+def get_velocity_bin_edges(ds):
+    """Retrieve velocity bin edges."""
+    bin_edges = np.append(ds["velocity_bin_lower"].to_numpy(), ds["velocity_bin_upper"].to_numpy()[-1])
     return bin_edges

disdrodb/viz/plots.py CHANGED Viewed

@@ -96,6 +96,8 @@ def _check_has_diameter_and_velocity_dims(da):
 def _get_spectrum_variable(xr_obj, variable):
     if not isinstance(xr_obj, (xr.Dataset, xr.DataArray)):
         raise TypeError("Expecting xarray object as input.")
+    if VELOCITY_DIMENSION not in xr_obj.dims:
+        raise ValueError("2D spectrum not available.")
     if isinstance(xr_obj, xr.Dataset):
         if variable not in xr_obj:
             raise ValueError(f"The dataset do not include {variable=}.")
@@ -229,7 +231,7 @@ def plot_raw_and_filtered_spectra(
         theoretical_average_velocity = ds["fall_velocity"]
         if "time" in theoretical_average_velocity.dims:
             theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
-    if add_measured_average_velocity:
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
         measured_average_velocity = get_drop_average_velocity(drop_number)
     # Define norm if not specified
@@ -248,7 +250,7 @@ def plot_raw_and_filtered_spectra(
     # Add velocities if asked
     if add_theoretical_average_velocity:
         theoretical_average_velocity.plot(ax=ax1, c="k", linestyle="dashed")
-    if add_measured_average_velocity:
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
         measured_average_velocity.plot(ax=ax1, c="k", linestyle="dotted")
     # Improve plot appearance
@@ -262,7 +264,7 @@ def plot_raw_and_filtered_spectra(
     # Add velocities if asked
     if add_theoretical_average_velocity:
         theoretical_average_velocity.plot(ax=ax2, c="k", linestyle="dashed", label="Theoretical velocity")
-    if add_measured_average_velocity:
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
         measured_average_velocity.plot(ax=ax2, c="k", linestyle="dotted", label="Measured average velocity")
     # Improve plot appearance

{disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: disdrodb
-Version: 0.2.0
+Version: 0.2.1
 Summary: disdrodb provides tools to download, standardize, share and analyze global disdrometer data.
 Author: Gionata Ghiggi
 Project-URL: homepage, https://github.com/ltelab/disdrodb

disdrodb 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

disdrodb 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl