PyPI - dclab - Versions diffs - 0.62.17__cp39-cp39-macosx_11_0_arm64.whl → 0.67.3__cp39-cp39-macosx_11_0_arm64.whl - Mend

dclab 0.62.17__cp39-cp39-macosx_11_0_arm64.whl → 0.67.3__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

dclab/_version.py +16 -3
dclab/cli/task_tdms2rtdc.py +1 -1
dclab/cli/task_verify_dataset.py +3 -3
dclab/definitions/__init__.py +1 -1
dclab/definitions/feat_const.py +6 -4
dclab/definitions/feat_logic.py +27 -28
dclab/downsampling.cpython-39-darwin.so +0 -0
dclab/downsampling.pyx +12 -7
dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
dclab/features/bright.py +11 -2
dclab/features/bright_bc.py +13 -2
dclab/features/bright_perc.py +10 -2
dclab/features/contour.py +12 -7
dclab/features/emodulus/__init__.py +33 -27
dclab/features/emodulus/load.py +8 -6
dclab/features/emodulus/pxcorr.py +33 -15
dclab/features/emodulus/scale_linear.py +79 -52
dclab/features/emodulus/viscosity.py +31 -19
dclab/features/fl_crosstalk.py +19 -10
dclab/features/inert_ratio.py +18 -11
dclab/features/volume.py +24 -14
dclab/http_utils.py +1 -1
dclab/kde/base.py +238 -14
dclab/kde/methods.py +33 -12
dclab/rtdc_dataset/config.py +1 -1
dclab/rtdc_dataset/core.py +22 -8
dclab/rtdc_dataset/export.py +171 -34
dclab/rtdc_dataset/feat_basin.py +250 -33
dclab/rtdc_dataset/fmt_dcor/api.py +69 -7
dclab/rtdc_dataset/fmt_dcor/base.py +103 -4
dclab/rtdc_dataset/fmt_dcor/logs.py +1 -1
dclab/rtdc_dataset/fmt_dcor/tables.py +1 -1
dclab/rtdc_dataset/fmt_hdf5/events.py +20 -1
dclab/rtdc_dataset/fmt_hierarchy/base.py +1 -1
dclab/rtdc_dataset/fmt_s3.py +29 -10
dclab/rtdc_dataset/fmt_tdms/event_trace.py +1 -1
dclab/rtdc_dataset/fmt_tdms/naming.py +1 -1
dclab/rtdc_dataset/writer.py +43 -11
dclab/statistics.py +27 -4
dclab/warn.py +1 -1
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/METADATA +26 -4
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/RECORD +48 -48
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/WHEEL +1 -1
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/entry_points.txt +0 -0
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/licenses/LICENSE +0 -0
{dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/top_level.txt +0 -0

dclab/rtdc_dataset/export.py CHANGED Viewed

@@ -39,6 +39,10 @@ class LimitingExportSizeWarning(UserWarning):
     pass
+class ContourNotExportedWarning(UserWarning):
+    pass
 class Export(object):
     def __init__(self, rtdc_ds):
         """Export functionalities for RT-DC datasets"""
@@ -51,6 +55,7 @@ class Export(object):
             pixel_format: str = "yuv420p",
             codec: str = "rawvideo",
             codec_options: dict[str, str] = None,
+            progress_callback: callable = None,
             ):
         """Exports filtered event images to a video file
@@ -72,6 +77,10 @@ class Export(object):
         codec_options:
             Additional arguments to give to the codec using ffmpeg,
             e.g. `{'preset': 'slow', 'crf': '0'}` for "libx264" codec.
+        progress_callback: callable
+            Function that takes at least two arguments: float between 0 and
+            1 for monitoring progress and a string describing what is being
+            done.
         Notes
         -----
@@ -103,6 +112,10 @@ class Export(object):
                 # write the filtered frames to the video file
                 for evid in np.arange(len(ds)):
+                    if progress_callback is not None and evid % 10_000 == 0:
+                        progress_callback(evid / len(ds), "exporting video")
                     # skip frames that were filtered out
                     if filtered and not ds.filter.all[evid]:
                         continue
@@ -116,12 +129,22 @@ class Export(object):
                     for packet in stream.encode(av_frame):
                         container.mux(packet)
+            if progress_callback is not None:
+                progress_callback(1.0, "video export complete")
         else:
             msg = "No image data to export: dataset {} !".format(ds.title)
             raise OSError(msg)
-    def fcs(self, path, features, meta_data=None, filtered=True,
-            override=False):
+    def fcs(self,
+            path: pathlib.Path | str,
+            features: list[str],
+            meta_data: dict = None,
+            filtered: bool = True,
+            override: bool = False,
+            progress_callback: callable = None,
+            ):
         """Export the data of an RT-DC dataset to an .fcs file
         Parameters
@@ -142,6 +165,10 @@ class Export(object):
         override: bool
             If set to `True`, an existing file ``path`` will be overridden.
             If set to `False`, raises `OSError` if ``path`` exists.
+        progress_callback: callable
+            Function that takes at least two arguments: float between 0 and
+            1 for monitoring progress and a string describing what is being
+            done.
         Notes
         -----
@@ -175,12 +202,18 @@ class Export(object):
         # Collect the header
         chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
+        if progress_callback is not None:
+            progress_callback(0.0, "collecting data")
         # Collect the data
         if filtered:
             data = [ds[c][ds.filter.all] for c in features]
         else:
             data = [ds[c] for c in features]
+        if progress_callback is not None:
+            progress_callback(0.5, "exporting data")
         data = np.array(data).transpose()
         meta_data["dclab version"] = version
         fcswrite.write_fcs(filename=str(path),
@@ -189,6 +222,9 @@ class Export(object):
                            text_kw_pr=meta_data,
                            )
+        if progress_callback is not None:
+            progress_callback(1.0, "export complete")
     def hdf5(self,
              path: str | pathlib.Path,
              features: List[str] = None,
@@ -196,11 +232,14 @@ class Export(object):
              logs: bool = False,
              tables: bool = False,
              basins: bool = False,
+             allow_contour: bool = False,
              meta_prefix: str = "src_",
              override: bool = False,
              compression_kwargs: Dict = None,
              compression: str = "deprecated",
-             skip_checks: bool = False):
+             skip_checks: bool = False,
+             progress_callback: callable = None,
+             ):
         """Export the data of the current instance to an HDF5 file
         Parameters
@@ -226,6 +265,14 @@ class Export(object):
             Whether to export basins. If filtering is disabled, basins
             are copied directly to the output file. If filtering is enabled,
             then mapped basins are exported.
+        allow_contour: bool
+            Whether to allow exporting the "contour" feature. Writing this
+            feature to an HDF5 file is extremely inefficient, because it
+            cannot be represented by an ND array and thus must be stored
+            in a group, each contour stored in a separate dataset. The
+            contour can easily be computed via the mask, so actually storing
+            the contour should be avoided. If "contour" is in `features`,
+            it will only be written to the output file if `allow_contour=True`.
         meta_prefix: str
             Prefix for log and table names in the exported file
         override: bool
@@ -234,8 +281,8 @@ class Export(object):
         compression_kwargs: dict
             Dictionary with the keys "compression" and "compression_opts"
             which are passed to :func:`h5py.H5File.create_dataset`. The
-            default is Zstandard compression with the lowest compression
-            level `hdf5plugin.Zstd(clevel=1)`.
+            default is Zstandard compression with the compression
+            level 5 `hdf5plugin.Zstd(clevel=5)`.
         compression: str or None
             Compression method used for data storage;
             one of [None, "lzf", "gzip", "szip"].
@@ -244,7 +291,10 @@ class Export(object):
                 Use `compression_kwargs` instead.
         skip_checks: bool
             Disable checking whether all features have the same length.
+        progress_callback: callable
+            Function that takes at least two arguments: float between 0 and
+            1 for monitoring progress and a string describing what is being
+            done.
         .. versionchanged:: 0.58.0
@@ -263,7 +313,7 @@ class Export(object):
             # be backwards-compatible
             compression_kwargs = {"compression": compression}
         if compression_kwargs is None:
-            compression_kwargs = hdf5plugin.Zstd(clevel=1)
+            compression_kwargs = hdf5plugin.Zstd(clevel=5)
         path = pathlib.Path(path)
         # Make sure that path ends with .rtdc
         if path.suffix not in [".rtdc", ".rtdc~"]:
@@ -281,8 +331,25 @@ class Export(object):
         # for convenience
         ds = self.rtdc_ds
+        # remove contour information from user-specified features
+        if "contour" in (features or []) and not allow_contour:
+            features = list(features)
+            features.remove("contour")
+            warnings.warn(
+                "Feature 'contour' not exported to output file, because "
+                "`allow_contour` is `False`. If you really need the "
+                "'contour' feature in the output file (unlikely, unless you "
+                "are venturing outside the DC Cosmos), you must set "
+                "`allow_contour=True`. Otherwise, you can safely ignore "
+                "this warning or silence it by not providing 'contour' in "
+                "`features`.",
+                ContourNotExportedWarning)
         if features is None:
             features = ds.features_innate
+            # silently remove contour information
+            if "contour" in features and not allow_contour:
+                features.remove("contour")
         # decide which metadata to export
         meta = {}
@@ -297,8 +364,8 @@ class Export(object):
             # Define a new measurement identifier, so that we are not running
             # into any problems with basins being defined for filtered data.
             ds_run_id = ds.get_measurement_identifier()
-            random_ap = str(uuid.uuid4())[:4]
-            meta["experiment"]["run identifier"] = f"{ds_run_id}-{random_ap}"
+            random_ap = f"dclab-{str(uuid.uuid4())[:7]}"
+            meta["experiment"]["run identifier"] = f"{ds_run_id}_{random_ap}"
         if filtered:
             filter_arr = ds.filter.all
@@ -335,6 +402,8 @@ class Export(object):
         with RTDCWriter(path,
                         mode="append",
                         compression_kwargs=compression_kwargs) as hw:
+            if progress_callback is not None:
+                progress_callback(0.0, "writing metadata")
             # write meta data
             hw.store_metadata(meta)
@@ -369,7 +438,10 @@ class Export(object):
                                    ds.tables[tab])
             # write each feature individually
-            for feat in features:
+            for ii, feat in enumerate(features):
+                if progress_callback is not None:
+                    progress_callback(ii / len(features), f"exporting {feat}")
                 if (filter_arr is None or
                         # This does not work for the .tdms file format
                         # (and probably also not for DCOR).
@@ -393,6 +465,9 @@ class Export(object):
                                            filtarr=filter_arr)
             if basins:
+                if progress_callback:
+                    progress_callback(1 - 1 / (len(features) or 1),
+                                      "writing basins")
                 # We have to store basins. There are three options:
                 # - filtering disabled: just copy basins
                 # - filtering enabled
@@ -404,6 +479,8 @@ class Export(object):
                 basin_list = [bn.as_dict() for bn in ds.basins]
                 # In addition to the upstream basins, also store a reference
                 # to the original file from which the export was done.
+                # Get the identifier of the current dataset for the new basins.
+                basin_id = ds.get_measurement_identifier()
                 if ds.format in get_basin_classes():
                     # The dataset has a format that matches a basin format
                     # directly.
@@ -418,17 +495,13 @@ class Export(object):
                         "basin_format": ds.format,
                         "basin_locs": basin_locs,
                         "basin_descr": f"Exported with dclab {version}",
+                        "basin_id": basin_id,
                     })
                 elif (ds.format == "hierarchy"
                       and ds.get_root_parent().format in get_basin_classes()):
-                    # avoid circular imports
-                    from .fmt_hierarchy import map_indices_child2root
                     # The dataset is a hierarchy child, and it is derived
                     # from a dataset that has a matching basin format.
-                    # We have to add the indices of the root parent, which
-                    # identify the child, to the basin dictionary. Note
-                    # that additional basin filtering is applied below
-                    # this case for all basins.
+                    #
                     # For the sake of clarity I wrote this as a separate case,
                     # even if that means duplicating code from the previous
                     # case.
@@ -445,36 +518,83 @@ class Export(object):
                         "basin_locs": basin_locs,
                         "basin_descr": f"Exported with dclab {version} from a "
                                        f"hierarchy dataset",
-                        # This is where this basin differs from the basin
-                        # definition in the previous case.
-                        "basin_map": map_indices_child2root(
-                            child=ds,
-                            child_indices=np.arange(len(ds))
-                            ),
+                        # Here we do not yet treat the conversion from the
+                        # root dataset indices to the child indices,
+                        # because we will fill in the missing values below
+                        # in the basin mapping correction step.
+                        "basin_map": None,
+                        "basin_id": basin_id,
                     })
                 for bn_dict in basin_list:
-                    if bn_dict.get("basin_type") == "internal":
+                    if bn_dict.get("basin_format") not in get_basin_classes():
+                        # Whichever software stored this basin in the
+                        # original file, we do not support it or don't want
+                        # to break it.
+                        continue
+                    elif bn_dict.get("basin_type") == "internal":
                         # Internal basins are only valid for files they were
                         # defined in. Since we are exporting, it does not
                         # make sense to store these basins in the output file.
                         continue
+                    elif bn_dict.get("perishable"):
+                        # Perishable basins require secret keys or complicated
+                        # logic to execute in order to refresh them. We do not
+                        # store them in the output file.
+                        continue
+                    # Basin mapping correction: If we are filtering, or
+                    # if we are exporting from a hierarchy dataset, we have
+                    # to correct or add basin mapping arrays.
                     basinmap_orig = bn_dict.get("basin_map")
-                    if not filtered:
-                        # filtering disabled: just copy basins
-                        pass
-                    elif basinmap_orig is None:
-                        # basins with "same" mapping: create new mapping
-                        bn_dict["basin_map"] = np.where(filter_arr)[0]
+                    if ds.format == "hierarchy":
+                        # Hierarchy dataset
+                        # Compute mapping from hierarchy root.
+                        from .fmt_hierarchy import map_indices_child2root
+                        map_root = map_indices_child2root(
+                            child=ds,
+                            child_indices=np.arange(len(ds))
+                        )
+                        if not filtered and basinmap_orig is None:
+                            # We only have to consider the hierarchy.
+                            bn_dict["basin_map"] = map_root
+                        elif filtered and basinmap_orig is None:
+                            # Filtering must be taken into account.
+                            bn_dict["basin_map"] = map_root[filter_arr]
+                        else:
+                            # The source file has mapping defined which we
+                            # have to take into account.
+                            map_child = basinmap_orig[map_root]
+                            if filtered:
+                                # Subsetting additional filters
+                                bn_dict["basin_map"] = map_child[filter_arr]
+                            else:
+                                bn_dict["basin_map"] = map_child
                     else:
-                        # mapped basins: correct nested mapping
-                        bn_dict["basin_map"] = basinmap_orig[filter_arr]
+                        if not filtered:
+                            # filtering disabled: just copy basins
+                            pass
+                        elif filtered and basinmap_orig is None:
+                            # basins with mapping "same": create new mapping
+                            bn_dict["basin_map"] = np.where(filter_arr)[0]
+                        else:
+                            # filter the source mapping
+                            bn_dict["basin_map"] = basinmap_orig[filter_arr]
                     # Do not verify basins, it takes too long.
                     hw.store_basin(**bn_dict, verify=False)
+        if progress_callback is not None:
+            progress_callback(1.0, "export complete")
-    def tsv(self, path, features, meta_data=None, filtered=True,
-            override=False):
+    def tsv(self,
+            path: pathlib.Path | str,
+            features: list[str],
+            meta_data: dict = None,
+            filtered: bool = True,
+            override: bool = False,
+            progress_callback: callable = None,
+            ):
         """Export the data of the current instance to a .tsv file
         Parameters
@@ -496,6 +616,10 @@ class Export(object):
         override: bool
             If set to `True`, an existing file ``path`` will be overridden.
             If set to `False`, raises `OSError` if ``path`` exists.
+        progress_callback: callable
+            Function that takes at least two arguments: float between 0 and
+            1 for monitoring progress and a string describing what is being
+            done.
         """
         if meta_data is None:
             meta_data = {}
@@ -516,6 +640,10 @@ class Export(object):
             if c not in ds.features_scalar:
                 raise ValueError("Invalid feature name {}".format(c))
         meta_data["dclab version"] = version
+        if progress_callback is not None:
+            progress_callback(0.0, "writing metadata")
         # Write BOM header
         with path.open("wb") as fd:
             fd.write(codecs.BOM_UTF8)
@@ -539,17 +667,26 @@ class Export(object):
             fd.write("# "+header2+"\n")
         with path.open("ab") as fd:
-            # write data
+            if progress_callback is not None:
+                progress_callback(0.1, "collecting data")
+            # collect data
             if filtered:
                 data = [ds[c][ds.filter.all] for c in features]
             else:
                 data = [ds[c] for c in features]
+            if progress_callback is not None:
+                progress_callback(0.5, "writing data")
             np.savetxt(fd,
                        np.array(data).transpose(),
                        fmt=str("%.10e"),
                        delimiter="\t")
+        if progress_callback is not None:
+            progress_callback(1.0, "export complete")
 def yield_filtered_array_stacks(data, indices):
     """Generator returning chunks with the filtered feature data