PyPI - dclab - Versions diffs - 0.67.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show

dclab/__init__.py +41 -0
dclab/_version.py +34 -0
dclab/cached.py +97 -0
dclab/cli/__init__.py +10 -0
dclab/cli/common.py +237 -0
dclab/cli/task_compress.py +126 -0
dclab/cli/task_condense.py +223 -0
dclab/cli/task_join.py +229 -0
dclab/cli/task_repack.py +98 -0
dclab/cli/task_split.py +154 -0
dclab/cli/task_tdms2rtdc.py +186 -0
dclab/cli/task_verify_dataset.py +75 -0
dclab/definitions/__init__.py +79 -0
dclab/definitions/feat_const.py +202 -0
dclab/definitions/feat_logic.py +182 -0
dclab/definitions/meta_const.py +252 -0
dclab/definitions/meta_logic.py +111 -0
dclab/definitions/meta_parse.py +94 -0
dclab/downsampling.cpython-314-darwin.so +0 -0
dclab/downsampling.pyx +230 -0
dclab/external/__init__.py +4 -0
dclab/external/packaging/LICENSE +3 -0
dclab/external/packaging/LICENSE.APACHE +177 -0
dclab/external/packaging/LICENSE.BSD +23 -0
dclab/external/packaging/__init__.py +6 -0
dclab/external/packaging/_structures.py +61 -0
dclab/external/packaging/version.py +505 -0
dclab/external/skimage/LICENSE +28 -0
dclab/external/skimage/__init__.py +2 -0
dclab/external/skimage/_find_contours.py +216 -0
dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
dclab/external/skimage/_find_contours_cy.pyx +188 -0
dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
dclab/external/skimage/_pnpoly.pyx +99 -0
dclab/external/skimage/_shared/__init__.py +1 -0
dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.pxd +6 -0
dclab/external/skimage/_shared/geometry.pyx +55 -0
dclab/external/skimage/measure.py +7 -0
dclab/external/skimage/pnpoly.py +53 -0
dclab/external/statsmodels/LICENSE +35 -0
dclab/external/statsmodels/__init__.py +6 -0
dclab/external/statsmodels/nonparametric/__init__.py +1 -0
dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
dclab/external/statsmodels/nonparametric/kernels.py +36 -0
dclab/features/__init__.py +9 -0
dclab/features/bright.py +81 -0
dclab/features/bright_bc.py +93 -0
dclab/features/bright_perc.py +63 -0
dclab/features/contour.py +161 -0
dclab/features/emodulus/__init__.py +339 -0
dclab/features/emodulus/load.py +252 -0
dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
dclab/features/emodulus/pxcorr.py +135 -0
dclab/features/emodulus/scale_linear.py +247 -0
dclab/features/emodulus/viscosity.py +260 -0
dclab/features/fl_crosstalk.py +95 -0
dclab/features/inert_ratio.py +377 -0
dclab/features/volume.py +242 -0
dclab/http_utils.py +322 -0
dclab/isoelastics/__init__.py +468 -0
dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
dclab/kde/__init__.py +1 -0
dclab/kde/base.py +459 -0
dclab/kde/contours.py +222 -0
dclab/kde/methods.py +313 -0
dclab/kde_contours.py +10 -0
dclab/kde_methods.py +11 -0
dclab/lme4/__init__.py +5 -0
dclab/lme4/lme4_template.R +94 -0
dclab/lme4/rsetup.py +204 -0
dclab/lme4/wrapr.py +386 -0
dclab/polygon_filter.py +398 -0
dclab/rtdc_dataset/__init__.py +15 -0
dclab/rtdc_dataset/check.py +902 -0
dclab/rtdc_dataset/config.py +533 -0
dclab/rtdc_dataset/copier.py +353 -0
dclab/rtdc_dataset/core.py +896 -0
dclab/rtdc_dataset/export.py +867 -0
dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
dclab/rtdc_dataset/feat_basin.py +762 -0
dclab/rtdc_dataset/feat_temp.py +102 -0
dclab/rtdc_dataset/filter.py +263 -0
dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
dclab/rtdc_dataset/fmt_dict.py +103 -0
dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
dclab/rtdc_dataset/fmt_http.py +102 -0
dclab/rtdc_dataset/fmt_s3.py +354 -0
dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
dclab/rtdc_dataset/load.py +77 -0
dclab/rtdc_dataset/meta_table.py +25 -0
dclab/rtdc_dataset/writer.py +1019 -0
dclab/statistics.py +226 -0
dclab/util.py +176 -0
dclab/warn.py +15 -0
dclab-0.67.0.dist-info/METADATA +153 -0
dclab-0.67.0.dist-info/RECORD +142 -0
dclab-0.67.0.dist-info/WHEEL +6 -0
dclab-0.67.0.dist-info/entry_points.txt +8 -0
dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
dclab-0.67.0.dist-info/top_level.txt +1 -0

dclab/cli/task_condense.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""Create .rtdc files with scalar-only features"""
+from __future__ import annotations
+import argparse
+import pathlib
+from typing import List
+import warnings
+import h5py
+import hdf5plugin
+from ..rtdc_dataset import (
+    fmt_hdf5, new_dataset, rtdc_copy, RTDCWriter, RTDCBase
+)
+from .. import util
+from .._version import version
+from . import common
+def condense(
+        path_in: str | pathlib.Path = None,
+        path_out: str | pathlib.Path = None,
+        ancillaries: bool = None,
+        store_ancillary_features: bool = True,
+        store_basin_features: bool = True,
+        check_suffix: bool = True,
+        ret_path: bool = False
+        ):
+    """Create a new dataset with all available scalar-only features
+    Besides the innate scalar features, this also includes all
+    fast-to-compute ancillary and all basin features (`features_loaded`).
+    Parameters
+    ----------
+    path_in: str or pathlib.Path
+        file to compress
+    path_out: str or pathlib
+        output file path
+    ancillaries: bool
+        DEPRECATED, use `store_ancillary_features` instead
+    store_ancillary_features: bool
+        compute and store ancillary features in the output file
+    store_basin_features: bool
+        copy basin features from the input path to the output file;
+        Note that the basin information (including any internal
+        basin dataset) are always copied over to the new dataset.
+    check_suffix: bool
+        check suffixes for input and output files
+    ret_path: bool
+        whether to return the output path
+    Returns
+    -------
+    path_out: pathlib.Path (optional)
+        output path (with possibly corrected suffix)
+    """
+    if ancillaries is not None:
+        warnings.warn("Please use `store_ancillary_features` instead of "
+                      "`ancillaries`", DeprecationWarning)
+        store_ancillary_features = ancillaries
+    if path_out is None or path_in is None:
+        parser = condense_parser()
+        args = parser.parse_args()
+        path_in = args.input
+        path_out = args.output
+        store_ancillary_features = not args.no_ancillaries
+        store_basin_features = not args.no_basins
+    allowed_input_suffixes = [".rtdc", ".tdms"]
+    if not check_suffix:
+        allowed_input_suffixes.append(pathlib.Path(path_in).suffix)
+    path_in, path_out, path_temp = common.setup_task_paths(
+        path_in, path_out, allowed_input_suffixes=allowed_input_suffixes)
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        # We use `store_basin_features` during initialization (to avoid
+        # conflicts with ancillary features) and in the actual function
+        # as well, to correctly determine which features to use.
+        with new_dataset(path_in, enable_basins=store_basin_features) as ds, \
+                h5py.File(path_temp, "w") as h5_cond:
+            condense_dataset(ds=ds,
+                             h5_cond=h5_cond,
+                             store_ancillary_features=store_ancillary_features,
+                             store_basin_features=store_basin_features,
+                             warnings_list=w)
+    # Finally, rename temp to out
+    path_temp.rename(path_out)
+    if ret_path:
+        return path_out
+def condense_dataset(
+        ds: RTDCBase,
+        h5_cond: h5py.File,
+        ancillaries: bool = None,
+        store_ancillary_features: bool = True,
+        store_basin_features: bool = True,
+        warnings_list: List = None):
+    """Condense a dataset using low-level HDF5 methods
+    For ancillary and basin features, high-level dclab methods are used.
+    """
+    if ancillaries is not None:
+        warnings.warn("Please use `store_ancillary_features` instead of "
+                      "`ancillaries`", DeprecationWarning)
+        store_ancillary_features = ancillaries
+    cmp_kw = hdf5plugin.Zstd(clevel=5)
+    cmd_dict = {}
+    # If we have an input HDF5 file, then we might readily copy most
+    # of the features over using rtdc_copy. If we have a .tdms file,
+    # then we have to go the long route.
+    if isinstance(ds, fmt_hdf5.RTDC_HDF5):
+        rtdc_copy(src_h5file=ds.h5file,
+                  dst_h5file=h5_cond,
+                  features="scalar",
+                  include_basins=True,
+                  include_logs=True,
+                  include_tables=True,
+                  meta_prefix="")
+    h5_cond.require_group("logs")
+    # scalar features
+    feats_sc = ds.features_scalar
+    # loaded (computationally cheap) scalar features
+    feats_sc_loaded = [f for f in ds.features_loaded if f in feats_sc]
+    # internal basin features that have already been copied with `rtdc_copy`
+    feats_sc_basint = sorted(h5_cond.get("basin_events", {}).keys())
+    # features that are excluded, because we already copied them
+    feats_exclude = feats_sc_loaded + feats_sc_basint
+    cmd_dict["features_original_innate"] = ds.features_innate
+    features = set(feats_sc_loaded)
+    if store_basin_features:
+        feats_sc_basin = [f for f in ds.features_basin if
+                          (f in feats_sc and f not in feats_exclude)]
+        cmd_dict["features_basin"] = feats_sc_basin
+        if feats_sc_basin:
+            print(f"Using basin features {feats_sc_basin}")
+            features |= set(feats_sc_basin)
+    if store_ancillary_features:
+        feats_sc_anc = [f for f in ds.features_ancillary if
+                        (f in feats_sc and f not in feats_exclude)]
+        cmd_dict["features_ancillary"] = feats_sc_anc
+        if feats_sc_anc:
+            features |= set(feats_sc_anc)
+            print(f"Using ancillary features {feats_sc_anc}")
+    # command log
+    logs = {"dclab-condense": common.get_command_log(
+        paths=[ds.path], custom_dict=cmd_dict)}
+    # rename old dclab-condense logs
+    for l_key in ["dclab-condense", "dclab-condense-warnings"]:
+        if l_key in h5_cond["logs"]:
+            # This is cached, so no worry calling it multiple times.
+            md5_cfg = util.hashobj(ds.config)
+            # rename
+            new_log_name = f"{l_key}_{md5_cfg}"
+            if new_log_name not in h5_cond["logs"]:
+                # If the user repeatedly condensed one file, then there is
+                # no benefit in storing the log under a different name (the
+                # metadata did not change). Only write the log if it does
+                # not already exist.
+                h5_cond["logs"][f"{l_key}_{md5_cfg}"] = h5_cond["logs"][l_key]
+            del h5_cond["logs"][l_key]
+    with RTDCWriter(h5_cond,
+                    mode="append",
+                    compression_kwargs=cmp_kw,
+                    ) as hw:
+        # Write all remaining scalar features to the file
+        # (these are *all* scalar features in the case of .tdms data).
+        for feat in features:
+            if feat not in h5_cond["events"]:
+                hw.store_feature(feat=feat, data=ds[feat])
+        # collect warnings log
+        if warnings_list:
+            logs["dclab-condense-warnings"] = \
+                common.assemble_warnings(warnings_list)
+        # Write logs
+        for name in logs:
+            hw.store_log(name, logs[name])
+def condense_parser():
+    descr = "Reduce an RT-DC measurement to its scalar-only features " \
+            + "(i.e. without `contour`, `image`, `mask`, or `trace`). " \
+            + "All available ancillary features are computed."
+    parser = argparse.ArgumentParser(description=descr)
+    parser.add_argument('input', metavar="INPUT", type=str,
+                        help='Input path (.tdms or .rtdc file)')
+    parser.add_argument('output', metavar="OUTPUT", type=str,
+                        help='Output path (.rtdc file)')
+    parser.add_argument('--no-ancillary-features',
+                        dest='no_ancillaries',
+                        action='store_true',
+                        help='Do not compute expensive ancillary features '
+                             'such as volume'
+                        )
+    parser.set_defaults(no_ancillaries=False)
+    parser.add_argument('--no-basin-features',
+                        dest='no_basins',
+                        action='store_true',
+                        help='Do not store basin-based feature data from the '
+                             'input file in the output file'
+                        )
+    parser.set_defaults(no_basins=False)
+    parser.add_argument('--version', action='version',
+                        version=f'dclab-condense {version}')
+    return parser

dclab/cli/task_join.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Concatenate .rtdc files"""
+from __future__ import annotations
+import argparse
+import pathlib
+import time
+from typing import Dict, List
+import warnings
+import hdf5plugin
+import numpy as np
+from ..rtdc_dataset import new_dataset, RTDCWriter
+from .. import definitions as dfn
+from .._version import version
+from . import common
+class FeatureSetNotIdenticalJoinWarning(UserWarning):
+    pass
+def join(
+        paths_in: List[str | pathlib.Path] = None,
+        path_out: str | pathlib.Path = None,
+        metadata: Dict = None,
+        ret_path: bool = False,
+        ):
+    """Join multiple RT-DC measurements into a single .rtdc file
+    Parameters
+    ----------
+    paths_in: list of paths
+        input paths to join
+    path_out: str or pathlib.Path
+        output path
+    metadata: dict
+        optional metadata dictionary (configuration dict) to store
+        in the output file
+    ret_path: bool
+        whether to return the output path
+    Returns
+    -------
+    path_out: pathlib.Path (optional)
+        output path (with corrected path suffix if applicable)
+    Notes
+    -----
+    The first input file defines the metadata written to the output
+    file. Only features that are present in all input files are written
+    to the output file.
+    """
+    cmp_kw = hdf5plugin.Zstd(clevel=5)
+    if metadata is None:
+        metadata = {"experiment": {"run index": 1}}
+    if path_out is None or paths_in is None:
+        parser = join_parser()
+        args = parser.parse_args()
+        paths_in = args.input
+        path_out = args.output
+    if len(paths_in) < 2:
+        raise ValueError("At least two input files must be specified!")
+    paths_in, path_out, path_temp = common.setup_task_paths(
+        paths_in, path_out, allowed_input_suffixes=[".rtdc", ".tdms"])
+    # Order input files by date
+    key_paths = []
+    for pp in paths_in:
+        with new_dataset(pp) as dsa:
+            # sorting key
+            key = "_".join([dsa.config["experiment"]["date"],
+                            dsa.config["experiment"]["time"],
+                            str(dsa.config["experiment"]["run index"])
+                            ])
+            key_paths.append((key, pp))
+    sorted_paths = [p[1] for p in sorted(key_paths, key=lambda x: x[0])]
+    logs = {"dclab-join": common.get_command_log(paths=sorted_paths)}
+    # Determine temporal offsets
+    t_offsets = np.zeros(len(sorted_paths), dtype=np.float64)
+    for ii, pp in enumerate(sorted_paths):
+        with new_dataset(pp) as dsb:
+            etime = dsb.config["experiment"]["time"]
+            st = time.strptime(dsb.config["experiment"]["date"]
+                               + etime[:8],
+                               "%Y-%m-%d%H:%M:%S")
+            t_offsets[ii] = time.mktime(st)
+            if len(etime) > 8:
+                # floating point time stored as well (HH:MM:SS.SS)
+                t_offsets[ii] += float(etime[8:])
+    t_offsets -= t_offsets[0]
+    # Determine features to export (based on first file)
+    with warnings.catch_warnings(record=True) as w:
+        # Catch all FeatureSetNotIdenticalJoinWarnings
+        warnings.simplefilter("ignore")
+        warnings.simplefilter("always",
+                              category=FeatureSetNotIdenticalJoinWarning)
+        features = None
+        for pp in sorted_paths:
+            with new_dataset(pp) as dsc:
+                # features present
+                if features is None:
+                    # The initial features are the innate features of the
+                    # first file (sorted by time). If we didn't use the innate
+                    # features, then the resulting file might become large
+                    # (e.g. if we included ancillary features).
+                    features = sorted(dsc.features_innate)
+                else:
+                    # Remove features from the feature list, if it is not in
+                    # this dataset, or cannot be computed on-the-fly.
+                    for feat in features:
+                        if feat not in dsc.features:
+                            features.remove(feat)
+                            warnings.warn(
+                                f"Excluding feature '{feat}', because "
+                                + f"it is not present in '{pp}'!",
+                                FeatureSetNotIdenticalJoinWarning)
+                    # Warn the user if this dataset has an innate feature that
+                    # is being ignored, because it is not an innate feature of
+                    # the first dataset.
+                    for feat in dsc.features_innate:
+                        if feat not in features:
+                            warnings.warn(
+                                f"Ignoring feature '{feat}' in '{pp}', "
+                                + "because it is not present in the "
+                                + "other files being joined!",
+                                FeatureSetNotIdenticalJoinWarning)
+        if w:
+            logs["dclab-join-feature-warnings"] = common.assemble_warnings(w)
+    # Create initial output file
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        with new_dataset(sorted_paths[0]) as ds0:
+            ds0.export.hdf5(path=path_temp,
+                            features=features,
+                            filtered=False,
+                            override=True,
+                            logs=True,
+                            tables=True,
+                            basins=False,
+                            meta_prefix="src-#1_",
+                            compression_kwargs=cmp_kw)
+            # store configuration
+            cfg0 = ds0.config.tostring(
+                sections=dfn.CFG_METADATA).split("\n")
+        if w:
+            logs["dclab-join-warnings-#1"] = common.assemble_warnings(w)
+    with RTDCWriter(path_temp, compression_kwargs=cmp_kw) as hw:
+        # store configuration of first dataset
+        hw.store_log(name="src-#1_cfg", lines=cfg0)
+        ii = 1
+        # Append data from other files
+        for pi, ti in zip(sorted_paths[1:], t_offsets[1:]):
+            ii += 1  # we start with the second dataset
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always")
+                meta_key = f"src-#{ii}"
+                meta_prefix = meta_key + "_"
+                with new_dataset(pi) as dsi:
+                    for feat in features:
+                        if feat == "time":
+                            # handle time offset
+                            fdata = dsi["time"] + ti
+                        elif feat == "frame":
+                            # handle frame offset
+                            fr = dsi.config["imaging"]["frame rate"]
+                            fdata = (np.array(dsi["frame"], dtype=np.uint64)
+                                     + np.uint64(round(ti * fr)))
+                        elif feat == "index_online":
+                            if "events/index_online" in hw.h5file:
+                                # index_online is usually larger than index
+                                ido0 = hw.h5file["events/index_online"][-1] + 1
+                            else:
+                                ido0 = 0
+                            fdata = dsi["index_online"] + ido0
+                        else:
+                            fdata = dsi[feat]
+                        hw.store_feature(feat=feat, data=fdata)
+                    # store logs
+                    for log in dsi.logs:
+                        hw.store_log(name=meta_prefix + log,
+                                     lines=dsi.logs[log])
+                    # store tables
+                    for tab in dsi.tables:
+                        hw.store_table(name=meta_prefix + tab,
+                                       cmp_array=dsi.tables[tab])
+                    # store configuration
+                    cfg = dsi.config.tostring(
+                        sections=dfn.CFG_METADATA).split("\n")
+                    hw.store_log(name=f"{meta_key}_cfg",
+                                 lines=cfg)
+                if w:
+                    hw.store_log(name=f"dclab-join-warnings-#{ii}",
+                                 lines=common.assemble_warnings(w))
+        # Write logs and missing meta data
+        for name in logs:
+            hw.store_log(name, logs[name])
+        hw.store_metadata(metadata)
+    # Finally, rename temp to out
+    path_temp.rename(path_out)
+    if ret_path:
+        return path_out
+def join_parser():
+    descr = "Join two or more RT-DC measurements. This will produce " \
+            + "one larger .rtdc file. The meta data of the dataset " \
+            + "that was recorded earliest will be used in the output " \
+            + "file. Please only join datasets that were recorded " \
+            + "in the same measurement run."
+    parser = argparse.ArgumentParser(description=descr)
+    parser.add_argument('input', metavar="INPUT", nargs="*", type=str,
+                        help='Input paths (.tdms or .rtdc files)')
+    required_named = parser.add_argument_group('required named arguments')
+    required_named.add_argument('-o', '--output', metavar="OUTPUT", type=str,
+                                help='Output path (.rtdc file)', required=True)
+    parser.add_argument('--version', action='version',
+                        version=f'dclab-join {version}')
+    return parser

dclab/cli/task_repack.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Repack (similar to h5repack) .rtdc files"""
+from __future__ import annotations
+import argparse
+import pathlib
+import h5py
+from ..rtdc_dataset import rtdc_copy
+from .._version import version
+from . import common
+def repack(
+        path_in: str | pathlib.Path = None,
+        path_out: str | pathlib.Path = None,
+        strip_basins: bool = False,
+        strip_logs: bool = False,
+        check_suffix: bool = True,
+        ret_path: bool = False,
+        ):
+    """Repack/recreate an .rtdc file, optionally stripping the logs
+    Parameters
+    ----------
+    path_in: str or pathlib.Path
+        file to compress
+    path_out: str or pathlib
+        output file path
+    strip_basins: bool
+        do not write basin information to the output file
+    strip_logs: bool
+        do not write logs to the output file
+    check_suffix: bool
+        check suffixes for input and output files
+    ret_path: bool
+        whether to return the output path
+    Returns
+    -------
+    path_out: pathlib.Path
+        output path (with possibly corrected suffix)
+    """
+    if path_in is None and path_out is None:
+        parser = repack_parser()
+        args = parser.parse_args()
+        path_in = args.input
+        path_out = args.output
+        strip_basins = args.strip_basins
+        strip_logs = args.strip_logs
+    allowed_input_suffixes = [".rtdc"]
+    if not check_suffix:
+        allowed_input_suffixes.append(pathlib.Path(path_in).suffix)
+    path_in, path_out, path_temp = common.setup_task_paths(
+        path_in, path_out, allowed_input_suffixes=allowed_input_suffixes)
+    with h5py.File(path_in) as h5, h5py.File(path_temp, "w") as hc:
+        rtdc_copy(src_h5file=h5,
+                  dst_h5file=hc,
+                  features="all",
+                  include_basins=not strip_basins,
+                  include_logs=not strip_logs,
+                  include_tables=True,
+                  meta_prefix="")
+    # Finally, rename temp to out
+    path_temp.rename(path_out)
+    if ret_path:
+        return path_out
+def repack_parser():
+    descr = "Repack an .rtdc file. The difference to dclab-compress " \
+            + "is that no logs are added. Other logs can optionally be " \
+            + "stripped away. Repacking also gets rid of old clutter " \
+            + "data (e.g. previous metadata stored in the HDF5 file)."
+    parser = argparse.ArgumentParser(description=descr)
+    parser.add_argument('input', metavar="INPUT", type=str,
+                        help='Input path (.rtdc file)')
+    parser.add_argument('output',  metavar="OUTPUT", type=str,
+                        help='Output path (.rtdc file)')
+    parser.add_argument('--strip-basins',
+                        dest='strip_basins',
+                        action='store_true',
+                        help='Do not copy any basin information to the '
+                             'output file.')
+    parser.set_defaults(strip_basins=False)
+    parser.add_argument('--strip-logs',
+                        dest='strip_logs',
+                        action='store_true',
+                        help='Do not copy any logs to the output file.')
+    parser.set_defaults(strip_logs=False)
+    parser.add_argument('--version', action='version',
+                        version=f'dclab-repack {version}')
+    return parser