PyPI - dclab - Versions diffs - 0.67.0__cp314-cp314t-macosx_10_13_x86_64.whl - Mend

dclab 0.67.0__cp314-cp314t-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show

dclab/__init__.py +41 -0
dclab/_version.py +34 -0
dclab/cached.py +97 -0
dclab/cli/__init__.py +10 -0
dclab/cli/common.py +237 -0
dclab/cli/task_compress.py +126 -0
dclab/cli/task_condense.py +223 -0
dclab/cli/task_join.py +229 -0
dclab/cli/task_repack.py +98 -0
dclab/cli/task_split.py +154 -0
dclab/cli/task_tdms2rtdc.py +186 -0
dclab/cli/task_verify_dataset.py +75 -0
dclab/definitions/__init__.py +79 -0
dclab/definitions/feat_const.py +202 -0
dclab/definitions/feat_logic.py +182 -0
dclab/definitions/meta_const.py +252 -0
dclab/definitions/meta_logic.py +111 -0
dclab/definitions/meta_parse.py +94 -0
dclab/downsampling.cpython-314t-darwin.so +0 -0
dclab/downsampling.pyx +230 -0
dclab/external/__init__.py +4 -0
dclab/external/packaging/LICENSE +3 -0
dclab/external/packaging/LICENSE.APACHE +177 -0
dclab/external/packaging/LICENSE.BSD +23 -0
dclab/external/packaging/__init__.py +6 -0
dclab/external/packaging/_structures.py +61 -0
dclab/external/packaging/version.py +505 -0
dclab/external/skimage/LICENSE +28 -0
dclab/external/skimage/__init__.py +2 -0
dclab/external/skimage/_find_contours.py +216 -0
dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_find_contours_cy.pyx +188 -0
dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_pnpoly.pyx +99 -0
dclab/external/skimage/_shared/__init__.py +1 -0
dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.pxd +6 -0
dclab/external/skimage/_shared/geometry.pyx +55 -0
dclab/external/skimage/measure.py +7 -0
dclab/external/skimage/pnpoly.py +53 -0
dclab/external/statsmodels/LICENSE +35 -0
dclab/external/statsmodels/__init__.py +6 -0
dclab/external/statsmodels/nonparametric/__init__.py +1 -0
dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
dclab/external/statsmodels/nonparametric/kernels.py +36 -0
dclab/features/__init__.py +9 -0
dclab/features/bright.py +81 -0
dclab/features/bright_bc.py +93 -0
dclab/features/bright_perc.py +63 -0
dclab/features/contour.py +161 -0
dclab/features/emodulus/__init__.py +339 -0
dclab/features/emodulus/load.py +252 -0
dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
dclab/features/emodulus/pxcorr.py +135 -0
dclab/features/emodulus/scale_linear.py +247 -0
dclab/features/emodulus/viscosity.py +260 -0
dclab/features/fl_crosstalk.py +95 -0
dclab/features/inert_ratio.py +377 -0
dclab/features/volume.py +242 -0
dclab/http_utils.py +322 -0
dclab/isoelastics/__init__.py +468 -0
dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
dclab/kde/__init__.py +1 -0
dclab/kde/base.py +459 -0
dclab/kde/contours.py +222 -0
dclab/kde/methods.py +313 -0
dclab/kde_contours.py +10 -0
dclab/kde_methods.py +11 -0
dclab/lme4/__init__.py +5 -0
dclab/lme4/lme4_template.R +94 -0
dclab/lme4/rsetup.py +204 -0
dclab/lme4/wrapr.py +386 -0
dclab/polygon_filter.py +398 -0
dclab/rtdc_dataset/__init__.py +15 -0
dclab/rtdc_dataset/check.py +902 -0
dclab/rtdc_dataset/config.py +533 -0
dclab/rtdc_dataset/copier.py +353 -0
dclab/rtdc_dataset/core.py +896 -0
dclab/rtdc_dataset/export.py +867 -0
dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
dclab/rtdc_dataset/feat_basin.py +762 -0
dclab/rtdc_dataset/feat_temp.py +102 -0
dclab/rtdc_dataset/filter.py +263 -0
dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
dclab/rtdc_dataset/fmt_dict.py +103 -0
dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
dclab/rtdc_dataset/fmt_http.py +102 -0
dclab/rtdc_dataset/fmt_s3.py +354 -0
dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
dclab/rtdc_dataset/load.py +77 -0
dclab/rtdc_dataset/meta_table.py +25 -0
dclab/rtdc_dataset/writer.py +1019 -0
dclab/statistics.py +226 -0
dclab/util.py +176 -0
dclab/warn.py +15 -0
dclab-0.67.0.dist-info/METADATA +153 -0
dclab-0.67.0.dist-info/RECORD +142 -0
dclab-0.67.0.dist-info/WHEEL +6 -0
dclab-0.67.0.dist-info/entry_points.txt +8 -0
dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
dclab-0.67.0.dist-info/top_level.txt +1 -0

dclab/rtdc_dataset/feat_anc_core/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+from .ancillary_feature import AncillaryFeature  # noqa: F401
+from . import af_basic
+from . import af_emodulus
+from . import af_fl_max_ctc
+from . import af_image_contour
+from . import af_ml_class
+#: features whose computation is fast
+FEATURES_RAPID = [
+    "area_ratio",
+    "area_um",
+    "aspect",
+    "deform",
+    "index",
+    "time",
+]
+af_basic.register()
+af_emodulus.register()
+af_fl_max_ctc.register()
+af_image_contour.register()
+af_ml_class.register()

dclab/rtdc_dataset/feat_anc_core/af_basic.py ADDED Viewed

@@ -0,0 +1,75 @@
+import numpy as np
+from .ancillary_feature import AncillaryFeature
+def compute_area_ratio(mm):
+    valid = mm["area_msd"] != 0
+    out = np.nan * np.ones(len(mm), dtype=float)
+    return np.divide(mm["area_cvx"], mm["area_msd"], where=valid, out=out)
+def compute_area_um(mm):
+    pxs = mm.config["imaging"]["pixel size"]
+    return mm["area_cvx"] * pxs**2
+def compute_aspect(mm):
+    """Compute the aspect ratio of the bounding box
+    Notes
+    -----
+    If the cell is elongated along the channel, i.e.
+    `size_x` is larger than `size_y`, then the aspect
+    ratio is larger than 1.
+    """
+    out = np.nan * np.ones(len(mm), dtype=float)
+    valid = mm["size_y"] != 0
+    # parallel to flow, perpendicular to flow
+    return np.divide(mm["size_x"], mm["size_y"], where=valid, out=out)
+def compute_deform(mm):
+    return 1 - mm["circ"]
+def compute_index(mm):
+    return np.arange(1, len(mm)+1)
+def compute_time(mm):
+    fr = mm.config["imaging"]["frame rate"]
+    # Since version 0.47.8, we don't "normalize" the time anymore
+    # with the information from mm["frame"][0]. This is important
+    # for cases where it is important to know the time elapsed before
+    # the first event was recorded (issue #207).
+    return np.array(mm["frame"], dtype=float) / fr
+AncillaryFeature(feature_name="time",
+                 method=compute_time,
+                 req_config=[["imaging", ["frame rate"]]],
+                 req_features=["frame"])
+AncillaryFeature(feature_name="index",
+                 method=compute_index)
+def register():
+    AncillaryFeature(feature_name="area_ratio",
+                     method=compute_area_ratio,
+                     req_features=["area_cvx", "area_msd"])
+    AncillaryFeature(feature_name="area_um",
+                     method=compute_area_um,
+                     req_config=[["imaging", ["pixel size"]]],
+                     req_features=["area_cvx"])
+    AncillaryFeature(feature_name="aspect",
+                     method=compute_aspect,
+                     req_features=["size_x", "size_y"])
+    AncillaryFeature(feature_name="deform",
+                     method=compute_deform,
+                     req_features=["circ"])

dclab/rtdc_dataset/feat_anc_core/af_emodulus.py ADDED Viewed

@@ -0,0 +1,160 @@
+import warnings
+from ... import features
+from .ancillary_feature import AncillaryFeature
+def compute_emodulus(mm):
+    """Wrapper function for computing the Young's modulus
+    Please take a look at the docs :ref:`sec_emodulus_usage`
+    for more details on the three cases A, B, and C.
+    There are also some sanity checks taking place here.
+    """
+    calccfg = mm.config["calculation"]
+    medium = calccfg.get("emodulus medium", "other").lower()
+    temperature = calccfg.get("emodulus temperature", None)
+    viscosity = calccfg.get("emodulus viscosity", None)
+    if viscosity is not None and medium == "other":
+        # sanity checks
+        if temperature is not None:
+            warnings.warn("The 'emodulus temperature' configuration key is "
+                          "ignored if the 'emodulus viscosity' key is set!")
+        # Case B from the docs
+        return compute_emodulus_visc_only(mm)
+    else:
+        # sanity checks
+        if not isinstance(medium, str):
+            raise ValueError(
+                f"'emodulus medium' must be a string, got '{medium}'!")
+        if medium not in features.emodulus.viscosity.KNOWN_MEDIA:
+            raise ValueError(
+                f"Only the following media are supported: "
+                f"{features.emodulus.viscosity.KNOWN_MEDIA}, got '{medium}'!")
+        if viscosity is not None:
+            raise ValueError("You must not set the 'emodulus viscosity' "
+                             "configuration keyword for known media!")
+        # warnings
+        if "emodulus viscosity model" not in calccfg:
+            warnings.warn("Please specify the 'emodulus viscosity model' "
+                          "key in the 'calculation' config segion, falling "
+                          "back to 'herold-2017'!",
+                          DeprecationWarning)
+        # actual function calls
+        if temperature is not None:
+            # case C from the docs
+            temperature = mm.config["calculation"]["emodulus temperature"]
+            return compute_emodulus_known_media(mm, temperature=temperature)
+        elif "temp" in mm:
+            # case A from the docs
+            return compute_emodulus_known_media(mm, temperature=mm["temp"])
+def compute_emodulus_known_media(mm, temperature):
+    """Only use known media and one temperature for all"""
+    calccfg = mm.config["calculation"]
+    # compute elastic modulus
+    emod = features.emodulus.get_emodulus(
+        area_um=mm["area_um"],
+        deform=mm["deform"],
+        medium=calccfg["emodulus medium"],
+        channel_width=mm.config["setup"]["channel width"],
+        flow_rate=mm.config["setup"]["flow rate"],
+        px_um=mm.config["imaging"]["pixel size"],
+        temperature=temperature,
+        lut_data=calccfg["emodulus lut"],
+        visc_model=calccfg.get("emodulus viscosity model", "herold-2017"),
+    )
+    return emod
+def compute_emodulus_visc_only(mm):
+    """The user entered the viscosity directly"""
+    calccfg = mm.config["calculation"]
+    # compute elastic modulus
+    emod = features.emodulus.get_emodulus(
+        area_um=mm["area_um"],
+        deform=mm["deform"],
+        medium=calccfg["emodulus viscosity"],
+        channel_width=mm.config["setup"]["channel width"],
+        flow_rate=mm.config["setup"]["flow rate"],
+        px_um=mm.config["imaging"]["pixel size"],
+        temperature=None,
+        visc_model=None,
+        lut_data=calccfg["emodulus lut"],
+    )
+    return emod
+def is_channel(mm):
+    """Check whether the measurement was performed in the channel
+    If the chip region is not set, then it is assumed to be a
+    channel measurement (for backwards compatibility and user-
+    friendliness).
+    """
+    if "setup" in mm.config and "chip region" in mm.config["setup"]:
+        region = mm.config["setup"]["chip region"]
+        if region == "channel":
+            # measured in the channel
+            return True
+        else:
+            # measured in the reservoir
+            return False
+    else:
+        # This might be a testing dictionary or someone who is
+        # playing around with data. Avoid disappointments here.
+        return True
+def register():
+    # Please note that registering these things is a delicate business,
+    # because the priority has to be chosen carefully.
+    # Note that here we have not included the "emodulus viscosity model"
+    # configuration keyword. This is checked in the `compute_emodulus`
+    # method above and a deprecation warning is issued, so old code
+    # does not break immediately.
+    for pr, vm in [(1, ["emodulus viscosity model"]),
+                   (0, [])  # this is deprecated and should be removed!
+                   ]:
+        AncillaryFeature(feature_name="emodulus",
+                         method=compute_emodulus,
+                         data="case C",
+                         req_features=["area_um", "deform"],
+                         req_config=[["calculation", vm + [
+                                        "emodulus lut",
+                                        "emodulus medium",
+                                        "emodulus temperature"]],
+                                     ["imaging", ["pixel size"]],
+                                     ["setup", ["flow rate", "channel width"]]
+                                     ],
+                         req_func=is_channel,
+                         priority=4 + pr)
+        AncillaryFeature(feature_name="emodulus",
+                         data="case A",
+                         method=compute_emodulus,
+                         req_features=["area_um", "deform", "temp"],
+                         req_config=[["calculation", vm + [
+                                        "emodulus lut",
+                                        "emodulus medium"]],
+                                     ["imaging", ["pixel size"]],
+                                     ["setup", ["flow rate", "channel width"]]
+                                     ],
+                         req_func=is_channel,
+                         priority=0 + pr)
+    AncillaryFeature(feature_name="emodulus",
+                     data="case B",
+                     method=compute_emodulus,
+                     req_features=["area_um", "deform"],
+                     req_config=[["calculation", vm + [
+                                    "emodulus lut",
+                                    "emodulus viscosity"]],
+                                 ["imaging", ["pixel size"]],
+                                 ["setup", ["flow rate", "channel width"]]
+                                 ],
+                     req_func=is_channel,
+                     priority=2)

dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py ADDED Viewed

@@ -0,0 +1,133 @@
+from ... import features
+from .ancillary_feature import AncillaryFeature
+class MissingCrosstalkMatrixElementsError(BaseException):
+    pass
+def compute_ctc(mm, fl_channel):
+    if "fl1_max" in mm:
+        fl1 = mm["fl1_max"]
+    else:
+        fl1 = 0
+    if "fl2_max" in mm:
+        fl2 = mm["fl2_max"]
+    else:
+        fl2 = 0
+    if "fl3_max" in mm:
+        fl3 = mm["fl3_max"]
+    else:
+        fl3 = 0
+    ctdict = {}
+    for i in [1, 2, 3]:
+        for j in [1, 2, 3]:
+            if i == j:
+                continue
+            key = "crosstalk fl{}{}".format(i, j)
+            par = "ct{}{}".format(i, j)
+            if key in mm.config["calculation"]:
+                ctdict[par] = mm.config["calculation"][key]
+    if ("fl1_max" in mm and
+        "fl2_max" in mm and
+        "fl3_max" in mm and
+        ("ct12" not in ctdict or
+         "ct13" not in ctdict or
+         "ct21" not in ctdict or
+         "ct23" not in ctdict or
+         "ct31" not in ctdict or
+         "ct32" not in ctdict)):
+        msg = "{}, has fl1_max, fl2_max, and fl3_max,".format(mm) \
+              + " but not all crosstalk matrix elements are" \
+              + " defined in the 'calculation' configuration section."
+        raise MissingCrosstalkMatrixElementsError(msg)
+    return features.fl_crosstalk.correct_crosstalk(
+        fl1=fl1,
+        fl2=fl2,
+        fl3=fl3,
+        fl_channel=fl_channel,
+        **ctdict)
+def compute_ctc1(mm):
+    return compute_ctc(mm, fl_channel=1)
+def compute_ctc2(mm):
+    return compute_ctc(mm, fl_channel=2)
+def compute_ctc3(mm):
+    return compute_ctc(mm, fl_channel=3)
+def get_method(fl_channel):
+    if fl_channel == 1:
+        return compute_ctc1
+    elif fl_channel == 2:
+        return compute_ctc2
+    elif fl_channel == 3:
+        return compute_ctc3
+def register():
+    opts_all = (["fl1_max",
+                 "fl2_max",
+                 "fl3_max"],
+                ["crosstalk fl21",
+                 "crosstalk fl31",
+                 "crosstalk fl12",
+                 "crosstalk fl32",
+                 "crosstalk fl13",
+                 "crosstalk fl23"])
+    opts_12 = (["fl1_max",
+                "fl2_max"],
+               ["crosstalk fl21",
+                "crosstalk fl12"])
+    opts_13 = (["fl1_max",
+                "fl3_max"],
+               ["crosstalk fl31",
+                "crosstalk fl13"])
+    opts_23 = (["fl2_max",
+                "fl3_max"],
+               ["crosstalk fl32",
+                "crosstalk fl23"])
+    for flch in [1, 2, 3]:
+        AncillaryFeature(feature_name="fl{}_max_ctc".format(flch),
+                         method=get_method(flch),
+                         req_features=opts_all[0],
+                         req_config=[["calculation", opts_all[1]]],
+                         priority=1)
+    for flch in [1, 2]:
+        AncillaryFeature(feature_name="fl{}_max_ctc".format(flch),
+                         method=get_method(flch),
+                         req_features=opts_12[0],
+                         req_config=[["calculation", opts_12[1]]],
+                         priority=0)
+    for flch in [1, 3]:
+        AncillaryFeature(feature_name="fl{}_max_ctc".format(flch),
+                         method=get_method(flch),
+                         req_features=opts_13[0],
+                         req_config=[["calculation", opts_13[1]]],
+                         priority=0)
+    for flch in [2, 3]:
+        AncillaryFeature(feature_name="fl{}_max_ctc".format(flch),
+                         method=get_method(flch),
+                         req_features=opts_23[0],
+                         req_config=[["calculation", opts_23[1]]],
+                         priority=0)

dclab/rtdc_dataset/feat_anc_core/af_image_contour.py ADDED Viewed

@@ -0,0 +1,113 @@
+from ... import features
+from .ancillary_feature import AncillaryFeature
+def compute_contour(mm):
+    cont = features.contour.get_contour_lazily(mask=mm["mask"])
+    return cont
+def compute_bright(mm):
+    bavg, bsd = features.bright.get_bright(
+        mask=mm["mask"],
+        image=mm["image"],
+        ret_data="avg,sd",
+        )
+    return {"bright_avg": bavg, "bright_sd": bsd}
+def compute_bright_bc(mm):
+    bavg, bsd = features.bright_bc.get_bright_bc(
+        mask=mm["mask"],
+        image=mm["image"],
+        image_bg=mm["image_bg"],
+        bg_off=mm["bg_off"] if "bg_off" in mm else None,
+        ret_data="avg,sd",
+        )
+    return {"bright_bc_avg": bavg, "bright_bc_sd": bsd}
+def compute_bright_perc(mm):
+    p10, p90 = features.bright_perc.get_bright_perc(
+        mask=mm["mask"],
+        image=mm["image"],
+        image_bg=mm["image_bg"],
+        bg_off=mm["bg_off"] if "bg_off" in mm else None,
+        )
+    return {"bright_perc_10": p10, "bright_perc_90": p90}
+def compute_inert_ratio_cvx(mm):
+    return features.inert_ratio.get_inert_ratio_cvx(cont=mm["contour"])
+def compute_inert_ratio_prnc(mm):
+    return features.inert_ratio.get_inert_ratio_prnc(cont=mm["contour"])
+def compute_inert_ratio_raw(mm):
+    return features.inert_ratio.get_inert_ratio_raw(cont=mm["contour"])
+def compute_tilt(mm):
+    return features.inert_ratio.get_tilt(cont=mm["contour"])
+def compute_volume(mm):
+    vol = features.volume.get_volume(
+        cont=mm["contour"],
+        pos_x=mm["pos_x"],
+        pos_y=mm["pos_y"],
+        pix=mm.config["imaging"]["pixel size"])
+    return vol
+def register():
+    AncillaryFeature(feature_name="contour",
+                     method=compute_contour,
+                     req_features=["mask"])
+    AncillaryFeature(feature_name="bright_avg",
+                     method=compute_bright,
+                     req_features=["image", "mask"])
+    AncillaryFeature(feature_name="bright_sd",
+                     method=compute_bright,
+                     req_features=["image", "mask"])
+    AncillaryFeature(feature_name="bright_bc_avg",
+                     method=compute_bright_bc,
+                     req_features=["image", "image_bg", "mask"])
+    AncillaryFeature(feature_name="bright_bc_sd",
+                     method=compute_bright_bc,
+                     req_features=["image", "image_bg", "mask"])
+    AncillaryFeature(feature_name="bright_perc_10",
+                     method=compute_bright_perc,
+                     req_features=["image", "image_bg", "mask"])
+    AncillaryFeature(feature_name="bright_perc_90",
+                     method=compute_bright_perc,
+                     req_features=["image", "image_bg", "mask"])
+    AncillaryFeature(feature_name="inert_ratio_cvx",
+                     method=compute_inert_ratio_cvx,
+                     req_features=["contour"])
+    AncillaryFeature(feature_name="inert_ratio_prnc",
+                     method=compute_inert_ratio_prnc,
+                     req_features=["contour"])
+    AncillaryFeature(feature_name="inert_ratio_raw",
+                     method=compute_inert_ratio_raw,
+                     req_features=["contour"])
+    AncillaryFeature(feature_name="tilt",
+                     method=compute_tilt,
+                     req_features=["contour"])
+    AncillaryFeature(feature_name="volume",
+                     method=compute_volume,
+                     req_features=["contour", "pos_x", "pos_y"],
+                     req_config=[["imaging", ["pixel size"]]])

dclab/rtdc_dataset/feat_anc_core/af_ml_class.py ADDED Viewed

@@ -0,0 +1,102 @@
+import numpy as np
+from .ancillary_feature import AncillaryFeature
+def get_ml_score_names(mm):
+    """Return a list of all ml_score_??? features"""
+    feats = []
+    # We cannot loop over mm.features because of infinite recursions
+    for ft in mm._feature_candidates:
+        if ft.startswith("ml_score_") and ft in mm:
+            feats.append(ft)
+    return sorted(feats)
+def compute_ml_class(mm, sanity_checks=True):
+    """Compute the most-probable class membership for all events
+    Parameters
+    ----------
+    mm: .RTDCBase
+        instance with the `ml_score_???` features
+    sanity_checks: bool
+        set to `False` to not perform sanity checks (checks whether
+        the scores are between 0 and 1)
+    Returns
+    -------
+    ml_class: 1D ndarray
+        The most-probable class for each event in `mm`. If no class
+        can be attributed to an event (because the scores are all
+        `np.nan` or `0` for that event), the class `-1` is used.
+    Notes
+    -----
+    I initially thought about also checking whether each feature
+    sums to one, but discarded the idea. Let's assume that a classifier
+    does an awful classification and classifies all events in
+    the same way. If the dataset is cropped at some point (e.g.
+    debris or other events), then this bad classifier has an
+    increased probability compared to another classifier which is
+    perfect at picking out one population. The ml_score values
+    should be just in the range of [0, 1]. This also simplifies
+    export to hdf5 and the work with hierarchy children.
+    """
+    feats = get_ml_score_names(mm)
+    # the score matrix
+    score_matrix = np.zeros((len(mm), len(feats)), dtype=float)
+    for ii, ft in enumerate(feats):
+        if sanity_checks:
+            if np.nanmax(mm[ft]) > 1:
+                raise ValueError("Feature '{}' has values > 1!".format(ft))
+            elif np.nanmin(mm[ft]) < 0:
+                raise ValueError("Feature '{}' has values < 0!".format(ft))
+        score_matrix[:, ii] = mm[ft]
+    # Now compute the maximum for each event. The initial idea was to just
+    # use `ml_class = np.nanargmax(score_matrix, axis=1)`. However, here we
+    # run into these problems:
+    # 1. This does not handle All-NaN slices, e.g. all features are `np.nan`
+    #    for an event.
+    # 2. This does not properly handle manually-rated, zero-valued features,
+    #    e.g. in a situation where we have two features, one with `np.nan`
+    #    and one with `0`, we cannot assign the event to either of the two
+    #    classes.
+    # 3. There is no "unclassified" class (this also becomes apparent in
+    #    point 2). We will set all events that cannot be attributed to a
+    #    class to `-1` in `ml_class`.
+    # Define unusable entries:
+    unusable = np.logical_or(np.isnan(score_matrix), (score_matrix == 0))
+    where_idx_nan = np.sum(~unusable, axis=1) == 0
+    score_matrix[where_idx_nan, :] = -1
+    ml_class = np.nanargmax(score_matrix, axis=1)
+    ml_class[where_idx_nan] = -1
+    return ml_class
+def has_ml_scores(mm):
+    """Check whether the dataset has ml_scores defined"""
+    # Return the sorted score names plus Ancillary feature hashes.
+    # This will be used to determine the hash of the ml_class feature,
+    # which is important in case the user replaces an ML feature
+    # with a new one.
+    features = get_ml_score_names(mm)
+    idlist = []
+    for feat in features:
+        # We also hash any other AncillaryFeature that might implement
+        # this ML score. But this use case is basically non-existent and
+        # the performance impact is probably negligible.
+        candidates = AncillaryFeature.get_instances(feat)
+        idlist.append((feat, [c.hash(mm) for c in candidates]))
+    return idlist
+def register():
+    AncillaryFeature(feature_name="ml_class",
+                     method=compute_ml_class,
+                     req_func=has_ml_scores,
+                     )