PyPI - dclab - Versions diffs - 0.62.10__cp39-cp39-macosx_11_0_arm64.whl → 2.18.0__cp39-cp39-macosx_11_0_arm64.whl - Mend

dclab 0.62.10__cp39-cp39-macosx_11_0_arm64.whl → 2.18.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (30) hide show

dclab/_version.py +2 -2
dclab/definitions/meta_const.py +1 -11
dclab/downsampling.cpython-39-darwin.so +0 -0
dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
dclab/http_utils.py +2 -12
dclab/lme4/__init__.py +4 -4
dclab/lme4/rlibs.py +93 -0
dclab/lme4/rsetup.py +153 -150
dclab/lme4/wrapr.py +129 -93
dclab/rtdc_dataset/check.py +6 -74
dclab/rtdc_dataset/copier.py +19 -73
dclab/rtdc_dataset/core.py +2 -12
dclab/rtdc_dataset/export.py +12 -16
dclab/rtdc_dataset/feat_basin.py +3 -30
dclab/rtdc_dataset/fmt_hdf5/base.py +2 -7
dclab/rtdc_dataset/fmt_hdf5/events.py +3 -3
dclab/rtdc_dataset/fmt_hierarchy/base.py +1 -0
dclab/rtdc_dataset/fmt_hierarchy/events.py +3 -4
dclab/rtdc_dataset/linker.py +124 -0
dclab/rtdc_dataset/writer.py +2 -2
dclab/util.py +0 -6
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/METADATA +4 -2
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/RECORD +29 -28
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/WHEEL +1 -1
dclab/lme4/lme4_template.R +0 -94
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/LICENSE +0 -0
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/entry_points.txt +0 -0
{dclab-0.62.10.dist-info → dclab-2.18.0.dist-info}/top_level.txt +0 -0

dclab/lme4/wrapr.py CHANGED Viewed

@@ -1,19 +1,18 @@
 """R lme4 wrapper"""
-import logging
 import numbers
-import pathlib
-import tempfile
+import warnings
-import importlib_resources
 import numpy as np
 from .. import definitions as dfn
 from ..rtdc_dataset.core import RTDCBase
+from .rlibs import rpy2
 from . import rsetup
-logger = logging.getLogger(__name__)
+class Lme4InstallWarning(UserWarning):
+    pass
 class Rlme4(object):
@@ -39,12 +38,19 @@ class Rlme4(object):
         #: list of [RTDCBase, column, repetition, chip_region]
         self.data = []
+        #: model function
+        self.r_func_model = "feature ~ group + (1 + group | repetition)"
+        #: null model function
+        self.r_func_nullmodel = "feature ~ (1 + group | repetition)"
         self.set_options(model=model, feature=feature)
         # Make sure that lme4 is available
         if not rsetup.has_lme4():
-            logger.info("Installing lme4, this may take a while!")
-            rsetup.require_lme4()
+            warnings.warn("Installing lme4, this may take a while!",
+                          Lme4InstallWarning)
+            rsetup.install_lme4()
+        rsetup.import_lme4()
     def add_dataset(self, ds, group, repetition):
         """Add a dataset to the analysis list
@@ -61,8 +67,8 @@ class Rlme4(object):
         Notes
         -----
-        - For each repetition, there must be a "treatment" (``1``) and a
-          "control" (``0``) group.
+        - For each repetition, there must be a "treatment" and a
+          "control" ``group``.
         - If you would like to perform a differential feature analysis,
           then you need to pass at least a reservoir and a channel
           dataset (with same parameters for `group` and `repetition`).
@@ -96,10 +102,10 @@ class Rlme4(object):
         The response variable is modeled using two linear mixed effect
         models:
-        - model: "feature ~ group + (1 + group | repetition)"
-          (random intercept + random slope model)
-        - the null model: "feature ~ (1 + group | repetition)"
-          (without the fixed effect introduced by the "treatment" group).
+        - model :const:`Rlme4.r_func_model` (random intercept +
+          random slope model)
+        - the null model :const:`Rlme4.r_func_nullmodel` (without
+          the fixed effect introduced by the "treatment" group).
         Both models are compared in R using "anova" (from the
         R-package "stats" :cite:`Everitt1992`) which performs a
@@ -127,16 +133,16 @@ class Rlme4(object):
         results: dict
             Dictionary with the results of the fitting process:
-            - "anova p-value": Anova likelihood ratio test (significance)
+            - "anova p-value": Anova likelyhood ratio test (significance)
             - "feature": name of the feature used for the analysis
               ``self.feature``
             - "fixed effects intercept": Mean of ``self.feature`` for all
               controls; In the case of the "glmer+loglink" model, the intercept
-              is already back transformed from log space.
+              is already backtransformed from log space.
             - "fixed effects treatment": The fixed effect size between the mean
               of the controls and the mean of the treatments relative to
               "fixed effects intercept"; In the case of the "glmer+loglink"
-              model, the fixed effect is already back transformed from log
+              model, the fixed effect is already backtransformed from log
               space.
             - "fixed effects repetitions": The effects (intercept and
               treatment) for each repetition. The first axis defines
@@ -153,10 +159,11 @@ class Rlme4(object):
             - "model": model name used for the analysis ``self.model``
             - "model converged": boolean indicating whether the model
               converged
-            - "r model summary": Summary of the model
-            - "r model coefficients": Model coefficient table
-            - "r script": the R script used
-            - "r output": full output of the R script
+            - "r anova": Anova model (exposed from R)
+            - "r model summary": Summary of the model (exposed from R)
+            - "r model coefficients": Model coefficient table (exposed from R)
+            - "r stderr": errors and warnings from R
+            - "r stdout": standard output from R
         """
         self.set_options(model=model, feature=feature)
         self.check_data()
@@ -175,38 +182,105 @@ class Rlme4(object):
                 groups.append(dd[1])
                 repetitions.append(dd[2])
-        # concatenate and populate arrays for R
-        features_c = np.concatenate(features)
-        groups_c = np.zeros(len(features_c), dtype=str)
-        repetitions_c = np.zeros(len(features_c), dtype=int)
-        pos = 0
-        for ii in range(len(features)):
-            size = len(features[ii])
-            groups_c[pos:pos+size] = groups[ii][0]
-            repetitions_c[pos:pos+size] = repetitions[ii]
-            pos += size
-        # Run R with the given template script
-        rscript = importlib_resources.read_text("dclab.lme4",
-                                                "lme4_template.R")
-        _, script_path = tempfile.mkstemp(prefix="dclab_lme4_", suffix=".R",
-                                          text=True)
-        script_path = pathlib.Path(script_path)
-        rscript = rscript.replace("<MODEL_NAME>", self.model)
-        rscript = rscript.replace("<FEATURES>", arr2str(features_c))
-        rscript = rscript.replace("<REPETITIONS>", arr2str(repetitions_c))
-        rscript = rscript.replace("<GROUPS>", arr2str(groups_c))
-        script_path.write_text(rscript, encoding="utf-8")
-        result = rsetup.run_command((rsetup.get_r_script_path(), script_path))
-        ret_dict = self.parse_result(result)
-        ret_dict["is differential"] = self.is_differential()
-        ret_dict["feature"] = self.feature
-        ret_dict["r script"] = rscript
-        ret_dict["r output"] = result
-        assert ret_dict["model"] == self.model
+        # Fire up R
+        with rsetup.AutoRConsole() as ac:
+            r = rpy2.robjects.r
+            # Load lme4
+            rpy2.robjects.packages.importr("lme4")
+            # Concatenate huge arrays for R
+            r_features = rpy2.robjects.FloatVector(np.concatenate(features))
+            _groups = []
+            _repets = []
+            for ii in range(len(features)):
+                _groups.append(np.repeat(groups[ii], len(features[ii])))
+                _repets.append(np.repeat(repetitions[ii], len(features[ii])))
+            r_groups = rpy2.robjects.StrVector(np.concatenate(_groups))
+            r_repetitions = rpy2.robjects.IntVector(np.concatenate(_repets))
+            # Register groups and repetitions
+            rpy2.robjects.globalenv["feature"] = r_features
+            rpy2.robjects.globalenv["group"] = r_groups
+            rpy2.robjects.globalenv["repetition"] = r_repetitions
+            # Create a dataframe which contains all the data
+            r_data = r["data.frame"](r_features, r_groups, r_repetitions)
+            # Random intercept and random slope model
+            if self.model == 'glmer+loglink':
+                r_model = r["glmer"](self.r_func_model, r_data,
+                                     family=r["Gamma"](link='log'))
+                r_nullmodel = r["glmer"](self.r_func_nullmodel, r_data,
+                                         family=r["Gamma"](link='log'))
+            else:  # lmer
+                r_model = r["lmer"](self.r_func_model, r_data)
+                r_nullmodel = r["lmer"](self.r_func_nullmodel, r_data)
+            # Anova analysis (increase verbosity by making models global)
+            # Using anova is a very conservative way of determining
+            # p values.
+            rpy2.robjects.globalenv["Model"] = r_model
+            rpy2.robjects.globalenv["NullModel"] = r_nullmodel
+            r_anova = r("anova(Model, NullModel)")
+            try:
+                pvalue = r_anova.rx2["Pr(>Chisq)"][1]
+            except ValueError:  # rpy2 2.9.4
+                pvalue = r_anova[7][1]
+            r_model_summary = r["summary"](r_model)
+            r_model_coefficients = r["coef"](r_model)
+            try:
+                fe_reps = np.array(r_model_coefficients.rx2["repetition"])
+            except ValueError:  # rpy2 2.9.4
+                fe_reps = np.concatenate((
+                    np.array(r_model_coefficients[0][0]).reshape(1, -1),
+                    np.array(r_model_coefficients[0][1]).reshape(1, -1)),
+                    axis=0)
+            r_effects = r["data.frame"](r["coef"](r_model_summary))
+            try:
+                fe_icept = r_effects.rx2["Estimate"][0]
+                fe_treat = r_effects.rx2["Estimate"][1]
+            except ValueError:  # rpy2 2.9.4
+                fe_icept = r_effects[0][0]
+                fe_treat = r_effects[0][1]
+            if self.model == "glmer+loglink":
+                # transform back from log
+                fe_treat = np.exp(fe_icept + fe_treat) - np.exp(fe_icept)
+                fe_icept = np.exp(fe_icept)
+                fe_reps[:, 1] = np.exp(fe_reps[:, 0] + fe_reps[:, 1]) \
+                    - np.exp(fe_reps[:, 0])
+                fe_reps[:, 0] = np.exp(fe_reps[:, 0])
+            # convergence
+            try:
+                lme4l = r_model_summary.rx2["optinfo"].rx2["conv"].rx2["lme4"]
+            except ValueError:  # rpy2 2.9.4
+                lme4l = r_model_summary[17][3][1]
+            if lme4l and "code" in lme4l.names:
+                try:
+                    conv_code = lme4l.rx2["code"]
+                except ValueError:  # rpy2 2.9.4
+                    conv_code = lme4l[0]
+            else:
+                conv_code = 0
+        ret_dict = {
+            "anova p-value": pvalue,
+            "feature": self.feature,
+            "fixed effects intercept": fe_icept,
+            "fixed effects treatment": fe_treat,  # aka "fixed effect"
+            "fixed effects repetitions": fe_reps,
+            "is differential": self.is_differential(),
+            "model": self.model,
+            "model converged": conv_code == 0,
+            "r anova": r_anova,
+            "r model summary": r_model_summary,
+            "r model coefficients": r_model_coefficients,
+            "r stderr": ac.get_warnerrors(),
+            "r stdout": ac.get_prints(),
+        }
         return ret_dict
     def get_differential_dataset(self):
@@ -214,7 +288,7 @@ class Rlme4(object):
         The most famous use case is differential deformation. The idea
         is that you cannot tell what the difference in deformation
-        from channel to reservoir, because you never measure the
+        from channel to reservoir is, because you never measure the
         same object in the reservoir and the channel. You usually just
         have two distributions. Comparing distributions is possible
         via bootstrapping. And then, instead of running the lme4
@@ -288,34 +362,6 @@ class Rlme4(object):
         else:
             return False
-    def parse_result(self, result):
-        resd = result.split("OUTPUT")
-        ret_dict = {}
-        for item in resd:
-            string = item.split("#*#")[0]
-            key, value = string.split(":", 1)
-            key = key.strip()
-            value = value.strip().replace("\n\n", "\n")
-            if key == "fixed effects repetitions":
-                rows = value.split("\n")[1:]
-                reps = []
-                for row in rows:
-                    reps.append([float(vv) for vv in row.split()[1:]])
-                value = np.array(reps).transpose()
-            elif key == "model converged":
-                value = value == "TRUE"
-            elif value == "NA":
-                value = np.nan
-            else:
-                try:
-                    value = float(value)
-                except ValueError:
-                    pass
-            ret_dict[key] = value
-        return ret_dict
     def set_options(self, model=None, feature=None):
         """Set analysis options"""
         if model is not None:
@@ -326,16 +372,6 @@ class Rlme4(object):
             self.feature = feature
-def arr2str(a):
-    """Convert an array to a string"""
-    if isinstance(a.dtype.type, np.integer):
-        return ",".join(str(dd) for dd in a.tolist())
-    elif a.dtype.type == np.str_:
-        return ",".join(f"'{dd}'" for dd in a.tolist())
-    else:
-        return ",".join(f"{dd:.16g}" for dd in a.tolist())
 def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
     """Compute the bootstrapped distributions for two arrays.
@@ -345,7 +381,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
         Input data
     bs_iter: int
         Number of bootstrapping iterations to perform
-        (output size).
+        (outtput size).
     rs: int
         Random state seed for random number generator
@@ -360,7 +396,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
     Notes
     -----
-    From a programmatic point of view, it would have been better
+    From a programmatical point of view, it would have been better
     to implement this method for just one input array (because of
     redundant code). However, due to historical reasons (testing
     and comparability to Shape-Out 1), bootstrapping is done

dclab/rtdc_dataset/check.py CHANGED Viewed

@@ -8,7 +8,6 @@ import numpy as np
 from .copier import is_properly_compressed
 from .core import RTDCBase
-from .fmt_hdf5 import RTDC_HDF5
 from .fmt_hierarchy import RTDC_Hierarchy
 from .load import load_file
@@ -193,13 +192,14 @@ class IntegrityChecker(object):
         else:
             with warnings.catch_warnings(record=True) as ws:
                 warnings.simplefilter("always")
-                self.ds = load_file(path_or_ds, enable_basins=False)
+                self.ds = load_file(path_or_ds)
                 for ww in ws:
                     self.warn_cues.append(ICue(
                         msg=f"{ww.category.__name__}: {ww.message}",
                         level="alert",
                         category="warning"))
             self.finally_close = True
+        np.max(self.ds["index"])
     def __enter__(self):
         return self
@@ -260,24 +260,13 @@ class IntegrityChecker(object):
                              level="alert",
                              category="basin data",
                              ))
-                else:
-                    if "basin_events" not in self.ds.h5file:
+                for feat in bn["features"]:
+                    if feat not in self.ds.h5file[bpaths[0]]:
                         cues.append(
-                            ICue(msg="Missing internal basin group "
-                                     "'basin_events', although an internal "
-                                     "basin is defined",
+                            ICue(msg=f"Missing internal basin feature {feat}",
                                  level="violation",
                                  category="basin data",
                                  ))
-                    else:
-                        for feat in bn["features"]:
-                            if feat not in self.ds.h5file["basin_events"]:
-                                cues.append(
-                                    ICue(msg=f"Missing internal basin "
-                                             f"feature {feat}",
-                                         level="violation",
-                                         category="basin data",
-                                         ))
         return cues
     def check_compression(self, **kwargs):
@@ -330,32 +319,8 @@ class IntegrityChecker(object):
             data=data))
         return cues
-    def check_empty(self, **kwargs):
-        """The dataset should contain events"""
-        cues = []
-        lends = len(self.ds)
-        if lends == 0:
-            cues.append(ICue(
-                msg="The dataset does not contain any events",
-                level="alert",
-                category="feature data"))
-        return cues
-    def check_external_links(self, **kwargs):
-        """An HDF5 dataset should not contain external links"""
-        cues = []
-        if isinstance(self.ds, RTDC_HDF5):
-            has_external, h5object = hdf5_has_external(self.ds.h5file)
-            if has_external:
-                cues.append(ICue(
-                    msg=f"The HDF5 file contains at least one external "
-                        f"link: '{h5object}'",
-                    level="violation",
-                    category="format HDF5"))
-        return cues
     def check_feat_index(self, **kwargs):
-        """The index of the dataset should be monotonous"""
+        """Up until"""
         cues = []
         lends = len(self.ds)
         if "index" in self.ds:
@@ -867,36 +832,3 @@ def check_dataset(path_or_ds):
             elif cue.level == "violation":
                 viol.append(cue.msg)
     return sorted(viol), sorted(aler), sorted(info)
-def hdf5_has_external(h5):
-    """Check recursively, whether an h5py object contains external data
-    External data includes binary data in external files, virtual
-    datasets, and external links.
-    Returns a tuple of either
-    - `(True, path_ext)` if the object contains external data
-    - `(False, None)` if this is not the case
-    where `path_ext` is the path to the group or dataset in `h5`.
-    .. versionadded:: 0.62.0
-    """
-    for key in h5:
-        obj = h5[key]
-        if (obj.file != h5.file  # not in same file
-                or (isinstance(obj, h5py.Dataset)
-                    and (obj.is_virtual  # virtual dataset
-                         or obj.external))):  # external dataset
-            # These are external data
-            return True, f"{h5.name}/{key}".replace("//", "/")
-        elif isinstance(obj, h5py.Group):
-            # Perform recursive check for external data
-            has_ext, path_ext = hdf5_has_external(obj)
-            if has_ext:
-                return True, path_ext
-    else:
-        return False, None

dclab/rtdc_dataset/copier.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Helper methods for copying .rtdc data"""
 from __future__ import annotations
-import json
 import re
 from typing import List, Literal
@@ -11,10 +10,8 @@ import hdf5plugin
 import numpy as np
 from ..definitions import feature_exists, scalar_feature_exists
-from ..util import hashobj
-from .fmt_hdf5 import DEFECTIVE_FEATURES, RTDC_HDF5
-from .writer import RTDCWriter
+from .fmt_hdf5 import DEFECTIVE_FEATURES
 def rtdc_copy(src_h5file: h5py.Group,
@@ -47,7 +44,8 @@ def rtdc_copy(src_h5file: h5py.Group,
         Add this prefix to the name of the logs and tables in `dst_h5file`.
     """
     # metadata
-    dst_h5file.attrs.update(src_h5file.attrs)
+    for akey in src_h5file.attrs:
+        dst_h5file.attrs[akey] = src_h5file.attrs[akey]
     # events in source file
     if "events" in src_h5file:
@@ -59,6 +57,19 @@ def rtdc_copy(src_h5file: h5py.Group,
         events_src += list(src_h5file["basin_events"].keys())
         events_src = sorted(set(events_src))
+    # basins
+    if include_basins and "basins" in src_h5file:
+        dst_h5file.require_group("basins")
+        for b_key in src_h5file["basins"]:
+            if b_key in dst_h5file["basins"]:
+                # This basin already exists.
+                continue
+            h5ds_copy(src_loc=src_h5file["basins"],
+                      src_name=b_key,
+                      dst_loc=dst_h5file["basins"],
+                      dst_name=b_key,
+                      recursive=False)
     # logs
     if include_logs and "logs" in src_h5file:
         dst_h5file.require_group("logs")
@@ -83,12 +94,11 @@ def rtdc_copy(src_h5file: h5py.Group,
             #           dst_loc=dst_h5file["tables"],
             #           dst_name=meta_prefix + tkey,
             #           recursive=False)
-            copy_table = dst_h5file["tables"].create_dataset(
+            dst_h5file["tables"].create_dataset(
                 name=tkey,
                 data=src_h5file["tables"][tkey][:],
                 fletcher32=True,
                 **hdf5plugin.Zstd(clevel=5))
-            copy_table.attrs.update(src_h5file["tables"][tkey].attrs)
     # events
     if isinstance(features, list):
@@ -120,12 +130,6 @@ def rtdc_copy(src_h5file: h5py.Group,
             if feat in feature_iter:
                 feature_iter.remove(feat)
-    # copy basin definitions
-    if include_basins and "basins" in src_h5file:
-        basin_definition_copy(src_h5file=src_h5file,
-                              dst_h5file=dst_h5file,
-                              features_iter=feature_iter)
     if feature_iter:
         dst_h5file.require_group("events")
         for feat in feature_iter:
@@ -166,65 +170,6 @@ def rtdc_copy(src_h5file: h5py.Group,
                               )
-def basin_definition_copy(src_h5file, dst_h5file, features_iter):
-    """Copy basin definitions `src_h5file["basins"]` to the new file
-    Normally, we would just use :func:`h5ds_copy` to copy basins from
-    one dataset to another. However, if we are e.g. only copying scalar
-    features, and there are non-scalar features in the internal basin,
-    then we must rewrite the basin definition of the internal basin.
-    The `features_iter` list of features defines which features are
-    relevant for the internal basin.
-    """
-    dst_h5file.require_group("basins")
-    # Load the basin information
-    basin_dicts = RTDC_HDF5.basin_get_dicts_from_h5file(src_h5file)
-    for bn in basin_dicts:
-        b_key = bn["key"]
-        if b_key in dst_h5file["basins"]:
-            # already stored therein
-            continue
-        # sanity check
-        if b_key not in src_h5file["basins"]:
-            raise ValueError(
-                f"Failed to parse basin information correctly. Source file "
-                f"{src_h5file} does not contain basin {b_key} which I got "
-                f"from `RTDC_HDF5.basin_get_dicts_from_h5file`.")
-        if bn["type"] == "internal":
-            # Make sure we define the internal features selected
-            feat_used = [f for f in bn["features"] if f in features_iter]
-            if len(feat_used) == 0:
-                # We don't have any internal features, don't write anything
-                continue
-            elif feat_used != bn["features"]:
-                bn["features"] = feat_used
-                rewrite = True
-            else:
-                rewrite = False
-        else:
-            # We do not have an internal basin, just copy everything
-            rewrite = False
-        if rewrite:
-            # Convert edited `bn` to JSON and write feature data
-            b_lines = json.dumps(bn, indent=2).split("\n")
-            key = hashobj(b_lines)
-            if key not in dst_h5file["basins"]:
-                with RTDCWriter(dst_h5file) as hw:
-                    hw.write_text(dst_h5file["basins"], key, b_lines)
-        else:
-            # copy only
-            h5ds_copy(src_loc=src_h5file["basins"],
-                      src_name=b_key,
-                      dst_loc=dst_h5file["basins"],
-                      dst_name=b_key,
-                      recursive=False)
 def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
               ensure_compression=True, recursive=True):
     """Copy an HDF5 Dataset from one group to another
@@ -312,7 +257,8 @@ def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
                 for chunk in src.iter_chunks():
                     dst[chunk] = src[chunk]
             # Also write all the attributes
-            dst.attrs.update(src.attrs)
+            for key in src.attrs:
+                dst.attrs[key] = src.attrs[key]
         else:
             # Copy the Dataset to the destination as-is.
             h5py.h5o.copy(src_loc=src_loc.id,

dclab/rtdc_dataset/core.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """RT-DC dataset core classes and methods"""
 import abc
 import hashlib
-import json
 import os.path
 import pathlib
 import traceback
@@ -16,7 +15,6 @@ from .. import definitions as dfn
 from .. import downsampling
 from ..polygon_filter import PolygonFilter
 from .. import kde_methods
-from ..util import hashobj
 from .feat_anc_core import AncillaryFeature, FEATURES_RAPID
 from . import feat_basin
@@ -827,20 +825,14 @@ class RTDCBase(abc.ABC):
         # Sort basins according to priority
         bdicts_srt = sorted(self.basins_get_dicts(),
                             key=feat_basin.basin_priority_sorted_key)
-        # complement basin "key"s (we do the same in writer)
-        for bdict in bdicts_srt:
-            if "key" not in bdict:
-                b_dat = json.dumps(bdict, indent=2, sort_keys=True).split("\n")
-                bdict["key"] = hashobj(b_dat)
-        bd_keys = [bd["key"] for bd in bdicts_srt]
+        bd_keys = [bd["key"] for bd in bdicts_srt if "key" in bd]
         bd_keys += self._basins_ignored
         for bdict in bdicts_srt:
             if bdict["format"] not in bc:
                 warnings.warn(f"Encountered unsupported basin "
                               f"format '{bdict['format']}'!")
                 continue
-            if bdict["key"] in self._basins_ignored:
+            if "key" in bdict and bdict["key"] in self._basins_ignored:
                 warnings.warn(
                     f"Encountered cyclic basin dependency '{bdict['key']}'",
                     feat_basin.CyclicBasinDependencyFoundWarning)
@@ -861,8 +853,6 @@ class RTDCBase(abc.ABC):
                 "measurement_identifier": self.get_measurement_identifier(),
                 # allow to ignore basins
                 "ignored_basins": bd_keys,
-                # basin key
-                "key": bdict["key"],
             }
             # Check whether this basin is supported and exists