dclab 0.62.10__cp39-cp39-macosx_11_0_arm64.whl → 2.18.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

dclab/lme4/wrapr.py CHANGED
@@ -1,19 +1,18 @@
1
1
  """R lme4 wrapper"""
2
- import logging
3
2
  import numbers
4
- import pathlib
5
- import tempfile
3
+ import warnings
6
4
 
7
- import importlib_resources
8
5
  import numpy as np
9
6
 
10
7
  from .. import definitions as dfn
11
8
  from ..rtdc_dataset.core import RTDCBase
12
9
 
10
+ from .rlibs import rpy2
13
11
  from . import rsetup
14
12
 
15
13
 
16
- logger = logging.getLogger(__name__)
14
+ class Lme4InstallWarning(UserWarning):
15
+ pass
17
16
 
18
17
 
19
18
  class Rlme4(object):
@@ -39,12 +38,19 @@ class Rlme4(object):
39
38
  #: list of [RTDCBase, column, repetition, chip_region]
40
39
  self.data = []
41
40
 
41
+ #: model function
42
+ self.r_func_model = "feature ~ group + (1 + group | repetition)"
43
+ #: null model function
44
+ self.r_func_nullmodel = "feature ~ (1 + group | repetition)"
45
+
42
46
  self.set_options(model=model, feature=feature)
43
47
 
44
48
  # Make sure that lme4 is available
45
49
  if not rsetup.has_lme4():
46
- logger.info("Installing lme4, this may take a while!")
47
- rsetup.require_lme4()
50
+ warnings.warn("Installing lme4, this may take a while!",
51
+ Lme4InstallWarning)
52
+ rsetup.install_lme4()
53
+ rsetup.import_lme4()
48
54
 
49
55
  def add_dataset(self, ds, group, repetition):
50
56
  """Add a dataset to the analysis list
@@ -61,8 +67,8 @@ class Rlme4(object):
61
67
 
62
68
  Notes
63
69
  -----
64
- - For each repetition, there must be a "treatment" (``1``) and a
65
- "control" (``0``) group.
70
+ - For each repetition, there must be a "treatment" and a
71
+ "control" ``group``.
66
72
  - If you would like to perform a differential feature analysis,
67
73
  then you need to pass at least a reservoir and a channel
68
74
  dataset (with same parameters for `group` and `repetition`).
@@ -96,10 +102,10 @@ class Rlme4(object):
96
102
  The response variable is modeled using two linear mixed effect
97
103
  models:
98
104
 
99
- - model: "feature ~ group + (1 + group | repetition)"
100
- (random intercept + random slope model)
101
- - the null model: "feature ~ (1 + group | repetition)"
102
- (without the fixed effect introduced by the "treatment" group).
105
+ - model :const:`Rlme4.r_func_model` (random intercept +
106
+ random slope model)
107
+ - the null model :const:`Rlme4.r_func_nullmodel` (without
108
+ the fixed effect introduced by the "treatment" group).
103
109
 
104
110
  Both models are compared in R using "anova" (from the
105
111
  R-package "stats" :cite:`Everitt1992`) which performs a
@@ -127,16 +133,16 @@ class Rlme4(object):
127
133
  results: dict
128
134
  Dictionary with the results of the fitting process:
129
135
 
130
- - "anova p-value": Anova likelihood ratio test (significance)
136
+ - "anova p-value": Anova likelyhood ratio test (significance)
131
137
  - "feature": name of the feature used for the analysis
132
138
  ``self.feature``
133
139
  - "fixed effects intercept": Mean of ``self.feature`` for all
134
140
  controls; In the case of the "glmer+loglink" model, the intercept
135
- is already back transformed from log space.
141
+ is already backtransformed from log space.
136
142
  - "fixed effects treatment": The fixed effect size between the mean
137
143
  of the controls and the mean of the treatments relative to
138
144
  "fixed effects intercept"; In the case of the "glmer+loglink"
139
- model, the fixed effect is already back transformed from log
145
+ model, the fixed effect is already backtransformed from log
140
146
  space.
141
147
  - "fixed effects repetitions": The effects (intercept and
142
148
  treatment) for each repetition. The first axis defines
@@ -153,10 +159,11 @@ class Rlme4(object):
153
159
  - "model": model name used for the analysis ``self.model``
154
160
  - "model converged": boolean indicating whether the model
155
161
  converged
156
- - "r model summary": Summary of the model
157
- - "r model coefficients": Model coefficient table
158
- - "r script": the R script used
159
- - "r output": full output of the R script
162
+ - "r anova": Anova model (exposed from R)
163
+ - "r model summary": Summary of the model (exposed from R)
164
+ - "r model coefficients": Model coefficient table (exposed from R)
165
+ - "r stderr": errors and warnings from R
166
+ - "r stdout": standard output from R
160
167
  """
161
168
  self.set_options(model=model, feature=feature)
162
169
  self.check_data()
@@ -175,38 +182,105 @@ class Rlme4(object):
175
182
  groups.append(dd[1])
176
183
  repetitions.append(dd[2])
177
184
 
178
- # concatenate and populate arrays for R
179
- features_c = np.concatenate(features)
180
- groups_c = np.zeros(len(features_c), dtype=str)
181
- repetitions_c = np.zeros(len(features_c), dtype=int)
182
- pos = 0
183
- for ii in range(len(features)):
184
- size = len(features[ii])
185
- groups_c[pos:pos+size] = groups[ii][0]
186
- repetitions_c[pos:pos+size] = repetitions[ii]
187
- pos += size
188
-
189
- # Run R with the given template script
190
- rscript = importlib_resources.read_text("dclab.lme4",
191
- "lme4_template.R")
192
- _, script_path = tempfile.mkstemp(prefix="dclab_lme4_", suffix=".R",
193
- text=True)
194
- script_path = pathlib.Path(script_path)
195
- rscript = rscript.replace("<MODEL_NAME>", self.model)
196
- rscript = rscript.replace("<FEATURES>", arr2str(features_c))
197
- rscript = rscript.replace("<REPETITIONS>", arr2str(repetitions_c))
198
- rscript = rscript.replace("<GROUPS>", arr2str(groups_c))
199
- script_path.write_text(rscript, encoding="utf-8")
200
-
201
- result = rsetup.run_command((rsetup.get_r_script_path(), script_path))
202
-
203
- ret_dict = self.parse_result(result)
204
- ret_dict["is differential"] = self.is_differential()
205
- ret_dict["feature"] = self.feature
206
- ret_dict["r script"] = rscript
207
- ret_dict["r output"] = result
208
- assert ret_dict["model"] == self.model
209
-
185
+ # Fire up R
186
+ with rsetup.AutoRConsole() as ac:
187
+ r = rpy2.robjects.r
188
+
189
+ # Load lme4
190
+ rpy2.robjects.packages.importr("lme4")
191
+
192
+ # Concatenate huge arrays for R
193
+ r_features = rpy2.robjects.FloatVector(np.concatenate(features))
194
+ _groups = []
195
+ _repets = []
196
+ for ii in range(len(features)):
197
+ _groups.append(np.repeat(groups[ii], len(features[ii])))
198
+ _repets.append(np.repeat(repetitions[ii], len(features[ii])))
199
+ r_groups = rpy2.robjects.StrVector(np.concatenate(_groups))
200
+ r_repetitions = rpy2.robjects.IntVector(np.concatenate(_repets))
201
+
202
+ # Register groups and repetitions
203
+ rpy2.robjects.globalenv["feature"] = r_features
204
+ rpy2.robjects.globalenv["group"] = r_groups
205
+ rpy2.robjects.globalenv["repetition"] = r_repetitions
206
+
207
+ # Create a dataframe which contains all the data
208
+ r_data = r["data.frame"](r_features, r_groups, r_repetitions)
209
+
210
+ # Random intercept and random slope model
211
+ if self.model == 'glmer+loglink':
212
+ r_model = r["glmer"](self.r_func_model, r_data,
213
+ family=r["Gamma"](link='log'))
214
+ r_nullmodel = r["glmer"](self.r_func_nullmodel, r_data,
215
+ family=r["Gamma"](link='log'))
216
+ else: # lmer
217
+ r_model = r["lmer"](self.r_func_model, r_data)
218
+ r_nullmodel = r["lmer"](self.r_func_nullmodel, r_data)
219
+
220
+ # Anova analysis (increase verbosity by making models global)
221
+ # Using anova is a very conservative way of determining
222
+ # p values.
223
+ rpy2.robjects.globalenv["Model"] = r_model
224
+ rpy2.robjects.globalenv["NullModel"] = r_nullmodel
225
+ r_anova = r("anova(Model, NullModel)")
226
+ try:
227
+ pvalue = r_anova.rx2["Pr(>Chisq)"][1]
228
+ except ValueError: # rpy2 2.9.4
229
+ pvalue = r_anova[7][1]
230
+ r_model_summary = r["summary"](r_model)
231
+ r_model_coefficients = r["coef"](r_model)
232
+ try:
233
+ fe_reps = np.array(r_model_coefficients.rx2["repetition"])
234
+ except ValueError: # rpy2 2.9.4
235
+ fe_reps = np.concatenate((
236
+ np.array(r_model_coefficients[0][0]).reshape(1, -1),
237
+ np.array(r_model_coefficients[0][1]).reshape(1, -1)),
238
+ axis=0)
239
+
240
+ r_effects = r["data.frame"](r["coef"](r_model_summary))
241
+ try:
242
+ fe_icept = r_effects.rx2["Estimate"][0]
243
+ fe_treat = r_effects.rx2["Estimate"][1]
244
+ except ValueError: # rpy2 2.9.4
245
+ fe_icept = r_effects[0][0]
246
+ fe_treat = r_effects[0][1]
247
+ if self.model == "glmer+loglink":
248
+ # transform back from log
249
+ fe_treat = np.exp(fe_icept + fe_treat) - np.exp(fe_icept)
250
+ fe_icept = np.exp(fe_icept)
251
+ fe_reps[:, 1] = np.exp(fe_reps[:, 0] + fe_reps[:, 1]) \
252
+ - np.exp(fe_reps[:, 0])
253
+ fe_reps[:, 0] = np.exp(fe_reps[:, 0])
254
+
255
+ # convergence
256
+ try:
257
+ lme4l = r_model_summary.rx2["optinfo"].rx2["conv"].rx2["lme4"]
258
+ except ValueError: # rpy2 2.9.4
259
+ lme4l = r_model_summary[17][3][1]
260
+
261
+ if lme4l and "code" in lme4l.names:
262
+ try:
263
+ conv_code = lme4l.rx2["code"]
264
+ except ValueError: # rpy2 2.9.4
265
+ conv_code = lme4l[0]
266
+ else:
267
+ conv_code = 0
268
+
269
+ ret_dict = {
270
+ "anova p-value": pvalue,
271
+ "feature": self.feature,
272
+ "fixed effects intercept": fe_icept,
273
+ "fixed effects treatment": fe_treat, # aka "fixed effect"
274
+ "fixed effects repetitions": fe_reps,
275
+ "is differential": self.is_differential(),
276
+ "model": self.model,
277
+ "model converged": conv_code == 0,
278
+ "r anova": r_anova,
279
+ "r model summary": r_model_summary,
280
+ "r model coefficients": r_model_coefficients,
281
+ "r stderr": ac.get_warnerrors(),
282
+ "r stdout": ac.get_prints(),
283
+ }
210
284
  return ret_dict
211
285
 
212
286
  def get_differential_dataset(self):
@@ -214,7 +288,7 @@ class Rlme4(object):
214
288
 
215
289
  The most famous use case is differential deformation. The idea
216
290
  is that you cannot tell what the difference in deformation
217
- from channel to reservoir, because you never measure the
291
+ from channel to reservoir is, because you never measure the
218
292
  same object in the reservoir and the channel. You usually just
219
293
  have two distributions. Comparing distributions is possible
220
294
  via bootstrapping. And then, instead of running the lme4
@@ -288,34 +362,6 @@ class Rlme4(object):
288
362
  else:
289
363
  return False
290
364
 
291
- def parse_result(self, result):
292
- resd = result.split("OUTPUT")
293
- ret_dict = {}
294
- for item in resd:
295
- string = item.split("#*#")[0]
296
- key, value = string.split(":", 1)
297
- key = key.strip()
298
- value = value.strip().replace("\n\n", "\n")
299
-
300
- if key == "fixed effects repetitions":
301
- rows = value.split("\n")[1:]
302
- reps = []
303
- for row in rows:
304
- reps.append([float(vv) for vv in row.split()[1:]])
305
- value = np.array(reps).transpose()
306
- elif key == "model converged":
307
- value = value == "TRUE"
308
- elif value == "NA":
309
- value = np.nan
310
- else:
311
- try:
312
- value = float(value)
313
- except ValueError:
314
- pass
315
-
316
- ret_dict[key] = value
317
- return ret_dict
318
-
319
365
  def set_options(self, model=None, feature=None):
320
366
  """Set analysis options"""
321
367
  if model is not None:
@@ -326,16 +372,6 @@ class Rlme4(object):
326
372
  self.feature = feature
327
373
 
328
374
 
329
- def arr2str(a):
330
- """Convert an array to a string"""
331
- if isinstance(a.dtype.type, np.integer):
332
- return ",".join(str(dd) for dd in a.tolist())
333
- elif a.dtype.type == np.str_:
334
- return ",".join(f"'{dd}'" for dd in a.tolist())
335
- else:
336
- return ",".join(f"{dd:.16g}" for dd in a.tolist())
337
-
338
-
339
375
  def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
340
376
  """Compute the bootstrapped distributions for two arrays.
341
377
 
@@ -345,7 +381,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
345
381
  Input data
346
382
  bs_iter: int
347
383
  Number of bootstrapping iterations to perform
348
- (output size).
384
+ (outtput size).
349
385
  rs: int
350
386
  Random state seed for random number generator
351
387
 
@@ -360,7 +396,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
360
396
 
361
397
  Notes
362
398
  -----
363
- From a programmatic point of view, it would have been better
399
+ From a programmatical point of view, it would have been better
364
400
  to implement this method for just one input array (because of
365
401
  redundant code). However, due to historical reasons (testing
366
402
  and comparability to Shape-Out 1), bootstrapping is done
@@ -8,7 +8,6 @@ import numpy as np
8
8
 
9
9
  from .copier import is_properly_compressed
10
10
  from .core import RTDCBase
11
- from .fmt_hdf5 import RTDC_HDF5
12
11
  from .fmt_hierarchy import RTDC_Hierarchy
13
12
  from .load import load_file
14
13
 
@@ -193,13 +192,14 @@ class IntegrityChecker(object):
193
192
  else:
194
193
  with warnings.catch_warnings(record=True) as ws:
195
194
  warnings.simplefilter("always")
196
- self.ds = load_file(path_or_ds, enable_basins=False)
195
+ self.ds = load_file(path_or_ds)
197
196
  for ww in ws:
198
197
  self.warn_cues.append(ICue(
199
198
  msg=f"{ww.category.__name__}: {ww.message}",
200
199
  level="alert",
201
200
  category="warning"))
202
201
  self.finally_close = True
202
+ np.max(self.ds["index"])
203
203
 
204
204
  def __enter__(self):
205
205
  return self
@@ -260,24 +260,13 @@ class IntegrityChecker(object):
260
260
  level="alert",
261
261
  category="basin data",
262
262
  ))
263
- else:
264
- if "basin_events" not in self.ds.h5file:
263
+ for feat in bn["features"]:
264
+ if feat not in self.ds.h5file[bpaths[0]]:
265
265
  cues.append(
266
- ICue(msg="Missing internal basin group "
267
- "'basin_events', although an internal "
268
- "basin is defined",
266
+ ICue(msg=f"Missing internal basin feature {feat}",
269
267
  level="violation",
270
268
  category="basin data",
271
269
  ))
272
- else:
273
- for feat in bn["features"]:
274
- if feat not in self.ds.h5file["basin_events"]:
275
- cues.append(
276
- ICue(msg=f"Missing internal basin "
277
- f"feature {feat}",
278
- level="violation",
279
- category="basin data",
280
- ))
281
270
  return cues
282
271
 
283
272
  def check_compression(self, **kwargs):
@@ -330,32 +319,8 @@ class IntegrityChecker(object):
330
319
  data=data))
331
320
  return cues
332
321
 
333
- def check_empty(self, **kwargs):
334
- """The dataset should contain events"""
335
- cues = []
336
- lends = len(self.ds)
337
- if lends == 0:
338
- cues.append(ICue(
339
- msg="The dataset does not contain any events",
340
- level="alert",
341
- category="feature data"))
342
- return cues
343
-
344
- def check_external_links(self, **kwargs):
345
- """An HDF5 dataset should not contain external links"""
346
- cues = []
347
- if isinstance(self.ds, RTDC_HDF5):
348
- has_external, h5object = hdf5_has_external(self.ds.h5file)
349
- if has_external:
350
- cues.append(ICue(
351
- msg=f"The HDF5 file contains at least one external "
352
- f"link: '{h5object}'",
353
- level="violation",
354
- category="format HDF5"))
355
- return cues
356
-
357
322
  def check_feat_index(self, **kwargs):
358
- """The index of the dataset should be monotonous"""
323
+ """Up until"""
359
324
  cues = []
360
325
  lends = len(self.ds)
361
326
  if "index" in self.ds:
@@ -867,36 +832,3 @@ def check_dataset(path_or_ds):
867
832
  elif cue.level == "violation":
868
833
  viol.append(cue.msg)
869
834
  return sorted(viol), sorted(aler), sorted(info)
870
-
871
-
872
- def hdf5_has_external(h5):
873
- """Check recursively, whether an h5py object contains external data
874
-
875
- External data includes binary data in external files, virtual
876
- datasets, and external links.
877
-
878
- Returns a tuple of either
879
-
880
- - `(True, path_ext)` if the object contains external data
881
- - `(False, None)` if this is not the case
882
-
883
- where `path_ext` is the path to the group or dataset in `h5`.
884
-
885
- .. versionadded:: 0.62.0
886
-
887
- """
888
- for key in h5:
889
- obj = h5[key]
890
- if (obj.file != h5.file # not in same file
891
- or (isinstance(obj, h5py.Dataset)
892
- and (obj.is_virtual # virtual dataset
893
- or obj.external))): # external dataset
894
- # These are external data
895
- return True, f"{h5.name}/{key}".replace("//", "/")
896
- elif isinstance(obj, h5py.Group):
897
- # Perform recursive check for external data
898
- has_ext, path_ext = hdf5_has_external(obj)
899
- if has_ext:
900
- return True, path_ext
901
- else:
902
- return False, None
@@ -1,7 +1,6 @@
1
1
  """Helper methods for copying .rtdc data"""
2
2
  from __future__ import annotations
3
3
 
4
- import json
5
4
  import re
6
5
  from typing import List, Literal
7
6
 
@@ -11,10 +10,8 @@ import hdf5plugin
11
10
  import numpy as np
12
11
 
13
12
  from ..definitions import feature_exists, scalar_feature_exists
14
- from ..util import hashobj
15
13
 
16
- from .fmt_hdf5 import DEFECTIVE_FEATURES, RTDC_HDF5
17
- from .writer import RTDCWriter
14
+ from .fmt_hdf5 import DEFECTIVE_FEATURES
18
15
 
19
16
 
20
17
  def rtdc_copy(src_h5file: h5py.Group,
@@ -47,7 +44,8 @@ def rtdc_copy(src_h5file: h5py.Group,
47
44
  Add this prefix to the name of the logs and tables in `dst_h5file`.
48
45
  """
49
46
  # metadata
50
- dst_h5file.attrs.update(src_h5file.attrs)
47
+ for akey in src_h5file.attrs:
48
+ dst_h5file.attrs[akey] = src_h5file.attrs[akey]
51
49
 
52
50
  # events in source file
53
51
  if "events" in src_h5file:
@@ -59,6 +57,19 @@ def rtdc_copy(src_h5file: h5py.Group,
59
57
  events_src += list(src_h5file["basin_events"].keys())
60
58
  events_src = sorted(set(events_src))
61
59
 
60
+ # basins
61
+ if include_basins and "basins" in src_h5file:
62
+ dst_h5file.require_group("basins")
63
+ for b_key in src_h5file["basins"]:
64
+ if b_key in dst_h5file["basins"]:
65
+ # This basin already exists.
66
+ continue
67
+ h5ds_copy(src_loc=src_h5file["basins"],
68
+ src_name=b_key,
69
+ dst_loc=dst_h5file["basins"],
70
+ dst_name=b_key,
71
+ recursive=False)
72
+
62
73
  # logs
63
74
  if include_logs and "logs" in src_h5file:
64
75
  dst_h5file.require_group("logs")
@@ -83,12 +94,11 @@ def rtdc_copy(src_h5file: h5py.Group,
83
94
  # dst_loc=dst_h5file["tables"],
84
95
  # dst_name=meta_prefix + tkey,
85
96
  # recursive=False)
86
- copy_table = dst_h5file["tables"].create_dataset(
97
+ dst_h5file["tables"].create_dataset(
87
98
  name=tkey,
88
99
  data=src_h5file["tables"][tkey][:],
89
100
  fletcher32=True,
90
101
  **hdf5plugin.Zstd(clevel=5))
91
- copy_table.attrs.update(src_h5file["tables"][tkey].attrs)
92
102
 
93
103
  # events
94
104
  if isinstance(features, list):
@@ -120,12 +130,6 @@ def rtdc_copy(src_h5file: h5py.Group,
120
130
  if feat in feature_iter:
121
131
  feature_iter.remove(feat)
122
132
 
123
- # copy basin definitions
124
- if include_basins and "basins" in src_h5file:
125
- basin_definition_copy(src_h5file=src_h5file,
126
- dst_h5file=dst_h5file,
127
- features_iter=feature_iter)
128
-
129
133
  if feature_iter:
130
134
  dst_h5file.require_group("events")
131
135
  for feat in feature_iter:
@@ -166,65 +170,6 @@ def rtdc_copy(src_h5file: h5py.Group,
166
170
  )
167
171
 
168
172
 
169
- def basin_definition_copy(src_h5file, dst_h5file, features_iter):
170
- """Copy basin definitions `src_h5file["basins"]` to the new file
171
-
172
- Normally, we would just use :func:`h5ds_copy` to copy basins from
173
- one dataset to another. However, if we are e.g. only copying scalar
174
- features, and there are non-scalar features in the internal basin,
175
- then we must rewrite the basin definition of the internal basin.
176
-
177
- The `features_iter` list of features defines which features are
178
- relevant for the internal basin.
179
- """
180
- dst_h5file.require_group("basins")
181
- # Load the basin information
182
- basin_dicts = RTDC_HDF5.basin_get_dicts_from_h5file(src_h5file)
183
- for bn in basin_dicts:
184
- b_key = bn["key"]
185
-
186
- if b_key in dst_h5file["basins"]:
187
- # already stored therein
188
- continue
189
-
190
- # sanity check
191
- if b_key not in src_h5file["basins"]:
192
- raise ValueError(
193
- f"Failed to parse basin information correctly. Source file "
194
- f"{src_h5file} does not contain basin {b_key} which I got "
195
- f"from `RTDC_HDF5.basin_get_dicts_from_h5file`.")
196
-
197
- if bn["type"] == "internal":
198
- # Make sure we define the internal features selected
199
- feat_used = [f for f in bn["features"] if f in features_iter]
200
- if len(feat_used) == 0:
201
- # We don't have any internal features, don't write anything
202
- continue
203
- elif feat_used != bn["features"]:
204
- bn["features"] = feat_used
205
- rewrite = True
206
- else:
207
- rewrite = False
208
- else:
209
- # We do not have an internal basin, just copy everything
210
- rewrite = False
211
-
212
- if rewrite:
213
- # Convert edited `bn` to JSON and write feature data
214
- b_lines = json.dumps(bn, indent=2).split("\n")
215
- key = hashobj(b_lines)
216
- if key not in dst_h5file["basins"]:
217
- with RTDCWriter(dst_h5file) as hw:
218
- hw.write_text(dst_h5file["basins"], key, b_lines)
219
- else:
220
- # copy only
221
- h5ds_copy(src_loc=src_h5file["basins"],
222
- src_name=b_key,
223
- dst_loc=dst_h5file["basins"],
224
- dst_name=b_key,
225
- recursive=False)
226
-
227
-
228
173
  def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
229
174
  ensure_compression=True, recursive=True):
230
175
  """Copy an HDF5 Dataset from one group to another
@@ -312,7 +257,8 @@ def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
312
257
  for chunk in src.iter_chunks():
313
258
  dst[chunk] = src[chunk]
314
259
  # Also write all the attributes
315
- dst.attrs.update(src.attrs)
260
+ for key in src.attrs:
261
+ dst.attrs[key] = src.attrs[key]
316
262
  else:
317
263
  # Copy the Dataset to the destination as-is.
318
264
  h5py.h5o.copy(src_loc=src_loc.id,
@@ -1,7 +1,6 @@
1
1
  """RT-DC dataset core classes and methods"""
2
2
  import abc
3
3
  import hashlib
4
- import json
5
4
  import os.path
6
5
  import pathlib
7
6
  import traceback
@@ -16,7 +15,6 @@ from .. import definitions as dfn
16
15
  from .. import downsampling
17
16
  from ..polygon_filter import PolygonFilter
18
17
  from .. import kde_methods
19
- from ..util import hashobj
20
18
 
21
19
  from .feat_anc_core import AncillaryFeature, FEATURES_RAPID
22
20
  from . import feat_basin
@@ -827,20 +825,14 @@ class RTDCBase(abc.ABC):
827
825
  # Sort basins according to priority
828
826
  bdicts_srt = sorted(self.basins_get_dicts(),
829
827
  key=feat_basin.basin_priority_sorted_key)
830
- # complement basin "key"s (we do the same in writer)
831
- for bdict in bdicts_srt:
832
- if "key" not in bdict:
833
- b_dat = json.dumps(bdict, indent=2, sort_keys=True).split("\n")
834
- bdict["key"] = hashobj(b_dat)
835
-
836
- bd_keys = [bd["key"] for bd in bdicts_srt]
828
+ bd_keys = [bd["key"] for bd in bdicts_srt if "key" in bd]
837
829
  bd_keys += self._basins_ignored
838
830
  for bdict in bdicts_srt:
839
831
  if bdict["format"] not in bc:
840
832
  warnings.warn(f"Encountered unsupported basin "
841
833
  f"format '{bdict['format']}'!")
842
834
  continue
843
- if bdict["key"] in self._basins_ignored:
835
+ if "key" in bdict and bdict["key"] in self._basins_ignored:
844
836
  warnings.warn(
845
837
  f"Encountered cyclic basin dependency '{bdict['key']}'",
846
838
  feat_basin.CyclicBasinDependencyFoundWarning)
@@ -861,8 +853,6 @@ class RTDCBase(abc.ABC):
861
853
  "measurement_identifier": self.get_measurement_identifier(),
862
854
  # allow to ignore basins
863
855
  "ignored_basins": bd_keys,
864
- # basin key
865
- "key": bdict["key"],
866
856
  }
867
857
 
868
858
  # Check whether this basin is supported and exists