dclab 0.62.7__cp38-cp38-macosx_11_0_arm64.whl → 2.18.0__cp38-cp38-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

dclab/lme4/wrapr.py CHANGED
@@ -1,19 +1,18 @@
1
1
  """R lme4 wrapper"""
2
- import logging
3
2
  import numbers
4
- import pathlib
5
- import tempfile
3
+ import warnings
6
4
 
7
- import importlib_resources
8
5
  import numpy as np
9
6
 
10
7
  from .. import definitions as dfn
11
8
  from ..rtdc_dataset.core import RTDCBase
12
9
 
10
+ from .rlibs import rpy2
13
11
  from . import rsetup
14
12
 
15
13
 
16
- logger = logging.getLogger(__name__)
14
+ class Lme4InstallWarning(UserWarning):
15
+ pass
17
16
 
18
17
 
19
18
  class Rlme4(object):
@@ -39,12 +38,19 @@ class Rlme4(object):
39
38
  #: list of [RTDCBase, column, repetition, chip_region]
40
39
  self.data = []
41
40
 
41
+ #: model function
42
+ self.r_func_model = "feature ~ group + (1 + group | repetition)"
43
+ #: null model function
44
+ self.r_func_nullmodel = "feature ~ (1 + group | repetition)"
45
+
42
46
  self.set_options(model=model, feature=feature)
43
47
 
44
48
  # Make sure that lme4 is available
45
49
  if not rsetup.has_lme4():
46
- logger.info("Installing lme4, this may take a while!")
47
- rsetup.require_lme4()
50
+ warnings.warn("Installing lme4, this may take a while!",
51
+ Lme4InstallWarning)
52
+ rsetup.install_lme4()
53
+ rsetup.import_lme4()
48
54
 
49
55
  def add_dataset(self, ds, group, repetition):
50
56
  """Add a dataset to the analysis list
@@ -61,8 +67,8 @@ class Rlme4(object):
61
67
 
62
68
  Notes
63
69
  -----
64
- - For each repetition, there must be a "treatment" (``1``) and a
65
- "control" (``0``) group.
70
+ - For each repetition, there must be a "treatment" and a
71
+ "control" ``group``.
66
72
  - If you would like to perform a differential feature analysis,
67
73
  then you need to pass at least a reservoir and a channel
68
74
  dataset (with same parameters for `group` and `repetition`).
@@ -96,10 +102,10 @@ class Rlme4(object):
96
102
  The response variable is modeled using two linear mixed effect
97
103
  models:
98
104
 
99
- - model: "feature ~ group + (1 + group | repetition)"
100
- (random intercept + random slope model)
101
- - the null model: "feature ~ (1 + group | repetition)"
102
- (without the fixed effect introduced by the "treatment" group).
105
+ - model :const:`Rlme4.r_func_model` (random intercept +
106
+ random slope model)
107
+ - the null model :const:`Rlme4.r_func_nullmodel` (without
108
+ the fixed effect introduced by the "treatment" group).
103
109
 
104
110
  Both models are compared in R using "anova" (from the
105
111
  R-package "stats" :cite:`Everitt1992`) which performs a
@@ -127,16 +133,16 @@ class Rlme4(object):
127
133
  results: dict
128
134
  Dictionary with the results of the fitting process:
129
135
 
130
- - "anova p-value": Anova likelihood ratio test (significance)
136
+ - "anova p-value": Anova likelyhood ratio test (significance)
131
137
  - "feature": name of the feature used for the analysis
132
138
  ``self.feature``
133
139
  - "fixed effects intercept": Mean of ``self.feature`` for all
134
140
  controls; In the case of the "glmer+loglink" model, the intercept
135
- is already back transformed from log space.
141
+ is already backtransformed from log space.
136
142
  - "fixed effects treatment": The fixed effect size between the mean
137
143
  of the controls and the mean of the treatments relative to
138
144
  "fixed effects intercept"; In the case of the "glmer+loglink"
139
- model, the fixed effect is already back transformed from log
145
+ model, the fixed effect is already backtransformed from log
140
146
  space.
141
147
  - "fixed effects repetitions": The effects (intercept and
142
148
  treatment) for each repetition. The first axis defines
@@ -153,10 +159,11 @@ class Rlme4(object):
153
159
  - "model": model name used for the analysis ``self.model``
154
160
  - "model converged": boolean indicating whether the model
155
161
  converged
156
- - "r model summary": Summary of the model
157
- - "r model coefficients": Model coefficient table
158
- - "r script": the R script used
159
- - "r output": full output of the R script
162
+ - "r anova": Anova model (exposed from R)
163
+ - "r model summary": Summary of the model (exposed from R)
164
+ - "r model coefficients": Model coefficient table (exposed from R)
165
+ - "r stderr": errors and warnings from R
166
+ - "r stdout": standard output from R
160
167
  """
161
168
  self.set_options(model=model, feature=feature)
162
169
  self.check_data()
@@ -175,38 +182,105 @@ class Rlme4(object):
175
182
  groups.append(dd[1])
176
183
  repetitions.append(dd[2])
177
184
 
178
- # concatenate and populate arrays for R
179
- features_c = np.concatenate(features)
180
- groups_c = np.zeros(len(features_c), dtype=str)
181
- repetitions_c = np.zeros(len(features_c), dtype=int)
182
- pos = 0
183
- for ii in range(len(features)):
184
- size = len(features[ii])
185
- groups_c[pos:pos+size] = groups[ii][0]
186
- repetitions_c[pos:pos+size] = repetitions[ii]
187
- pos += size
188
-
189
- # Run R with the given template script
190
- rscript = importlib_resources.read_text("dclab.lme4",
191
- "lme4_template.R")
192
- _, script_path = tempfile.mkstemp(prefix="dclab_lme4_", suffix=".R",
193
- text=True)
194
- script_path = pathlib.Path(script_path)
195
- rscript = rscript.replace("<MODEL_NAME>", self.model)
196
- rscript = rscript.replace("<FEATURES>", arr2str(features_c))
197
- rscript = rscript.replace("<REPETITIONS>", arr2str(repetitions_c))
198
- rscript = rscript.replace("<GROUPS>", arr2str(groups_c))
199
- script_path.write_text(rscript, encoding="utf-8")
200
-
201
- result = rsetup.run_command((rsetup.get_r_script_path(), script_path))
202
-
203
- ret_dict = self.parse_result(result)
204
- ret_dict["is differential"] = self.is_differential()
205
- ret_dict["feature"] = self.feature
206
- ret_dict["r script"] = rscript
207
- ret_dict["r output"] = result
208
- assert ret_dict["model"] == self.model
209
-
185
+ # Fire up R
186
+ with rsetup.AutoRConsole() as ac:
187
+ r = rpy2.robjects.r
188
+
189
+ # Load lme4
190
+ rpy2.robjects.packages.importr("lme4")
191
+
192
+ # Concatenate huge arrays for R
193
+ r_features = rpy2.robjects.FloatVector(np.concatenate(features))
194
+ _groups = []
195
+ _repets = []
196
+ for ii in range(len(features)):
197
+ _groups.append(np.repeat(groups[ii], len(features[ii])))
198
+ _repets.append(np.repeat(repetitions[ii], len(features[ii])))
199
+ r_groups = rpy2.robjects.StrVector(np.concatenate(_groups))
200
+ r_repetitions = rpy2.robjects.IntVector(np.concatenate(_repets))
201
+
202
+ # Register groups and repetitions
203
+ rpy2.robjects.globalenv["feature"] = r_features
204
+ rpy2.robjects.globalenv["group"] = r_groups
205
+ rpy2.robjects.globalenv["repetition"] = r_repetitions
206
+
207
+ # Create a dataframe which contains all the data
208
+ r_data = r["data.frame"](r_features, r_groups, r_repetitions)
209
+
210
+ # Random intercept and random slope model
211
+ if self.model == 'glmer+loglink':
212
+ r_model = r["glmer"](self.r_func_model, r_data,
213
+ family=r["Gamma"](link='log'))
214
+ r_nullmodel = r["glmer"](self.r_func_nullmodel, r_data,
215
+ family=r["Gamma"](link='log'))
216
+ else: # lmer
217
+ r_model = r["lmer"](self.r_func_model, r_data)
218
+ r_nullmodel = r["lmer"](self.r_func_nullmodel, r_data)
219
+
220
+ # Anova analysis (increase verbosity by making models global)
221
+ # Using anova is a very conservative way of determining
222
+ # p values.
223
+ rpy2.robjects.globalenv["Model"] = r_model
224
+ rpy2.robjects.globalenv["NullModel"] = r_nullmodel
225
+ r_anova = r("anova(Model, NullModel)")
226
+ try:
227
+ pvalue = r_anova.rx2["Pr(>Chisq)"][1]
228
+ except ValueError: # rpy2 2.9.4
229
+ pvalue = r_anova[7][1]
230
+ r_model_summary = r["summary"](r_model)
231
+ r_model_coefficients = r["coef"](r_model)
232
+ try:
233
+ fe_reps = np.array(r_model_coefficients.rx2["repetition"])
234
+ except ValueError: # rpy2 2.9.4
235
+ fe_reps = np.concatenate((
236
+ np.array(r_model_coefficients[0][0]).reshape(1, -1),
237
+ np.array(r_model_coefficients[0][1]).reshape(1, -1)),
238
+ axis=0)
239
+
240
+ r_effects = r["data.frame"](r["coef"](r_model_summary))
241
+ try:
242
+ fe_icept = r_effects.rx2["Estimate"][0]
243
+ fe_treat = r_effects.rx2["Estimate"][1]
244
+ except ValueError: # rpy2 2.9.4
245
+ fe_icept = r_effects[0][0]
246
+ fe_treat = r_effects[0][1]
247
+ if self.model == "glmer+loglink":
248
+ # transform back from log
249
+ fe_treat = np.exp(fe_icept + fe_treat) - np.exp(fe_icept)
250
+ fe_icept = np.exp(fe_icept)
251
+ fe_reps[:, 1] = np.exp(fe_reps[:, 0] + fe_reps[:, 1]) \
252
+ - np.exp(fe_reps[:, 0])
253
+ fe_reps[:, 0] = np.exp(fe_reps[:, 0])
254
+
255
+ # convergence
256
+ try:
257
+ lme4l = r_model_summary.rx2["optinfo"].rx2["conv"].rx2["lme4"]
258
+ except ValueError: # rpy2 2.9.4
259
+ lme4l = r_model_summary[17][3][1]
260
+
261
+ if lme4l and "code" in lme4l.names:
262
+ try:
263
+ conv_code = lme4l.rx2["code"]
264
+ except ValueError: # rpy2 2.9.4
265
+ conv_code = lme4l[0]
266
+ else:
267
+ conv_code = 0
268
+
269
+ ret_dict = {
270
+ "anova p-value": pvalue,
271
+ "feature": self.feature,
272
+ "fixed effects intercept": fe_icept,
273
+ "fixed effects treatment": fe_treat, # aka "fixed effect"
274
+ "fixed effects repetitions": fe_reps,
275
+ "is differential": self.is_differential(),
276
+ "model": self.model,
277
+ "model converged": conv_code == 0,
278
+ "r anova": r_anova,
279
+ "r model summary": r_model_summary,
280
+ "r model coefficients": r_model_coefficients,
281
+ "r stderr": ac.get_warnerrors(),
282
+ "r stdout": ac.get_prints(),
283
+ }
210
284
  return ret_dict
211
285
 
212
286
  def get_differential_dataset(self):
@@ -214,7 +288,7 @@ class Rlme4(object):
214
288
 
215
289
  The most famous use case is differential deformation. The idea
216
290
  is that you cannot tell what the difference in deformation
217
- from channel to reservoir, because you never measure the
291
+ from channel to reservoir is, because you never measure the
218
292
  same object in the reservoir and the channel. You usually just
219
293
  have two distributions. Comparing distributions is possible
220
294
  via bootstrapping. And then, instead of running the lme4
@@ -288,34 +362,6 @@ class Rlme4(object):
288
362
  else:
289
363
  return False
290
364
 
291
- def parse_result(self, result):
292
- resd = result.split("OUTPUT")
293
- ret_dict = {}
294
- for item in resd:
295
- string = item.split("#*#")[0]
296
- key, value = string.split(":", 1)
297
- key = key.strip()
298
- value = value.strip().replace("\n\n", "\n")
299
-
300
- if key == "fixed effects repetitions":
301
- rows = value.split("\n")[1:]
302
- reps = []
303
- for row in rows:
304
- reps.append([float(vv) for vv in row.split()[1:]])
305
- value = np.array(reps).transpose()
306
- elif key == "model converged":
307
- value = value == "TRUE"
308
- elif value == "NA":
309
- value = np.nan
310
- else:
311
- try:
312
- value = float(value)
313
- except ValueError:
314
- pass
315
-
316
- ret_dict[key] = value
317
- return ret_dict
318
-
319
365
  def set_options(self, model=None, feature=None):
320
366
  """Set analysis options"""
321
367
  if model is not None:
@@ -326,16 +372,6 @@ class Rlme4(object):
326
372
  self.feature = feature
327
373
 
328
374
 
329
- def arr2str(a):
330
- """Convert an array to a string"""
331
- if isinstance(a.dtype.type, np.integer):
332
- return ",".join(str(dd) for dd in a.tolist())
333
- elif a.dtype.type == np.str_:
334
- return ",".join(f"'{dd}'" for dd in a.tolist())
335
- else:
336
- return ",".join(f"{dd:.16g}" for dd in a.tolist())
337
-
338
-
339
375
  def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
340
376
  """Compute the bootstrapped distributions for two arrays.
341
377
 
@@ -345,7 +381,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
345
381
  Input data
346
382
  bs_iter: int
347
383
  Number of bootstrapping iterations to perform
348
- (output size).
384
+ (outtput size).
349
385
  rs: int
350
386
  Random state seed for random number generator
351
387
 
@@ -360,7 +396,7 @@ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
360
396
 
361
397
  Notes
362
398
  -----
363
- From a programmatic point of view, it would have been better
399
+ From a programmatical point of view, it would have been better
364
400
  to implement this method for just one input array (because of
365
401
  redundant code). However, due to historical reasons (testing
366
402
  and comparability to Shape-Out 1), bootstrapping is done
@@ -8,7 +8,6 @@ import numpy as np
8
8
 
9
9
  from .copier import is_properly_compressed
10
10
  from .core import RTDCBase
11
- from .fmt_hdf5 import RTDC_HDF5
12
11
  from .fmt_hierarchy import RTDC_Hierarchy
13
12
  from .load import load_file
14
13
 
@@ -193,13 +192,14 @@ class IntegrityChecker(object):
193
192
  else:
194
193
  with warnings.catch_warnings(record=True) as ws:
195
194
  warnings.simplefilter("always")
196
- self.ds = load_file(path_or_ds, enable_basins=False)
195
+ self.ds = load_file(path_or_ds)
197
196
  for ww in ws:
198
197
  self.warn_cues.append(ICue(
199
198
  msg=f"{ww.category.__name__}: {ww.message}",
200
199
  level="alert",
201
200
  category="warning"))
202
201
  self.finally_close = True
202
+ np.max(self.ds["index"])
203
203
 
204
204
  def __enter__(self):
205
205
  return self
@@ -260,24 +260,13 @@ class IntegrityChecker(object):
260
260
  level="alert",
261
261
  category="basin data",
262
262
  ))
263
- else:
264
- if "basin_events" not in self.ds.h5file:
263
+ for feat in bn["features"]:
264
+ if feat not in self.ds.h5file[bpaths[0]]:
265
265
  cues.append(
266
- ICue(msg="Missing internal basin group "
267
- "'basin_events', although an internal "
268
- "basin is defined",
266
+ ICue(msg=f"Missing internal basin feature {feat}",
269
267
  level="violation",
270
268
  category="basin data",
271
269
  ))
272
- else:
273
- for feat in bn["features"]:
274
- if feat not in self.ds.h5file["basin_events"]:
275
- cues.append(
276
- ICue(msg=f"Missing internal basin "
277
- f"feature {feat}",
278
- level="violation",
279
- category="basin data",
280
- ))
281
270
  return cues
282
271
 
283
272
  def check_compression(self, **kwargs):
@@ -330,32 +319,8 @@ class IntegrityChecker(object):
330
319
  data=data))
331
320
  return cues
332
321
 
333
- def check_empty(self, **kwargs):
334
- """The dataset should contain events"""
335
- cues = []
336
- lends = len(self.ds)
337
- if lends == 0:
338
- cues.append(ICue(
339
- msg="The dataset does not contain any events",
340
- level="alert",
341
- category="feature data"))
342
- return cues
343
-
344
- def check_external_links(self, **kwargs):
345
- """An HDF5 dataset should not contain external links"""
346
- cues = []
347
- if isinstance(self.ds, RTDC_HDF5):
348
- has_external, h5object = hdf5_has_external(self.ds.h5file)
349
- if has_external:
350
- cues.append(ICue(
351
- msg=f"The HDF5 file contains at least one external "
352
- f"link: '{h5object}'",
353
- level="violation",
354
- category="format HDF5"))
355
- return cues
356
-
357
322
  def check_feat_index(self, **kwargs):
358
- """The index of the dataset should be monotonous"""
323
+ """Up until"""
359
324
  cues = []
360
325
  lends = len(self.ds)
361
326
  if "index" in self.ds:
@@ -867,36 +832,3 @@ def check_dataset(path_or_ds):
867
832
  elif cue.level == "violation":
868
833
  viol.append(cue.msg)
869
834
  return sorted(viol), sorted(aler), sorted(info)
870
-
871
-
872
- def hdf5_has_external(h5):
873
- """Check recursively, whether an h5py object contains external data
874
-
875
- External data includes binary data in external files, virtual
876
- datasets, and external links.
877
-
878
- Returns a tuple of either
879
-
880
- - `(True, path_ext)` if the object contains external data
881
- - `(False, None)` if this is not the case
882
-
883
- where `path_ext` is the path to the group or dataset in `h5`.
884
-
885
- .. versionadded:: 0.62.0
886
-
887
- """
888
- for key in h5:
889
- obj = h5[key]
890
- if (obj.file != h5.file # not in same file
891
- or (isinstance(obj, h5py.Dataset)
892
- and (obj.is_virtual # virtual dataset
893
- or obj.external))): # external dataset
894
- # These are external data
895
- return True, f"{h5.name}/{key}".replace("//", "/")
896
- elif isinstance(obj, h5py.Group):
897
- # Perform recursive check for external data
898
- has_ext, path_ext = hdf5_has_external(obj)
899
- if has_ext:
900
- return True, path_ext
901
- else:
902
- return False, None
@@ -1,7 +1,6 @@
1
1
  """Helper methods for copying .rtdc data"""
2
2
  from __future__ import annotations
3
3
 
4
- import json
5
4
  import re
6
5
  from typing import List, Literal
7
6
 
@@ -11,10 +10,8 @@ import hdf5plugin
11
10
  import numpy as np
12
11
 
13
12
  from ..definitions import feature_exists, scalar_feature_exists
14
- from ..util import hashobj
15
13
 
16
- from .fmt_hdf5 import DEFECTIVE_FEATURES, RTDC_HDF5
17
- from .writer import RTDCWriter
14
+ from .fmt_hdf5 import DEFECTIVE_FEATURES
18
15
 
19
16
 
20
17
  def rtdc_copy(src_h5file: h5py.Group,
@@ -60,6 +57,19 @@ def rtdc_copy(src_h5file: h5py.Group,
60
57
  events_src += list(src_h5file["basin_events"].keys())
61
58
  events_src = sorted(set(events_src))
62
59
 
60
+ # basins
61
+ if include_basins and "basins" in src_h5file:
62
+ dst_h5file.require_group("basins")
63
+ for b_key in src_h5file["basins"]:
64
+ if b_key in dst_h5file["basins"]:
65
+ # This basin already exists.
66
+ continue
67
+ h5ds_copy(src_loc=src_h5file["basins"],
68
+ src_name=b_key,
69
+ dst_loc=dst_h5file["basins"],
70
+ dst_name=b_key,
71
+ recursive=False)
72
+
63
73
  # logs
64
74
  if include_logs and "logs" in src_h5file:
65
75
  dst_h5file.require_group("logs")
@@ -120,12 +130,6 @@ def rtdc_copy(src_h5file: h5py.Group,
120
130
  if feat in feature_iter:
121
131
  feature_iter.remove(feat)
122
132
 
123
- # copy basin definitions
124
- if include_basins and "basins" in src_h5file:
125
- basin_definition_copy(src_h5file=src_h5file,
126
- dst_h5file=dst_h5file,
127
- features_iter=feature_iter)
128
-
129
133
  if feature_iter:
130
134
  dst_h5file.require_group("events")
131
135
  for feat in feature_iter:
@@ -166,56 +170,6 @@ def rtdc_copy(src_h5file: h5py.Group,
166
170
  )
167
171
 
168
172
 
169
- def basin_definition_copy(src_h5file, dst_h5file, features_iter):
170
- """Copy basin definitions `src_h5file["basins"]` to the new file
171
-
172
- Normally, we would just use :func:`h5ds_copy` to copy basins from
173
- one dataset to another. However, if we are e.g. only copying scalar
174
- features, and there are non-scalar features in the internal basin,
175
- then we must rewrite the basin definition of the internal basin.
176
-
177
- The `features_iter` list of features defines which features are
178
- relevant for the internal basin.
179
- """
180
- dst_h5file.require_group("basins")
181
- for b_key in src_h5file["basins"]:
182
- if b_key in dst_h5file["basins"]:
183
- # This basin already exists.
184
- continue
185
- # Load the basin information
186
- basin_dicts = RTDC_HDF5.basin_get_dicts_from_h5file(src_h5file)
187
- for bn in basin_dicts:
188
- if bn["type"] == "internal":
189
- # Make sure we define the internal features selected
190
- feat_used = [f for f in bn["features"] if f in features_iter]
191
- if len(feat_used) == 0:
192
- # We don't have any internal features, don't write anything
193
- continue
194
- elif feat_used != bn["features"]:
195
- bn["features"] = feat_used
196
- rewrite = True
197
- else:
198
- rewrite = False
199
- else:
200
- # We do not have an internal basin, just copy everything
201
- rewrite = False
202
-
203
- if rewrite:
204
- # Convert edited `bn` to JSON and write feature data
205
- b_lines = json.dumps(bn, indent=2).split("\n")
206
- key = hashobj(b_lines)
207
- if key not in dst_h5file["basins"]:
208
- with RTDCWriter(dst_h5file) as hw:
209
- hw.write_text(dst_h5file["basins"], key, b_lines)
210
- else:
211
- # copy only
212
- h5ds_copy(src_loc=src_h5file["basins"],
213
- src_name=b_key,
214
- dst_loc=dst_h5file["basins"],
215
- dst_name=b_key,
216
- recursive=False)
217
-
218
-
219
173
  def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
220
174
  ensure_compression=True, recursive=True):
221
175
  """Copy an HDF5 Dataset from one group to another
@@ -14,12 +14,6 @@ import weakref
14
14
 
15
15
  import numpy as np
16
16
 
17
- from ..util import copy_if_needed
18
-
19
-
20
- class BasinFeatureMissingWarning(UserWarning):
21
- """Used when a badin feature is defined but not stored"""
22
-
23
17
 
24
18
  class CyclicBasinDependencyFoundWarning(UserWarning):
25
19
  """Used when a basin is defined in one of its sub-basins"""
@@ -411,7 +405,7 @@ class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
411
405
  self._cache = None
412
406
  self.is_scalar = bool(len(self.feat_obj.shape) == 1)
413
407
 
414
- def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
408
+ def __array__(self, dtype=None, copy=False, *args, **kwargs):
415
409
  if self._cache is None and self.is_scalar:
416
410
  self._cache = self.feat_obj[:][self.basinmap]
417
411
  else:
@@ -502,19 +496,6 @@ class InternalH5DatasetBasin(Basin):
502
496
  if self._features is None:
503
497
  raise ValueError("You must specify features when defining "
504
498
  "internal basins.")
505
- # Redefine the features if necessary
506
- h5root = self._basinmap_referrer().h5file
507
- available_features = []
508
- for feat in self._features:
509
- if self.location in h5root and feat in h5root[self.location]:
510
- available_features.append(feat)
511
- else:
512
- warnings.warn(
513
- f"Feature '{feat}' is defined as an internal basin, "
514
- f"but it cannot be found in '{self.location}'.",
515
- BasinFeatureMissingWarning)
516
- self._features.clear()
517
- self._features += available_features
518
499
 
519
500
  def _load_dataset(self, location, **kwargs):
520
501
  from .fmt_dict import RTDC_Dict
@@ -526,7 +507,8 @@ class InternalH5DatasetBasin(Basin):
526
507
  return RTDC_Dict(ds_dict)
527
508
 
528
509
  def is_available(self):
529
- return bool(self._features)
510
+ h5root = self._basinmap_referrer().h5file
511
+ return self.location in h5root
530
512
 
531
513
  def verify_basin(self, *args, **kwargs):
532
514
  """It's not necessary to verify internal basins"""
@@ -175,15 +175,10 @@ class RTDC_HDF5(RTDCBase):
175
175
 
176
176
  def basins_get_dicts(self):
177
177
  """Return list of dicts for all basins defined in `self.h5file`"""
178
- return self.basin_get_dicts_from_h5file(self.h5file)
179
-
180
- @staticmethod
181
- def basin_get_dicts_from_h5file(h5file):
182
- """Return list of dicts for all basins defined in `h5file`"""
183
178
  basins = []
184
179
  # Do not sort anything here, sorting is done in `RTDCBase`.
185
- for bk in h5file.get("basins", []):
186
- bdat = list(h5file["basins"][bk])
180
+ for bk in self.h5file.get("basins", []):
181
+ bdat = list(self.h5file["basins"][bk])
187
182
  if isinstance(bdat[0], bytes):
188
183
  bdat = [bi.decode("utf") for bi in bdat]
189
184
  bdict = json.loads(" ".join(bdat))
@@ -7,7 +7,7 @@ import numbers
7
7
  import numpy as np
8
8
 
9
9
  from ... import definitions as dfn
10
- from ...util import copy_if_needed
10
+
11
11
 
12
12
  from . import feat_defect
13
13
 
@@ -140,7 +140,7 @@ class H5MaskEvent:
140
140
  self.identifier = (self.h5dataset.file.filename, self.h5dataset.name)
141
141
  self.dtype = np.dtype(bool)
142
142
 
143
- def __array__(self, dtype=np.bool_, copy=copy_if_needed, *args, **kwargs):
143
+ def __array__(self, dtype=np.bool_, copy=False, *args, **kwargs):
144
144
  if dtype is not np.uint8:
145
145
  warnings.warn("Please avoid calling the `__array__` method of the "
146
146
  "`H5MaskEvent`. It may consume a lot of memory.",
@@ -180,7 +180,7 @@ class H5ScalarEvent(np.lib.mixins.NDArrayOperatorsMixin):
180
180
  # attrs
181
181
  self._ufunc_attrs = dict(self.h5ds.attrs)
182
182
 
183
- def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
183
+ def __array__(self, dtype=None, copy=False, *args, **kwargs):
184
184
  if self._array is None:
185
185
  self._array = np.asarray(self.h5ds, *args, **kwargs)
186
186
  return np.array(self._array, dtype=dtype, copy=copy)
@@ -60,6 +60,7 @@ class RTDC_Hierarchy(RTDCBase):
60
60
 
61
61
  self._events = {}
62
62
 
63
+ #: hierarchy parent
63
64
  self.hparent = hparent
64
65
 
65
66
  self.config = self._create_config() # init config