dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,550 @@
1
+ """
2
+ With basins, you can create analysis pipelines that result in output files
3
+ which, when opened in dclab, can access features stored in the input file
4
+ (without having to write those features to the output file).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import abc
9
+ import numbers
10
+ import threading
11
+ from typing import Dict, List, Literal
12
+ import uuid
13
+ import warnings
14
+ import weakref
15
+
16
+ import numpy as np
17
+
18
+ from ..util import copy_if_needed
19
+
20
+
21
+ class BasinFeatureMissingWarning(UserWarning):
22
+ """Used when a badin feature is defined but not stored"""
23
+
24
+
25
+ class CyclicBasinDependencyFoundWarning(UserWarning):
26
+ """Used when a basin is defined in one of its sub-basins"""
27
+
28
+
29
+ class BasinmapFeatureMissingError(KeyError):
30
+ """Used when one of the `basinmap` features is not defined"""
31
+ pass
32
+
33
+
34
+ class BasinNotAvailableError(BaseException):
35
+ """Used to identify situations where the basin data is not available"""
36
+ pass
37
+
38
+
39
+ class BasinAvailabilityChecker(threading.Thread):
40
+ """Helper thread for checking basin availability in the background"""
41
+ def __init__(self, basin, *args, **kwargs):
42
+ super(BasinAvailabilityChecker, self).__init__(*args, daemon=True,
43
+ **kwargs)
44
+ self.basin = basin
45
+
46
+ def run(self):
47
+ self.basin.is_available()
48
+
49
+
50
+ class Basin(abc.ABC):
51
+ """A basin represents data from an external source
52
+
53
+ The external data must be a valid RT-DC dataset, subclasses
54
+ should ensure that the corresponding API is available.
55
+ """
56
+ id_getters = {}
57
+
58
+ def __init__(self,
59
+ location: str,
60
+ name: str = None,
61
+ description: str = None,
62
+ features: List[str] = None,
63
+ measurement_identifier: str = None,
64
+ mapping: Literal["same",
65
+ "basinmap0",
66
+ "basinmap1",
67
+ "basinmap2",
68
+ "basinmap3",
69
+ "basinmap4",
70
+ "basinmap5",
71
+ "basinmap6",
72
+ "basinmap7",
73
+ "basinmap8",
74
+ "basinmap9",
75
+ ] = "same",
76
+ mapping_referrer: Dict = None,
77
+ ignored_basins: List[str] = None,
78
+ key: str = None,
79
+ **kwargs):
80
+ """
81
+
82
+ Parameters
83
+ ----------
84
+ location: str
85
+ Location of the basin, this can be a path or a URL, depending
86
+ on the implementation of the subclass
87
+ name: str
88
+ Human-readable name of the basin
89
+ description: str
90
+ Lengthy description of the basin
91
+ features: list of str
92
+ List of features this basin provides; This list is enforced,
93
+ even if the basin actually contains more features.
94
+ measurement_identifier: str
95
+ A measurement identifier against which to check the basin.
96
+ If this is set to None (default), there is no certainty
97
+ that the downstream dataset is from the same measurement.
98
+ mapping: str
99
+ Which type of mapping to use. This can be either "same"
100
+ when the event list of the basin is identical to that
101
+ of the dataset defining the basin, or one of the "basinmap"
102
+ features (e.g. "basinmap1") in cases where the dataset consists
103
+ of a subset of the events of the basin dataset. In the latter
104
+ case, the feature defined by `mapping` must be present in the
105
+ dataset and consist of integer-valued indices (starting at 0)
106
+ for the basin dataset.
107
+ mapping_referrer: dict-like
108
+ Dict-like object from which "basinmap" features can be obtained
109
+ in situations where `mapping != "same"`. This can be a simple
110
+ dictionary of numpy arrays or e.g. an instance of
111
+ :class:`.RTDCBase`.
112
+ ignored_basins: list of str
113
+ List of basins to ignore in subsequent basin instantiations
114
+ key: str
115
+ Unique key to identify this basin; normally computed from
116
+ a JSON dump of the basin definition. A random string is used
117
+ if None is specified.
118
+ kwargs:
119
+ Additional keyword arguments passed to the `load_dataset`
120
+ method of the `Basin` subclass.
121
+
122
+ .. versionchanged: 0.58.0
123
+
124
+ Added the `mapping` keyword argument to support basins
125
+ with a superset of events.
126
+ """
127
+ #: location of the basin (e.g. path or URL)
128
+ self.location = location
129
+ #: user-defined name of the basin
130
+ self.name = name
131
+ #: lengthy description of the basin
132
+ self.description = description
133
+ # defining key of the basin
134
+ self.key = key or str(uuid.uuid4())
135
+ # features this basin provides
136
+ self._features = features
137
+ #: measurement identifier of the referencing dataset
138
+ self.measurement_identifier = measurement_identifier
139
+ self._measurement_identifier_verified = False
140
+ #: ignored basins
141
+ self.ignored_basins = ignored_basins or []
142
+ #: additional keyword arguments passed to the basin
143
+ self.kwargs = kwargs
144
+ #: Event mapping strategy. If this is "same", it means that the
145
+ #: referring dataset and the basin dataset have identical event
146
+ #: indices. If `mapping` is e.g. `basinmap1` then the mapping of the
147
+ #: indices from the basin to the referring dataset is defined in
148
+ #: `self.basinmap` (copied during initialization of this class from
149
+ #: the array in the key `basinmap1` from the dict-like object
150
+ #: `mapping_referrer`).
151
+ self.mapping = mapping or "same"
152
+ self._basinmap = None # see `basinmap` property
153
+ # Create a weakref to the original referrer: If it is an instance
154
+ # of RTDCBase, then garbage collection can clean up properly and
155
+ # the basin instance has no reason to exist without the referrer.
156
+ if self.mapping != "same":
157
+ self._basinmap_referrer = weakref.ref(mapping_referrer)
158
+ else:
159
+ self._basinmap_referrer = None
160
+ self._ds = None
161
+ # perform availability check in separate thread
162
+ self._av_check_lock = threading.Lock()
163
+ self._av_check = BasinAvailabilityChecker(self)
164
+ self._av_check.start()
165
+
166
+ def __repr__(self):
167
+ options = [
168
+ self.name,
169
+ f"mapped {self.mapping}" if self.mapping != "same" else "",
170
+ f"features {self._features}" if self.features else "full-featured",
171
+ f"location {self.location}",
172
+ ]
173
+ opt_str = ", ".join([o for o in options if o])
174
+
175
+ return f"<{self.__class__.__name__} ({opt_str}) at {hex(id(self))}>"
176
+
177
+ def _assert_measurement_identifier(self):
178
+ """Make sure the basin matches the measurement identifier
179
+ """
180
+ if not self.verify_basin(run_identifier=True):
181
+ raise KeyError(f"Measurement identifier of basin {self.ds} "
182
+ f"({self.get_measurement_identifier()}) does "
183
+ f"not match {self.measurement_identifier}!")
184
+
185
+ @property
186
+ def basinmap(self):
187
+ """Contains the indexing array in case of a mapped basin"""
188
+ if self._basinmap is None:
189
+ if self.mapping != "same":
190
+ try:
191
+ basinmap = self._basinmap_referrer()[self.mapping]
192
+ except (KeyError, RecursionError):
193
+ raise BasinmapFeatureMissingError(
194
+ f"Could not find the feature '{self.mapping}' in the "
195
+ f"dataset or any of its basins. This suggests that "
196
+ f"this feature was never saved anywhere. Please check "
197
+ f"the input files.")
198
+ #: `basinmap` is an integer array that maps the events from the
199
+ #: basin to the events of the referring dataset.
200
+ self._basinmap = np.array(basinmap,
201
+ dtype=np.uint64,
202
+ copy=True)
203
+ else:
204
+ self._basinmap = None
205
+ return self._basinmap
206
+
207
+ @property
208
+ @abc.abstractmethod
209
+ def basin_format(self):
210
+ """Basin format (:class:`.RTDCBase` subclass), e.g. "hdf5" or "s3"
211
+ """
212
+ # to be implemented in subclasses
213
+
214
+ @property
215
+ @abc.abstractmethod
216
+ def basin_type(self):
217
+ """Storage type to use (e.g. "file" or "remote")"""
218
+ # to be implemented in subclasses
219
+
220
+ @property
221
+ def ds(self):
222
+ """The :class:`.RTDCBase` instance represented by the basin"""
223
+ if self._ds is None:
224
+ if not self.is_available():
225
+ raise BasinNotAvailableError(f"Basin {self} is not available!")
226
+ self._ds = self.load_dataset(self.location, **self.kwargs)
227
+ self._ds.ignore_basins(self.ignored_basins)
228
+ return self._ds
229
+
230
+ @property
231
+ def features(self):
232
+ """Features made available by the basin
233
+
234
+ .. versionchanged: 0.56.0
235
+
236
+ Return nested basin features
237
+ """
238
+ if self._features is None:
239
+ if self.is_available():
240
+ # If features are not specified already, either by previous
241
+ # call to this method or during initialization from basin
242
+ # definition, then make the innate and *all* the basin
243
+ # features available.
244
+ self._features = sorted(set(self.ds.features_innate
245
+ + self.ds.features_basin))
246
+ else:
247
+ self._features = []
248
+ return self._features
249
+
250
+ def as_dict(self):
251
+ """Return basin kwargs for :func:`RTDCWriter.store_basin`
252
+
253
+ Note that each subclass of :class:`.RTDCBase` has its own
254
+ implementation of :func:`.RTDCBase.basins_get_dicts` which
255
+ returns a list of basin dictionaries that are used to
256
+ instantiate the basins in :func:`RTDCBase.basins_enable`.
257
+ This method here is only intended for usage with
258
+ :func:`RTDCWriter.store_basin`.
259
+ """
260
+ return {
261
+ "basin_name": self.name,
262
+ "basin_type": self.basin_type,
263
+ "basin_format": self.basin_format,
264
+ "basin_locs": [self.location],
265
+ "basin_descr": self.description,
266
+ "basin_feats": self.features,
267
+ "basin_map": self.basinmap,
268
+ }
269
+
270
+ def close(self):
271
+ """Close any open file handles or connections"""
272
+ if self._ds is not None:
273
+ self._ds.close()
274
+ self._av_check.join(0.5)
275
+
276
+ def get_feature_data(self, feat):
277
+ """Return an object representing feature data of the basin"""
278
+ self._assert_measurement_identifier()
279
+ return self.ds[feat]
280
+
281
+ def get_measurement_identifier(self):
282
+ """Return the identifier of the basin dataset"""
283
+ return self.ds.get_measurement_identifier()
284
+
285
+ @abc.abstractmethod
286
+ def is_available(self):
287
+ """Return True if the basin is available"""
288
+
289
+ @abc.abstractmethod
290
+ def _load_dataset(self, location, **kwargs):
291
+ """Subclasses should return an instance of :class:`.RTDCBase`"""
292
+
293
+ def load_dataset(self, location, **kwargs):
294
+ """Return an instance of :class:`.RTDCBase` for this basin
295
+
296
+ If the basin mapping (`self.mapping`) is not the same as
297
+ the referencing dataset
298
+ """
299
+ ds = self._load_dataset(location, **kwargs)
300
+ if self.mapping != "same":
301
+ # The array `self.basinmap` may contain duplicate elements,
302
+ # which is why we cannot use hierarchy children to access the
303
+ # data (sometimes the data must be blown-up rather than gated).
304
+ ds_bn = BasinProxy(ds=ds, basinmap=self.basinmap)
305
+ else:
306
+ ds_bn = ds
307
+ return ds_bn
308
+
309
+ def verify_basin(self, run_identifier=True, availability=True):
310
+ if not availability:
311
+ warnings.warn("The keyword argument 'availability' is "
312
+ "deprecated, because it can lead to long waiting "
313
+ "times with many unavailable basins.",
314
+ DeprecationWarning)
315
+ if availability:
316
+ check_avail = self.is_available()
317
+ else:
318
+ check_avail = True
319
+
320
+ # Only check for run identifier if requested and if the availability
321
+ # check did not fail.
322
+ if run_identifier and check_avail:
323
+ if not self._measurement_identifier_verified:
324
+ if self.measurement_identifier is None:
325
+ # No measurement identifier was presented by the
326
+ # referencing dataset. Don't perform any checks.
327
+ self._measurement_identifier_verified = True
328
+ else:
329
+ if self.mapping == "same":
330
+ # When we have identical mapping, then the measurement
331
+ # identifier has to match exactly.
332
+ verifier = str.__eq__
333
+ else:
334
+ # When we have non-identical mapping (e.g. exported
335
+ # data), then the measurement identifier has to
336
+ # partially match.
337
+ verifier = str.startswith
338
+ self._measurement_identifier_verified = verifier(
339
+ self.measurement_identifier,
340
+ self.get_measurement_identifier()
341
+ )
342
+ check_rid = self._measurement_identifier_verified
343
+ else:
344
+ check_rid = True
345
+
346
+ return check_rid and check_avail
347
+
348
+
349
+ class BasinProxy:
350
+ def __init__(self, ds, basinmap):
351
+ """Proxy for accessing data in basin datasets
352
+
353
+ The idea of a basin proxy is to give access to the data of an
354
+ :class:`.RTDCBase` that is mapped, i.e. the indices defined for
355
+ the basin do not coincide with the indices in the downstream
356
+ dataset.
357
+
358
+ This class achieves two things:
359
+ 1. Subset indexing: For every event in the downstream dataset, there
360
+ is *only* one corresponding event in the basin dataset. This
361
+ could also be achieved via hierarchy children
362
+ (:class:`RTDCHierarchy`).
363
+ 2. Blown indexing: Two different events in the downstream dataset
364
+ can refer to one event in the basin dataset. I.e. the basin
365
+ dataset contains fewer events than the downstream dataset,
366
+ because e.g. it is a raw image recording series that has been
367
+ processed and multiple events were found in one frame.
368
+
369
+ Parameters
370
+ ----------
371
+ ds: RTDCBase
372
+ the basin dataset
373
+ basinmap: np.ndarray
374
+ 1D integer indexing array that maps the events of the basin
375
+ dataset to the downstream dataset
376
+ """
377
+ self.ds = ds
378
+ self.basins_get_dicts = ds.basins_get_dicts
379
+ self.basinmap = basinmap
380
+ self._features = {}
381
+
382
+ def __contains__(self, item):
383
+ return item in self.ds
384
+
385
+ def __getattr__(self, item):
386
+ if item in [
387
+ "basins",
388
+ "close",
389
+ "features",
390
+ "features_ancillary",
391
+ "features_basin",
392
+ "features_innate",
393
+ "features_loaded",
394
+ "features_local",
395
+ "features_scalar",
396
+ "get_measurement_identifier",
397
+ "ignore_basins",
398
+ ]:
399
+ return getattr(self.ds, item)
400
+ else:
401
+ raise AttributeError(
402
+ f"BasinProxy does not implement {item}")
403
+
404
+ def __getitem__(self, feat):
405
+ if feat not in self._features:
406
+ feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
407
+ basinmap=self.basinmap)
408
+ self._features[feat] = feat_obj
409
+ return self._features[feat]
410
+
411
+ def __len__(self):
412
+ return len(self.basinmap)
413
+
414
+
415
+ class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
416
+ def __init__(self, feat_obj, basinmap):
417
+ """Wrap around a feature object, mapping it upon data access"""
418
+ self.feat_obj = feat_obj
419
+ self.basinmap = basinmap
420
+ self._cache = None
421
+ self.is_scalar = bool(len(self.feat_obj.shape) == 1)
422
+
423
+ def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
424
+ if self._cache is None and self.is_scalar:
425
+ self._cache = self.feat_obj[:][self.basinmap]
426
+ else:
427
+ # This is dangerous territory in terms of memory usage
428
+ out_arr = np.empty((len(self.basinmap),) + self.feat_obj.shape[1:],
429
+ dtype=dtype or self.feat_obj.dtype,
430
+ *args, **kwargs)
431
+ for ii, idx in enumerate(self.basinmap):
432
+ out_arr[ii] = self.feat_obj[idx]
433
+ return out_arr
434
+ return np.array(self._cache, copy=copy)
435
+
436
+ def __getattr__(self, item):
437
+ if item in [
438
+ "dtype",
439
+ "shape",
440
+ "size",
441
+ ]:
442
+ return getattr(self.feat_obj, item)
443
+ else:
444
+ raise AttributeError(
445
+ f"BasinProxyFeature does not implement {item}")
446
+
447
+ def __getitem__(self, index):
448
+ if self._cache is None and isinstance(index, numbers.Integral):
449
+ # single index, cheap operation
450
+ return self.feat_obj[self.basinmap[index]]
451
+ elif not self.is_scalar:
452
+ # image, mask, etc
453
+ if isinstance(index, slice) and index == slice(None):
454
+ indices = self.basinmap
455
+ else:
456
+ indices = self.basinmap[index]
457
+ out_arr = np.empty((len(indices),) + self.feat_obj.shape[1:],
458
+ dtype=self.feat_obj.dtype)
459
+ for ii, idx in enumerate(indices):
460
+ out_arr[ii] = self.feat_obj[idx]
461
+ return out_arr
462
+ else:
463
+ # sets the cache if not already set
464
+ return self.__array__()[index]
465
+
466
+ def __len__(self):
467
+ return len(self.basinmap)
468
+
469
+
470
+ def basin_priority_sorted_key(bdict: Dict):
471
+ """Yield a sorting value for a given basin that can be used with `sorted`
472
+
473
+ Basins are normally stored in random order in a dataset. This method
474
+ brings them into correct order, prioritizing:
475
+
476
+ - type: "file" over "remote"
477
+ - format: "HTTP" over "S3" over "dcor"
478
+ - mapping: "same" over anything else
479
+ """
480
+ srt_type = {
481
+ "internal": "a",
482
+ "file": "b",
483
+ "remote": "c",
484
+ }.get(bdict.get("type"), "z")
485
+
486
+ srt_format = {
487
+ "h5dataset": "a",
488
+ "hdf5": "b",
489
+ "http": "c",
490
+ "s3": "d",
491
+ "dcor": "e",
492
+ }.get(bdict.get("format"), "z")
493
+
494
+ mapping = bdict.get("mapping", "same") # old dicts don't have "mapping"
495
+ srt_map = "a" if mapping == "same" else mapping
496
+
497
+ return srt_type + srt_format + srt_map
498
+
499
+
500
+ class InternalH5DatasetBasin(Basin):
501
+ basin_format = "h5dataset"
502
+ basin_type = "internal"
503
+
504
+ def __init__(self, *args, **kwargs):
505
+ super(InternalH5DatasetBasin, self).__init__(*args, **kwargs)
506
+ if self.mapping == "same":
507
+ raise ValueError(
508
+ "'internal' basins must be instantiated with `mapping`. "
509
+ "If you are not doing that, then you probably don't need "
510
+ "them.")
511
+ if self._features is None:
512
+ raise ValueError("You must specify features when defining "
513
+ "internal basins.")
514
+ # Redefine the features if necessary
515
+ h5root = self._basinmap_referrer().h5file
516
+ available_features = []
517
+ for feat in self._features:
518
+ if self.location in h5root and feat in h5root[self.location]:
519
+ available_features.append(feat)
520
+ else:
521
+ warnings.warn(
522
+ f"Feature '{feat}' is defined as an internal basin, "
523
+ f"but it cannot be found in '{self.location}'.",
524
+ BasinFeatureMissingWarning)
525
+ self._features.clear()
526
+ self._features += available_features
527
+
528
+ def _load_dataset(self, location, **kwargs):
529
+ from .fmt_dict import RTDC_Dict
530
+ # get the h5file object
531
+ h5root = self._basinmap_referrer().h5file
532
+ ds_dict = {}
533
+ for feat in self.features:
534
+ ds_dict[feat] = h5root[self.location][feat]
535
+ return RTDC_Dict(ds_dict)
536
+
537
+ def is_available(self):
538
+ return bool(self._features)
539
+
540
+ def verify_basin(self, *args, **kwargs):
541
+ """It's not necessary to verify internal basins"""
542
+ return True
543
+
544
+
545
+ def get_basin_classes():
546
+ bc = {}
547
+ for b_cls in Basin.__subclasses__():
548
+ if hasattr(b_cls, "basin_format"):
549
+ bc[b_cls.basin_format] = b_cls
550
+ return bc
@@ -0,0 +1,102 @@
1
+ """
2
+ .. versionadded:: 0.33.0
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import Optional
7
+
8
+ import numpy as np
9
+
10
+ from ..definitions import feat_logic
11
+
12
+ from .core import RTDCBase
13
+ from .fmt_hierarchy import RTDC_Hierarchy, map_indices_child2root
14
+
15
+
16
+ _registered_temporary_features = []
17
+
18
+
19
+ def deregister_all():
20
+ """Deregisters all temporary features"""
21
+ for feat in list(_registered_temporary_features):
22
+ deregister_temporary_feature(feat)
23
+
24
+
25
+ def deregister_temporary_feature(feature: str):
26
+ """Convenience function for deregistering a temporary feature
27
+
28
+ This method is mostly used during testing. It does not
29
+ remove the actual feature data from any dataset; the data
30
+ will stay in memory but is not accessible anymore through
31
+ the public methods of the :class:`RTDCBase` user interface.
32
+ """
33
+ if feature in _registered_temporary_features:
34
+ _registered_temporary_features.remove(feature)
35
+ feat_logic.feature_deregister(feature)
36
+
37
+
38
+ def register_temporary_feature(feature: str,
39
+ label: Optional[str] = None,
40
+ is_scalar: bool = True):
41
+ """Register a new temporary feature
42
+
43
+ Temporary features are custom features that can be defined ad hoc
44
+ by the user. Temporary features are helpful when the integral
45
+ features are not enough, e.g. for prototyping, testing, or
46
+ collating with other data. Temporary features allow you to
47
+ leverage the full functionality of :class:`RTDCBase` with
48
+ your custom features (no need to go for a custom `pandas.Dataframe`).
49
+
50
+ Parameters
51
+ ----------
52
+ feature: str
53
+ Feature name; allowed characters are lower-case letters,
54
+ digits, and underscores
55
+ label: str
56
+ Feature label used e.g. for plotting
57
+ is_scalar: bool
58
+ Whether or not the feature is a scalar feature
59
+ """
60
+ feat_logic.feature_register(feature, label, is_scalar)
61
+ _registered_temporary_features.append(feature)
62
+
63
+
64
+ def set_temporary_feature(rtdc_ds: RTDCBase,
65
+ feature: str,
66
+ data: np.ndarray):
67
+ """Set temporary feature data for a dataset
68
+
69
+ Parameters
70
+ ----------
71
+ rtdc_ds: dclab.RTDCBase
72
+ Dataset for which to set the feature. Note that the
73
+ length of the feature `data` must match the number of events
74
+ in `rtdc_ds`. If the dataset is a hierarchy child, the data will also
75
+ be set in the parent dataset, but only for those events that are part
76
+ of the child. For all events in the parent dataset that are not part
77
+ of the child dataset, the temporary feature is set to np.nan.
78
+ feature: str
79
+ Feature name
80
+ data: np.ndarray
81
+ The data
82
+ """
83
+ if not feat_logic.feature_exists(feature):
84
+ raise ValueError(
85
+ f"Temporary feature '{feature}' has not been registered!")
86
+ if len(data) != len(rtdc_ds):
87
+ raise ValueError(f"The temporary feature {feature} must have same "
88
+ f"length as the dataset. Expected length "
89
+ f"{len(rtdc_ds)}, got length {len(data)}!")
90
+ if isinstance(rtdc_ds, RTDC_Hierarchy):
91
+ root_ids = map_indices_child2root(rtdc_ds, np.arange(len(rtdc_ds)))
92
+ root_parent = rtdc_ds.get_root_parent()
93
+ root_feat_data = np.empty((len(root_parent)))
94
+ root_feat_data[:] = np.nan
95
+ root_feat_data[root_ids] = data
96
+ set_temporary_feature(root_parent, feature, root_feat_data)
97
+ rtdc_ds.rejuvenate()
98
+ else:
99
+ feat_logic.check_feature_shape(feature, data)
100
+ data_ro = data.view()
101
+ data_ro.setflags(write=False)
102
+ rtdc_ds._usertemp[feature] = data_ro