dclab 0.67.0__cp314-cp314t-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314t-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""Computation of ancillary features
|
|
2
|
+
|
|
3
|
+
Ancillary features are computed on-the-fly in dclab if the
|
|
4
|
+
required data are available. The features are registered here
|
|
5
|
+
and are computed when `RTDCBase.__getitem__` is called with
|
|
6
|
+
the respective feature name. When `RTDCBase.__contains__` is
|
|
7
|
+
called with the feature name, then the feature is not yet
|
|
8
|
+
computed, but the prerequisites are evaluated:
|
|
9
|
+
|
|
10
|
+
.. ipython::
|
|
11
|
+
:okwarning:
|
|
12
|
+
|
|
13
|
+
In [1]: import dclab
|
|
14
|
+
|
|
15
|
+
In [2]: ds = dclab.new_dataset("data/example.rtdc")
|
|
16
|
+
|
|
17
|
+
In [4]: ds.config["calculation"]["emodulus lut"] = "LE-2D-FEM-19"
|
|
18
|
+
|
|
19
|
+
In [3]: ds.config["calculation"]["emodulus medium"] = "CellCarrier"
|
|
20
|
+
|
|
21
|
+
In [5]: ds.config["calculation"]["emodulus temperature"] = 23.0
|
|
22
|
+
|
|
23
|
+
In [6]: ds.config["calculation"]["emodulus viscosity model"] = \
|
|
24
|
+
'buyukurganci-2022'
|
|
25
|
+
|
|
26
|
+
In [7]: "emodulus" in ds # nothing is computed yet
|
|
27
|
+
|
|
28
|
+
In [8]: ds["emodulus"] # now data are computed and cached
|
|
29
|
+
|
|
30
|
+
Once the data has been computed, `RTDCBase` caches it in
|
|
31
|
+
the `_ancillaries` property dict together with a hash
|
|
32
|
+
that is computed with `AncillaryFeature.hash`. The hash
|
|
33
|
+
is computed from the feature data `req_features` and the
|
|
34
|
+
configuration metadata `req_config`.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
import hashlib
|
|
38
|
+
import warnings
|
|
39
|
+
|
|
40
|
+
import numpy as np
|
|
41
|
+
|
|
42
|
+
from ...util import obj2bytes
|
|
43
|
+
from ... import definitions as dfn
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BadFeatureSizeWarning(UserWarning):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AncillaryFeature:
|
|
51
|
+
#: All ancillary features registered
|
|
52
|
+
features = []
|
|
53
|
+
#: All feature names registered
|
|
54
|
+
feature_names = []
|
|
55
|
+
|
|
56
|
+
def __init__(self, feature_name, method, req_config=None,
|
|
57
|
+
req_features=None, req_func=lambda x: True, priority=0,
|
|
58
|
+
data=None, identifier=None):
|
|
59
|
+
"""A data feature that is computed from existing data
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
feature_name: str
|
|
64
|
+
The name of the ancillary feature, e.g. "emodulus".
|
|
65
|
+
method: callable
|
|
66
|
+
The method that computes the feature. This method
|
|
67
|
+
takes an instance of `RTDCBase` as argument.
|
|
68
|
+
req_config: list
|
|
69
|
+
Required configuration parameters to compute the feature,
|
|
70
|
+
e.g. ["calculation", ["emodulus lut", "emodulus viscosity"]]
|
|
71
|
+
req_features: list
|
|
72
|
+
Required existing features in the dataset,
|
|
73
|
+
e.g. ["area_cvx", "deform"]
|
|
74
|
+
req_func: callable
|
|
75
|
+
A function that takes an instance of `RTDCBase` as an
|
|
76
|
+
argument and checks whether any other necessary criteria
|
|
77
|
+
are met. By default, this is a lambda function that returns
|
|
78
|
+
True. The function should return False if the necessary
|
|
79
|
+
criteria are not met. This function may also return a
|
|
80
|
+
hashable object (via :func:`dclab.util.objstr`) instead of
|
|
81
|
+
True, if the criteria are subject to change. In this case,
|
|
82
|
+
the return value is used for identifying the cached
|
|
83
|
+
ancillary feature.
|
|
84
|
+
|
|
85
|
+
.. versionchanged:: 0.27.0
|
|
86
|
+
Support non-boolean return values for caching purposes.
|
|
87
|
+
|
|
88
|
+
priority: int
|
|
89
|
+
The priority of the feature; if there are multiple
|
|
90
|
+
AncillaryFeature defined for the same feature_name,
|
|
91
|
+
then the priority of the features defines which feature
|
|
92
|
+
returns True in `self.is_available`. A higher value
|
|
93
|
+
means a higher priority.
|
|
94
|
+
data: object or BaseModel
|
|
95
|
+
Any other data relevant for the feature (e.g. the ML
|
|
96
|
+
model for computing 'ml_score_xxx' features)
|
|
97
|
+
identifier: None or str
|
|
98
|
+
A unique identifier (e.g. MD5 hash) of the ancillary
|
|
99
|
+
feature. For PluginFeatures or ML features, this should
|
|
100
|
+
be computed at least from the input file and the feature
|
|
101
|
+
name.
|
|
102
|
+
|
|
103
|
+
Notes
|
|
104
|
+
-----
|
|
105
|
+
`req_config` and `req_features` are used to test whether the
|
|
106
|
+
feature can be computed in `self.is_available`.
|
|
107
|
+
"""
|
|
108
|
+
if req_features is None:
|
|
109
|
+
req_features = []
|
|
110
|
+
if req_config is None:
|
|
111
|
+
req_config = []
|
|
112
|
+
self.feature_name = feature_name
|
|
113
|
+
self.method = method
|
|
114
|
+
self.req_config = req_config
|
|
115
|
+
self.req_features = req_features
|
|
116
|
+
self.req_func = req_func
|
|
117
|
+
self.priority = priority
|
|
118
|
+
self.data = data
|
|
119
|
+
self.identifier = identifier
|
|
120
|
+
|
|
121
|
+
# register this feature
|
|
122
|
+
AncillaryFeature.features.append(self)
|
|
123
|
+
AncillaryFeature.feature_names.append(feature_name)
|
|
124
|
+
|
|
125
|
+
def __repr__(self):
|
|
126
|
+
repre = " ".join([
|
|
127
|
+
f"<{self.__class__.__name__}",
|
|
128
|
+
f"'{self.feature_name}'",
|
|
129
|
+
f"(id {self.identifier[:5]}...)" if self.identifier else "(no ID)",
|
|
130
|
+
f"with priority {self.priority}",
|
|
131
|
+
f"at {hex(id(self))}>",
|
|
132
|
+
])
|
|
133
|
+
return repre
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def available_features(rtdc_ds):
|
|
137
|
+
"""Determine available features for an RT-DC dataset
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
rtdc_ds: instance of RTDCBase
|
|
142
|
+
The dataset to check availability for
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
features: dict
|
|
147
|
+
Dictionary with feature names as keys and instances
|
|
148
|
+
of `AncillaryFeature` as values.
|
|
149
|
+
"""
|
|
150
|
+
# TODO: This is quite slow.
|
|
151
|
+
cols = {}
|
|
152
|
+
for inst in AncillaryFeature.features:
|
|
153
|
+
if inst.is_available(rtdc_ds):
|
|
154
|
+
cols[inst.feature_name] = inst
|
|
155
|
+
return cols
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def get_instances(feature_name):
|
|
159
|
+
"""Return all instances that compute `feature_name`"""
|
|
160
|
+
feats = []
|
|
161
|
+
for ft in AncillaryFeature.features:
|
|
162
|
+
if ft.feature_name == feature_name:
|
|
163
|
+
feats.append(ft)
|
|
164
|
+
return feats
|
|
165
|
+
|
|
166
|
+
@staticmethod
|
|
167
|
+
def check_data_size(rtdc_ds, data_dict):
|
|
168
|
+
"""Check the feature data is the correct size. If it isn't, resize it.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
rtdc_ds: instance of RTDCBase
|
|
173
|
+
The dataset from which the features are computed
|
|
174
|
+
data_dict: dict
|
|
175
|
+
Dictionary with `AncillaryFeature.feature_name` as keys and the
|
|
176
|
+
computed data features (to be resized) as values.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
data_dict: dict
|
|
181
|
+
Dictionary with `feature_name` as keys and the correctly resized
|
|
182
|
+
data features as values.
|
|
183
|
+
"""
|
|
184
|
+
for key in data_dict:
|
|
185
|
+
dsize = len(rtdc_ds) - len(data_dict[key])
|
|
186
|
+
if dsize > 0:
|
|
187
|
+
msg = "Growing feature {} in {} by {} to match event number!"
|
|
188
|
+
warnings.warn(msg.format(key, rtdc_ds, abs(dsize)),
|
|
189
|
+
BadFeatureSizeWarning)
|
|
190
|
+
data_dict[key] = np.array(data_dict[key], dtype=float)
|
|
191
|
+
data_dict[key].resize(len(rtdc_ds), refcheck=False)
|
|
192
|
+
data_dict[key][-dsize:] = np.nan
|
|
193
|
+
elif dsize < 0:
|
|
194
|
+
msg = "Shrinking feature {} in {} by {} to match event number!"
|
|
195
|
+
warnings.warn(msg.format(key, rtdc_ds, abs(dsize)),
|
|
196
|
+
BadFeatureSizeWarning)
|
|
197
|
+
data_dict[key].resize(len(rtdc_ds), refcheck=False)
|
|
198
|
+
if isinstance(data_dict[key], np.ndarray):
|
|
199
|
+
data_dict[key].setflags(write=False)
|
|
200
|
+
elif isinstance(data_dict[key], list):
|
|
201
|
+
for item in data_dict[key]:
|
|
202
|
+
if isinstance(item, np.ndarray):
|
|
203
|
+
item.setflags(write=False)
|
|
204
|
+
return data_dict
|
|
205
|
+
|
|
206
|
+
def compute(self, rtdc_ds):
|
|
207
|
+
"""Compute the feature with self.method. All ancillary features that
|
|
208
|
+
share the same method will also be populated automatically.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
rtdc_ds: instance of RTDCBase
|
|
213
|
+
The dataset to compute the feature for
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
data_dict: dict
|
|
218
|
+
Dictionary with `AncillaryFeature.feature_name` as keys and the
|
|
219
|
+
computed data features (read-only) as values.
|
|
220
|
+
"""
|
|
221
|
+
data_dict = self.method(rtdc_ds)
|
|
222
|
+
if not isinstance(data_dict, dict):
|
|
223
|
+
data_dict = {self.feature_name: data_dict}
|
|
224
|
+
data_dict = AncillaryFeature.check_data_size(rtdc_ds, data_dict)
|
|
225
|
+
if self.feature_name not in data_dict:
|
|
226
|
+
raise KeyError(
|
|
227
|
+
f"I expected the feature '{self.feature_name}' to be a key "
|
|
228
|
+
+ f"in the dictionary returned by {self}. But I found only "
|
|
229
|
+
+ f"the following: {sorted(data_dict.keys())}")
|
|
230
|
+
for key in data_dict:
|
|
231
|
+
dfn.check_feature_shape(key, data_dict[key])
|
|
232
|
+
return data_dict
|
|
233
|
+
|
|
234
|
+
def hash(self, rtdc_ds):
|
|
235
|
+
"""Used for identifying an ancillary computation
|
|
236
|
+
|
|
237
|
+
The required features, the used configuration keys/values, and
|
|
238
|
+
the return value of the requirement function are hashed.
|
|
239
|
+
"""
|
|
240
|
+
hasher = hashlib.md5()
|
|
241
|
+
# data columns
|
|
242
|
+
for col in self.req_features:
|
|
243
|
+
hasher.update(obj2bytes(rtdc_ds[col]))
|
|
244
|
+
# config keys
|
|
245
|
+
for sec, keys in self.req_config:
|
|
246
|
+
for key in keys:
|
|
247
|
+
val = rtdc_ds.config[sec][key]
|
|
248
|
+
data = "{}:{}={}".format(sec, key, val)
|
|
249
|
+
hasher.update(obj2bytes(data))
|
|
250
|
+
# custom requirement function hash
|
|
251
|
+
reqret = self.req_func(rtdc_ds)
|
|
252
|
+
if not isinstance(reqret, bool):
|
|
253
|
+
# add to hash if not a boolean
|
|
254
|
+
hasher.update(obj2bytes(reqret))
|
|
255
|
+
return hasher.hexdigest()
|
|
256
|
+
|
|
257
|
+
def is_available(self, rtdc_ds, verbose=False):
|
|
258
|
+
"""Check whether the feature is available
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
rtdc_ds: instance of RTDCBase
|
|
263
|
+
The dataset to check availability for
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
available: bool
|
|
268
|
+
`True`, if feature can be computed with `compute`
|
|
269
|
+
|
|
270
|
+
Notes
|
|
271
|
+
-----
|
|
272
|
+
This method returns `False` for a feature if there
|
|
273
|
+
is a feature defined with the same name but with
|
|
274
|
+
higher priority (even if the feature would be
|
|
275
|
+
available otherwise).
|
|
276
|
+
"""
|
|
277
|
+
# Check config keys
|
|
278
|
+
for item in self.req_config:
|
|
279
|
+
section, keys = item
|
|
280
|
+
if section not in rtdc_ds.config:
|
|
281
|
+
if verbose:
|
|
282
|
+
print("{} not in config".format(section))
|
|
283
|
+
return False
|
|
284
|
+
else:
|
|
285
|
+
for key in keys:
|
|
286
|
+
if key not in rtdc_ds.config[section]:
|
|
287
|
+
if verbose:
|
|
288
|
+
print("{} not in config['{}']".format(key,
|
|
289
|
+
section))
|
|
290
|
+
return False
|
|
291
|
+
# Check features
|
|
292
|
+
for col in self.req_features:
|
|
293
|
+
if col not in rtdc_ds:
|
|
294
|
+
return False
|
|
295
|
+
# Check priorities of other features
|
|
296
|
+
for of in AncillaryFeature.features:
|
|
297
|
+
if of == self:
|
|
298
|
+
# nothing to compare
|
|
299
|
+
continue
|
|
300
|
+
elif of.feature_name == self.feature_name:
|
|
301
|
+
# same feature name
|
|
302
|
+
if of.priority <= self.priority:
|
|
303
|
+
# lower priority, ignore
|
|
304
|
+
continue
|
|
305
|
+
else:
|
|
306
|
+
# higher priority
|
|
307
|
+
if of.is_available(rtdc_ds):
|
|
308
|
+
# higher priority is available, thus
|
|
309
|
+
# this feature is not available
|
|
310
|
+
return False
|
|
311
|
+
else:
|
|
312
|
+
# higher priority not available
|
|
313
|
+
continue
|
|
314
|
+
else:
|
|
315
|
+
# other feature
|
|
316
|
+
continue
|
|
317
|
+
# Check user-defined function
|
|
318
|
+
if not self.req_func(rtdc_ds):
|
|
319
|
+
return False
|
|
320
|
+
return True
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MachineLearningFeature:
|
|
5
|
+
def __init__(self, *args, **kwargs):
|
|
6
|
+
raise NotImplementedError(
|
|
7
|
+
"`MachineLearningFeature` has been stripped from dclab since "
|
|
8
|
+
"version 0.61.0.")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_ml_feature(*args, **kwargs):
|
|
12
|
+
raise NotImplementedError(
|
|
13
|
+
"`load_ml_feature` has been stripped from dclab since "
|
|
14
|
+
"version 0.61.0.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_modc(*args, **kwargs):
|
|
18
|
+
raise NotImplementedError(
|
|
19
|
+
"`load_modc` has been stripped from dclab since "
|
|
20
|
+
"version 0.61.0.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def remove_all_ml_features():
|
|
24
|
+
warnings.warn("The `remove_all_ml_features` does nothing since it was "
|
|
25
|
+
"stripped from dclab version 0.61.0.",
|
|
26
|
+
DeprecationWarning)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def save_modc(*args, **kwargs):
|
|
30
|
+
raise NotImplementedError(
|
|
31
|
+
"`save_modc` has been stripped from dclab since "
|
|
32
|
+
"version 0.61.0.")
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. versionadded:: 0.34.0
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import hashlib
|
|
7
|
+
import importlib
|
|
8
|
+
import pathlib
|
|
9
|
+
import sys
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
from ...definitions import feat_logic
|
|
13
|
+
from ..feat_anc_core import AncillaryFeature
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PluginImportError(BaseException):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PlugInFeature(AncillaryFeature):
|
|
21
|
+
def __init__(self,
|
|
22
|
+
feature_name: str,
|
|
23
|
+
info: dict,
|
|
24
|
+
plugin_path: Optional[str | pathlib.Path] = None):
|
|
25
|
+
"""A user-defined plugin feature
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
feature_name: str
|
|
30
|
+
name of a feature that matches that defined in `info`
|
|
31
|
+
info: dict
|
|
32
|
+
Full plugin recipe (for all features) as given in the
|
|
33
|
+
`info` dictionary in the plugin file.
|
|
34
|
+
At least the following keys must be specified:
|
|
35
|
+
|
|
36
|
+
- "method": callable function computing the plugin feature
|
|
37
|
+
values (takes an :class`dclab.rtdc_dataset.core.RTDCBase`
|
|
38
|
+
as argument)
|
|
39
|
+
- "feature names": list of plugin feature names provided
|
|
40
|
+
by the plugin
|
|
41
|
+
|
|
42
|
+
The following features are optional:
|
|
43
|
+
|
|
44
|
+
- "description": short (one-line) description of the plugin
|
|
45
|
+
- "long description": long description of the plugin
|
|
46
|
+
- "feature labels": feature labels used e.g. for plotting
|
|
47
|
+
- "feature shapes": list of tuples for each feature
|
|
48
|
+
indicating the shape (this is required only for
|
|
49
|
+
non-scalar features; for scalar features simply set
|
|
50
|
+
this to ``None`` or ``(1,)``).
|
|
51
|
+
- "scalar feature": list of boolean values indicating
|
|
52
|
+
whether the features are scalar
|
|
53
|
+
- "config required": configuration keys required to
|
|
54
|
+
compute the plugin features (see the `req_config`
|
|
55
|
+
parameter for :class:`.AncillaryFeature`)
|
|
56
|
+
- "features required": list of feature names required to
|
|
57
|
+
compute the plugin features (see the `req_features`
|
|
58
|
+
parameter for :class:`.AncillaryFeature`)
|
|
59
|
+
- "method check required": additional method that checks
|
|
60
|
+
whether the features can be computed (see the `req_func`
|
|
61
|
+
parameter for :class:`.AncillaryFeature`)
|
|
62
|
+
- "version": version of this plugin (please use
|
|
63
|
+
semantic verioning)
|
|
64
|
+
|
|
65
|
+
plugin_path: str or pathlib.Path, optional
|
|
66
|
+
path which was used to load the `PlugInFeature` with
|
|
67
|
+
:func:`load_plugin_feature`.
|
|
68
|
+
|
|
69
|
+
Notes
|
|
70
|
+
-----
|
|
71
|
+
`PluginFeature` inherits from :class:`AncillaryFeature
|
|
72
|
+
<dclab.rtdc_dataset.feat_anc_core.ancillary_feature.AncillaryFeature>`.
|
|
73
|
+
Please read the advanced section on `PluginFeatures` in the dclab docs.
|
|
74
|
+
"""
|
|
75
|
+
if plugin_path is not None:
|
|
76
|
+
plugin_path = pathlib.Path(plugin_path)
|
|
77
|
+
|
|
78
|
+
#: Plugin feature name
|
|
79
|
+
self.feature_name = feature_name
|
|
80
|
+
#: Path to the original plugin file
|
|
81
|
+
self.plugin_path = plugin_path
|
|
82
|
+
|
|
83
|
+
# perform sanity checks
|
|
84
|
+
self._sanity_check_original_info(info)
|
|
85
|
+
# keep this for tests
|
|
86
|
+
self._original_info = info
|
|
87
|
+
|
|
88
|
+
# populate `info` dictionary with missing values
|
|
89
|
+
#: Dictionary containing all information relevant for
|
|
90
|
+
#: this particular plugin feature instance
|
|
91
|
+
self.plugin_feature_info = self._process_plugin_info(info)
|
|
92
|
+
|
|
93
|
+
# register this plugin feature in definitions
|
|
94
|
+
# This must be executed before initializing the super class
|
|
95
|
+
# (AncillaryFeature). If we don't do this, then `remove_plugin_feature`
|
|
96
|
+
# may end up removing innate features e.g., "deform".
|
|
97
|
+
feat_logic.feature_register(
|
|
98
|
+
name=self.feature_name,
|
|
99
|
+
label=self.plugin_feature_info["feature label"],
|
|
100
|
+
is_scalar=self.plugin_feature_info["scalar feature"]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Instantiate the super class
|
|
104
|
+
super(PlugInFeature, self).__init__(
|
|
105
|
+
feature_name=self.plugin_feature_info["feature name"],
|
|
106
|
+
method=self.plugin_feature_info["method"],
|
|
107
|
+
req_config=self.plugin_feature_info["config required"],
|
|
108
|
+
req_features=self.plugin_feature_info["features required"],
|
|
109
|
+
req_func=self.plugin_feature_info["method check required"],
|
|
110
|
+
identifier=self.plugin_feature_info["identifier"],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _process_plugin_info(self, original_info: dict) -> dict:
|
|
114
|
+
"""Return dictionary with all relevant info for this instance
|
|
115
|
+
"""
|
|
116
|
+
fidx = original_info["feature names"].index(self.feature_name)
|
|
117
|
+
|
|
118
|
+
# determine feature label
|
|
119
|
+
if ("feature labels" in original_info
|
|
120
|
+
and original_info["feature labels"][fidx]):
|
|
121
|
+
label = original_info["feature labels"][fidx]
|
|
122
|
+
else:
|
|
123
|
+
label = f"Plugin feature {self.feature_name}"
|
|
124
|
+
|
|
125
|
+
# determine whether we have a scalar feature
|
|
126
|
+
if "scalar feature" in original_info:
|
|
127
|
+
is_scalar = original_info["scalar feature"][fidx]
|
|
128
|
+
else:
|
|
129
|
+
is_scalar = True # default
|
|
130
|
+
|
|
131
|
+
if is_scalar:
|
|
132
|
+
event_shape = (1,)
|
|
133
|
+
else:
|
|
134
|
+
if "feature shapes" in original_info:
|
|
135
|
+
event_shape = original_info["feature shapes"][fidx]
|
|
136
|
+
else:
|
|
137
|
+
event_shape = None
|
|
138
|
+
|
|
139
|
+
# We assume that the script does not import any other custom
|
|
140
|
+
# Python scripts.
|
|
141
|
+
md5hasher = hashlib.md5()
|
|
142
|
+
if isinstance(self.plugin_path, pathlib.Path):
|
|
143
|
+
md5hasher.update(self.plugin_path.read_bytes())
|
|
144
|
+
else:
|
|
145
|
+
md5hasher.update(original_info["method"].__code__.co_code)
|
|
146
|
+
md5hasher.update(self.feature_name.encode("utf-8"))
|
|
147
|
+
md5hasher.update(original_info.get("version", "").encode("utf-8"))
|
|
148
|
+
for feat in original_info.get("features required", []):
|
|
149
|
+
md5hasher.update(feat.encode("utf-8"))
|
|
150
|
+
identifier = md5hasher.hexdigest()
|
|
151
|
+
|
|
152
|
+
feature_info = {
|
|
153
|
+
"method": original_info["method"],
|
|
154
|
+
"description": original_info.get(
|
|
155
|
+
"description", "No description provided"),
|
|
156
|
+
"long description": original_info.get(
|
|
157
|
+
"long description", "No long description provided."),
|
|
158
|
+
"feature name": self.feature_name,
|
|
159
|
+
"feature label": label,
|
|
160
|
+
"feature shape": event_shape,
|
|
161
|
+
"features required": original_info.get("features required", []),
|
|
162
|
+
"config required": original_info.get("config required", []),
|
|
163
|
+
"method check required": original_info.get(
|
|
164
|
+
"method check required", lambda x: True),
|
|
165
|
+
"scalar feature": is_scalar,
|
|
166
|
+
# allow comparisons with distutil.version.LooseVersion
|
|
167
|
+
"version": original_info.get("version", "0.0.0-unknown"),
|
|
168
|
+
"plugin path": self.plugin_path,
|
|
169
|
+
"identifier": identifier,
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return feature_info
|
|
173
|
+
|
|
174
|
+
def _sanity_check_original_info(self, original_info: dict):
|
|
175
|
+
"""Various checks on the `original_info` attribute dict
|
|
176
|
+
|
|
177
|
+
Raises
|
|
178
|
+
------
|
|
179
|
+
ValueError
|
|
180
|
+
If the parameter `original_info` is not a dict.
|
|
181
|
+
If the `self.feature_name` is not in
|
|
182
|
+
`original_info["feature names"]`.
|
|
183
|
+
If the `method` provided in parameter `original_info`
|
|
184
|
+
is not callable.
|
|
185
|
+
"""
|
|
186
|
+
if not isinstance(original_info, dict):
|
|
187
|
+
raise ValueError(
|
|
188
|
+
"PlugInFeature parameter for `info` must be a dict, instead "
|
|
189
|
+
f"a '{type(original_info)}' was given.")
|
|
190
|
+
|
|
191
|
+
if not isinstance(original_info["feature names"], list):
|
|
192
|
+
raise ValueError("'feature names' must be a list, "
|
|
193
|
+
f"got '{type(original_info['feature names'])}'")
|
|
194
|
+
|
|
195
|
+
if self.feature_name not in original_info["feature names"]:
|
|
196
|
+
raise ValueError(
|
|
197
|
+
f"The feature name '{self.feature_name}' is not defined in "
|
|
198
|
+
+ "the `info` dict of the plugin feature"
|
|
199
|
+
+ (f" at {self.plugin_path}" if self.plugin_path else "")
|
|
200
|
+
+ f". Defined names are '{original_info['feature names']}'.")
|
|
201
|
+
|
|
202
|
+
if not callable(original_info["method"]):
|
|
203
|
+
raise ValueError(
|
|
204
|
+
"The `method` you have provided in the parameter `info` is "
|
|
205
|
+
f"not callable ('{original_info['method']}' is not "
|
|
206
|
+
"a function).")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def import_plugin_feature_script(
|
|
210
|
+
plugin_path: str | pathlib.Path) -> dict:
|
|
211
|
+
"""Import the user-defined recipe and return the info dictionary
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
plugin_path: str or Path
|
|
216
|
+
pathname to a valid dclab plugin script
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
info: dict
|
|
221
|
+
Dictionary with the information required to instantiate
|
|
222
|
+
one (or multiple) :class:`PlugInFeature`.
|
|
223
|
+
|
|
224
|
+
Raises
|
|
225
|
+
------
|
|
226
|
+
PluginImportError
|
|
227
|
+
If the plugin can not be found
|
|
228
|
+
|
|
229
|
+
Notes
|
|
230
|
+
-----
|
|
231
|
+
One recipe may define multiple plugin features.
|
|
232
|
+
"""
|
|
233
|
+
path = pathlib.Path(plugin_path)
|
|
234
|
+
if not path.exists():
|
|
235
|
+
raise PluginImportError("The plugin could be not be found at "
|
|
236
|
+
f"'{plugin_path}'!")
|
|
237
|
+
try:
|
|
238
|
+
# insert the plugin directory to sys.path so we can import it
|
|
239
|
+
sys.path.insert(-1, str(path.parent))
|
|
240
|
+
sys.dont_write_bytecode = True
|
|
241
|
+
plugin = importlib.import_module(path.stem)
|
|
242
|
+
except BaseException as e:
|
|
243
|
+
raise PluginImportError(
|
|
244
|
+
f"The plugin {plugin_path} could not be loaded!") from e
|
|
245
|
+
finally:
|
|
246
|
+
# undo our path insertion
|
|
247
|
+
sys.path.pop(0)
|
|
248
|
+
sys.dont_write_bytecode = False
|
|
249
|
+
|
|
250
|
+
return plugin.info
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def load_plugin_feature(
|
|
254
|
+
plugin_path: str | pathlib.Path) -> List[PlugInFeature]:
|
|
255
|
+
"""Find and load PlugInFeature(s) from a user-defined recipe
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
plugin_path: str or Path
|
|
260
|
+
pathname to a valid dclab plugin Python script
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
plugin_list: list of PlugInFeature
|
|
265
|
+
list of PlugInFeature instances loaded from `plugin_path`
|
|
266
|
+
|
|
267
|
+
Raises
|
|
268
|
+
------
|
|
269
|
+
ValueError
|
|
270
|
+
If the script dictionary "feature names" are not a list
|
|
271
|
+
|
|
272
|
+
Notes
|
|
273
|
+
-----
|
|
274
|
+
One recipe may define multiple plugin features.
|
|
275
|
+
|
|
276
|
+
See Also
|
|
277
|
+
--------
|
|
278
|
+
import_plugin_feature_script: function that imports the plugin script
|
|
279
|
+
PlugInFeature: class handling the plugin feature information
|
|
280
|
+
dclab.rtdc_dataset.feat_temp.register_temporary_feature: alternative
|
|
281
|
+
method for creating user-defined features
|
|
282
|
+
"""
|
|
283
|
+
info = import_plugin_feature_script(plugin_path)
|
|
284
|
+
if not isinstance(info["feature names"], list):
|
|
285
|
+
raise ValueError(
|
|
286
|
+
"'feature names' must be a list of strings.")
|
|
287
|
+
|
|
288
|
+
plugin_list = []
|
|
289
|
+
for feature_name in info["feature names"]:
|
|
290
|
+
plugin_list.append(PlugInFeature(feature_name, info, plugin_path))
|
|
291
|
+
return plugin_list
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def remove_all_plugin_features():
|
|
295
|
+
"""Convenience function for removing all `PlugInFeature` instances
|
|
296
|
+
|
|
297
|
+
See Also
|
|
298
|
+
--------
|
|
299
|
+
remove_plugin_feature: remove a single `PlugInFeature` instance
|
|
300
|
+
"""
|
|
301
|
+
for plugin_instance in reversed(PlugInFeature.features):
|
|
302
|
+
if isinstance(plugin_instance, PlugInFeature):
|
|
303
|
+
remove_plugin_feature(plugin_instance)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def remove_plugin_feature(plugin_instance: PlugInFeature):
|
|
307
|
+
"""Convenience function for removing a `PlugInFeature` instance
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
plugin_instance: PlugInFeature
|
|
312
|
+
The `PlugInFeature` instance to be removed from dclab
|
|
313
|
+
|
|
314
|
+
Raises
|
|
315
|
+
------
|
|
316
|
+
TypeError
|
|
317
|
+
If the `plugin_instance` is not a `PlugInFeature` instance
|
|
318
|
+
"""
|
|
319
|
+
if isinstance(plugin_instance, PlugInFeature):
|
|
320
|
+
# This check is necessary for situations where the PlugInFeature fails
|
|
321
|
+
# between updating the `dclab.dfn` file and initialising the
|
|
322
|
+
# AncillaryFeature
|
|
323
|
+
if plugin_instance.feature_name in PlugInFeature.feature_names:
|
|
324
|
+
PlugInFeature.feature_names.remove(plugin_instance.feature_name)
|
|
325
|
+
feat_logic.feature_deregister(plugin_instance.feature_name)
|
|
326
|
+
PlugInFeature.features.remove(plugin_instance)
|
|
327
|
+
else:
|
|
328
|
+
raise TypeError(f"Type {type(plugin_instance)} should be an instance "
|
|
329
|
+
f"of PlugInFeature. '{plugin_instance}' was given.")
|