dclab 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314t-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. versionadded:: 0.33.0
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from ..definitions import feat_logic
|
|
11
|
+
|
|
12
|
+
from .core import RTDCBase
|
|
13
|
+
from .fmt_hierarchy import RTDC_Hierarchy, map_indices_child2root
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_registered_temporary_features = []
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def deregister_all():
|
|
20
|
+
"""Deregisters all temporary features"""
|
|
21
|
+
for feat in list(_registered_temporary_features):
|
|
22
|
+
deregister_temporary_feature(feat)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def deregister_temporary_feature(feature: str):
|
|
26
|
+
"""Convenience function for deregistering a temporary feature
|
|
27
|
+
|
|
28
|
+
This method is mostly used during testing. It does not
|
|
29
|
+
remove the actual feature data from any dataset; the data
|
|
30
|
+
will stay in memory but is not accessible anymore through
|
|
31
|
+
the public methods of the :class:`RTDCBase` user interface.
|
|
32
|
+
"""
|
|
33
|
+
if feature in _registered_temporary_features:
|
|
34
|
+
_registered_temporary_features.remove(feature)
|
|
35
|
+
feat_logic.feature_deregister(feature)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def register_temporary_feature(feature: str,
|
|
39
|
+
label: Optional[str] = None,
|
|
40
|
+
is_scalar: bool = True):
|
|
41
|
+
"""Register a new temporary feature
|
|
42
|
+
|
|
43
|
+
Temporary features are custom features that can be defined ad hoc
|
|
44
|
+
by the user. Temporary features are helpful when the integral
|
|
45
|
+
features are not enough, e.g. for prototyping, testing, or
|
|
46
|
+
collating with other data. Temporary features allow you to
|
|
47
|
+
leverage the full functionality of :class:`RTDCBase` with
|
|
48
|
+
your custom features (no need to go for a custom `pandas.Dataframe`).
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
feature: str
|
|
53
|
+
Feature name; allowed characters are lower-case letters,
|
|
54
|
+
digits, and underscores
|
|
55
|
+
label: str
|
|
56
|
+
Feature label used e.g. for plotting
|
|
57
|
+
is_scalar: bool
|
|
58
|
+
Whether or not the feature is a scalar feature
|
|
59
|
+
"""
|
|
60
|
+
feat_logic.feature_register(feature, label, is_scalar)
|
|
61
|
+
_registered_temporary_features.append(feature)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def set_temporary_feature(rtdc_ds: RTDCBase,
|
|
65
|
+
feature: str,
|
|
66
|
+
data: np.ndarray):
|
|
67
|
+
"""Set temporary feature data for a dataset
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
rtdc_ds: dclab.RTDCBase
|
|
72
|
+
Dataset for which to set the feature. Note that the
|
|
73
|
+
length of the feature `data` must match the number of events
|
|
74
|
+
in `rtdc_ds`. If the dataset is a hierarchy child, the data will also
|
|
75
|
+
be set in the parent dataset, but only for those events that are part
|
|
76
|
+
of the child. For all events in the parent dataset that are not part
|
|
77
|
+
of the child dataset, the temporary feature is set to np.nan.
|
|
78
|
+
feature: str
|
|
79
|
+
Feature name
|
|
80
|
+
data: np.ndarray
|
|
81
|
+
The data
|
|
82
|
+
"""
|
|
83
|
+
if not feat_logic.feature_exists(feature):
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Temporary feature '{feature}' has not been registered!")
|
|
86
|
+
if len(data) != len(rtdc_ds):
|
|
87
|
+
raise ValueError(f"The temporary feature {feature} must have same "
|
|
88
|
+
f"length as the dataset. Expected length "
|
|
89
|
+
f"{len(rtdc_ds)}, got length {len(data)}!")
|
|
90
|
+
if isinstance(rtdc_ds, RTDC_Hierarchy):
|
|
91
|
+
root_ids = map_indices_child2root(rtdc_ds, np.arange(len(rtdc_ds)))
|
|
92
|
+
root_parent = rtdc_ds.get_root_parent()
|
|
93
|
+
root_feat_data = np.empty((len(root_parent)))
|
|
94
|
+
root_feat_data[:] = np.nan
|
|
95
|
+
root_feat_data[root_ids] = data
|
|
96
|
+
set_temporary_feature(root_parent, feature, root_feat_data)
|
|
97
|
+
rtdc_ds.rejuvenate()
|
|
98
|
+
else:
|
|
99
|
+
feat_logic.check_feature_shape(feature, data)
|
|
100
|
+
data_ro = data.view()
|
|
101
|
+
data_ro.setflags(write=False)
|
|
102
|
+
rtdc_ds._usertemp[feature] = data_ro
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""RT-DC dataset core classes and methods"""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from dclab import definitions as dfn
|
|
8
|
+
|
|
9
|
+
from .. import downsampling
|
|
10
|
+
from ..polygon_filter import PolygonFilter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NanWarning(UserWarning):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Filter(object):
|
|
18
|
+
def __init__(self, rtdc_ds):
|
|
19
|
+
"""Boolean filter arrays for RT-DC measurements
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
rtdc_ds: instance of RTDCBase
|
|
24
|
+
The RT-DC dataset the filter applies to
|
|
25
|
+
"""
|
|
26
|
+
# dictionary of boolean array for box filters
|
|
27
|
+
self._box_filters = {}
|
|
28
|
+
# dictionary of (hash, boolean array) for polygon filters
|
|
29
|
+
self._poly_filters = {}
|
|
30
|
+
# dictionary of all internal property filters
|
|
31
|
+
self._array_props = {}
|
|
32
|
+
# initialize important parameters
|
|
33
|
+
self._init_rtdc_ds(rtdc_ds)
|
|
34
|
+
# initialize properties
|
|
35
|
+
self.reset()
|
|
36
|
+
|
|
37
|
+
def __getitem__(self, key):
|
|
38
|
+
"""Return the filter for a feature in `self.features`"""
|
|
39
|
+
if key in self.features and dfn.scalar_feature_exists(key):
|
|
40
|
+
if key not in self._box_filters:
|
|
41
|
+
# Generate filters on-the-fly
|
|
42
|
+
self._box_filters[key] = np.ones(self.size, dtype=bool)
|
|
43
|
+
else:
|
|
44
|
+
raise KeyError("Feature not available: '{}'".format(key))
|
|
45
|
+
return self._box_filters[key]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def all(self):
|
|
49
|
+
"""All filters combined (see :func:`Filter.update`)
|
|
50
|
+
|
|
51
|
+
Use this property to filter the features of
|
|
52
|
+
:class:`dclab.rtdc_dataset.RTDCBase` instances
|
|
53
|
+
"""
|
|
54
|
+
return self._get_ro_array("all")
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def box(self):
|
|
58
|
+
"""All box filters"""
|
|
59
|
+
return self._get_ro_array("box")
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def polygon(self):
|
|
63
|
+
"""Polygon filters"""
|
|
64
|
+
return self._get_ro_array("polygon")
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def invalid(self):
|
|
68
|
+
"""Invalid (nan/inf) events"""
|
|
69
|
+
return self._get_ro_array("invalid")
|
|
70
|
+
|
|
71
|
+
def _get_ro_array(self, key):
|
|
72
|
+
view = self._get_rw_array(key).view()
|
|
73
|
+
view.flags.writeable = False
|
|
74
|
+
return view
|
|
75
|
+
|
|
76
|
+
def _get_rw_array(self, key):
|
|
77
|
+
if key not in self._array_props:
|
|
78
|
+
self._array_props[key] = np.ones(self.size, dtype=bool)
|
|
79
|
+
return self._array_props[key]
|
|
80
|
+
|
|
81
|
+
def _init_rtdc_ds(self, rtdc_ds):
|
|
82
|
+
#: Available feature names
|
|
83
|
+
self.features = rtdc_ds.features_scalar
|
|
84
|
+
if hasattr(self, "size") and self.size != len(rtdc_ds):
|
|
85
|
+
raise ValueError("Change of RTDCBase size not supported!")
|
|
86
|
+
self.size = len(rtdc_ds)
|
|
87
|
+
# determine box filters that have been removed
|
|
88
|
+
for key in list(self._box_filters.keys()):
|
|
89
|
+
if key not in self.features:
|
|
90
|
+
self._box_filters.pop(key)
|
|
91
|
+
# determine polygon filters that have been removed
|
|
92
|
+
for pf_id in list(self._poly_filters.keys()):
|
|
93
|
+
pf = PolygonFilter.get_instance_from_id(pf_id)
|
|
94
|
+
if (pf_id in rtdc_ds.config["filtering"]["polygon filters"]
|
|
95
|
+
and pf.axes[0] in self.features
|
|
96
|
+
and pf.axes[1] in self.features):
|
|
97
|
+
pass
|
|
98
|
+
else:
|
|
99
|
+
# filter has been removed
|
|
100
|
+
self._poly_filters.pop(pf_id)
|
|
101
|
+
|
|
102
|
+
def reset(self):
|
|
103
|
+
"""Reset all filters"""
|
|
104
|
+
self._box_filters.clear()
|
|
105
|
+
self._poly_filters.clear()
|
|
106
|
+
self._array_props.clear()
|
|
107
|
+
#: 1D boolean array for manually excluding events; `False` values
|
|
108
|
+
#: are excluded.
|
|
109
|
+
self.manual = np.ones(self.size, dtype=bool)
|
|
110
|
+
# old filter configuration of `rtdc_ds`
|
|
111
|
+
self._old_config = {}
|
|
112
|
+
|
|
113
|
+
def update(self, rtdc_ds, force=None):
|
|
114
|
+
"""Update the filters according to `rtdc_ds.config["filtering"]`
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
rtdc_ds: dclab.rtdc_dataset.core.RTDCBase
|
|
119
|
+
The measurement to which the filter is applied
|
|
120
|
+
force : list
|
|
121
|
+
A list of feature names that must be refiltered with
|
|
122
|
+
min/max values.
|
|
123
|
+
|
|
124
|
+
Notes
|
|
125
|
+
-----
|
|
126
|
+
This function is called when
|
|
127
|
+
:func:`ds.apply_filter <dclab.rtdc_dataset.RTDCBase.apply_filter>`
|
|
128
|
+
is called.
|
|
129
|
+
"""
|
|
130
|
+
if force is None:
|
|
131
|
+
force = []
|
|
132
|
+
# re-initialize important parameters
|
|
133
|
+
self._init_rtdc_ds(rtdc_ds)
|
|
134
|
+
|
|
135
|
+
# These lists may help us become very fast in the future
|
|
136
|
+
newkeys = []
|
|
137
|
+
oldvals = []
|
|
138
|
+
newvals = []
|
|
139
|
+
|
|
140
|
+
cfg_cur = rtdc_ds.config["filtering"]
|
|
141
|
+
cfg_old = self._old_config
|
|
142
|
+
|
|
143
|
+
# Determine which data was updated
|
|
144
|
+
for skey in list(cfg_cur.keys()):
|
|
145
|
+
if cfg_cur[skey] != cfg_old.get(skey, None):
|
|
146
|
+
newkeys.append(skey)
|
|
147
|
+
oldvals.append(cfg_old.get(skey, None))
|
|
148
|
+
newvals.append(cfg_cur[skey])
|
|
149
|
+
|
|
150
|
+
# 1. Invalid filters
|
|
151
|
+
arr_invalid = self._get_rw_array("invalid")
|
|
152
|
+
arr_invalid[:] = True
|
|
153
|
+
if cfg_cur["remove invalid events"]:
|
|
154
|
+
for feat in self.features:
|
|
155
|
+
data = rtdc_ds[feat]
|
|
156
|
+
invalid = np.isinf(data) | np.isnan(data)
|
|
157
|
+
arr_invalid &= ~invalid
|
|
158
|
+
|
|
159
|
+
# 2. Filter all feature min/max values.
|
|
160
|
+
feat2filter = []
|
|
161
|
+
for k in newkeys:
|
|
162
|
+
# k[:-4] because we want to crop " min" and " max"
|
|
163
|
+
if (dfn.scalar_feature_exists(k[:-4])
|
|
164
|
+
and (k.endswith(" min") or k.endswith(" max"))):
|
|
165
|
+
feat2filter.append(k[:-4])
|
|
166
|
+
|
|
167
|
+
for f in force:
|
|
168
|
+
# add forced features
|
|
169
|
+
if dfn.scalar_feature_exists(f):
|
|
170
|
+
feat2filter.append(f)
|
|
171
|
+
else:
|
|
172
|
+
# Make sure the feature name is valid.
|
|
173
|
+
raise ValueError("Unknown scalar feature name '{}'!".format(f))
|
|
174
|
+
|
|
175
|
+
feat2filter = np.unique(feat2filter)
|
|
176
|
+
|
|
177
|
+
for feat in feat2filter:
|
|
178
|
+
fstart = feat + " min"
|
|
179
|
+
fend = feat + " max"
|
|
180
|
+
must_be_filtered = (fstart in cfg_cur
|
|
181
|
+
and fend in cfg_cur
|
|
182
|
+
and cfg_cur[fstart] != cfg_cur[fend])
|
|
183
|
+
if ((fstart in cfg_cur and fend not in cfg_cur)
|
|
184
|
+
or (fstart not in cfg_cur and fend in cfg_cur)):
|
|
185
|
+
# User is responsible for setting min and max values!
|
|
186
|
+
raise ValueError("Box filter: Please make sure that both "
|
|
187
|
+
"'{}' and '{}' are set!".format(fstart, fend))
|
|
188
|
+
if feat in self.features:
|
|
189
|
+
# Get the current feature filter
|
|
190
|
+
feat_filt = self[feat]
|
|
191
|
+
feat_filt[:] = True
|
|
192
|
+
# If min and max exist and if they are not identical:
|
|
193
|
+
if must_be_filtered:
|
|
194
|
+
ivalstart = cfg_cur[fstart]
|
|
195
|
+
ivalend = cfg_cur[fend]
|
|
196
|
+
if ivalstart > ivalend:
|
|
197
|
+
msg = "inverting filter: {} > {}".format(fstart, fend)
|
|
198
|
+
warnings.warn(msg)
|
|
199
|
+
ivalstart, ivalend = ivalend, ivalstart
|
|
200
|
+
data = rtdc_ds[feat]
|
|
201
|
+
# treat nan-values in a special way
|
|
202
|
+
disnan = np.isnan(data)
|
|
203
|
+
if np.sum(disnan):
|
|
204
|
+
# this avoids RuntimeWarnings (invalid value
|
|
205
|
+
# encountered due to nan-values)
|
|
206
|
+
feat_filt[disnan] = False
|
|
207
|
+
idx = ~disnan
|
|
208
|
+
if not cfg_cur["remove invalid events"]:
|
|
209
|
+
msg = "Feature '{}' contains ".format(feat) \
|
|
210
|
+
+ "nan-values! Box filters remove those."
|
|
211
|
+
warnings.warn(msg, NanWarning)
|
|
212
|
+
else:
|
|
213
|
+
idx = slice(0, self.size) # place-holder for [:]
|
|
214
|
+
feat_filt[idx] &= ivalstart <= data[idx]
|
|
215
|
+
feat_filt[idx] &= data[idx] <= ivalend
|
|
216
|
+
elif must_be_filtered:
|
|
217
|
+
warnings.warn("Dataset '{}' does ".format(rtdc_ds.identifier)
|
|
218
|
+
+ "not contain the feature '{}'! ".format(feat)
|
|
219
|
+
+ "A box filter has been ignored.")
|
|
220
|
+
# store box filters
|
|
221
|
+
arr_box = self._get_rw_array("box")
|
|
222
|
+
arr_box[:] = True
|
|
223
|
+
for feat in self._box_filters:
|
|
224
|
+
arr_box &= self._box_filters[feat]
|
|
225
|
+
|
|
226
|
+
# 3. Filter with polygon filters
|
|
227
|
+
# check if something has changed
|
|
228
|
+
# perform polygon filtering
|
|
229
|
+
for pf_id in cfg_cur["polygon filters"]:
|
|
230
|
+
pf = PolygonFilter.get_instance_from_id(pf_id)
|
|
231
|
+
if (pf_id not in self._poly_filters
|
|
232
|
+
or pf.hash != self._poly_filters[pf_id][0]):
|
|
233
|
+
datax = rtdc_ds[pf.axes[0]]
|
|
234
|
+
datay = rtdc_ds[pf.axes[1]]
|
|
235
|
+
self._poly_filters[pf_id] = (pf.hash, pf.filter(datax, datay))
|
|
236
|
+
# store polygon filters
|
|
237
|
+
arr_polygon = self._get_rw_array("polygon")
|
|
238
|
+
arr_polygon[:] = True
|
|
239
|
+
for pf_id in self._poly_filters:
|
|
240
|
+
arr_polygon &= self._poly_filters[pf_id][1]
|
|
241
|
+
|
|
242
|
+
# 4. Finally combine all filters and apply "limit events"
|
|
243
|
+
# get a list of all filters
|
|
244
|
+
arr_all = self._get_rw_array("all")
|
|
245
|
+
if cfg_cur["enable filters"]:
|
|
246
|
+
arr_all[:] = arr_box & arr_invalid & arr_polygon & self.manual
|
|
247
|
+
|
|
248
|
+
# Filter with configuration keyword argument "limit events".
|
|
249
|
+
# This additional step limits the total number of events in
|
|
250
|
+
# self.all.
|
|
251
|
+
if cfg_cur["limit events"] > 0:
|
|
252
|
+
limit = cfg_cur["limit events"]
|
|
253
|
+
sub = arr_all[arr_all]
|
|
254
|
+
_, idx = downsampling.downsample_rand(sub,
|
|
255
|
+
samples=limit,
|
|
256
|
+
ret_idx=True)
|
|
257
|
+
sub[~idx] = False
|
|
258
|
+
arr_all[arr_all] = sub
|
|
259
|
+
else:
|
|
260
|
+
arr_all[:] = True
|
|
261
|
+
|
|
262
|
+
# Actual filtering is then done during plotting
|
|
263
|
+
self._old_config = rtdc_ds.config.copy()["filtering"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""DCOR-med access token (SSL certificate + CKAN token)"""
|
|
2
|
+
import pathlib
|
|
3
|
+
import ssl
|
|
4
|
+
import tempfile
|
|
5
|
+
import zipfile
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_api_key(access_token_path, password):
|
|
9
|
+
"""Extract the API key / API token from an encrypted DCOR access token"""
|
|
10
|
+
if isinstance(password, str):
|
|
11
|
+
password = password.encode("utf-8")
|
|
12
|
+
with zipfile.ZipFile(access_token_path) as arc:
|
|
13
|
+
api_key = arc.read("api_key.txt", pwd=password).decode().strip()
|
|
14
|
+
return api_key
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_certificate(access_token_path, password):
|
|
18
|
+
"""Extract the certificate bundle from an encrypted DCOR access token"""
|
|
19
|
+
if isinstance(password, str):
|
|
20
|
+
password = password.encode("utf-8")
|
|
21
|
+
with zipfile.ZipFile(access_token_path) as arc:
|
|
22
|
+
cert_data = arc.read("server.cert", pwd=password)
|
|
23
|
+
return cert_data
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_hostname(access_token_path, password):
|
|
27
|
+
"""Extract the hostname from an encrypted DCOR access token"""
|
|
28
|
+
cert_data = get_certificate(access_token_path, password)
|
|
29
|
+
with tempfile.TemporaryDirectory(prefix="dcoraid_access_token_") as td:
|
|
30
|
+
cfile = pathlib.Path(td) / "server.cert"
|
|
31
|
+
cfile.write_bytes(cert_data)
|
|
32
|
+
# Dear future-self,
|
|
33
|
+
#
|
|
34
|
+
# I know that this will probably not have been a good solution.
|
|
35
|
+
# Anyway, I still decided to use this private function from the
|
|
36
|
+
# built-in ssh module to avoid additional dependencies. Just so
|
|
37
|
+
# you know: If you happen to be in trouble now because of this,
|
|
38
|
+
# bear in mind that you had enough time to at least ask for the
|
|
39
|
+
# functionality to be implemented in the requests library. Look
|
|
40
|
+
# how I kept the lines all the same length!
|
|
41
|
+
#
|
|
42
|
+
# Cheers,
|
|
43
|
+
# Paul
|
|
44
|
+
cert_dict = ssl._ssl._test_decode_cert(str(cfile))
|
|
45
|
+
# get the common name
|
|
46
|
+
for ((key, value),) in cert_dict["subject"]:
|
|
47
|
+
if key == "commonName":
|
|
48
|
+
hostname = value.strip()
|
|
49
|
+
break
|
|
50
|
+
else:
|
|
51
|
+
raise KeyError("Could not extract hostname from certificate!")
|
|
52
|
+
return hostname
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from ...http_utils import REQUESTS_AVAILABLE # noqa: F401
|
|
5
|
+
from ...http_utils import requests, session_cache
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DCORAccessError(BaseException):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class APIHandler:
|
|
13
|
+
"""Handles the DCOR api with caching for simple queries"""
|
|
14
|
+
#: These are cached to minimize network usage
|
|
15
|
+
#: Note that we are not caching basins, since they may contain
|
|
16
|
+
#: expiring URLs.
|
|
17
|
+
cache_queries = ["metadata", "size", "feature_list", "valid"]
|
|
18
|
+
#: DCOR API Keys/Tokens in the current session
|
|
19
|
+
api_keys = []
|
|
20
|
+
|
|
21
|
+
def __init__(self, url, api_key="", cert_path=None, dcserv_api_version=2):
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
url: str
|
|
27
|
+
URL to DCOR API
|
|
28
|
+
api_key: str
|
|
29
|
+
DCOR API token
|
|
30
|
+
cert_path: pathlib.Path
|
|
31
|
+
the path to the server's CA bundle; by default this
|
|
32
|
+
will use the default certificates (which depends on
|
|
33
|
+
from where you obtained certifi/requests)
|
|
34
|
+
"""
|
|
35
|
+
#: DCOR API URL
|
|
36
|
+
self.url = url
|
|
37
|
+
#: keyword argument to :func:`requests.request`
|
|
38
|
+
self.verify = cert_path or True
|
|
39
|
+
#: DCOR API Token
|
|
40
|
+
self.api_key = api_key
|
|
41
|
+
#: ckanext-dc_serve dcserv API version
|
|
42
|
+
self.dcserv_api_version = dcserv_api_version
|
|
43
|
+
#: create a session
|
|
44
|
+
self.session = session_cache.get_session(url)
|
|
45
|
+
self._cache = {}
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def add_api_key(cls, api_key):
|
|
49
|
+
"""Add an API Key/Token to the base class
|
|
50
|
+
|
|
51
|
+
When accessing the DCOR API, all available API Keys/Tokens are
|
|
52
|
+
used to access a resource (trial and error).
|
|
53
|
+
"""
|
|
54
|
+
if api_key.strip() and api_key not in APIHandler.api_keys:
|
|
55
|
+
APIHandler.api_keys.append(api_key)
|
|
56
|
+
|
|
57
|
+
def _get(self,
|
|
58
|
+
query: str,
|
|
59
|
+
feat: str = None,
|
|
60
|
+
trace: str = None,
|
|
61
|
+
event: str = None,
|
|
62
|
+
api_key: str = "",
|
|
63
|
+
timeout: float = None,
|
|
64
|
+
retries: int = 5):
|
|
65
|
+
"""Fetch information via the DCOR API
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
query: str
|
|
70
|
+
API route
|
|
71
|
+
feat: str
|
|
72
|
+
DEPRECATED (use basins instead), adds f"&feature={feat}" to query
|
|
73
|
+
trace: str
|
|
74
|
+
DEPRECATED (use basins instead), adds f"&trace={trace}" to query
|
|
75
|
+
event: str
|
|
76
|
+
DEPRECATED (use basins instead), adds f"&event={event}" to query
|
|
77
|
+
api_key: str
|
|
78
|
+
DCOR API token to use
|
|
79
|
+
timeout: float
|
|
80
|
+
Request timeout
|
|
81
|
+
retries: int
|
|
82
|
+
Number of retries to fetch the request. For every retry, the
|
|
83
|
+
timeout is increased by two seconds.
|
|
84
|
+
"""
|
|
85
|
+
if timeout is None:
|
|
86
|
+
timeout = 1
|
|
87
|
+
# "version=2" introduced in dclab 0.54.3
|
|
88
|
+
# (supported since ckanext.dc_serve 0.13.2)
|
|
89
|
+
qstr = f"&version={self.dcserv_api_version}&query={query}"
|
|
90
|
+
if feat is not None:
|
|
91
|
+
qstr += f"&feature={feat}"
|
|
92
|
+
if trace is not None:
|
|
93
|
+
qstr += f"&trace={trace}"
|
|
94
|
+
if event is not None:
|
|
95
|
+
qstr += f"&event={event}"
|
|
96
|
+
apicall = self.url + qstr
|
|
97
|
+
fail_reasons = []
|
|
98
|
+
for ii in range(retries):
|
|
99
|
+
try:
|
|
100
|
+
# try-except both requests and json conversion
|
|
101
|
+
req = self.session.get(apicall,
|
|
102
|
+
headers={"Authorization": api_key},
|
|
103
|
+
verify=self.verify,
|
|
104
|
+
timeout=timeout + ii * 2,
|
|
105
|
+
)
|
|
106
|
+
jreq = req.json()
|
|
107
|
+
except requests.urllib3.exceptions.ConnectionError: # requests
|
|
108
|
+
fail_reasons.append("connection problem")
|
|
109
|
+
continue
|
|
110
|
+
except (requests.urllib3.exceptions.ReadTimeoutError,
|
|
111
|
+
requests.exceptions.ConnectTimeout): # requests
|
|
112
|
+
fail_reasons.append("timeout")
|
|
113
|
+
except json.decoder.JSONDecodeError: # json
|
|
114
|
+
fail_reasons.append("invalid json")
|
|
115
|
+
time.sleep(1) # wait a bit, maybe the server is overloaded
|
|
116
|
+
continue
|
|
117
|
+
else:
|
|
118
|
+
break
|
|
119
|
+
else:
|
|
120
|
+
raise DCORAccessError(f"Could not complete query '{apicall}'. "
|
|
121
|
+
f"I retried {retries} times. "
|
|
122
|
+
f"Messages: {fail_reasons}")
|
|
123
|
+
return jreq
|
|
124
|
+
|
|
125
|
+
def get(self,
|
|
126
|
+
query: str,
|
|
127
|
+
feat: str = None,
|
|
128
|
+
trace: str = None,
|
|
129
|
+
event: str = None,
|
|
130
|
+
timeout: float = None,
|
|
131
|
+
retries: int = 5,
|
|
132
|
+
):
|
|
133
|
+
"""Fetch information from DCOR
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
query: str
|
|
138
|
+
API route
|
|
139
|
+
feat: str
|
|
140
|
+
DEPRECATED (use basins instead), adds f"&feature={feat}" to query
|
|
141
|
+
trace: str
|
|
142
|
+
DEPRECATED (use basins instead), adds f"&trace={trace}" to query
|
|
143
|
+
event: str
|
|
144
|
+
DEPRECATED (use basins instead), adds f"&event={event}" to query
|
|
145
|
+
timeout: float
|
|
146
|
+
Request timeout
|
|
147
|
+
retries: int
|
|
148
|
+
Number of retries to fetch the request. For every retry, the
|
|
149
|
+
timeout is increased by two seconds.
|
|
150
|
+
"""
|
|
151
|
+
if query in APIHandler.cache_queries and query in self._cache:
|
|
152
|
+
result = self._cache[query]
|
|
153
|
+
else:
|
|
154
|
+
req = {"error": {"message": "No access to API (api key?)"}}
|
|
155
|
+
for api_key in [self.api_key] + APIHandler.api_keys:
|
|
156
|
+
req = self._get(query=query,
|
|
157
|
+
feat=feat,
|
|
158
|
+
trace=trace,
|
|
159
|
+
event=event,
|
|
160
|
+
api_key=api_key,
|
|
161
|
+
timeout=timeout,
|
|
162
|
+
retries=retries,
|
|
163
|
+
)
|
|
164
|
+
if req["success"]:
|
|
165
|
+
self.api_key = api_key # remember working key
|
|
166
|
+
break
|
|
167
|
+
else:
|
|
168
|
+
raise DCORAccessError(
|
|
169
|
+
f"Cannot access {query}: {req['error']['message']}")
|
|
170
|
+
result = req["result"]
|
|
171
|
+
if query in APIHandler.cache_queries:
|
|
172
|
+
self._cache[query] = result
|
|
173
|
+
return result
|