dclab 0.62.11__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +23 -0
- dclab/_version.py +16 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +183 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cp313-win_amd64.pyd +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +256 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde_contours.py +222 -0
- dclab/kde_methods.py +303 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +1001 -0
- dclab/rtdc_dataset/export.py +737 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +550 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +320 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +72 -0
- dclab/rtdc_dataset/writer.py +985 -0
- dclab/statistics.py +203 -0
- dclab/util.py +156 -0
- dclab/warn.py +15 -0
- dclab-0.62.11.dist-info/LICENSE +343 -0
- dclab-0.62.11.dist-info/METADATA +146 -0
- dclab-0.62.11.dist-info/RECORD +137 -0
- dclab-0.62.11.dist-info/WHEEL +5 -0
- dclab-0.62.11.dist-info/entry_points.txt +8 -0
- dclab-0.62.11.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""RT-DC dataset core classes and methods"""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from dclab import definitions as dfn
|
|
8
|
+
|
|
9
|
+
from .. import downsampling
|
|
10
|
+
from ..polygon_filter import PolygonFilter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NanWarning(UserWarning):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Filter(object):
|
|
18
|
+
def __init__(self, rtdc_ds):
|
|
19
|
+
"""Boolean filter arrays for RT-DC measurements
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
rtdc_ds: instance of RTDCBase
|
|
24
|
+
The RT-DC dataset the filter applies to
|
|
25
|
+
"""
|
|
26
|
+
# dictionary of boolean array for box filters
|
|
27
|
+
self._box_filters = {}
|
|
28
|
+
# dictionary of (hash, boolean array) for polygon filters
|
|
29
|
+
self._poly_filters = {}
|
|
30
|
+
# dictionary of all internal property filters
|
|
31
|
+
self._array_props = {}
|
|
32
|
+
# initialize important parameters
|
|
33
|
+
self._init_rtdc_ds(rtdc_ds)
|
|
34
|
+
# initialize properties
|
|
35
|
+
self.reset()
|
|
36
|
+
|
|
37
|
+
def __getitem__(self, key):
|
|
38
|
+
"""Return the filter for a feature in `self.features`"""
|
|
39
|
+
if key in self.features and dfn.scalar_feature_exists(key):
|
|
40
|
+
if key not in self._box_filters:
|
|
41
|
+
# Generate filters on-the-fly
|
|
42
|
+
self._box_filters[key] = np.ones(self.size, dtype=bool)
|
|
43
|
+
else:
|
|
44
|
+
raise KeyError("Feature not available: '{}'".format(key))
|
|
45
|
+
return self._box_filters[key]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def all(self):
|
|
49
|
+
"""All filters combined (see :func:`Filter.update`)
|
|
50
|
+
|
|
51
|
+
Use this property to filter the features of
|
|
52
|
+
:class:`dclab.rtdc_dataset.RTDCBase` instances
|
|
53
|
+
"""
|
|
54
|
+
return self._get_ro_array("all")
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def box(self):
|
|
58
|
+
"""All box filters"""
|
|
59
|
+
return self._get_ro_array("box")
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def polygon(self):
|
|
63
|
+
"""Polygon filters"""
|
|
64
|
+
return self._get_ro_array("polygon")
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def invalid(self):
|
|
68
|
+
"""Invalid (nan/inf) events"""
|
|
69
|
+
return self._get_ro_array("invalid")
|
|
70
|
+
|
|
71
|
+
def _get_ro_array(self, key):
|
|
72
|
+
view = self._get_rw_array(key).view()
|
|
73
|
+
view.flags.writeable = False
|
|
74
|
+
return view
|
|
75
|
+
|
|
76
|
+
def _get_rw_array(self, key):
|
|
77
|
+
if key not in self._array_props:
|
|
78
|
+
self._array_props[key] = np.ones(self.size, dtype=bool)
|
|
79
|
+
return self._array_props[key]
|
|
80
|
+
|
|
81
|
+
def _init_rtdc_ds(self, rtdc_ds):
|
|
82
|
+
#: Available feature names
|
|
83
|
+
self.features = rtdc_ds.features_scalar
|
|
84
|
+
if hasattr(self, "size") and self.size != len(rtdc_ds):
|
|
85
|
+
raise ValueError("Change of RTDCBase size not supported!")
|
|
86
|
+
self.size = len(rtdc_ds)
|
|
87
|
+
# determine box filters that have been removed
|
|
88
|
+
for key in list(self._box_filters.keys()):
|
|
89
|
+
if key not in self.features:
|
|
90
|
+
self._box_filters.pop(key)
|
|
91
|
+
# determine polygon filters that have been removed
|
|
92
|
+
for pf_id in list(self._poly_filters.keys()):
|
|
93
|
+
pf = PolygonFilter.get_instance_from_id(pf_id)
|
|
94
|
+
if (pf_id in rtdc_ds.config["filtering"]["polygon filters"]
|
|
95
|
+
and pf.axes[0] in self.features
|
|
96
|
+
and pf.axes[1] in self.features):
|
|
97
|
+
pass
|
|
98
|
+
else:
|
|
99
|
+
# filter has been removed
|
|
100
|
+
self._poly_filters.pop(pf_id)
|
|
101
|
+
|
|
102
|
+
def reset(self):
|
|
103
|
+
"""Reset all filters"""
|
|
104
|
+
self._box_filters.clear()
|
|
105
|
+
self._poly_filters.clear()
|
|
106
|
+
self._array_props.clear()
|
|
107
|
+
#: 1D boolean array for manually excluding events; `False` values
|
|
108
|
+
#: are excluded.
|
|
109
|
+
self.manual = np.ones(self.size, dtype=bool)
|
|
110
|
+
# old filter configuration of `rtdc_ds`
|
|
111
|
+
self._old_config = {}
|
|
112
|
+
|
|
113
|
+
def update(self, rtdc_ds, force=None):
|
|
114
|
+
"""Update the filters according to `rtdc_ds.config["filtering"]`
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
rtdc_ds: dclab.rtdc_dataset.core.RTDCBase
|
|
119
|
+
The measurement to which the filter is applied
|
|
120
|
+
force : list
|
|
121
|
+
A list of feature names that must be refiltered with
|
|
122
|
+
min/max values.
|
|
123
|
+
|
|
124
|
+
Notes
|
|
125
|
+
-----
|
|
126
|
+
This function is called when
|
|
127
|
+
:func:`ds.apply_filter <dclab.rtdc_dataset.RTDCBase.apply_filter>`
|
|
128
|
+
is called.
|
|
129
|
+
"""
|
|
130
|
+
if force is None:
|
|
131
|
+
force = []
|
|
132
|
+
# re-initialize important parameters
|
|
133
|
+
self._init_rtdc_ds(rtdc_ds)
|
|
134
|
+
|
|
135
|
+
# These lists may help us become very fast in the future
|
|
136
|
+
newkeys = []
|
|
137
|
+
oldvals = []
|
|
138
|
+
newvals = []
|
|
139
|
+
|
|
140
|
+
cfg_cur = rtdc_ds.config["filtering"]
|
|
141
|
+
cfg_old = self._old_config
|
|
142
|
+
|
|
143
|
+
# Determine which data was updated
|
|
144
|
+
for skey in list(cfg_cur.keys()):
|
|
145
|
+
if cfg_cur[skey] != cfg_old.get(skey, None):
|
|
146
|
+
newkeys.append(skey)
|
|
147
|
+
oldvals.append(cfg_old.get(skey, None))
|
|
148
|
+
newvals.append(cfg_cur[skey])
|
|
149
|
+
|
|
150
|
+
# 1. Invalid filters
|
|
151
|
+
arr_invalid = self._get_rw_array("invalid")
|
|
152
|
+
arr_invalid[:] = True
|
|
153
|
+
if cfg_cur["remove invalid events"]:
|
|
154
|
+
for feat in self.features:
|
|
155
|
+
data = rtdc_ds[feat]
|
|
156
|
+
invalid = np.isinf(data) | np.isnan(data)
|
|
157
|
+
arr_invalid &= ~invalid
|
|
158
|
+
|
|
159
|
+
# 2. Filter all feature min/max values.
|
|
160
|
+
feat2filter = []
|
|
161
|
+
for k in newkeys:
|
|
162
|
+
# k[:-4] because we want to crop " min" and " max"
|
|
163
|
+
if (dfn.scalar_feature_exists(k[:-4])
|
|
164
|
+
and (k.endswith(" min") or k.endswith(" max"))):
|
|
165
|
+
feat2filter.append(k[:-4])
|
|
166
|
+
|
|
167
|
+
for f in force:
|
|
168
|
+
# add forced features
|
|
169
|
+
if dfn.scalar_feature_exists(f):
|
|
170
|
+
feat2filter.append(f)
|
|
171
|
+
else:
|
|
172
|
+
# Make sure the feature name is valid.
|
|
173
|
+
raise ValueError("Unknown scalar feature name '{}'!".format(f))
|
|
174
|
+
|
|
175
|
+
feat2filter = np.unique(feat2filter)
|
|
176
|
+
|
|
177
|
+
for feat in feat2filter:
|
|
178
|
+
fstart = feat + " min"
|
|
179
|
+
fend = feat + " max"
|
|
180
|
+
must_be_filtered = (fstart in cfg_cur
|
|
181
|
+
and fend in cfg_cur
|
|
182
|
+
and cfg_cur[fstart] != cfg_cur[fend])
|
|
183
|
+
if ((fstart in cfg_cur and fend not in cfg_cur)
|
|
184
|
+
or (fstart not in cfg_cur and fend in cfg_cur)):
|
|
185
|
+
# User is responsible for setting min and max values!
|
|
186
|
+
raise ValueError("Box filter: Please make sure that both "
|
|
187
|
+
"'{}' and '{}' are set!".format(fstart, fend))
|
|
188
|
+
if feat in self.features:
|
|
189
|
+
# Get the current feature filter
|
|
190
|
+
feat_filt = self[feat]
|
|
191
|
+
feat_filt[:] = True
|
|
192
|
+
# If min and max exist and if they are not identical:
|
|
193
|
+
if must_be_filtered:
|
|
194
|
+
ivalstart = cfg_cur[fstart]
|
|
195
|
+
ivalend = cfg_cur[fend]
|
|
196
|
+
if ivalstart > ivalend:
|
|
197
|
+
msg = "inverting filter: {} > {}".format(fstart, fend)
|
|
198
|
+
warnings.warn(msg)
|
|
199
|
+
ivalstart, ivalend = ivalend, ivalstart
|
|
200
|
+
data = rtdc_ds[feat]
|
|
201
|
+
# treat nan-values in a special way
|
|
202
|
+
disnan = np.isnan(data)
|
|
203
|
+
if np.sum(disnan):
|
|
204
|
+
# this avoids RuntimeWarnings (invalid value
|
|
205
|
+
# encountered due to nan-values)
|
|
206
|
+
feat_filt[disnan] = False
|
|
207
|
+
idx = ~disnan
|
|
208
|
+
if not cfg_cur["remove invalid events"]:
|
|
209
|
+
msg = "Feature '{}' contains ".format(feat) \
|
|
210
|
+
+ "nan-values! Box filters remove those."
|
|
211
|
+
warnings.warn(msg, NanWarning)
|
|
212
|
+
else:
|
|
213
|
+
idx = slice(0, self.size) # place-holder for [:]
|
|
214
|
+
feat_filt[idx] &= ivalstart <= data[idx]
|
|
215
|
+
feat_filt[idx] &= data[idx] <= ivalend
|
|
216
|
+
elif must_be_filtered:
|
|
217
|
+
warnings.warn("Dataset '{}' does ".format(rtdc_ds.identifier)
|
|
218
|
+
+ "not contain the feature '{}'! ".format(feat)
|
|
219
|
+
+ "A box filter has been ignored.")
|
|
220
|
+
# store box filters
|
|
221
|
+
arr_box = self._get_rw_array("box")
|
|
222
|
+
arr_box[:] = True
|
|
223
|
+
for feat in self._box_filters:
|
|
224
|
+
arr_box &= self._box_filters[feat]
|
|
225
|
+
|
|
226
|
+
# 3. Filter with polygon filters
|
|
227
|
+
# check if something has changed
|
|
228
|
+
# perform polygon filtering
|
|
229
|
+
for pf_id in cfg_cur["polygon filters"]:
|
|
230
|
+
pf = PolygonFilter.get_instance_from_id(pf_id)
|
|
231
|
+
if (pf_id not in self._poly_filters
|
|
232
|
+
or pf.hash != self._poly_filters[pf_id][0]):
|
|
233
|
+
datax = rtdc_ds[pf.axes[0]]
|
|
234
|
+
datay = rtdc_ds[pf.axes[1]]
|
|
235
|
+
self._poly_filters[pf_id] = (pf.hash, pf.filter(datax, datay))
|
|
236
|
+
# store polygon filters
|
|
237
|
+
arr_polygon = self._get_rw_array("polygon")
|
|
238
|
+
arr_polygon[:] = True
|
|
239
|
+
for pf_id in self._poly_filters:
|
|
240
|
+
arr_polygon &= self._poly_filters[pf_id][1]
|
|
241
|
+
|
|
242
|
+
# 4. Finally combine all filters and apply "limit events"
|
|
243
|
+
# get a list of all filters
|
|
244
|
+
arr_all = self._get_rw_array("all")
|
|
245
|
+
if cfg_cur["enable filters"]:
|
|
246
|
+
arr_all[:] = arr_box & arr_invalid & arr_polygon & self.manual
|
|
247
|
+
|
|
248
|
+
# Filter with configuration keyword argument "limit events".
|
|
249
|
+
# This additional step limits the total number of events in
|
|
250
|
+
# self.all.
|
|
251
|
+
if cfg_cur["limit events"] > 0:
|
|
252
|
+
limit = cfg_cur["limit events"]
|
|
253
|
+
sub = arr_all[arr_all]
|
|
254
|
+
_, idx = downsampling.downsample_rand(sub,
|
|
255
|
+
samples=limit,
|
|
256
|
+
ret_idx=True)
|
|
257
|
+
sub[~idx] = False
|
|
258
|
+
arr_all[arr_all] = sub
|
|
259
|
+
else:
|
|
260
|
+
arr_all[:] = True
|
|
261
|
+
|
|
262
|
+
# Actual filtering is then done during plotting
|
|
263
|
+
self._old_config = rtdc_ds.config.copy()["filtering"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""DCOR-med access token (SSL certificate + CKAN token)"""
|
|
2
|
+
import pathlib
|
|
3
|
+
import ssl
|
|
4
|
+
import tempfile
|
|
5
|
+
import zipfile
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_api_key(access_token_path, password):
|
|
9
|
+
"""Extract the API key / API token from an encrypted DCOR access token"""
|
|
10
|
+
if isinstance(password, str):
|
|
11
|
+
password = password.encode("utf-8")
|
|
12
|
+
with zipfile.ZipFile(access_token_path) as arc:
|
|
13
|
+
api_key = arc.read("api_key.txt", pwd=password).decode().strip()
|
|
14
|
+
return api_key
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_certificate(access_token_path, password):
|
|
18
|
+
"""Extract the certificate bundle from an encrypted DCOR access token"""
|
|
19
|
+
if isinstance(password, str):
|
|
20
|
+
password = password.encode("utf-8")
|
|
21
|
+
with zipfile.ZipFile(access_token_path) as arc:
|
|
22
|
+
cert_data = arc.read("server.cert", pwd=password)
|
|
23
|
+
return cert_data
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_hostname(access_token_path, password):
|
|
27
|
+
"""Extract the hostname from an encrypted DCOR access token"""
|
|
28
|
+
cert_data = get_certificate(access_token_path, password)
|
|
29
|
+
with tempfile.TemporaryDirectory(prefix="dcoraid_access_token_") as td:
|
|
30
|
+
cfile = pathlib.Path(td) / "server.cert"
|
|
31
|
+
cfile.write_bytes(cert_data)
|
|
32
|
+
# Dear future-self,
|
|
33
|
+
#
|
|
34
|
+
# I know that this will probably not have been a good solution.
|
|
35
|
+
# Anyway, I still decided to use this private function from the
|
|
36
|
+
# built-in ssh module to avoid additional dependencies. Just so
|
|
37
|
+
# you know: If you happen to be in trouble now because of this,
|
|
38
|
+
# bear in mind that you had enough time to at least ask for the
|
|
39
|
+
# functionality to be implemented in the requests library. Look
|
|
40
|
+
# how I kept the lines all the same length!
|
|
41
|
+
#
|
|
42
|
+
# Cheers,
|
|
43
|
+
# Paul
|
|
44
|
+
cert_dict = ssl._ssl._test_decode_cert(str(cfile))
|
|
45
|
+
# get the common name
|
|
46
|
+
for ((key, value),) in cert_dict["subject"]:
|
|
47
|
+
if key == "commonName":
|
|
48
|
+
hostname = value.strip()
|
|
49
|
+
break
|
|
50
|
+
else:
|
|
51
|
+
raise KeyError("Could not extract hostname from certificate!")
|
|
52
|
+
return hostname
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from ...http_utils import REQUESTS_AVAILABLE # noqa: F401
|
|
5
|
+
from ...http_utils import requests, session_cache
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DCORAccessError(BaseException):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class APIHandler:
|
|
13
|
+
"""Handles the DCOR api with caching for simple queries"""
|
|
14
|
+
#: these are cached to minimize network usage
|
|
15
|
+
cache_queries = ["metadata", "size", "feature_list", "valid"]
|
|
16
|
+
#: DCOR API Keys/Tokens in the current session
|
|
17
|
+
api_keys = []
|
|
18
|
+
|
|
19
|
+
def __init__(self, url, api_key="", cert_path=None, dcserv_api_version=2):
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
url: str
|
|
25
|
+
URL to DCOR API
|
|
26
|
+
api_key: str
|
|
27
|
+
DCOR API token
|
|
28
|
+
cert_path: pathlib.Path
|
|
29
|
+
the path to the server's CA bundle; by default this
|
|
30
|
+
will use the default certificates (which depends on
|
|
31
|
+
from where you obtained certifi/requests)
|
|
32
|
+
"""
|
|
33
|
+
#: DCOR API URL
|
|
34
|
+
self.url = url
|
|
35
|
+
#: keyword argument to :func:`requests.request`
|
|
36
|
+
self.verify = cert_path or True
|
|
37
|
+
#: DCOR API Token
|
|
38
|
+
self.api_key = api_key
|
|
39
|
+
#: ckanext-dc_serve dcserv API version
|
|
40
|
+
self.dcserv_api_version = dcserv_api_version
|
|
41
|
+
#: create a session
|
|
42
|
+
self.session = session_cache.get_session(url)
|
|
43
|
+
self._cache = {}
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def add_api_key(cls, api_key):
|
|
47
|
+
"""Add an API Key/Token to the base class
|
|
48
|
+
|
|
49
|
+
When accessing the DCOR API, all available API Keys/Tokens are
|
|
50
|
+
used to access a resource (trial and error).
|
|
51
|
+
"""
|
|
52
|
+
if api_key.strip() and api_key not in APIHandler.api_keys:
|
|
53
|
+
APIHandler.api_keys.append(api_key)
|
|
54
|
+
|
|
55
|
+
def _get(self, query, feat=None, trace=None, event=None, api_key="",
|
|
56
|
+
retries=13):
|
|
57
|
+
# "version=2" introduced in dclab 0.54.3
|
|
58
|
+
# (supported since ckanext.dc_serve 0.13.2)
|
|
59
|
+
qstr = f"&version={self.dcserv_api_version}&query={query}"
|
|
60
|
+
if feat is not None:
|
|
61
|
+
qstr += f"&feature={feat}"
|
|
62
|
+
if trace is not None:
|
|
63
|
+
qstr += f"&trace={trace}"
|
|
64
|
+
if event is not None:
|
|
65
|
+
qstr += f"&event={event}"
|
|
66
|
+
apicall = self.url + qstr
|
|
67
|
+
fail_reasons = []
|
|
68
|
+
for _ in range(retries):
|
|
69
|
+
try:
|
|
70
|
+
# try-except both requests and json conversion
|
|
71
|
+
req = self.session.get(apicall,
|
|
72
|
+
headers={"Authorization": api_key},
|
|
73
|
+
verify=self.verify,
|
|
74
|
+
timeout=1,
|
|
75
|
+
)
|
|
76
|
+
jreq = req.json()
|
|
77
|
+
except requests.urllib3.exceptions.ConnectionError: # requests
|
|
78
|
+
fail_reasons.append("connection problem")
|
|
79
|
+
continue
|
|
80
|
+
except (requests.urllib3.exceptions.ReadTimeoutError,
|
|
81
|
+
requests.exceptions.ConnectTimeout): # requests
|
|
82
|
+
fail_reasons.append("timeout")
|
|
83
|
+
except json.decoder.JSONDecodeError: # json
|
|
84
|
+
fail_reasons.append("invalid json")
|
|
85
|
+
time.sleep(1) # wait a bit, maybe the server is overloaded
|
|
86
|
+
continue
|
|
87
|
+
else:
|
|
88
|
+
break
|
|
89
|
+
else:
|
|
90
|
+
raise DCORAccessError(f"Could not complete query '{apicall}'. "
|
|
91
|
+
f"I retried {retries} times. "
|
|
92
|
+
f"Messages: {fail_reasons}")
|
|
93
|
+
return jreq
|
|
94
|
+
|
|
95
|
+
def get(self, query, feat=None, trace=None, event=None):
|
|
96
|
+
if query in APIHandler.cache_queries and query in self._cache:
|
|
97
|
+
result = self._cache[query]
|
|
98
|
+
else:
|
|
99
|
+
req = {"error": {"message": "No access to API (api key?)"}}
|
|
100
|
+
for api_key in [self.api_key] + APIHandler.api_keys:
|
|
101
|
+
req = self._get(query, feat, trace, event, api_key)
|
|
102
|
+
if req["success"]:
|
|
103
|
+
self.api_key = api_key # remember working key
|
|
104
|
+
break
|
|
105
|
+
else:
|
|
106
|
+
raise DCORAccessError(
|
|
107
|
+
f"Cannot access {query}: {req['error']['message']}")
|
|
108
|
+
result = req["result"]
|
|
109
|
+
if query in APIHandler.cache_queries:
|
|
110
|
+
self._cache[query] = result
|
|
111
|
+
return result
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""DCOR client interface"""
|
|
2
|
+
import pathlib
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from ...util import hashobj
|
|
6
|
+
|
|
7
|
+
from ..config import Configuration
|
|
8
|
+
from ..core import RTDCBase
|
|
9
|
+
|
|
10
|
+
from . import api
|
|
11
|
+
from .logs import DCORLogs
|
|
12
|
+
from .tables import DCORTables
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#: Append directories here where dclab should look for certificate bundles
|
|
16
|
+
#: for a specific host. The directory should contain files named after the
|
|
17
|
+
#: hostname, e.g. "dcor.mpl.mpg.de.cert".
|
|
18
|
+
DCOR_CERTS_SEARCH_PATHS = []
|
|
19
|
+
|
|
20
|
+
#: Regular expression for matching a DCOR resource URL
|
|
21
|
+
REGEXP_DCOR_URL = re.compile(
|
|
22
|
+
r"^(https?:\/\/)?" # scheme
|
|
23
|
+
r"([a-z0-9-\.]*\/?api\/3\/action\/dcserv\?id=)?" # host with API
|
|
24
|
+
r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$") # id
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RTDC_DCOR(RTDCBase):
|
|
28
|
+
def __init__(self, url, host="dcor.mpl.mpg.de", api_key="",
|
|
29
|
+
use_ssl=None, cert_path=None, dcserv_api_version=2,
|
|
30
|
+
*args, **kwargs):
|
|
31
|
+
"""Wrap around the DCOR API
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
url: str
|
|
36
|
+
Full URL or resource identifier; valid values are
|
|
37
|
+
|
|
38
|
+
- `<https://dcor.mpl.mpg.de/api/3/action/dcserv?id=
|
|
39
|
+
b1404eb5-f661-4920-be79-5ff4e85915d5>`_
|
|
40
|
+
- dcor.mpl.mpg.de/api/3/action/dcserv?id=b1404eb5-f
|
|
41
|
+
661-4920-be79-5ff4e85915d5
|
|
42
|
+
- b1404eb5-f661-4920-be79-5ff4e85915d5
|
|
43
|
+
host: str
|
|
44
|
+
The default host machine used if the host is not given in `url`
|
|
45
|
+
api_key: str
|
|
46
|
+
API key to access private resources
|
|
47
|
+
use_ssl: bool
|
|
48
|
+
Set this to False to disable SSL (should only be used for
|
|
49
|
+
testing). Defaults to None (does not force SSL if the URL
|
|
50
|
+
starts with "http://").
|
|
51
|
+
cert_path: pathlib.Path
|
|
52
|
+
The (optional) path to a server CA bundle; this should only
|
|
53
|
+
be necessary for DCOR instances in the intranet with a custom
|
|
54
|
+
CA or for certificate pinning.
|
|
55
|
+
dcserv_api_version: int
|
|
56
|
+
Version of the dcserv API to use. In version 0.13.2 of
|
|
57
|
+
ckanext-dc_serve, version 2 was introduced which entails
|
|
58
|
+
serving an S3-basin-only dataset.
|
|
59
|
+
*args:
|
|
60
|
+
Arguments for `RTDCBase`
|
|
61
|
+
**kwargs:
|
|
62
|
+
Keyword arguments for `RTDCBase`
|
|
63
|
+
|
|
64
|
+
Attributes
|
|
65
|
+
----------
|
|
66
|
+
path: str
|
|
67
|
+
Full URL to the DCOR resource
|
|
68
|
+
"""
|
|
69
|
+
if not api.REQUESTS_AVAILABLE:
|
|
70
|
+
raise ModuleNotFoundError(
|
|
71
|
+
"Package `requests` required for DCOR format!")
|
|
72
|
+
|
|
73
|
+
super(RTDC_DCOR, self).__init__(*args, **kwargs)
|
|
74
|
+
|
|
75
|
+
self._hash = None
|
|
76
|
+
self.path = RTDC_DCOR.get_full_url(url, use_ssl, host)
|
|
77
|
+
|
|
78
|
+
if cert_path is None:
|
|
79
|
+
cert_path = get_server_cert_path(get_host_from_url(self.path))
|
|
80
|
+
|
|
81
|
+
self.api = api.APIHandler(url=self.path,
|
|
82
|
+
api_key=api_key,
|
|
83
|
+
cert_path=cert_path,
|
|
84
|
+
dcserv_api_version=dcserv_api_version)
|
|
85
|
+
|
|
86
|
+
# Parse configuration
|
|
87
|
+
self.config = Configuration(cfg=self.api.get(query="metadata"))
|
|
88
|
+
|
|
89
|
+
# Lazy logs
|
|
90
|
+
self.logs = DCORLogs(self.api)
|
|
91
|
+
|
|
92
|
+
# Lazy tables
|
|
93
|
+
self.tables = DCORTables(self.api)
|
|
94
|
+
|
|
95
|
+
# Get size
|
|
96
|
+
size = self.config["experiment"].get("event count")
|
|
97
|
+
if size is None:
|
|
98
|
+
size = int(self.api.get(query="size"))
|
|
99
|
+
self._size = size
|
|
100
|
+
|
|
101
|
+
self.title = f"{self.config['experiment']['sample']} - " \
|
|
102
|
+
+ f"M{self.config['experiment']['run index']}"
|
|
103
|
+
|
|
104
|
+
def __len__(self):
|
|
105
|
+
return self._size
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def hash(self):
|
|
109
|
+
"""Hash value based on file name and content"""
|
|
110
|
+
if self._hash is None:
|
|
111
|
+
tohash = [self.path]
|
|
112
|
+
self._hash = hashobj(tohash)
|
|
113
|
+
return self._hash
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def get_full_url(url, use_ssl, host=None):
|
|
117
|
+
"""Return the full URL to a DCOR resource
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
url: str
|
|
122
|
+
Full URL or resource identifier; valid values are
|
|
123
|
+
|
|
124
|
+
- https://dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-
|
|
125
|
+
df12-4299-aa2e-089e390aafd5'
|
|
126
|
+
- dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-df12-
|
|
127
|
+
4299-aa2e-089e390aafd5
|
|
128
|
+
- caab96f6-df12-4299-aa2e-089e390aafd5
|
|
129
|
+
use_ssl: bool or None
|
|
130
|
+
Set this to False to disable SSL (should only be used for
|
|
131
|
+
testing). Defaults to None (does not force SSL if the URL
|
|
132
|
+
starts with "http://").
|
|
133
|
+
host: str
|
|
134
|
+
Use this host if it is not specified in `url`
|
|
135
|
+
"""
|
|
136
|
+
if use_ssl is None:
|
|
137
|
+
if url.startswith("http://"):
|
|
138
|
+
# user wanted it that way
|
|
139
|
+
scheme = "http"
|
|
140
|
+
else:
|
|
141
|
+
scheme = "https"
|
|
142
|
+
elif use_ssl:
|
|
143
|
+
scheme = "https"
|
|
144
|
+
else:
|
|
145
|
+
scheme = "http"
|
|
146
|
+
if url.count("://"):
|
|
147
|
+
base = url.split("://", 1)[1]
|
|
148
|
+
else:
|
|
149
|
+
base = url
|
|
150
|
+
# determine the api_path and the netloc
|
|
151
|
+
if base.count("/"):
|
|
152
|
+
netloc, api_path = base.split("/", 1)
|
|
153
|
+
else:
|
|
154
|
+
netloc = None # default to `host`
|
|
155
|
+
api_path = "api/3/action/dcserv?id=" + base
|
|
156
|
+
# remove https from host string (user convenience)
|
|
157
|
+
if host is not None:
|
|
158
|
+
host = host.split("://")[-1]
|
|
159
|
+
|
|
160
|
+
netloc = host if netloc is None else netloc
|
|
161
|
+
new_url = f"{scheme}://{netloc}/{api_path}"
|
|
162
|
+
return new_url
|
|
163
|
+
|
|
164
|
+
def basins_get_dicts(self):
|
|
165
|
+
"""Return list of dicts for all basins defined in `self.h5file`"""
|
|
166
|
+
try:
|
|
167
|
+
basins = self.api.get(query="basins")
|
|
168
|
+
except api.DCORAccessError:
|
|
169
|
+
# TODO: Do not catch this exception when all DCOR instances
|
|
170
|
+
# implement the 'basins' query.
|
|
171
|
+
# This means that the server does not implement the 'basins' query.
|
|
172
|
+
basins = []
|
|
173
|
+
return basins
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_host_from_url(url):
|
|
177
|
+
"""Extract the hostname from a URL"""
|
|
178
|
+
return url.split("://")[1].split("/")[0]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_server_cert_path(host):
|
|
182
|
+
"""Return server certificate bundle for DCOR `host`"""
|
|
183
|
+
|
|
184
|
+
for path in DCOR_CERTS_SEARCH_PATHS:
|
|
185
|
+
path = pathlib.Path(path)
|
|
186
|
+
cert_path = path / f"{host}.cert"
|
|
187
|
+
if cert_path.exists():
|
|
188
|
+
break
|
|
189
|
+
else:
|
|
190
|
+
# use default certificate bundle
|
|
191
|
+
cert_path = api.requests.certs.where()
|
|
192
|
+
|
|
193
|
+
return cert_path
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def is_dcor_url(string):
|
|
197
|
+
if not isinstance(string, str):
|
|
198
|
+
return False
|
|
199
|
+
else:
|
|
200
|
+
return REGEXP_DCOR_URL.match(string.strip())
|