dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,102 @@
1
+ """
2
+ .. versionadded:: 0.33.0
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import Optional
7
+
8
+ import numpy as np
9
+
10
+ from ..definitions import feat_logic
11
+
12
+ from .core import RTDCBase
13
+ from .fmt_hierarchy import RTDC_Hierarchy, map_indices_child2root
14
+
15
+
16
+ _registered_temporary_features = []
17
+
18
+
19
+ def deregister_all():
20
+ """Deregisters all temporary features"""
21
+ for feat in list(_registered_temporary_features):
22
+ deregister_temporary_feature(feat)
23
+
24
+
25
+ def deregister_temporary_feature(feature: str):
26
+ """Convenience function for deregistering a temporary feature
27
+
28
+ This method is mostly used during testing. It does not
29
+ remove the actual feature data from any dataset; the data
30
+ will stay in memory but is not accessible anymore through
31
+ the public methods of the :class:`RTDCBase` user interface.
32
+ """
33
+ if feature in _registered_temporary_features:
34
+ _registered_temporary_features.remove(feature)
35
+ feat_logic.feature_deregister(feature)
36
+
37
+
38
+ def register_temporary_feature(feature: str,
39
+ label: Optional[str] = None,
40
+ is_scalar: bool = True):
41
+ """Register a new temporary feature
42
+
43
+ Temporary features are custom features that can be defined ad hoc
44
+ by the user. Temporary features are helpful when the integral
45
+ features are not enough, e.g. for prototyping, testing, or
46
+ collating with other data. Temporary features allow you to
47
+ leverage the full functionality of :class:`RTDCBase` with
48
+ your custom features (no need to go for a custom `pandas.Dataframe`).
49
+
50
+ Parameters
51
+ ----------
52
+ feature: str
53
+ Feature name; allowed characters are lower-case letters,
54
+ digits, and underscores
55
+ label: str
56
+ Feature label used e.g. for plotting
57
+ is_scalar: bool
58
+ Whether or not the feature is a scalar feature
59
+ """
60
+ feat_logic.feature_register(feature, label, is_scalar)
61
+ _registered_temporary_features.append(feature)
62
+
63
+
64
+ def set_temporary_feature(rtdc_ds: RTDCBase,
65
+ feature: str,
66
+ data: np.ndarray):
67
+ """Set temporary feature data for a dataset
68
+
69
+ Parameters
70
+ ----------
71
+ rtdc_ds: dclab.RTDCBase
72
+ Dataset for which to set the feature. Note that the
73
+ length of the feature `data` must match the number of events
74
+ in `rtdc_ds`. If the dataset is a hierarchy child, the data will also
75
+ be set in the parent dataset, but only for those events that are part
76
+ of the child. For all events in the parent dataset that are not part
77
+ of the child dataset, the temporary feature is set to np.nan.
78
+ feature: str
79
+ Feature name
80
+ data: np.ndarray
81
+ The data
82
+ """
83
+ if not feat_logic.feature_exists(feature):
84
+ raise ValueError(
85
+ f"Temporary feature '{feature}' has not been registered!")
86
+ if len(data) != len(rtdc_ds):
87
+ raise ValueError(f"The temporary feature {feature} must have same "
88
+ f"length as the dataset. Expected length "
89
+ f"{len(rtdc_ds)}, got length {len(data)}!")
90
+ if isinstance(rtdc_ds, RTDC_Hierarchy):
91
+ root_ids = map_indices_child2root(rtdc_ds, np.arange(len(rtdc_ds)))
92
+ root_parent = rtdc_ds.get_root_parent()
93
+ root_feat_data = np.empty((len(root_parent)))
94
+ root_feat_data[:] = np.nan
95
+ root_feat_data[root_ids] = data
96
+ set_temporary_feature(root_parent, feature, root_feat_data)
97
+ rtdc_ds.rejuvenate()
98
+ else:
99
+ feat_logic.check_feature_shape(feature, data)
100
+ data_ro = data.view()
101
+ data_ro.setflags(write=False)
102
+ rtdc_ds._usertemp[feature] = data_ro
@@ -0,0 +1,263 @@
1
+ """RT-DC dataset core classes and methods"""
2
+
3
+ import warnings
4
+
5
+ import numpy as np
6
+
7
+ from dclab import definitions as dfn
8
+
9
+ from .. import downsampling
10
+ from ..polygon_filter import PolygonFilter
11
+
12
+
13
+ class NanWarning(UserWarning):
14
+ pass
15
+
16
+
17
+ class Filter(object):
18
+ def __init__(self, rtdc_ds):
19
+ """Boolean filter arrays for RT-DC measurements
20
+
21
+ Parameters
22
+ ----------
23
+ rtdc_ds: instance of RTDCBase
24
+ The RT-DC dataset the filter applies to
25
+ """
26
+ # dictionary of boolean array for box filters
27
+ self._box_filters = {}
28
+ # dictionary of (hash, boolean array) for polygon filters
29
+ self._poly_filters = {}
30
+ # dictionary of all internal property filters
31
+ self._array_props = {}
32
+ # initialize important parameters
33
+ self._init_rtdc_ds(rtdc_ds)
34
+ # initialize properties
35
+ self.reset()
36
+
37
+ def __getitem__(self, key):
38
+ """Return the filter for a feature in `self.features`"""
39
+ if key in self.features and dfn.scalar_feature_exists(key):
40
+ if key not in self._box_filters:
41
+ # Generate filters on-the-fly
42
+ self._box_filters[key] = np.ones(self.size, dtype=bool)
43
+ else:
44
+ raise KeyError("Feature not available: '{}'".format(key))
45
+ return self._box_filters[key]
46
+
47
+ @property
48
+ def all(self):
49
+ """All filters combined (see :func:`Filter.update`)
50
+
51
+ Use this property to filter the features of
52
+ :class:`dclab.rtdc_dataset.RTDCBase` instances
53
+ """
54
+ return self._get_ro_array("all")
55
+
56
+ @property
57
+ def box(self):
58
+ """All box filters"""
59
+ return self._get_ro_array("box")
60
+
61
+ @property
62
+ def polygon(self):
63
+ """Polygon filters"""
64
+ return self._get_ro_array("polygon")
65
+
66
+ @property
67
+ def invalid(self):
68
+ """Invalid (nan/inf) events"""
69
+ return self._get_ro_array("invalid")
70
+
71
+ def _get_ro_array(self, key):
72
+ view = self._get_rw_array(key).view()
73
+ view.flags.writeable = False
74
+ return view
75
+
76
+ def _get_rw_array(self, key):
77
+ if key not in self._array_props:
78
+ self._array_props[key] = np.ones(self.size, dtype=bool)
79
+ return self._array_props[key]
80
+
81
+ def _init_rtdc_ds(self, rtdc_ds):
82
+ #: Available feature names
83
+ self.features = rtdc_ds.features_scalar
84
+ if hasattr(self, "size") and self.size != len(rtdc_ds):
85
+ raise ValueError("Change of RTDCBase size not supported!")
86
+ self.size = len(rtdc_ds)
87
+ # determine box filters that have been removed
88
+ for key in list(self._box_filters.keys()):
89
+ if key not in self.features:
90
+ self._box_filters.pop(key)
91
+ # determine polygon filters that have been removed
92
+ for pf_id in list(self._poly_filters.keys()):
93
+ pf = PolygonFilter.get_instance_from_id(pf_id)
94
+ if (pf_id in rtdc_ds.config["filtering"]["polygon filters"]
95
+ and pf.axes[0] in self.features
96
+ and pf.axes[1] in self.features):
97
+ pass
98
+ else:
99
+ # filter has been removed
100
+ self._poly_filters.pop(pf_id)
101
+
102
+ def reset(self):
103
+ """Reset all filters"""
104
+ self._box_filters.clear()
105
+ self._poly_filters.clear()
106
+ self._array_props.clear()
107
+ #: 1D boolean array for manually excluding events; `False` values
108
+ #: are excluded.
109
+ self.manual = np.ones(self.size, dtype=bool)
110
+ # old filter configuration of `rtdc_ds`
111
+ self._old_config = {}
112
+
113
+ def update(self, rtdc_ds, force=None):
114
+ """Update the filters according to `rtdc_ds.config["filtering"]`
115
+
116
+ Parameters
117
+ ----------
118
+ rtdc_ds: dclab.rtdc_dataset.core.RTDCBase
119
+ The measurement to which the filter is applied
120
+ force : list
121
+ A list of feature names that must be refiltered with
122
+ min/max values.
123
+
124
+ Notes
125
+ -----
126
+ This function is called when
127
+ :func:`ds.apply_filter <dclab.rtdc_dataset.RTDCBase.apply_filter>`
128
+ is called.
129
+ """
130
+ if force is None:
131
+ force = []
132
+ # re-initialize important parameters
133
+ self._init_rtdc_ds(rtdc_ds)
134
+
135
+ # These lists may help us become very fast in the future
136
+ newkeys = []
137
+ oldvals = []
138
+ newvals = []
139
+
140
+ cfg_cur = rtdc_ds.config["filtering"]
141
+ cfg_old = self._old_config
142
+
143
+ # Determine which data was updated
144
+ for skey in list(cfg_cur.keys()):
145
+ if cfg_cur[skey] != cfg_old.get(skey, None):
146
+ newkeys.append(skey)
147
+ oldvals.append(cfg_old.get(skey, None))
148
+ newvals.append(cfg_cur[skey])
149
+
150
+ # 1. Invalid filters
151
+ arr_invalid = self._get_rw_array("invalid")
152
+ arr_invalid[:] = True
153
+ if cfg_cur["remove invalid events"]:
154
+ for feat in self.features:
155
+ data = rtdc_ds[feat]
156
+ invalid = np.isinf(data) | np.isnan(data)
157
+ arr_invalid &= ~invalid
158
+
159
+ # 2. Filter all feature min/max values.
160
+ feat2filter = []
161
+ for k in newkeys:
162
+ # k[:-4] because we want to crop " min" and " max"
163
+ if (dfn.scalar_feature_exists(k[:-4])
164
+ and (k.endswith(" min") or k.endswith(" max"))):
165
+ feat2filter.append(k[:-4])
166
+
167
+ for f in force:
168
+ # add forced features
169
+ if dfn.scalar_feature_exists(f):
170
+ feat2filter.append(f)
171
+ else:
172
+ # Make sure the feature name is valid.
173
+ raise ValueError("Unknown scalar feature name '{}'!".format(f))
174
+
175
+ feat2filter = np.unique(feat2filter)
176
+
177
+ for feat in feat2filter:
178
+ fstart = feat + " min"
179
+ fend = feat + " max"
180
+ must_be_filtered = (fstart in cfg_cur
181
+ and fend in cfg_cur
182
+ and cfg_cur[fstart] != cfg_cur[fend])
183
+ if ((fstart in cfg_cur and fend not in cfg_cur)
184
+ or (fstart not in cfg_cur and fend in cfg_cur)):
185
+ # User is responsible for setting min and max values!
186
+ raise ValueError("Box filter: Please make sure that both "
187
+ "'{}' and '{}' are set!".format(fstart, fend))
188
+ if feat in self.features:
189
+ # Get the current feature filter
190
+ feat_filt = self[feat]
191
+ feat_filt[:] = True
192
+ # If min and max exist and if they are not identical:
193
+ if must_be_filtered:
194
+ ivalstart = cfg_cur[fstart]
195
+ ivalend = cfg_cur[fend]
196
+ if ivalstart > ivalend:
197
+ msg = "inverting filter: {} > {}".format(fstart, fend)
198
+ warnings.warn(msg)
199
+ ivalstart, ivalend = ivalend, ivalstart
200
+ data = rtdc_ds[feat]
201
+ # treat nan-values in a special way
202
+ disnan = np.isnan(data)
203
+ if np.sum(disnan):
204
+ # this avoids RuntimeWarnings (invalid value
205
+ # encountered due to nan-values)
206
+ feat_filt[disnan] = False
207
+ idx = ~disnan
208
+ if not cfg_cur["remove invalid events"]:
209
+ msg = "Feature '{}' contains ".format(feat) \
210
+ + "nan-values! Box filters remove those."
211
+ warnings.warn(msg, NanWarning)
212
+ else:
213
+ idx = slice(0, self.size) # place-holder for [:]
214
+ feat_filt[idx] &= ivalstart <= data[idx]
215
+ feat_filt[idx] &= data[idx] <= ivalend
216
+ elif must_be_filtered:
217
+ warnings.warn("Dataset '{}' does ".format(rtdc_ds.identifier)
218
+ + "not contain the feature '{}'! ".format(feat)
219
+ + "A box filter has been ignored.")
220
+ # store box filters
221
+ arr_box = self._get_rw_array("box")
222
+ arr_box[:] = True
223
+ for feat in self._box_filters:
224
+ arr_box &= self._box_filters[feat]
225
+
226
+ # 3. Filter with polygon filters
227
+ # check if something has changed
228
+ # perform polygon filtering
229
+ for pf_id in cfg_cur["polygon filters"]:
230
+ pf = PolygonFilter.get_instance_from_id(pf_id)
231
+ if (pf_id not in self._poly_filters
232
+ or pf.hash != self._poly_filters[pf_id][0]):
233
+ datax = rtdc_ds[pf.axes[0]]
234
+ datay = rtdc_ds[pf.axes[1]]
235
+ self._poly_filters[pf_id] = (pf.hash, pf.filter(datax, datay))
236
+ # store polygon filters
237
+ arr_polygon = self._get_rw_array("polygon")
238
+ arr_polygon[:] = True
239
+ for pf_id in self._poly_filters:
240
+ arr_polygon &= self._poly_filters[pf_id][1]
241
+
242
+ # 4. Finally combine all filters and apply "limit events"
243
+ # get a list of all filters
244
+ arr_all = self._get_rw_array("all")
245
+ if cfg_cur["enable filters"]:
246
+ arr_all[:] = arr_box & arr_invalid & arr_polygon & self.manual
247
+
248
+ # Filter with configuration keyword argument "limit events".
249
+ # This additional step limits the total number of events in
250
+ # self.all.
251
+ if cfg_cur["limit events"] > 0:
252
+ limit = cfg_cur["limit events"]
253
+ sub = arr_all[arr_all]
254
+ _, idx = downsampling.downsample_rand(sub,
255
+ samples=limit,
256
+ ret_idx=True)
257
+ sub[~idx] = False
258
+ arr_all[arr_all] = sub
259
+ else:
260
+ arr_all[:] = True
261
+
262
+ # Actual filtering is then done during plotting
263
+ self._old_config = rtdc_ds.config.copy()["filtering"]
@@ -0,0 +1,7 @@
1
+ # flake8: noqa: F401
2
+ """DCOR client interface"""
3
+ from .api import REQUESTS_AVAILABLE
4
+ from .base import (
5
+ DCOR_CERTS_SEARCH_PATHS, RTDC_DCOR, get_server_cert_path, is_dcor_url
6
+ )
7
+ from .basin import DCORBasin
@@ -0,0 +1,52 @@
1
+ """DCOR-med access token (SSL certificate + CKAN token)"""
2
+ import pathlib
3
+ import ssl
4
+ import tempfile
5
+ import zipfile
6
+
7
+
8
+ def get_api_key(access_token_path, password):
9
+ """Extract the API key / API token from an encrypted DCOR access token"""
10
+ if isinstance(password, str):
11
+ password = password.encode("utf-8")
12
+ with zipfile.ZipFile(access_token_path) as arc:
13
+ api_key = arc.read("api_key.txt", pwd=password).decode().strip()
14
+ return api_key
15
+
16
+
17
+ def get_certificate(access_token_path, password):
18
+ """Extract the certificate bundle from an encrypted DCOR access token"""
19
+ if isinstance(password, str):
20
+ password = password.encode("utf-8")
21
+ with zipfile.ZipFile(access_token_path) as arc:
22
+ cert_data = arc.read("server.cert", pwd=password)
23
+ return cert_data
24
+
25
+
26
+ def get_hostname(access_token_path, password):
27
+ """Extract the hostname from an encrypted DCOR access token"""
28
+ cert_data = get_certificate(access_token_path, password)
29
+ with tempfile.TemporaryDirectory(prefix="dcoraid_access_token_") as td:
30
+ cfile = pathlib.Path(td) / "server.cert"
31
+ cfile.write_bytes(cert_data)
32
+ # Dear future-self,
33
+ #
34
+ # I know that this will probably not have been a good solution.
35
+ # Anyway, I still decided to use this private function from the
36
+ # built-in ssh module to avoid additional dependencies. Just so
37
+ # you know: If you happen to be in trouble now because of this,
38
+ # bear in mind that you had enough time to at least ask for the
39
+ # functionality to be implemented in the requests library. Look
40
+ # how I kept the lines all the same length!
41
+ #
42
+ # Cheers,
43
+ # Paul
44
+ cert_dict = ssl._ssl._test_decode_cert(str(cfile))
45
+ # get the common name
46
+ for ((key, value),) in cert_dict["subject"]:
47
+ if key == "commonName":
48
+ hostname = value.strip()
49
+ break
50
+ else:
51
+ raise KeyError("Could not extract hostname from certificate!")
52
+ return hostname
@@ -0,0 +1,173 @@
1
+ import json
2
+ import time
3
+
4
+ from ...http_utils import REQUESTS_AVAILABLE # noqa: F401
5
+ from ...http_utils import requests, session_cache
6
+
7
+
8
+ class DCORAccessError(BaseException):
9
+ pass
10
+
11
+
12
+ class APIHandler:
13
+ """Handles the DCOR api with caching for simple queries"""
14
+ #: These are cached to minimize network usage
15
+ #: Note that we are not caching basins, since they may contain
16
+ #: expiring URLs.
17
+ cache_queries = ["metadata", "size", "feature_list", "valid"]
18
+ #: DCOR API Keys/Tokens in the current session
19
+ api_keys = []
20
+
21
+ def __init__(self, url, api_key="", cert_path=None, dcserv_api_version=2):
22
+ """
23
+
24
+ Parameters
25
+ ----------
26
+ url: str
27
+ URL to DCOR API
28
+ api_key: str
29
+ DCOR API token
30
+ cert_path: pathlib.Path
31
+ the path to the server's CA bundle; by default this
32
+ will use the default certificates (which depends on
33
+ from where you obtained certifi/requests)
34
+ """
35
+ #: DCOR API URL
36
+ self.url = url
37
+ #: keyword argument to :func:`requests.request`
38
+ self.verify = cert_path or True
39
+ #: DCOR API Token
40
+ self.api_key = api_key
41
+ #: ckanext-dc_serve dcserv API version
42
+ self.dcserv_api_version = dcserv_api_version
43
+ #: create a session
44
+ self.session = session_cache.get_session(url)
45
+ self._cache = {}
46
+
47
+ @classmethod
48
+ def add_api_key(cls, api_key):
49
+ """Add an API Key/Token to the base class
50
+
51
+ When accessing the DCOR API, all available API Keys/Tokens are
52
+ used to access a resource (trial and error).
53
+ """
54
+ if api_key.strip() and api_key not in APIHandler.api_keys:
55
+ APIHandler.api_keys.append(api_key)
56
+
57
+ def _get(self,
58
+ query: str,
59
+ feat: str = None,
60
+ trace: str = None,
61
+ event: str = None,
62
+ api_key: str = "",
63
+ timeout: float = None,
64
+ retries: int = 5):
65
+ """Fetch information via the DCOR API
66
+
67
+ Parameters
68
+ ----------
69
+ query: str
70
+ API route
71
+ feat: str
72
+ DEPRECATED (use basins instead), adds f"&feature={feat}" to query
73
+ trace: str
74
+ DEPRECATED (use basins instead), adds f"&trace={trace}" to query
75
+ event: str
76
+ DEPRECATED (use basins instead), adds f"&event={event}" to query
77
+ api_key: str
78
+ DCOR API token to use
79
+ timeout: float
80
+ Request timeout
81
+ retries: int
82
+ Number of retries to fetch the request. For every retry, the
83
+ timeout is increased by two seconds.
84
+ """
85
+ if timeout is None:
86
+ timeout = 1
87
+ # "version=2" introduced in dclab 0.54.3
88
+ # (supported since ckanext.dc_serve 0.13.2)
89
+ qstr = f"&version={self.dcserv_api_version}&query={query}"
90
+ if feat is not None:
91
+ qstr += f"&feature={feat}"
92
+ if trace is not None:
93
+ qstr += f"&trace={trace}"
94
+ if event is not None:
95
+ qstr += f"&event={event}"
96
+ apicall = self.url + qstr
97
+ fail_reasons = []
98
+ for ii in range(retries):
99
+ try:
100
+ # try-except both requests and json conversion
101
+ req = self.session.get(apicall,
102
+ headers={"Authorization": api_key},
103
+ verify=self.verify,
104
+ timeout=timeout + ii * 2,
105
+ )
106
+ jreq = req.json()
107
+ except requests.urllib3.exceptions.ConnectionError: # requests
108
+ fail_reasons.append("connection problem")
109
+ continue
110
+ except (requests.urllib3.exceptions.ReadTimeoutError,
111
+ requests.exceptions.ConnectTimeout): # requests
112
+ fail_reasons.append("timeout")
113
+ except json.decoder.JSONDecodeError: # json
114
+ fail_reasons.append("invalid json")
115
+ time.sleep(1) # wait a bit, maybe the server is overloaded
116
+ continue
117
+ else:
118
+ break
119
+ else:
120
+ raise DCORAccessError(f"Could not complete query '{apicall}'. "
121
+ f"I retried {retries} times. "
122
+ f"Messages: {fail_reasons}")
123
+ return jreq
124
+
125
+ def get(self,
126
+ query: str,
127
+ feat: str = None,
128
+ trace: str = None,
129
+ event: str = None,
130
+ timeout: float = None,
131
+ retries: int = 5,
132
+ ):
133
+ """Fetch information from DCOR
134
+
135
+ Parameters
136
+ ----------
137
+ query: str
138
+ API route
139
+ feat: str
140
+ DEPRECATED (use basins instead), adds f"&feature={feat}" to query
141
+ trace: str
142
+ DEPRECATED (use basins instead), adds f"&trace={trace}" to query
143
+ event: str
144
+ DEPRECATED (use basins instead), adds f"&event={event}" to query
145
+ timeout: float
146
+ Request timeout
147
+ retries: int
148
+ Number of retries to fetch the request. For every retry, the
149
+ timeout is increased by two seconds.
150
+ """
151
+ if query in APIHandler.cache_queries and query in self._cache:
152
+ result = self._cache[query]
153
+ else:
154
+ req = {"error": {"message": "No access to API (api key?)"}}
155
+ for api_key in [self.api_key] + APIHandler.api_keys:
156
+ req = self._get(query=query,
157
+ feat=feat,
158
+ trace=trace,
159
+ event=event,
160
+ api_key=api_key,
161
+ timeout=timeout,
162
+ retries=retries,
163
+ )
164
+ if req["success"]:
165
+ self.api_key = api_key # remember working key
166
+ break
167
+ else:
168
+ raise DCORAccessError(
169
+ f"Cannot access {query}: {req['error']['message']}")
170
+ result = req["result"]
171
+ if query in APIHandler.cache_queries:
172
+ self._cache[query] = result
173
+ return result