dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,242 @@
1
+ """Volume computation based on contour revolution"""
2
+ import numpy as np
3
+
4
+
5
+ def get_volume(cont, pos_x, pos_y, pix, fix_orientation=False):
6
+ """Calculate the volume of a polygon revolved around an axis
7
+
8
+ The volume estimation assumes rotational symmetry.
9
+
10
+ Parameters
11
+ ----------
12
+ cont: ndarray or list of ndarrays of shape (N,2)
13
+ A 2D array that holds the contour of an event [px]
14
+ e.g. obtained using `mm.contour` where `mm` is an instance
15
+ of `RTDCBase`. The first and second columns of `cont`
16
+ correspond to the x- and y-coordinates of the contour.
17
+ pos_x: float or ndarray of length N
18
+ The x coordinate(s) of the centroid of the event(s) [µm]
19
+ e.g. obtained using `mm.pos_x`
20
+ pos_y: float or ndarray of length N
21
+ The y coordinate(s) of the centroid of the event(s) [µm]
22
+ e.g. obtained using `mm.pos_y`
23
+ pix: float
24
+ The detector pixel size in µm.
25
+ e.g. obtained using: `mm.config["imaging"]["pixel size"]`
26
+ fix_orientation: bool
27
+ If set to True, make sure that the orientation of the
28
+ contour is counter-clockwise in the r-z plane
29
+ (see :func:`vol_revolve`). This is False by default, because
30
+ (1) Shape-In always stores the contours in the correct
31
+ orientation and (2) there may be events with high porosity
32
+ where "fixing" the orientation makes things worse and a
33
+ negative volume is returned.
34
+
35
+ Returns
36
+ -------
37
+ volume: float or ndarray
38
+ volume in um^3
39
+
40
+ Notes
41
+ -----
42
+ The computation of the volume is based on a full rotation of the
43
+ upper and the lower halves of the contour from which the
44
+ average is then used.
45
+
46
+ The volume is computed radially from the the center position
47
+ given by (`pos_x`, `pos_y`). For sufficiently smooth contours,
48
+ such as densely sampled ellipses, the center position does not
49
+ play an important role. For contours that are given on a coarse
50
+ grid, as is the case for RT-DC, the center position must be
51
+ given.
52
+
53
+ References
54
+ ----------
55
+ - https://de.wikipedia.org/wiki/Kegelstumpf#Formeln
56
+ - Yields identical results to the Matlab script by Geoff Olynyk
57
+ <https://de.mathworks.com/matlabcentral/fileexchange/36525-volrevolve>`_
58
+ """
59
+ if np.isscalar(pos_x):
60
+ cont = [cont]
61
+ ret_list = False
62
+ else:
63
+ ret_list = True
64
+
65
+ # Convert input to 1D arrays
66
+ pos_x = np.atleast_1d(pos_x)
67
+ pos_y = np.atleast_1d(pos_y)
68
+
69
+ if pos_x.size != pos_y.size:
70
+ raise ValueError("Size of `pos_x` and `pos_y` must match!")
71
+
72
+ if pos_x.size > 1 and len(cont) <= 1:
73
+ raise ValueError("Number of given contours too small!")
74
+
75
+ # results are stored in a separate array initialized with nans
76
+ v_avg = np.zeros_like(pos_x, dtype=np.float64) * np.nan
77
+
78
+ # v_avg has the shape of `pos_x`. We are iterating over the smallest
79
+ # length for `cont` and `pos_x`.
80
+ for ii in range(min(len(cont), pos_x.shape[0])):
81
+ # If the contour has less than 4 pixels, the computation will fail.
82
+ # In that case, the value np.nan is already assigned.
83
+ cc = cont[ii]
84
+ if cc.shape[0] >= 4:
85
+ # Center contour coordinates with given centroid
86
+ contour_x = cc[:, 0] - pos_x[ii] / pix
87
+ contour_y = cc[:, 1] - pos_y[ii] / pix
88
+ # Switch to r and z to follow notation of vol_revolve
89
+ # (In RT-DC the axis of rotation is x, but for vol_revolve
90
+ # we need the axis vertically)
91
+ contour_r = contour_y
92
+ contour_z = contour_x
93
+ if fix_orientation:
94
+ # Make sure the contour is counter-clockwise
95
+ contour_r, contour_z = counter_clockwise(contour_r, contour_z)
96
+
97
+ # Compute right volume
98
+ # Which points are at negative r-values (r<0)?
99
+ inx_neg = np.where(contour_r < 0)
100
+ # These points will be shifted up to r=0 directly on the z-axis
101
+ contour_right = np.copy(contour_r)
102
+ contour_right[inx_neg] = 0
103
+ vol_right = vol_revolve(contour_right, contour_x, pix)
104
+
105
+ # Compute left volume
106
+ # Which points are at positive r-values? (r>0)?
107
+ idx_pos = np.where(contour_r > 0)
108
+ # These points will be shifted down to y=0 to build an x-axis
109
+ contour_left = np.copy(contour_r)
110
+ contour_left[idx_pos] = 0
111
+ # Now we still have negative r values, but vol_revolve needs
112
+ # positive values, so we flip the sign...
113
+ contour_left[:] *= -1
114
+ # ... but in doing so, we have switched to clockwise rotation
115
+ # and we need to pass the array in reverse order
116
+ vol_left = vol_revolve(contour_left[::-1], contour_x[::-1], pix)
117
+
118
+ # Compute the average
119
+ v_avg[ii] = (vol_right + vol_left) / 2
120
+
121
+ if not ret_list:
122
+ # Do not return a list if the input contour was not in a list
123
+ v_avg = v_avg[0]
124
+
125
+ return v_avg
126
+
127
+
128
+ def counter_clockwise(cx, cy):
129
+ """Put contour coordinates into counter-clockwise order
130
+
131
+ Parameters
132
+ ----------
133
+ cx, cy: 1d ndarrays
134
+ The x- and y-coordinates of the contour
135
+
136
+ Returns
137
+ -------
138
+ cx_cc, cy_cc:
139
+ The x- and y-coordinates of the contour in
140
+ counter-clockwise orientation.
141
+
142
+ Notes
143
+ -----
144
+ The contour must be centered around (0, 0).
145
+ """
146
+ # test orientation
147
+ angles = np.unwrap(np.arctan2(cy, cx))
148
+ grad = np.diff(angles)
149
+ if np.average(grad) < 0:
150
+ return cx[::-1], cy[::-1]
151
+ else:
152
+ return cx, cy
153
+
154
+
155
+ def vol_revolve(r, z, point_scale=1.):
156
+ r"""Calculate the volume of a polygon revolved around the Z-axis
157
+
158
+ This implementation yields the same results as the volRevolve
159
+ Matlab function by Geoff Olynyk (from 2012-05-03)
160
+ https://de.mathworks.com/matlabcentral/fileexchange/36525-volrevolve.
161
+
162
+ The difference here is that the volume is computed using (a much
163
+ more approachable) implementation using the volume of a truncated
164
+ cone (https://de.wikipedia.org/wiki/Kegelstumpf).
165
+
166
+ .. math::
167
+
168
+ V = \frac{h \cdot \pi}{3} \cdot (R^2 + R \cdot r + r^2)
169
+
170
+ Where :math:`h` is the height of the cone and :math:`r` and
171
+ `R` are the smaller and larger radii of the truncated cone.
172
+
173
+ Each line segment of the contour resembles one truncated cone. If
174
+ the z-step is positive (counter-clockwise contour), then the
175
+ truncated cone volume is added to the total volume. If the z-step
176
+ is negative (e.g. inclusion), then the truncated cone volume is
177
+ removed from the total volume.
178
+
179
+ .. versionchanged:: 0.37.0
180
+
181
+ The volume in previous versions was overestimated by on average
182
+ 2µm³.
183
+
184
+ Parameters
185
+ ----------
186
+ r: 1d np.ndarray
187
+ radial coordinates (perpendicular to the z axis)
188
+ z: 1d np.ndarray
189
+ coordinate along the axis of rotation
190
+ point_scale: float
191
+ point size in your preferred units; The volume is multiplied
192
+ by a factor of `point_scale**3`.
193
+
194
+ Notes
195
+ -----
196
+ The coordinates must be given in counter-clockwise order,
197
+ otherwise the volume will be negative.
198
+ """
199
+ r = np.array(r).flatten()
200
+ z = np.array(z).flatten()
201
+
202
+ # sanity checks
203
+ assert len(r) == len(z)
204
+ assert len(r) >= 3
205
+ assert len(r.shape) == len(z.shape) == 1
206
+ assert np.all(r >= 0)
207
+
208
+ # make sure we have a closed contour
209
+ if (r[-1] != r[0]) or (z[-1] != z[0]):
210
+ # We have an open contour - close it.
211
+ r = np.resize(r, len(r) + 1)
212
+ z = np.resize(z, len(z) + 1)
213
+
214
+ rp = r[:-1]
215
+
216
+ # array of radii differences: R - r
217
+ dr = np.diff(r)
218
+ # array of height differences: h
219
+ dz = np.diff(z)
220
+
221
+ # If we expand the function in the doc string with
222
+ # dr = R - r and dz = h, then we get three terms for the volume
223
+ # (as opposed to four terms in Olynyk's script). Those three terms
224
+ # all resemble area slices multiplied by the z-distance dz.
225
+ a1 = 3 * rp**2
226
+ a2 = 3 * rp*dr
227
+ a3 = dr**2
228
+
229
+ # Note that the formula for computing the volume is symmetric
230
+ # with respect to r and R. This means that it does not matter
231
+ # which sign dr has (R and r are always positive). Since this
232
+ # algorithm assumes that the contour is ordered counter-clockwise,
233
+ # positive dz means adding to the contour while negative dz means
234
+ # subtracting from the contour (see test functions for more details).
235
+ # Conveniently so, dz only appears one time in this formula, so
236
+ # we can take the sign of dz as it is (Otherwise, we would have
237
+ # to take the absolute value of every truncated cone volume and
238
+ # multiply it by np.sign(dz)).
239
+ v = np.pi / 3 * dz * np.abs(a1 + a2 + a3)
240
+ vol = np.sum(v) * point_scale ** 3
241
+
242
+ return vol
dclab/http_utils.py ADDED
@@ -0,0 +1,322 @@
1
+ import functools
2
+ import io
3
+ import os
4
+ import re
5
+ import socket
6
+ from unittest import mock
7
+ from urllib.parse import urlparse
8
+ import warnings
9
+
10
+ import numpy as np
11
+
12
+
13
+ try:
14
+ import requests
15
+ except ModuleNotFoundError:
16
+ requests = mock.Mock()
17
+ REQUESTS_AVAILABLE = False
18
+ else:
19
+ REQUESTS_AVAILABLE = True
20
+
21
+
22
+ #: Regular expression for matching a regular HTTP URL
23
+ REGEXP_HTTP_URL = re.compile(
24
+ r"^(https?:\/\/)" # protocol (http or https or omitted)
25
+ r"([a-z0-9-\.]*)(\:[0-9]*)?\/" # host:port
26
+ r".+" # path
27
+ )
28
+
29
+
30
+ class ETagNotInResponseHeaderWarning(UserWarning):
31
+ """Used for cases where the requests.Response does not contain an ETag"""
32
+
33
+
34
+ class ConnectionTimeoutWarning(UserWarning):
35
+ """Used when a connection fails or times out"""
36
+
37
+
38
+ class HTTPFile(io.IOBase):
39
+ def __init__(self, url, chunk_size=2**18, keep_chunks=200):
40
+ """Chunk-cached access to a URL supporting range requests
41
+
42
+ Range requests (https://en.wikipedia.org/wiki/Byte_serving)
43
+ allow clients to access specific parts of a file via HTTP
44
+ without downloading the entire file.
45
+
46
+ This class creates a file-like object from a URL that can
47
+ then be passed on to e.g. h5py for reading. In addition, this
48
+ class keeps a chunk cache of the URL, making it (A) fast to
49
+ access frequently used parts of the file and (B) fast to slice
50
+ through large files since the ratio of data downloaded versus
51
+ (time-consuming) HTTP requests is very large.
52
+
53
+ Parameters
54
+ ----------
55
+ url: str
56
+ Path to the URL
57
+ chunk_size: int
58
+ Download chunk size. The entire file is split up into
59
+ equally-sized (and thus indexable) chunks.
60
+ keep_chunks: int
61
+ Number of downloaded chunks to keep in memory. For a
62
+ `chunk_size` of 2**18 bytes, a `keep_chunks` of 200
63
+ impliese a chunk cache size of 50 MiB.
64
+ """
65
+ self.url = url
66
+ self._chunk_size = chunk_size
67
+ self._keep_chunks = keep_chunks
68
+ self.session = session_cache.get_session(url)
69
+ self._len = None
70
+ self._etag = None
71
+ self._pos = 0
72
+ self.cache = {}
73
+
74
+ def _parse_header(self):
75
+ """parse the header sent by the server, populates length and etag"""
76
+ if self._len is None:
77
+ # Do not use `self.session.head`, because it might return a
78
+ # wrong content-length for pre-signed S3 URLS.
79
+ resp = self.session.get(self.url, stream=True, timeout=0.5)
80
+ if resp.status_code != 200:
81
+ raise ValueError(
82
+ f"Server replied with status code {resp.status_code} "
83
+ f"{resp.reason} for '{self.url}'")
84
+ self._len = int(resp.headers["content-length"])
85
+ # Try to determine the etag of the file.
86
+ etag = resp.headers.get("etag", "").strip("'").strip('"')
87
+ if len(etag) < 5:
88
+ etag = None
89
+ warnings.warn(f"Got empty ETag header for {self.url}",
90
+ ETagNotInResponseHeaderWarning)
91
+ self._etag = etag
92
+
93
+ @property
94
+ def etag(self):
95
+ """Unique identifier for this resource (version) by the web server"""
96
+ self._parse_header()
97
+ return self._etag
98
+
99
+ @property
100
+ def length(self):
101
+ self._parse_header()
102
+ return self._len
103
+
104
+ @property
105
+ def max_cache_size(self):
106
+ """The maximum cache size allowed by `chunk_size` and `keep_chunks`"""
107
+ return self._chunk_size * self._keep_chunks
108
+
109
+ def close(self):
110
+ """Close the file
111
+
112
+ This closes the requests session and then calls `close` on
113
+ the super class.
114
+ """
115
+ self.session.close()
116
+ super(HTTPFile, self).close()
117
+
118
+ def download_range(self, start, stop):
119
+ """Download bytes given by the range (`start`, `stop`)
120
+
121
+ `stop` is not inclusive (In the HTTP range request it normally is).
122
+ """
123
+ resp = self.session.get(self.url,
124
+ headers={"Range": f"bytes={start}-{stop-1}"}
125
+ )
126
+ return resp.content
127
+
128
+ def get_cache_chunk(self, index):
129
+ """Return the cache chunk defined by `index`
130
+
131
+ If the chunk is not in `self.cache`, it is downloaded.
132
+ """
133
+ if index not in self.cache:
134
+ start = index*self._chunk_size
135
+ stop = min((index+1)*self._chunk_size, self.length)
136
+ self.cache[index] = self.download_range(start, stop)
137
+ if len(self.cache) > self._keep_chunks:
138
+ for kk in self.cache.keys():
139
+ if kk != 0: # always keep the first chunk
140
+ self.cache.pop(kk)
141
+ break
142
+ return self.cache[index]
143
+
144
+ def read(self, size=-1, /):
145
+ """Cache-supported read operation (file object)"""
146
+ data = self.read_range_cached(self._pos, self._pos + size)
147
+ if size > 0:
148
+ self._pos += size
149
+ else:
150
+ self._pos = self.length
151
+ return data
152
+
153
+ def read_range_cached(self, start, stop):
154
+ """Concatenate the requested bytes from the cached chunks
155
+
156
+ This calls `get_cache_chunk` and thus downloads cache
157
+ chunks when necessary.
158
+ """
159
+ toread = stop - start
160
+ # compute the chunk indices between start and stop
161
+ chunk_start = np.int64(start // self._chunk_size)
162
+ chunk_stop = np.int64(stop // self._chunk_size + 1)
163
+ data = b""
164
+ pos = start
165
+ for chunk_index in range(chunk_start, chunk_stop):
166
+ chunk = self.get_cache_chunk(chunk_index)
167
+ chunk_start = pos % self._chunk_size
168
+ if toread == 0:
169
+ break
170
+ elif chunk_start + toread >= self._chunk_size:
171
+ data += chunk[chunk_start:]
172
+ chunks_read = self._chunk_size - chunk_start
173
+ else:
174
+ chunk_end = stop % self._chunk_size
175
+ data += chunk[chunk_start:chunk_end]
176
+ chunks_read = chunk_end - chunk_start
177
+ toread -= chunks_read
178
+ pos += chunks_read
179
+
180
+ return data
181
+
182
+ def seek(self, offset, whence=os.SEEK_SET):
183
+ """Seek to a position (file object)"""
184
+ if whence == os.SEEK_SET:
185
+ self._pos = offset
186
+ elif whence == os.SEEK_CUR:
187
+ self._pos += offset
188
+ elif whence == os.SEEK_END:
189
+ self._pos = self.length + offset
190
+
191
+ def seekable(self):
192
+ """The HTTP file is seekable"""
193
+ return True
194
+
195
+ def tell(self):
196
+ """Tell the position (file object)"""
197
+ return self._pos
198
+
199
+
200
+ class ResoluteRequestsSessionCache:
201
+ def __init__(self):
202
+ """A multiprocessing-safe cache for requests session objects
203
+
204
+ This class implements empty `__getstate__` and `__setstate__`
205
+ methods, so that when used in a multiprocessing context, sessions
206
+ are never mirrored to the subprocesses. Each subprocess creates
207
+ its own sessions.
208
+
209
+ Note that only :class:`ResoluteRequestsSession` objects are used,
210
+ which is ideal for the use-case of unstable internet connections.
211
+ """
212
+ #: This dictionary holds all sessions in use by the current process.
213
+ #: Sessions are stored with the host name / netloc as the key.
214
+ self.sessions = {}
215
+
216
+ def __getstate__(self):
217
+ """Returns None, so sessions are not pickled into subrpocesses"""
218
+ pass
219
+
220
+ def __setstate__(self, state):
221
+ """Does nothing (see `__getstate__`)"""
222
+ pass
223
+
224
+ def get_session(self, url: str):
225
+ """Return a requests session for the specified URL
226
+
227
+ For each hostname, a different session is returned,
228
+ but for identical hostnames, cached sessions are used.
229
+ """
230
+ urlp = urlparse(url)
231
+ key = urlp.netloc
232
+ if key not in self.sessions:
233
+ self.sessions[key] = ResoluteRequestsSession()
234
+ return self.sessions[key]
235
+
236
+
237
+ class ResoluteRequestsSession(requests.Session):
238
+ """A session with built-in retry for `get`"""
239
+ def get(self, *args, **kwargs):
240
+ kwargs.setdefault("timeout", 0.5)
241
+ for ii in range(100):
242
+ try:
243
+ resp = super(ResoluteRequestsSession,
244
+ self).get(*args, **kwargs)
245
+ except (requests.exceptions.ConnectionError,
246
+ requests.exceptions.ReadTimeout,
247
+ requests.exceptions.ConnectTimeout,
248
+ requests.urllib3.exceptions.ConnectionError,
249
+ requests.urllib3.exceptions.ReadTimeoutError) as e:
250
+ warnings.warn(f"Encountered {e} for {args} {kwargs}",
251
+ ConnectionTimeoutWarning)
252
+ continue
253
+ else:
254
+ break
255
+ else:
256
+ raise requests.exceptions.ReadTimeout(
257
+ f"Resolute session failed for {args} and {kwargs}!")
258
+ return resp
259
+
260
+
261
+ def is_url_available(url: str, ret_reason=False):
262
+ """Check whether a URL is available
263
+
264
+ Parameters
265
+ ----------
266
+ url: str
267
+ full URL to the object
268
+ ret_reason: bool
269
+ whether to return reason for unavailability
270
+
271
+ Returns
272
+ -------
273
+ available: bool
274
+ whether the URL is available
275
+ reason: str
276
+ reason for the URL not being available is `available` is False
277
+ """
278
+ avail = False
279
+ reason = "none"
280
+ if is_http_url(url):
281
+ urlp = urlparse(url)
282
+ # default to https if no scheme or port is specified
283
+ port = urlp.port or (80 if urlp.scheme == "http" else 443)
284
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
285
+ s.settimeout(1)
286
+ # Try to connect to the host
287
+ try:
288
+ # Use `hostname`, not `netloc`, because `netloc` contains
289
+ # the port number which we do not want here.
290
+ s.connect((urlp.hostname, port))
291
+ except (socket.gaierror, OSError):
292
+ reason = "no connection"
293
+ else:
294
+ # Try to access the url
295
+ try:
296
+ ses = session_cache.get_session(url)
297
+ req = ses.get(url, stream=True, timeout=1)
298
+ avail = req.ok
299
+ if not avail:
300
+ reason = req.reason.lower()
301
+ except OSError:
302
+ reason = "oserror"
303
+ pass
304
+ else:
305
+ reason = "invalid"
306
+ if ret_reason:
307
+ return avail, reason
308
+ else:
309
+ return avail
310
+
311
+
312
+ @functools.lru_cache()
313
+ def is_http_url(string):
314
+ """Check whether `string` is a valid URL using regexp"""
315
+ if not isinstance(string, str):
316
+ return False
317
+ else:
318
+ return REGEXP_HTTP_URL.match(string.strip())
319
+
320
+
321
+ #: cache of requests sessions for current process
322
+ session_cache = ResoluteRequestsSessionCache()