dclab 0.67.0__cp314-cp314t-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314t-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,353 @@
1
+ """Helper methods for copying .rtdc data"""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import re
6
+ from typing import List, Literal
7
+
8
+ import h5py
9
+ import h5py.h5o
10
+ import hdf5plugin
11
+ import numpy as np
12
+
13
+ from ..definitions import feature_exists, scalar_feature_exists
14
+ from ..util import hashobj
15
+
16
+ from .fmt_hdf5 import DEFECTIVE_FEATURES, RTDC_HDF5
17
+ from .writer import RTDCWriter
18
+
19
+
20
+ def rtdc_copy(src_h5file: h5py.Group,
21
+ dst_h5file: h5py.Group,
22
+ features: List[str] | Literal['all', 'scalar', 'none'] = "all",
23
+ include_basins: bool = True,
24
+ include_logs: bool = True,
25
+ include_tables: bool = True,
26
+ meta_prefix: str = ""):
27
+ """Create a compressed copy of an RT-DC file
28
+
29
+ Parameters
30
+ ----------
31
+ src_h5file: h5py.Group
32
+ Input HDF5 file
33
+ dst_h5file: h5py.Group
34
+ Output HDF5 file
35
+ features: list of strings or one of ['all', 'scalar', 'none']
36
+ If this is a list then it specifies the features that are copied from
37
+ `src_h5file` to `dst_h5file`. Alternatively, you may specify 'all'
38
+ (copy all features), 'scalar' (copy only scalar features), or 'none'
39
+ (don't copy any features).
40
+ include_basins: bool
41
+ Copy the basin information from `src_h5file` to `dst_h5file`.
42
+ include_logs: bool
43
+ Copy the logs from `src_h5file` to `dst_h5file`.
44
+ include_tables: bool
45
+ Copy the tables from `src_h5file` to `dst_h5file`.
46
+ meta_prefix: str
47
+ Add this prefix to the name of the logs and tables in `dst_h5file`.
48
+ """
49
+ # metadata
50
+ dst_h5file.attrs.update(src_h5file.attrs)
51
+
52
+ # events in source file
53
+ if "events" in src_h5file:
54
+ events_src = list(src_h5file["events"].keys())
55
+ else:
56
+ events_src = []
57
+
58
+ if include_basins and "basin_events" in src_h5file:
59
+ events_src += list(src_h5file["basin_events"].keys())
60
+ events_src = sorted(set(events_src))
61
+
62
+ # logs
63
+ if include_logs and "logs" in src_h5file:
64
+ dst_h5file.require_group("logs")
65
+ for l_key in src_h5file["logs"]:
66
+ h5ds_copy(src_loc=src_h5file["logs"],
67
+ src_name=l_key,
68
+ dst_loc=dst_h5file["logs"],
69
+ dst_name=meta_prefix + l_key,
70
+ recursive=False)
71
+
72
+ # tables
73
+ if include_tables and "tables" in src_h5file:
74
+ dst_h5file.require_group("tables")
75
+ for tkey in src_h5file["tables"]:
76
+ # There appears to be a problem with h5copy in some rare
77
+ # situations, so we do not use h5copy, but read and write
78
+ # the table data directly.
79
+ # https://github.com/HDFGroup/hdf5/issues/3214
80
+ # The following caused a Segmentation fault:
81
+ # h5ds_copy(src_loc=src_h5file["tables"],
82
+ # src_name=tkey,
83
+ # dst_loc=dst_h5file["tables"],
84
+ # dst_name=meta_prefix + tkey,
85
+ # recursive=False)
86
+ copy_table = dst_h5file["tables"].create_dataset(
87
+ name=tkey,
88
+ data=src_h5file["tables"][tkey][:],
89
+ fletcher32=True,
90
+ **hdf5plugin.Zstd(clevel=5))
91
+ copy_table.attrs.update(src_h5file["tables"][tkey].attrs)
92
+
93
+ # events
94
+ if isinstance(features, list):
95
+ feature_iter = features
96
+ elif features == "all":
97
+ feature_iter = events_src
98
+ elif features == "scalar":
99
+ feature_iter = [feat for feat in events_src
100
+ if feature_exists(feat, scalar_only=True)]
101
+ elif features == "none":
102
+ feature_iter = []
103
+ else:
104
+ raise ValueError(f"`features` must be either a list of feature names "
105
+ f"or one of 'all', 'scalar' or 'none', got "
106
+ f"'{features}'")
107
+
108
+ # Additional check for basin features.
109
+ bn_regexp = re.compile("^basinmap[0-9]*$") # future-proof regexp
110
+ src_basin_feats = [f for f in events_src if bn_regexp.match(f)]
111
+ if include_basins:
112
+ # Make sure all 'basinmap?' features are included in the output file.
113
+ for feat in src_basin_feats:
114
+ if feat not in feature_iter:
115
+ feature_iter.append(feat)
116
+ else:
117
+ # We do not need the basinmap features, because basins are
118
+ # stripped from the output file.
119
+ for feat in src_basin_feats:
120
+ if feat in feature_iter:
121
+ feature_iter.remove(feat)
122
+
123
+ # copy basin definitions
124
+ if include_basins and "basins" in src_h5file:
125
+ basin_definition_copy(src_h5file=src_h5file,
126
+ dst_h5file=dst_h5file,
127
+ features_iter=feature_iter)
128
+
129
+ if feature_iter:
130
+ dst_h5file.require_group("events")
131
+ for feat in feature_iter:
132
+ if not feature_exists(feat):
133
+ continue
134
+ elif feat in src_h5file["events"]:
135
+ # Skip all defective features. These are features that
136
+ # are known to be invalid (e.g. ancillary features that
137
+ # were computed falsely) and must be recomputed by dclab.
138
+ if feat in DEFECTIVE_FEATURES:
139
+ defective = DEFECTIVE_FEATURES[feat](src_h5file)
140
+ if defective:
141
+ continue
142
+
143
+ dst = h5ds_copy(src_loc=src_h5file["events"],
144
+ src_name=feat,
145
+ dst_loc=dst_h5file["events"],
146
+ recursive=True)
147
+ if scalar_feature_exists(feat):
148
+ # complement min/max values for all scalar features
149
+ for ufunc, attr in [(np.nanmin, "min"),
150
+ (np.nanmax, "max"),
151
+ (np.nanmean, "mean"),
152
+ ]:
153
+ if attr not in dst.attrs:
154
+ dst.attrs[attr] = ufunc(dst)
155
+
156
+ elif (include_basins
157
+ and "basin_events" in src_h5file
158
+ and feat in src_h5file["basin_events"]):
159
+ # Also copy internal basins which should have been defined
160
+ # in the "basin_events" group.
161
+ if feat in src_h5file["basin_events"]:
162
+ h5ds_copy(src_loc=src_h5file["basin_events"],
163
+ src_name=feat,
164
+ dst_loc=dst_h5file.require_group("basin_events"),
165
+ dst_name=feat
166
+ )
167
+
168
+
169
+ def basin_definition_copy(src_h5file, dst_h5file, features_iter):
170
+ """Copy basin definitions `src_h5file["basins"]` to the new file
171
+
172
+ Normally, we would just use :func:`h5ds_copy` to copy basins from
173
+ one dataset to another. However, if we are e.g. only copying scalar
174
+ features, and there are non-scalar features in the internal basin,
175
+ then we must rewrite the basin definition of the internal basin.
176
+
177
+ The `features_iter` list of features defines which features are
178
+ relevant for the internal basin.
179
+ """
180
+ dst_h5file.require_group("basins")
181
+ # Load the basin information
182
+ basin_dicts = RTDC_HDF5.basin_get_dicts_from_h5file(src_h5file)
183
+ for bn in basin_dicts:
184
+ b_key = bn["key"]
185
+
186
+ if b_key in dst_h5file["basins"]:
187
+ # already stored therein
188
+ continue
189
+
190
+ # sanity check
191
+ if b_key not in src_h5file["basins"]:
192
+ raise ValueError(
193
+ f"Failed to parse basin information correctly. Source file "
194
+ f"{src_h5file} does not contain basin {b_key} which I got "
195
+ f"from `RTDC_HDF5.basin_get_dicts_from_h5file`.")
196
+
197
+ if bn["type"] == "internal":
198
+ # Make sure we define the internal features selected
199
+ feat_used = [f for f in bn["features"] if f in features_iter]
200
+ if len(feat_used) == 0:
201
+ # We don't have any internal features, don't write anything
202
+ continue
203
+ elif feat_used != bn["features"]:
204
+ bn["features"] = feat_used
205
+ rewrite = True
206
+ else:
207
+ rewrite = False
208
+ else:
209
+ # We do not have an internal basin, just copy everything
210
+ rewrite = False
211
+
212
+ if rewrite:
213
+ # Convert edited `bn` to JSON and write feature data
214
+ b_lines = json.dumps(bn, indent=2).split("\n")
215
+ key = hashobj(b_lines)
216
+ if key not in dst_h5file["basins"]:
217
+ with RTDCWriter(dst_h5file) as hw:
218
+ hw.write_text(dst_h5file["basins"], key, b_lines)
219
+ else:
220
+ # copy only
221
+ h5ds_copy(src_loc=src_h5file["basins"],
222
+ src_name=b_key,
223
+ dst_loc=dst_h5file["basins"],
224
+ dst_name=b_key,
225
+ recursive=False)
226
+
227
+
228
+ def h5ds_copy(src_loc, src_name, dst_loc, dst_name=None,
229
+ ensure_compression=True, recursive=True):
230
+ """Copy an HDF5 Dataset from one group to another
231
+
232
+ Parameters
233
+ ----------
234
+ src_loc: h5py.H5Group
235
+ The source location
236
+ src_name: str
237
+ Name of the dataset in `src_loc`
238
+ dst_loc: h5py.H5Group
239
+ The destination location
240
+ dst_name: str
241
+ The name of the destination dataset, defaults to `src_name`
242
+ ensure_compression: bool
243
+ Whether to make sure that the data are compressed,
244
+ If disabled, then all data from the source will be
245
+ just copied and not compressed.
246
+ recursive: bool
247
+ Whether to recurse into HDF5 Groups (this is required e.g.
248
+ for copying the "trace" feature)
249
+
250
+ Returns
251
+ -------
252
+ dst: h5py.Dataset
253
+ The dataset `dst_loc[dst_name]`
254
+
255
+ Raises
256
+ ------
257
+ ValueError:
258
+ If the named source is not a h5py.Dataset
259
+ """
260
+ compression_kwargs = hdf5plugin.Zstd(clevel=5)
261
+ dst_name = dst_name or src_name
262
+ src = src_loc[src_name]
263
+ if isinstance(src, h5py.Dataset):
264
+ if ensure_compression and not is_properly_compressed(src):
265
+ # Chunk size larger than dataset size is not allowed
266
+ # in h5py's `make_new_dset`.
267
+ if src.shape[0] == 0:
268
+ # Ignore empty datasets (This sometimes happens with logs).
269
+ return
270
+ elif src.chunks and src.chunks[0] > src.shape[0]:
271
+ # The chunks in the input file are larger than the dataset
272
+ # shape. So we set the chunks to the shape. Here, we only
273
+ # check for the first axis (event count for feature data),
274
+ # because if the chunks vary in any other dimension then
275
+ # there is something fundamentally wrong with the input
276
+ # dataset (which we don't want to endorse, and where there
277
+ # could potentially be a lot of data put into ram).
278
+ chunks = list(src.chunks)
279
+ chunks[0] = src.shape[0]
280
+ chunks = tuple(chunks)
281
+ else:
282
+ # original chunk size is fine
283
+ chunks = src.chunks
284
+ # Variable length strings, compression, and fletcher32 are not
285
+ # a good combination. If we encounter any logs, then we have
286
+ # to write them with fixed-length strings.
287
+ # https://forum.hdfgroup.org/t/fletcher32-filter-on-variable-
288
+ # length-string-datasets-not-suitable-for-filters/9038/4
289
+ if src.dtype.kind == "O":
290
+ # We are looking at logs with variable length strings.
291
+ max_length = max([len(ii) for ii in src] + [100])
292
+ dtype = f"S{max_length}"
293
+ convert_to_s_fixed = True
294
+ else:
295
+ dtype = src.dtype
296
+ convert_to_s_fixed = False
297
+
298
+ # Manually create a compressed version of the dataset.
299
+ dst = dst_loc.create_dataset(name=dst_name,
300
+ shape=src.shape,
301
+ dtype=dtype,
302
+ chunks=chunks,
303
+ fletcher32=True,
304
+ **compression_kwargs
305
+ )
306
+ if convert_to_s_fixed:
307
+ # We are looking at old variable-length log strings.
308
+ dst[:] = src[:].astype(dtype)
309
+ elif chunks is None:
310
+ dst[:] = src[:]
311
+ else:
312
+ for chunk in src.iter_chunks():
313
+ dst[chunk] = src[chunk]
314
+ # Also write all the attributes
315
+ dst.attrs.update(src.attrs)
316
+ else:
317
+ # Copy the Dataset to the destination as-is.
318
+ h5py.h5o.copy(src_loc=src_loc.id,
319
+ src_name=src_name.encode(),
320
+ dst_loc=dst_loc.id,
321
+ dst_name=dst_name.encode(),
322
+ )
323
+ elif recursive and isinstance(src, h5py.Group):
324
+ dst_rec = dst_loc.require_group(dst_name)
325
+ for key in src:
326
+ h5ds_copy(src_loc=src,
327
+ src_name=key,
328
+ dst_loc=dst_rec,
329
+ ensure_compression=ensure_compression,
330
+ recursive=recursive)
331
+ else:
332
+ raise ValueError(f"The object {src_name} in {src.file} is not "
333
+ f"a dataset!")
334
+ return dst_loc[dst_name]
335
+
336
+
337
+ def is_properly_compressed(h5obj):
338
+ """Check whether an HDF5 object is properly compressed
339
+
340
+ The compression check only returns True if the input file was
341
+ compressed with the Zstandard compression using compression
342
+ level 5 or higher.
343
+ """
344
+ # Since version 0.43.0, we use Zstandard compression
345
+ # which does not show up in the `compression`
346
+ # attribute of `obj`.
347
+ create_plist = h5obj.id.get_create_plist()
348
+ filter_args = create_plist.get_filter_by_id(32015)
349
+ if filter_args is not None and filter_args[1][0] >= 5:
350
+ properly_compressed = True
351
+ else:
352
+ properly_compressed = False
353
+ return properly_compressed