dclab 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314t-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,867 @@
1
+ """Export RT-DC measurement data"""
2
+ from __future__ import annotations
3
+
4
+ import codecs
5
+ import json
6
+ import pathlib
7
+ import time
8
+ from typing import Dict, List
9
+ import uuid
10
+ import warnings
11
+
12
+ import h5py
13
+ import hdf5plugin
14
+
15
+ try:
16
+ import av
17
+ except ModuleNotFoundError:
18
+ PYAV_AVAILABLE = False
19
+ else:
20
+ PYAV_AVAILABLE = True
21
+
22
+ try:
23
+ import fcswrite
24
+ except ModuleNotFoundError:
25
+ FCSWRITE_AVAILABLE = False
26
+ else:
27
+ FCSWRITE_AVAILABLE = True
28
+
29
+ import numpy as np
30
+
31
+ from .. import definitions as dfn
32
+ from .._version import version, version_tuple
33
+
34
+ from .feat_basin import get_basin_classes
35
+ from .writer import RTDCWriter
36
+
37
+
38
+ class LimitingExportSizeWarning(UserWarning):
39
+ pass
40
+
41
+
42
+ class ContourNotExportedWarning(UserWarning):
43
+ pass
44
+
45
+
46
+ class Export(object):
47
+ def __init__(self, rtdc_ds):
48
+ """Export functionalities for RT-DC datasets"""
49
+ self.rtdc_ds = rtdc_ds
50
+
51
+ def avi(self,
52
+ path: str | pathlib.Path,
53
+ filtered: bool = True,
54
+ override: bool = False,
55
+ pixel_format: str = "yuv420p",
56
+ codec: str = "rawvideo",
57
+ codec_options: dict[str, str] = None,
58
+ progress_callback: callable = None,
59
+ ):
60
+ """Exports filtered event images to a video file
61
+
62
+ Parameters
63
+ ----------
64
+ path: str
65
+ Path to a video file. The container is (.avi, .mkv, ...) is
66
+ deduced from the file suffix.
67
+ filtered: bool
68
+ If set to `True`, only the filtered data
69
+ (index in ds.filter.all) are used.
70
+ override: bool
71
+ If set to `True`, an existing file ``path`` will be overridden.
72
+ If set to `False`, raises `OSError` if ``path`` exists.
73
+ pixel_format: str
74
+ Which pixel format to give to ffmpeg.
75
+ codec: str
76
+ Codec name; e.g. "rawvideo" or "libx264"
77
+ codec_options:
78
+ Additional arguments to give to the codec using ffmpeg,
79
+ e.g. `{'preset': 'slow', 'crf': '0'}` for "libx264" codec.
80
+ progress_callback: callable
81
+ Function that takes at least two arguments: float between 0 and
82
+ 1 for monitoring progress and a string describing what is being
83
+ done.
84
+
85
+ Notes
86
+ -----
87
+ Raises OSError if current dataset does not contain image data
88
+ """
89
+ if not PYAV_AVAILABLE:
90
+ raise ModuleNotFoundError(
91
+ "Package `av` required for avi export!")
92
+ path = pathlib.Path(path)
93
+ if len(path.suffix) != 4:
94
+ path = path.with_suffix(".avi")
95
+ ds = self.rtdc_ds
96
+ # Check if file already exist
97
+ if not override and path.exists():
98
+ raise OSError("File already exists: {}\n".format(
99
+ str(path).encode("ascii", "ignore")) +
100
+ "Please use the `override=True` option.")
101
+ # Start exporting
102
+ if "image" in ds:
103
+ # Open video for writing
104
+ with av.open(path, mode="w") as container:
105
+ stream = container.add_stream(codec_name=codec,
106
+ rate=25)
107
+ stream.pix_fmt = pixel_format
108
+ stream.height = ds["image"].shape[1]
109
+ stream.width = ds["image"].shape[2]
110
+ if codec_options:
111
+ stream.codec_context.options = codec_options
112
+
113
+ # write the filtered frames to the video file
114
+ for evid in np.arange(len(ds)):
115
+
116
+ if progress_callback is not None and evid % 10_000 == 0:
117
+ progress_callback(evid / len(ds), "exporting video")
118
+
119
+ # skip frames that were filtered out
120
+ if filtered and not ds.filter.all[evid]:
121
+ continue
122
+ image = ds["image"][evid]
123
+ # Convert to RGB
124
+ image = image.reshape(image.shape[0], image.shape[1], 1)
125
+ image = np.repeat(image, 3, axis=2)
126
+
127
+ av_frame = av.VideoFrame.from_ndarray(image,
128
+ format="rgb24")
129
+
130
+ for packet in stream.encode(av_frame):
131
+ container.mux(packet)
132
+
133
+ if progress_callback is not None:
134
+ progress_callback(1.0, "video export complete")
135
+
136
+ else:
137
+ msg = "No image data to export: dataset {} !".format(ds.title)
138
+ raise OSError(msg)
139
+
140
+ def fcs(self,
141
+ path: pathlib.Path | str,
142
+ features: list[str],
143
+ meta_data: dict = None,
144
+ filtered: bool = True,
145
+ override: bool = False,
146
+ progress_callback: callable = None,
147
+ ):
148
+ """Export the data of an RT-DC dataset to an .fcs file
149
+
150
+ Parameters
151
+ ----------
152
+ path: str
153
+ Path to an .fcs file. The ending .fcs is added automatically.
154
+ features: list of str
155
+ The features in the resulting .fcs file. These are strings
156
+ that are defined by `dclab.definitions.scalar_feature_exists`,
157
+ e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
158
+ meta_data: dict
159
+ User-defined, optional key-value pairs that are stored
160
+ in the primary TEXT segment of the FCS file; the version
161
+ of dclab is stored there by default
162
+ filtered: bool
163
+ If set to `True`, only the filtered data
164
+ (index in ds.filter.all) are used.
165
+ override: bool
166
+ If set to `True`, an existing file ``path`` will be overridden.
167
+ If set to `False`, raises `OSError` if ``path`` exists.
168
+ progress_callback: callable
169
+ Function that takes at least two arguments: float between 0 and
170
+ 1 for monitoring progress and a string describing what is being
171
+ done.
172
+
173
+ Notes
174
+ -----
175
+ Due to incompatibility with the .fcs file format, all events with
176
+ NaN-valued features are not exported.
177
+ """
178
+ if meta_data is None:
179
+ meta_data = {}
180
+ if not FCSWRITE_AVAILABLE:
181
+ raise ModuleNotFoundError(
182
+ "Package `fcswrite` required for fcs export!")
183
+
184
+ ds = self.rtdc_ds
185
+
186
+ path = pathlib.Path(path)
187
+ # Make sure that path ends with .fcs
188
+ if path.suffix != ".fcs":
189
+ path = path.with_name(path.name + ".fcs")
190
+ # Check if file already exist
191
+ if not override and path.exists():
192
+ raise OSError("File already exists: {}\n".format(
193
+ str(path).encode("ascii", "ignore")) +
194
+ "Please use the `override=True` option.")
195
+ # Check that features are valid
196
+ features = sorted(set(features))
197
+ for c in features:
198
+ if c not in ds.features_scalar:
199
+ msg = "Invalid feature name: {}".format(c)
200
+ raise ValueError(msg)
201
+
202
+ # Collect the header
203
+ chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
204
+
205
+ if progress_callback is not None:
206
+ progress_callback(0.0, "collecting data")
207
+
208
+ # Collect the data
209
+ if filtered:
210
+ data = [ds[c][ds.filter.all] for c in features]
211
+ else:
212
+ data = [ds[c] for c in features]
213
+
214
+ if progress_callback is not None:
215
+ progress_callback(0.5, "exporting data")
216
+
217
+ data = np.array(data).transpose()
218
+ meta_data["dclab version"] = version
219
+ fcswrite.write_fcs(filename=str(path),
220
+ chn_names=chn_names,
221
+ data=data,
222
+ text_kw_pr=meta_data,
223
+ )
224
+
225
+ if progress_callback is not None:
226
+ progress_callback(1.0, "export complete")
227
+
228
+ def hdf5(self,
229
+ path: str | pathlib.Path,
230
+ features: List[str] = None,
231
+ filtered: bool = True,
232
+ logs: bool = False,
233
+ tables: bool = False,
234
+ basins: bool = False,
235
+ allow_contour: bool = False,
236
+ meta_prefix: str = "src_",
237
+ override: bool = False,
238
+ compression_kwargs: Dict = None,
239
+ compression: str = "deprecated",
240
+ skip_checks: bool = False,
241
+ progress_callback: callable = None,
242
+ ):
243
+ """Export the data of the current instance to an HDF5 file
244
+
245
+ Parameters
246
+ ----------
247
+ path: str
248
+ Path to an .rtdc file. The ending .rtdc is added
249
+ automatically.
250
+ features: list of str
251
+ The features in the resulting .rtdc file. These are strings
252
+ that are defined by `dclab.definitions.feature_exists`, e.g.
253
+ "area_cvx", "deform", "frame", "fl1_max", "image".
254
+ Defaults to `self.rtdc_ds.features_innate`.
255
+ filtered: bool
256
+ If set to `True`, only the filtered data
257
+ (index in ds.filter.all) are used.
258
+ logs: bool
259
+ Whether to store the logs of the original file prefixed with
260
+ `source_` to the output file.
261
+ tables: bool
262
+ Whether to store the tables of the original file prefixed with
263
+ `source_` to the output file.
264
+ basins: bool
265
+ Whether to export basins. If filtering is disabled, basins
266
+ are copied directly to the output file. If filtering is enabled,
267
+ then mapped basins are exported.
268
+ allow_contour: bool
269
+ Whether to allow exporting the "contour" feature. Writing this
270
+ feature to an HDF5 file is extremely inefficient, because it
271
+ cannot be represented by an ND array and thus must be stored
272
+ in a group, each contour stored in a separate dataset. The
273
+ contour can easily be computed via the mask, so actually storing
274
+ the contour should be avoided. If "contour" is in `features`,
275
+ it will only be written to the output file if `allow_contour=True`.
276
+ meta_prefix: str
277
+ Prefix for log and table names in the exported file
278
+ override: bool
279
+ If set to `True`, an existing file ``path`` will be overridden.
280
+ If set to `False`, raises `OSError` if ``path`` exists.
281
+ compression_kwargs: dict
282
+ Dictionary with the keys "compression" and "compression_opts"
283
+ which are passed to :func:`h5py.H5File.create_dataset`. The
284
+ default is Zstandard compression with the compression
285
+ level 5 `hdf5plugin.Zstd(clevel=5)`.
286
+ compression: str or None
287
+ Compression method used for data storage;
288
+ one of [None, "lzf", "gzip", "szip"].
289
+
290
+ .. deprecated:: 0.43.0
291
+ Use `compression_kwargs` instead.
292
+ skip_checks: bool
293
+ Disable checking whether all features have the same length.
294
+ progress_callback: callable
295
+ Function that takes at least two arguments: float between 0 and
296
+ 1 for monitoring progress and a string describing what is being
297
+ done.
298
+
299
+ .. versionchanged:: 0.58.0
300
+
301
+ The ``basins`` keyword argument was added, and it is now possible
302
+ to pass an empty list to ``features``. This combination results
303
+ in a very small file consisting of metadata and a mapped basin
304
+ referring to the original dataset.
305
+ """
306
+ if compression != "deprecated":
307
+ warnings.warn("The `compression` kwarg is deprecated in favor of "
308
+ "`compression_kwargs`!",
309
+ DeprecationWarning)
310
+ if compression_kwargs is not None:
311
+ raise ValueError("You may not specify `compression` and "
312
+ "`compression_kwargs` at the same time!")
313
+ # be backwards-compatible
314
+ compression_kwargs = {"compression": compression}
315
+ if compression_kwargs is None:
316
+ compression_kwargs = hdf5plugin.Zstd(clevel=5)
317
+ path = pathlib.Path(path)
318
+ # Make sure that path ends with .rtdc
319
+ if path.suffix not in [".rtdc", ".rtdc~"]:
320
+ path = path.parent / (path.name + ".rtdc")
321
+ # Check if file already exists
322
+ if not override and path.exists():
323
+ raise OSError("File already exists: {}\n".format(path)
324
+ + "Please use the `override=True` option.")
325
+ elif path.exists():
326
+ path.unlink()
327
+
328
+ # make sure the parent directory exists
329
+ path.parent.mkdir(parents=True, exist_ok=True)
330
+
331
+ # for convenience
332
+ ds = self.rtdc_ds
333
+
334
+ # remove contour information from user-specified features
335
+ if "contour" in (features or []) and not allow_contour:
336
+ features = list(features)
337
+ features.remove("contour")
338
+ warnings.warn(
339
+ "Feature 'contour' not exported to output file, because "
340
+ "`allow_contour` is `False`. If you really need the "
341
+ "'contour' feature in the output file (unlikely, unless you "
342
+ "are venturing outside the DC Cosmos), you must set "
343
+ "`allow_contour=True`. Otherwise, you can safely ignore "
344
+ "this warning or silence it by not providing 'contour' in "
345
+ "`features`.",
346
+ ContourNotExportedWarning)
347
+
348
+ if features is None:
349
+ features = ds.features_innate
350
+ # silently remove contour information
351
+ if "contour" in features and not allow_contour:
352
+ features.remove("contour")
353
+
354
+ # decide which metadata to export
355
+ meta = {}
356
+ # only cfg metadata (no analysis metadata)
357
+ for sec in dfn.CFG_METADATA:
358
+ if sec in ds.config:
359
+ meta[sec] = ds.config[sec].copy()
360
+ # add user-defined metadata
361
+ if "user" in ds.config:
362
+ meta["user"] = ds.config["user"].copy()
363
+ if filtered:
364
+ # Define a new measurement identifier, so that we are not running
365
+ # into any problems with basins being defined for filtered data.
366
+ ds_run_id = ds.get_measurement_identifier()
367
+ random_ap = f"dclab-{str(uuid.uuid4())[:7]}"
368
+ meta["experiment"]["run identifier"] = f"{ds_run_id}_{random_ap}"
369
+
370
+ if filtered:
371
+ filter_arr = ds.filter.all
372
+ else:
373
+ filter_arr = None
374
+
375
+ features = sorted(set(features))
376
+ if not skip_checks and features:
377
+ # check that all features have same length and use the smallest
378
+ # common length
379
+ lengths = []
380
+ for feat in features:
381
+ if feat == "trace":
382
+ for tr in list(ds["trace"].keys()):
383
+ lengths.append(len(ds["trace"][tr]))
384
+ else:
385
+ lengths.append(len(ds[feat]))
386
+ l_min = np.min(lengths)
387
+ l_max = np.max(lengths)
388
+ if l_min != l_max:
389
+ if filter_arr is None:
390
+ # we are forced to do filtering
391
+ filter_arr = np.ones(len(ds), dtype=bool)
392
+ else:
393
+ # have to create a copy, because rtdc_ds.filter.all is ro!
394
+ filter_arr = np.copy(filter_arr)
395
+ filter_arr[l_min:] = False
396
+ warnings.warn(
397
+ "Not all features have the same length! Limiting output "
398
+ + f"event count to {l_min} (max {l_max}) in '{l_min}'.",
399
+ LimitingExportSizeWarning)
400
+
401
+ # Perform actual export
402
+ with RTDCWriter(path,
403
+ mode="append",
404
+ compression_kwargs=compression_kwargs) as hw:
405
+ if progress_callback is not None:
406
+ progress_callback(0.0, "writing metadata")
407
+ # write meta data
408
+ hw.store_metadata(meta)
409
+
410
+ # write export log
411
+ hw.store_log(time.strftime("dclab-export_%Y-%m-%d_%H.%M.%S"),
412
+ json.dumps(
413
+ {"dclab version": version_tuple,
414
+ "kwargs": {
415
+ "features": features,
416
+ "filtered": filtered,
417
+ "logs": logs,
418
+ "tables": tables,
419
+ "basins": basins,
420
+ "meta_prefix": meta_prefix,
421
+ "skip_checks": skip_checks
422
+ }
423
+ },
424
+ indent=2,
425
+ sort_keys=True,
426
+ ).split("\n"))
427
+
428
+ if logs:
429
+ # write logs
430
+ for log in ds.logs:
431
+ hw.store_log(f"{meta_prefix}{log}",
432
+ ds.logs[log])
433
+
434
+ if tables:
435
+ # write tables
436
+ for tab in ds.tables:
437
+ hw.store_table(f"{meta_prefix}{tab}",
438
+ ds.tables[tab])
439
+
440
+ # write each feature individually
441
+ for ii, feat in enumerate(features):
442
+ if progress_callback is not None:
443
+ progress_callback(ii / len(features), f"exporting {feat}")
444
+
445
+ if (filter_arr is None or
446
+ # This does not work for the .tdms file format
447
+ # (and probably also not for DCOR).
448
+ (np.all(filter_arr) and ds.format == "hdf5")):
449
+ # We do not have to filter and can be fast
450
+ if dfn.scalar_feature_exists(feat):
451
+ shape = (1,)
452
+ elif feat in ["image", "image_bg", "mask", "trace"]:
453
+ # known shape
454
+ shape = None
455
+ else:
456
+ shape = np.array(ds[feat][0]).shape
457
+ hw.store_feature(feat=feat,
458
+ data=ds[feat],
459
+ shape=shape)
460
+ else:
461
+ # We have to filter and will be slower
462
+ store_filtered_feature(rtdc_writer=hw,
463
+ feat=feat,
464
+ data=ds[feat],
465
+ filtarr=filter_arr)
466
+
467
+ if basins:
468
+ if progress_callback:
469
+ progress_callback(1 - 1 / (len(features) or 1),
470
+ "writing basins")
471
+ # We have to store basins. There are three options:
472
+ # - filtering disabled: just copy basins
473
+ # - filtering enabled
474
+ # - basins with "same" mapping: create new mapping
475
+ # - mapped basins: correct nested mapping
476
+ # In addition to the basins that we copy from the
477
+ # original dataset, we also create a new basin that
478
+ # refers to the original dataset itself.
479
+ basin_list = [bn.as_dict() for bn in ds.basins]
480
+ # In addition to the upstream basins, also store a reference
481
+ # to the original file from which the export was done.
482
+ # Get the identifier of the current dataset for the new basins.
483
+ basin_id = ds.get_measurement_identifier()
484
+ if ds.format in get_basin_classes():
485
+ # The dataset has a format that matches a basin format
486
+ # directly.
487
+ basin_is_local = ds.format == "hdf5"
488
+ basin_locs = [ds.path]
489
+ if basin_is_local:
490
+ # So the user can put them into the same directory.
491
+ basin_locs.append(ds.path.name)
492
+ basin_list.append({
493
+ "basin_name": "Exported data",
494
+ "basin_type": "file" if basin_is_local else "remote",
495
+ "basin_format": ds.format,
496
+ "basin_locs": basin_locs,
497
+ "basin_descr": f"Exported with dclab {version}",
498
+ "basin_id": basin_id,
499
+ })
500
+ elif (ds.format == "hierarchy"
501
+ and ds.get_root_parent().format in get_basin_classes()):
502
+ # avoid circular imports
503
+ from .fmt_hierarchy import map_indices_child2root
504
+ # The dataset is a hierarchy child, and it is derived
505
+ # from a dataset that has a matching basin format.
506
+ # We have to add the indices of the root parent, which
507
+ # identify the child, to the basin dictionary. Note
508
+ # that additional basin filtering is applied below
509
+ # this case for all basins.
510
+ # For the sake of clarity I wrote this as a separate case,
511
+ # even if that means duplicating code from the previous
512
+ # case.
513
+ ds_root = ds.get_root_parent()
514
+ basin_is_local = ds_root.format == "hdf5"
515
+ basin_locs = [ds_root.path]
516
+ if basin_is_local:
517
+ # So the user can put them into the same directory.
518
+ basin_locs.append(ds_root.path.name)
519
+ basin_list.append({
520
+ "basin_name": "Exported data (hierarchy)",
521
+ "basin_type": "file" if basin_is_local else "remote",
522
+ "basin_format": ds_root.format,
523
+ "basin_locs": basin_locs,
524
+ "basin_descr": f"Exported with dclab {version} from a "
525
+ f"hierarchy dataset",
526
+ # This is where this basin differs from the basin
527
+ # definition in the previous case.
528
+ "basin_map": map_indices_child2root(
529
+ child=ds,
530
+ child_indices=np.arange(len(ds))
531
+ ),
532
+ "basin_id": basin_id,
533
+ })
534
+
535
+ for bn_dict in basin_list:
536
+ if bn_dict.get("basin_type") == "internal":
537
+ # Internal basins are only valid for files they were
538
+ # defined in. Since we are exporting, it does not
539
+ # make sense to store these basins in the output file.
540
+ continue
541
+ elif bn_dict.get("perishable"):
542
+ # Perishable basins require secret keys or complicated
543
+ # logic to execute in order to refresh them. We do not
544
+ # store them in the output file.
545
+ continue
546
+ basinmap_orig = bn_dict.get("basin_map")
547
+ if not filtered:
548
+ # filtering disabled: just copy basins
549
+ pass
550
+ elif basinmap_orig is None:
551
+ # basins with "same" mapping: create new mapping
552
+ bn_dict["basin_map"] = np.where(filter_arr)[0]
553
+ else:
554
+ # mapped basins: correct nested mapping
555
+ bn_dict["basin_map"] = basinmap_orig[filter_arr]
556
+
557
+ # Do not verify basins, it takes too long.
558
+ hw.store_basin(**bn_dict, verify=False)
559
+ if progress_callback is not None:
560
+ progress_callback(1.0, "export complete")
561
+
562
+ def tsv(self,
563
+ path: pathlib.Path | str,
564
+ features: list[str],
565
+ meta_data: dict = None,
566
+ filtered: bool = True,
567
+ override: bool = False,
568
+ progress_callback: callable = None,
569
+ ):
570
+ """Export the data of the current instance to a .tsv file
571
+
572
+ Parameters
573
+ ----------
574
+ path: str
575
+ Path to a .tsv file. The ending .tsv is added automatically.
576
+ features: list of str
577
+ The features in the resulting .tsv file. These are strings
578
+ that are defined by `dclab.definitions.scalar_feature_exists`,
579
+ e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
580
+ meta_data: dict
581
+ User-defined, optional key-value pairs that are stored
582
+ at the beginning of the tsv file - one key-value pair is
583
+ stored per line which starts with a hash. The version of
584
+ dclab is stored there by default.
585
+ filtered: bool
586
+ If set to `True`, only the filtered data
587
+ (index in ds.filter.all) are used.
588
+ override: bool
589
+ If set to `True`, an existing file ``path`` will be overridden.
590
+ If set to `False`, raises `OSError` if ``path`` exists.
591
+ progress_callback: callable
592
+ Function that takes at least two arguments: float between 0 and
593
+ 1 for monitoring progress and a string describing what is being
594
+ done.
595
+ """
596
+ if meta_data is None:
597
+ meta_data = {}
598
+ features = [c.lower() for c in features]
599
+ features = sorted(set(features))
600
+ path = pathlib.Path(path)
601
+ ds = self.rtdc_ds
602
+ # Make sure that path ends with .tsv
603
+ if path.suffix != ".tsv":
604
+ path = path.with_name(path.name + ".tsv")
605
+ # Check if file already exist
606
+ if not override and path.exists():
607
+ raise OSError("File already exists: {}\n".format(
608
+ str(path).encode("ascii", "ignore")) +
609
+ "Please use the `override=True` option.")
610
+ # Check that features exist
611
+ for c in features:
612
+ if c not in ds.features_scalar:
613
+ raise ValueError("Invalid feature name {}".format(c))
614
+ meta_data["dclab version"] = version
615
+
616
+ if progress_callback is not None:
617
+ progress_callback(0.0, "writing metadata")
618
+
619
+ # Write BOM header
620
+ with path.open("wb") as fd:
621
+ fd.write(codecs.BOM_UTF8)
622
+ # Open file
623
+ with path.open("a", encoding="utf-8") as fd:
624
+ # write meta data
625
+ for key in sorted(meta_data.keys()):
626
+ fd.write(f"# {key}: {meta_data[key]}\n")
627
+ fd.write("#\n")
628
+ fd.write("# Original dataset configuration:\n")
629
+ cfg = self.rtdc_ds.config.as_dict()
630
+ for sec in sorted(cfg.keys()):
631
+ for key in sorted(cfg[sec].keys()):
632
+ fd.write(f"# dc:{sec}:{key} = {cfg[sec][key]}\n")
633
+ fd.write("#\n")
634
+ # write header
635
+ header1 = "\t".join([c for c in features])
636
+ fd.write("# "+header1+"\n")
637
+ labels = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
638
+ header2 = "\t".join(labels)
639
+ fd.write("# "+header2+"\n")
640
+
641
+ with path.open("ab") as fd:
642
+ if progress_callback is not None:
643
+ progress_callback(0.1, "collecting data")
644
+
645
+ # collect data
646
+ if filtered:
647
+ data = [ds[c][ds.filter.all] for c in features]
648
+ else:
649
+ data = [ds[c] for c in features]
650
+
651
+ if progress_callback is not None:
652
+ progress_callback(0.5, "writing data")
653
+
654
+ np.savetxt(fd,
655
+ np.array(data).transpose(),
656
+ fmt=str("%.10e"),
657
+ delimiter="\t")
658
+
659
+ if progress_callback is not None:
660
+ progress_callback(1.0, "export complete")
661
+
662
+
663
+ def yield_filtered_array_stacks(data, indices):
664
+ """Generator returning chunks with the filtered feature data
665
+
666
+ Parameters
667
+ ----------
668
+ data: np.ndarray or h5py.Dataset
669
+ The full, unfiltered input feature data. Must implement
670
+ the `shape` and `dtype` properties. If it implements the
671
+ `__array__` method, fast slicing is used.
672
+ indices: np.ndarray or list
673
+ The indices (integer values) for `data` (first axis), indicating
674
+ which elements should be returned by this generator.
675
+
676
+ Notes
677
+ -----
678
+ This method works with any feature dimension (e.g. it
679
+ works for image (2D) data and for trace data (1D)). It
680
+ is just important that `data` is indexable using integers
681
+ and that the events in `data` all have the same shape.
682
+ The dtype of the returned chunks is determined by the first
683
+ item in `data`.
684
+
685
+ This method works with sliceable (e.g. np.ndarray) and
686
+ non-sliceable (e.g. tdms-format-based images) input data. If the
687
+ input data is sliceable (which is determined by the availability
688
+ of the `__array__` method, then fast numpy sclicing is used. If the
689
+ input data does not support slicing (`__array__` not defined), then
690
+ a slow iteration over `indices` is done.
691
+
692
+ In the slow iteration case, the returned array data are overridden
693
+ in-place. If you need to retain a copy of the `yield`ed chunks,
694
+ apply `np.array(.., copy=True)` to the returned chunks.
695
+ """
696
+ chunk_shape = RTDCWriter.get_best_nd_chunks(item_shape=data.shape[1:],
697
+ item_dtype=data.dtype)
698
+ chunk_size = chunk_shape[0]
699
+
700
+ if hasattr(data, "__array__"):
701
+ # We have an array-like object and can do slicing with the indexing
702
+ # array. This speeds up chunk creation for e.g. the HDF5 file format
703
+ # where all data are present in an array-like fashion.
704
+ indices = np.array(indices)
705
+ stop = 0
706
+ for kk in range(len(indices) // chunk_size):
707
+ start = chunk_size * kk
708
+ stop = chunk_size * (kk + 1)
709
+ yield data[indices[start:stop]]
710
+ if stop < len(indices):
711
+ yield data[indices[stop:]]
712
+ else:
713
+ # assemble filtered image stacks
714
+ chunk = np.zeros(chunk_shape, dtype=data.dtype)
715
+ jj = 0
716
+ for ii in indices:
717
+ chunk[jj] = data[ii]
718
+ if (jj + 1) % chunk_size == 0:
719
+ jj = 0
720
+ yield chunk
721
+ else:
722
+ jj += 1
723
+ # yield remainder
724
+ if jj:
725
+ yield chunk[:jj]
726
+
727
+
728
+ def store_filtered_feature(rtdc_writer, feat, data, filtarr):
729
+ """Append filtered feature data to an HDF5 file
730
+
731
+ Parameters
732
+ ----------
733
+ rtdc_writer: dclab.rtdc_dataset.writer.RTDCWriter
734
+ an open writer object
735
+ feat: str
736
+ feature name
737
+ data: object or list or np.ndarray or dict
738
+ feature data
739
+ filtarr: boolean np.ndarray
740
+ filtering array (same as RTDCBase.filter.all)
741
+
742
+ Notes
743
+ -----
744
+ This code is somewhat redundant to the code of RTDCWriter.
745
+ """
746
+ indices = np.where(filtarr)[0]
747
+ if indices.size == 0:
748
+ warnings.warn(f"No data to export to '{rtdc_writer.path}'")
749
+ return
750
+
751
+ hw = rtdc_writer
752
+ if not hw.mode == "append":
753
+ raise ValueError("The `rtdc_writer` object must be created with"
754
+ + f"`mode='append'`, got '{hw.mode}' for '{hw}'!")
755
+ # event-wise, because
756
+ # - tdms-based datasets don't allow indexing with numpy
757
+ # - there might be memory issues
758
+ if feat == "contour":
759
+ for ii in indices:
760
+ hw.store_feature("contour", data[ii])
761
+ elif feat in ["mask", "image", "image_bg"]:
762
+ # assemble filtered image stacks
763
+ for imstack in yield_filtered_array_stacks(data, indices):
764
+ hw.store_feature(feat, imstack)
765
+ elif feat == "trace":
766
+ # assemble filtered trace stacks
767
+ for tr in data.keys():
768
+ for trstack in yield_filtered_array_stacks(data[tr], indices):
769
+ hw.store_feature("trace", {tr: trstack})
770
+ elif dfn.scalar_feature_exists(feat):
771
+ hw.store_feature(feat, data[filtarr])
772
+ else:
773
+ # Special case of plugin or temporary features.
774
+ shape = data[0].shape
775
+ for dstack in yield_filtered_array_stacks(data, indices):
776
+ hw.store_feature(feat, dstack, shape=shape)
777
+
778
+
779
+ def hdf5_append(h5obj, rtdc_ds, feat, compression, filtarr=None,
780
+ time_offset=0):
781
+ """Append feature data to an HDF5 file
782
+
783
+ Parameters
784
+ ----------
785
+ h5obj: h5py.File
786
+ Opened HDF5 file
787
+ rtdc_ds: dclab.rtdc_dataset.RTDCBase
788
+ Instance from which to obtain the data
789
+ feat: str
790
+ Valid feature name in `rtdc_ds`
791
+ compression: str or None
792
+ Compression method for "contour", "image", and "trace" data
793
+ as well as logs; one of [None, "lzf", "gzip", "szip"].
794
+ filtarr: None or 1d boolean np.ndarray
795
+ Optional boolean array used for filtering. If set to
796
+ `None`, all events are saved.
797
+ time_offset: float
798
+ Do not use! Please use `dclab.cli.task_join.join` instead.
799
+
800
+ Notes
801
+ -----
802
+ Please update the "experiment::event count" attribute manually.
803
+ You may use
804
+ :func:`dclab.rtdc_dataset.writer.RTDCWriter.rectify_metadata`
805
+ for that or use the `RTDCWriter` context manager where it is
806
+ automatically run during `__exit__`.
807
+ """
808
+ # optional array for filtering events
809
+ if filtarr is None:
810
+ filtarr = np.ones(len(rtdc_ds), dtype=bool)
811
+ no_filter = True
812
+ else:
813
+ no_filter = False
814
+
815
+ warnings.warn("`hdf5_append` is deptecated; please use "
816
+ " the dclab.RTDCWriter context manager or the "
817
+ " export.store_filtered_feature function.",
818
+ DeprecationWarning)
819
+
820
+ if time_offset != 0:
821
+ raise ValueError("Setting `time_offset` not supported anymore! "
822
+ "Please use `dclab.cli.task_join.join` instead.")
823
+
824
+ # writer instance
825
+ hw = RTDCWriter(h5obj, mode="append", compression=compression)
826
+ if no_filter:
827
+ hw.store_feature(feat, rtdc_ds[feat])
828
+ else:
829
+ store_filtered_feature(rtdc_writer=hw,
830
+ feat=feat,
831
+ data=rtdc_ds[feat],
832
+ filtarr=filtarr)
833
+
834
+
835
+ def hdf5_autocomplete_config(path_or_h5obj):
836
+ """Autocomplete the configuration of the RTDC-measurement
837
+
838
+ The following configuration keys are updated:
839
+
840
+ - experiment:event count
841
+ - fluorescence:samples per event
842
+ - imaging: roi size x (if image or mask is given)
843
+ - imaging: roi size y (if image or mask is given)
844
+
845
+ The following configuration keys are added if not present:
846
+
847
+ - fluorescence:channel count
848
+
849
+ Parameters
850
+ ----------
851
+ path_or_h5obj: pathlib.Path or str or h5py.File
852
+ Path to or opened RT-DC measurement
853
+ """
854
+ warnings.warn("`hdf5_autocomplete_config` is deptecated; please use "
855
+ " the dclab.RTDCWriter context manager or the "
856
+ " dclab.RTDCWriter.rectify_metadata function.",
857
+ DeprecationWarning)
858
+ if not isinstance(path_or_h5obj, h5py.File):
859
+ close = True
860
+ else:
861
+ close = False
862
+
863
+ hw = RTDCWriter(path_or_h5obj, mode="append")
864
+ hw.rectify_metadata()
865
+
866
+ if close:
867
+ path_or_h5obj.close()