dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,737 @@
1
+ """Export RT-DC measurement data"""
2
+ from __future__ import annotations
3
+
4
+ import codecs
5
+ import json
6
+ import pathlib
7
+ import time
8
+ from typing import Dict, List
9
+ import uuid
10
+ import warnings
11
+
12
+ import h5py
13
+ import hdf5plugin
14
+
15
+ try:
16
+ import imageio
17
+ except ModuleNotFoundError:
18
+ IMAGEIO_AVAILABLE = False
19
+ else:
20
+ IMAGEIO_AVAILABLE = True
21
+
22
+ try:
23
+ import fcswrite
24
+ except ModuleNotFoundError:
25
+ FCSWRITE_AVAILABLE = False
26
+ else:
27
+ FCSWRITE_AVAILABLE = True
28
+
29
+ import numpy as np
30
+
31
+ from .. import definitions as dfn
32
+ from .._version import version, version_tuple
33
+
34
+ from .feat_basin import get_basin_classes
35
+ from .writer import RTDCWriter
36
+
37
+
38
+ class LimitingExportSizeWarning(UserWarning):
39
+ pass
40
+
41
+
42
+ class Export(object):
43
+ def __init__(self, rtdc_ds):
44
+ """Export functionalities for RT-DC datasets"""
45
+ self.rtdc_ds = rtdc_ds
46
+
47
+ def avi(self, path, filtered=True, override=False):
48
+ """Exports filtered event images to an avi file
49
+
50
+ Parameters
51
+ ----------
52
+ path: str
53
+ Path to a .avi file. The ending .avi is added automatically.
54
+ filtered: bool
55
+ If set to `True`, only the filtered data
56
+ (index in ds.filter.all) are used.
57
+ override: bool
58
+ If set to `True`, an existing file ``path`` will be overridden.
59
+ If set to `False`, raises `OSError` if ``path`` exists.
60
+
61
+ Notes
62
+ -----
63
+ Raises OSError if current dataset does not contain image data
64
+ """
65
+ if not IMAGEIO_AVAILABLE:
66
+ raise ModuleNotFoundError(
67
+ "Package `imageio` required for avi export!")
68
+ path = pathlib.Path(path)
69
+ ds = self.rtdc_ds
70
+ # Make sure that path ends with .avi
71
+ if path.suffix != ".avi":
72
+ path = path.with_name(path.name + ".avi")
73
+ # Check if file already exist
74
+ if not override and path.exists():
75
+ raise OSError("File already exists: {}\n".format(
76
+ str(path).encode("ascii", "ignore")) +
77
+ "Please use the `override=True` option.")
78
+ # Start exporting
79
+ if "image" in ds:
80
+ # Open video for writing
81
+ vout = imageio.get_writer(uri=path,
82
+ format="FFMPEG",
83
+ fps=25,
84
+ codec="rawvideo",
85
+ pixelformat="yuv420p",
86
+ macro_block_size=None,
87
+ ffmpeg_log_level="error")
88
+ # write the filtered frames to avi file
89
+ for evid in np.arange(len(ds)):
90
+ # skip frames that were filtered out
91
+ if filtered and not ds.filter.all[evid]:
92
+ continue
93
+ image = ds["image"][evid]
94
+ # Convert image to RGB
95
+ image = image.reshape(image.shape[0], image.shape[1], 1)
96
+ image = np.repeat(image, 3, axis=2)
97
+ vout.append_data(image)
98
+ else:
99
+ msg = "No image data to export: dataset {} !".format(ds.title)
100
+ raise OSError(msg)
101
+
102
+ def fcs(self, path, features, meta_data=None, filtered=True,
103
+ override=False):
104
+ """Export the data of an RT-DC dataset to an .fcs file
105
+
106
+ Parameters
107
+ ----------
108
+ path: str
109
+ Path to an .fcs file. The ending .fcs is added automatically.
110
+ features: list of str
111
+ The features in the resulting .fcs file. These are strings
112
+ that are defined by `dclab.definitions.scalar_feature_exists`,
113
+ e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
114
+ meta_data: dict
115
+ User-defined, optional key-value pairs that are stored
116
+ in the primary TEXT segment of the FCS file; the version
117
+ of dclab is stored there by default
118
+ filtered: bool
119
+ If set to `True`, only the filtered data
120
+ (index in ds.filter.all) are used.
121
+ override: bool
122
+ If set to `True`, an existing file ``path`` will be overridden.
123
+ If set to `False`, raises `OSError` if ``path`` exists.
124
+
125
+ Notes
126
+ -----
127
+ Due to incompatibility with the .fcs file format, all events with
128
+ NaN-valued features are not exported.
129
+ """
130
+ if meta_data is None:
131
+ meta_data = {}
132
+ if not FCSWRITE_AVAILABLE:
133
+ raise ModuleNotFoundError(
134
+ "Package `fcswrite` required for fcs export!")
135
+
136
+ ds = self.rtdc_ds
137
+
138
+ path = pathlib.Path(path)
139
+ # Make sure that path ends with .fcs
140
+ if path.suffix != ".fcs":
141
+ path = path.with_name(path.name + ".fcs")
142
+ # Check if file already exist
143
+ if not override and path.exists():
144
+ raise OSError("File already exists: {}\n".format(
145
+ str(path).encode("ascii", "ignore")) +
146
+ "Please use the `override=True` option.")
147
+ # Check that features are valid
148
+ features = sorted(set(features))
149
+ for c in features:
150
+ if c not in ds.features_scalar:
151
+ msg = "Invalid feature name: {}".format(c)
152
+ raise ValueError(msg)
153
+
154
+ # Collect the header
155
+ chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
156
+
157
+ # Collect the data
158
+ if filtered:
159
+ data = [ds[c][ds.filter.all] for c in features]
160
+ else:
161
+ data = [ds[c] for c in features]
162
+
163
+ data = np.array(data).transpose()
164
+ meta_data["dclab version"] = version
165
+ fcswrite.write_fcs(filename=str(path),
166
+ chn_names=chn_names,
167
+ data=data,
168
+ text_kw_pr=meta_data,
169
+ )
170
+
171
+ def hdf5(self,
172
+ path: str | pathlib.Path,
173
+ features: List[str] = None,
174
+ filtered: bool = True,
175
+ logs: bool = False,
176
+ tables: bool = False,
177
+ basins: bool = False,
178
+ meta_prefix: str = "src_",
179
+ override: bool = False,
180
+ compression_kwargs: Dict = None,
181
+ compression: str = "deprecated",
182
+ skip_checks: bool = False):
183
+ """Export the data of the current instance to an HDF5 file
184
+
185
+ Parameters
186
+ ----------
187
+ path: str
188
+ Path to an .rtdc file. The ending .rtdc is added
189
+ automatically.
190
+ features: list of str
191
+ The features in the resulting .rtdc file. These are strings
192
+ that are defined by `dclab.definitions.feature_exists`, e.g.
193
+ "area_cvx", "deform", "frame", "fl1_max", "image".
194
+ Defaults to `self.rtdc_ds.features_innate`.
195
+ filtered: bool
196
+ If set to `True`, only the filtered data
197
+ (index in ds.filter.all) are used.
198
+ logs: bool
199
+ Whether to store the logs of the original file prefixed with
200
+ `source_` to the output file.
201
+ tables: bool
202
+ Whether to store the tables of the original file prefixed with
203
+ `source_` to the output file.
204
+ basins: bool
205
+ Whether to export basins. If filtering is disabled, basins
206
+ are copied directly to the output file. If filtering is enabled,
207
+ then mapped basins are exported.
208
+ meta_prefix: str
209
+ Prefix for log and table names in the exported file
210
+ override: bool
211
+ If set to `True`, an existing file ``path`` will be overridden.
212
+ If set to `False`, raises `OSError` if ``path`` exists.
213
+ compression_kwargs: dict
214
+ Dictionary with the keys "compression" and "compression_opts"
215
+ which are passed to :func:`h5py.H5File.create_dataset`. The
216
+ default is Zstandard compression with the lowest compression
217
+ level `hdf5plugin.Zstd(clevel=1)`.
218
+ compression: str or None
219
+ Compression method used for data storage;
220
+ one of [None, "lzf", "gzip", "szip"].
221
+
222
+ .. deprecated:: 0.43.0
223
+ Use `compression_kwargs` instead.
224
+ skip_checks: bool
225
+ Disable checking whether all features have the same length.
226
+
227
+
228
+ .. versionchanged:: 0.58.0
229
+
230
+ The ``basins`` keyword argument was added, and it is now possible
231
+ to pass an empty list to ``features``. This combination results
232
+ in a very small file consisting of metadata and a mapped basin
233
+ referring to the original dataset.
234
+ """
235
+ if compression != "deprecated":
236
+ warnings.warn("The `compression` kwarg is deprecated in favor of "
237
+ "`compression_kwargs`!",
238
+ DeprecationWarning)
239
+ if compression_kwargs is not None:
240
+ raise ValueError("You may not specify `compression` and "
241
+ "`compression_kwargs` at the same time!")
242
+ # be backwards-compatible
243
+ compression_kwargs = {"compression": compression}
244
+ if compression_kwargs is None:
245
+ compression_kwargs = hdf5plugin.Zstd(clevel=1)
246
+ path = pathlib.Path(path)
247
+ # Make sure that path ends with .rtdc
248
+ if path.suffix not in [".rtdc", ".rtdc~"]:
249
+ path = path.parent / (path.name + ".rtdc")
250
+ # Check if file already exists
251
+ if not override and path.exists():
252
+ raise OSError("File already exists: {}\n".format(path)
253
+ + "Please use the `override=True` option.")
254
+ elif path.exists():
255
+ path.unlink()
256
+
257
+ # make sure the parent directory exists
258
+ path.parent.mkdir(parents=True, exist_ok=True)
259
+
260
+ # for convenience
261
+ ds = self.rtdc_ds
262
+
263
+ if features is None:
264
+ features = ds.features_innate
265
+
266
+ # decide which metadata to export
267
+ meta = {}
268
+ # only cfg metadata (no analysis metadata)
269
+ for sec in dfn.CFG_METADATA:
270
+ if sec in ds.config:
271
+ meta[sec] = ds.config[sec].copy()
272
+ # add user-defined metadata
273
+ if "user" in ds.config:
274
+ meta["user"] = ds.config["user"].copy()
275
+ if filtered:
276
+ # Define a new measurement identifier, so that we are not running
277
+ # into any problems with basins being defined for filtered data.
278
+ ds_run_id = ds.get_measurement_identifier()
279
+ random_ap = str(uuid.uuid4())[:4]
280
+ meta["experiment"]["run identifier"] = f"{ds_run_id}-{random_ap}"
281
+
282
+ if filtered:
283
+ filter_arr = ds.filter.all
284
+ else:
285
+ filter_arr = None
286
+
287
+ features = sorted(set(features))
288
+ if not skip_checks and features:
289
+ # check that all features have same length and use the smallest
290
+ # common length
291
+ lengths = []
292
+ for feat in features:
293
+ if feat == "trace":
294
+ for tr in list(ds["trace"].keys()):
295
+ lengths.append(len(ds["trace"][tr]))
296
+ else:
297
+ lengths.append(len(ds[feat]))
298
+ l_min = np.min(lengths)
299
+ l_max = np.max(lengths)
300
+ if l_min != l_max:
301
+ if filter_arr is None:
302
+ # we are forced to do filtering
303
+ filter_arr = np.ones(len(ds), dtype=bool)
304
+ else:
305
+ # have to create a copy, because rtdc_ds.filter.all is ro!
306
+ filter_arr = np.copy(filter_arr)
307
+ filter_arr[l_min:] = False
308
+ warnings.warn(
309
+ "Not all features have the same length! Limiting output "
310
+ + f"event count to {l_min} (max {l_max}) in '{l_min}'.",
311
+ LimitingExportSizeWarning)
312
+
313
+ # Perform actual export
314
+ with RTDCWriter(path,
315
+ mode="append",
316
+ compression_kwargs=compression_kwargs) as hw:
317
+ # write meta data
318
+ hw.store_metadata(meta)
319
+
320
+ # write export log
321
+ hw.store_log(time.strftime("dclab-export_%Y-%m-%d_%H.%M.%S"),
322
+ json.dumps(
323
+ {"dclab version": version_tuple,
324
+ "kwargs": {
325
+ "features": features,
326
+ "filtered": filtered,
327
+ "logs": logs,
328
+ "tables": tables,
329
+ "basins": basins,
330
+ "meta_prefix": meta_prefix,
331
+ "skip_checks": skip_checks
332
+ }
333
+ },
334
+ indent=2,
335
+ sort_keys=True,
336
+ ).split("\n"))
337
+
338
+ if logs:
339
+ # write logs
340
+ for log in ds.logs:
341
+ hw.store_log(f"{meta_prefix}{log}",
342
+ ds.logs[log])
343
+
344
+ if tables:
345
+ # write tables
346
+ for tab in ds.tables:
347
+ hw.store_table(f"{meta_prefix}{tab}",
348
+ ds.tables[tab])
349
+
350
+ # write each feature individually
351
+ for feat in features:
352
+ if (filter_arr is None or
353
+ # This does not work for the .tdms file format
354
+ # (and probably also not for DCOR).
355
+ (np.all(filter_arr) and ds.format == "hdf5")):
356
+ # We do not have to filter and can be fast
357
+ if dfn.scalar_feature_exists(feat):
358
+ shape = (1,)
359
+ elif feat in ["image", "image_bg", "mask", "trace"]:
360
+ # known shape
361
+ shape = None
362
+ else:
363
+ shape = np.array(ds[feat][0]).shape
364
+ hw.store_feature(feat=feat,
365
+ data=ds[feat],
366
+ shape=shape)
367
+ else:
368
+ # We have to filter and will be slower
369
+ store_filtered_feature(rtdc_writer=hw,
370
+ feat=feat,
371
+ data=ds[feat],
372
+ filtarr=filter_arr)
373
+
374
+ if basins:
375
+ # We have to store basins. There are three options:
376
+ # - filtering disabled: just copy basins
377
+ # - filtering enabled
378
+ # - basins with "same" mapping: create new mapping
379
+ # - mapped basins: correct nested mapping
380
+ # In addition to the basins that we copy from the
381
+ # original dataset, we also create a new basin that
382
+ # refers to the original dataset itself.
383
+ basin_list = [bn.as_dict() for bn in ds.basins]
384
+ # In addition to the upstream basins, also store a reference
385
+ # to the original file from which the export was done.
386
+ if ds.format in get_basin_classes():
387
+ # The dataset has a format that matches a basin format
388
+ # directly.
389
+ basin_is_local = ds.format == "hdf5"
390
+ basin_locs = [ds.path]
391
+ if basin_is_local:
392
+ # So the user can put them into the same directory.
393
+ basin_locs.append(ds.path.name)
394
+ basin_list.append({
395
+ "basin_name": "Exported data",
396
+ "basin_type": "file" if basin_is_local else "remote",
397
+ "basin_format": ds.format,
398
+ "basin_locs": basin_locs,
399
+ "basin_descr": f"Exported with dclab {version}",
400
+ })
401
+ elif (ds.format == "hierarchy"
402
+ and ds.get_root_parent().format in get_basin_classes()):
403
+ # avoid circular imports
404
+ from .fmt_hierarchy import map_indices_child2root
405
+ # The dataset is a hierarchy child, and it is derived
406
+ # from a dataset that has a matching basin format.
407
+ # We have to add the indices of the root parent, which
408
+ # identify the child, to the basin dictionary. Note
409
+ # that additional basin filtering is applied below
410
+ # this case for all basins.
411
+ # For the sake of clarity I wrote this as a separate case,
412
+ # even if that means duplicating code from the previous
413
+ # case.
414
+ ds_root = ds.get_root_parent()
415
+ basin_is_local = ds_root.format == "hdf5"
416
+ basin_locs = [ds_root.path]
417
+ if basin_is_local:
418
+ # So the user can put them into the same directory.
419
+ basin_locs.append(ds_root.path.name)
420
+ basin_list.append({
421
+ "basin_name": "Exported data (hierarchy)",
422
+ "basin_type": "file" if basin_is_local else "remote",
423
+ "basin_format": ds_root.format,
424
+ "basin_locs": basin_locs,
425
+ "basin_descr": f"Exported with dclab {version} from a "
426
+ f"hierarchy dataset",
427
+ # This is where this basin differs from the basin
428
+ # definition in the previous case.
429
+ "basin_map": map_indices_child2root(
430
+ child=ds,
431
+ child_indices=np.arange(len(ds))
432
+ ),
433
+ })
434
+
435
+ for bn_dict in basin_list:
436
+ if bn_dict.get("basin_type") == "internal":
437
+ # Internal basins are only valid for files they were
438
+ # defined in. Since we are exporting, it does not
439
+ # make sense to store these basins in the output file.
440
+ continue
441
+ basinmap_orig = bn_dict.get("basin_map")
442
+ if not filtered:
443
+ # filtering disabled: just copy basins
444
+ pass
445
+ elif basinmap_orig is None:
446
+ # basins with "same" mapping: create new mapping
447
+ bn_dict["basin_map"] = np.where(filter_arr)[0]
448
+ else:
449
+ # mapped basins: correct nested mapping
450
+ bn_dict["basin_map"] = basinmap_orig[filter_arr]
451
+
452
+ # Do not verify basins, it takes too long.
453
+ hw.store_basin(**bn_dict, verify=False)
454
+
455
+ def tsv(self, path, features, meta_data=None, filtered=True,
456
+ override=False):
457
+ """Export the data of the current instance to a .tsv file
458
+
459
+ Parameters
460
+ ----------
461
+ path: str
462
+ Path to a .tsv file. The ending .tsv is added automatically.
463
+ features: list of str
464
+ The features in the resulting .tsv file. These are strings
465
+ that are defined by `dclab.definitions.scalar_feature_exists`,
466
+ e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
467
+ meta_data: dict
468
+ User-defined, optional key-value pairs that are stored
469
+ at the beginning of the tsv file - one key-value pair is
470
+ stored per line which starts with a hash. The version of
471
+ dclab is stored there by default.
472
+ filtered: bool
473
+ If set to `True`, only the filtered data
474
+ (index in ds.filter.all) are used.
475
+ override: bool
476
+ If set to `True`, an existing file ``path`` will be overridden.
477
+ If set to `False`, raises `OSError` if ``path`` exists.
478
+ """
479
+ if meta_data is None:
480
+ meta_data = {}
481
+ features = [c.lower() for c in features]
482
+ features = sorted(set(features))
483
+ path = pathlib.Path(path)
484
+ ds = self.rtdc_ds
485
+ # Make sure that path ends with .tsv
486
+ if path.suffix != ".tsv":
487
+ path = path.with_name(path.name + ".tsv")
488
+ # Check if file already exist
489
+ if not override and path.exists():
490
+ raise OSError("File already exists: {}\n".format(
491
+ str(path).encode("ascii", "ignore")) +
492
+ "Please use the `override=True` option.")
493
+ # Check that features exist
494
+ for c in features:
495
+ if c not in ds.features_scalar:
496
+ raise ValueError("Invalid feature name {}".format(c))
497
+ meta_data["dclab version"] = version
498
+ # Write BOM header
499
+ with path.open("wb") as fd:
500
+ fd.write(codecs.BOM_UTF8)
501
+ # Open file
502
+ with path.open("a", encoding="utf-8") as fd:
503
+ # write meta data
504
+ for key in sorted(meta_data.keys()):
505
+ fd.write(f"# {key}: {meta_data[key]}\n")
506
+ fd.write("#\n")
507
+ fd.write("# Original dataset configuration:\n")
508
+ cfg = self.rtdc_ds.config.as_dict()
509
+ for sec in sorted(cfg.keys()):
510
+ for key in sorted(cfg[sec].keys()):
511
+ fd.write(f"# dc:{sec}:{key} = {cfg[sec][key]}\n")
512
+ fd.write("#\n")
513
+ # write header
514
+ header1 = "\t".join([c for c in features])
515
+ fd.write("# "+header1+"\n")
516
+ labels = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
517
+ header2 = "\t".join(labels)
518
+ fd.write("# "+header2+"\n")
519
+
520
+ with path.open("ab") as fd:
521
+ # write data
522
+ if filtered:
523
+ data = [ds[c][ds.filter.all] for c in features]
524
+ else:
525
+ data = [ds[c] for c in features]
526
+
527
+ np.savetxt(fd,
528
+ np.array(data).transpose(),
529
+ fmt=str("%.10e"),
530
+ delimiter="\t")
531
+
532
+
533
+ def yield_filtered_array_stacks(data, indices):
534
+ """Generator returning chunks with the filtered feature data
535
+
536
+ Parameters
537
+ ----------
538
+ data: np.ndarray or h5py.Dataset
539
+ The full, unfiltered input feature data. Must implement
540
+ the `shape` and `dtype` properties. If it implements the
541
+ `__array__` method, fast slicing is used.
542
+ indices: np.ndarray or list
543
+ The indices (integer values) for `data` (first axis), indicating
544
+ which elements should be returned by this generator.
545
+
546
+ Notes
547
+ -----
548
+ This method works with any feature dimension (e.g. it
549
+ works for image (2D) data and for trace data (1D)). It
550
+ is just important that `data` is indexable using integers
551
+ and that the events in `data` all have the same shape.
552
+ The dtype of the returned chunks is determined by the first
553
+ item in `data`.
554
+
555
+ This method works with sliceable (e.g. np.ndarray) and
556
+ non-sliceable (e.g. tdms-format-based images) input data. If the
557
+ input data is sliceable (which is determined by the availability
558
+ of the `__array__` method, then fast numpy sclicing is used. If the
559
+ input data does not support slicing (`__array__` not defined), then
560
+ a slow iteration over `indices` is done.
561
+
562
+ In the slow iteration case, the returned array data are overridden
563
+ in-place. If you need to retain a copy of the `yield`ed chunks,
564
+ apply `np.array(.., copy=True)` to the returned chunks.
565
+ """
566
+ chunk_shape = RTDCWriter.get_best_nd_chunks(item_shape=data.shape[1:],
567
+ item_dtype=data.dtype)
568
+ chunk_size = chunk_shape[0]
569
+
570
+ if hasattr(data, "__array__"):
571
+ # We have an array-like object and can do slicing with the indexing
572
+ # array. This speeds up chunk creation for e.g. the HDF5 file format
573
+ # where all data are present in an array-like fashion.
574
+ indices = np.array(indices)
575
+ stop = 0
576
+ for kk in range(len(indices) // chunk_size):
577
+ start = chunk_size * kk
578
+ stop = chunk_size * (kk + 1)
579
+ yield data[indices[start:stop]]
580
+ if stop < len(indices):
581
+ yield data[indices[stop:]]
582
+ else:
583
+ # assemble filtered image stacks
584
+ chunk = np.zeros(chunk_shape, dtype=data.dtype)
585
+ jj = 0
586
+ for ii in indices:
587
+ chunk[jj] = data[ii]
588
+ if (jj + 1) % chunk_size == 0:
589
+ jj = 0
590
+ yield chunk
591
+ else:
592
+ jj += 1
593
+ # yield remainder
594
+ if jj:
595
+ yield chunk[:jj]
596
+
597
+
598
+ def store_filtered_feature(rtdc_writer, feat, data, filtarr):
599
+ """Append filtered feature data to an HDF5 file
600
+
601
+ Parameters
602
+ ----------
603
+ rtdc_writer: dclab.rtdc_dataset.writer.RTDCWriter
604
+ an open writer object
605
+ feat: str
606
+ feature name
607
+ data: object or list or np.ndarray or dict
608
+ feature data
609
+ filtarr: boolean np.ndarray
610
+ filtering array (same as RTDCBase.filter.all)
611
+
612
+ Notes
613
+ -----
614
+ This code is somewhat redundant to the code of RTDCWriter.
615
+ """
616
+ indices = np.where(filtarr)[0]
617
+ if indices.size == 0:
618
+ warnings.warn(f"No data to export to '{rtdc_writer.path}'")
619
+ return
620
+
621
+ hw = rtdc_writer
622
+ if not hw.mode == "append":
623
+ raise ValueError("The `rtdc_writer` object must be created with"
624
+ + f"`mode='append'`, got '{hw.mode}' for '{hw}'!")
625
+ # event-wise, because
626
+ # - tdms-based datasets don't allow indexing with numpy
627
+ # - there might be memory issues
628
+ if feat == "contour":
629
+ for ii in indices:
630
+ hw.store_feature("contour", data[ii])
631
+ elif feat in ["mask", "image", "image_bg"]:
632
+ # assemble filtered image stacks
633
+ for imstack in yield_filtered_array_stacks(data, indices):
634
+ hw.store_feature(feat, imstack)
635
+ elif feat == "trace":
636
+ # assemble filtered trace stacks
637
+ for tr in data.keys():
638
+ for trstack in yield_filtered_array_stacks(data[tr], indices):
639
+ hw.store_feature("trace", {tr: trstack})
640
+ elif dfn.scalar_feature_exists(feat):
641
+ hw.store_feature(feat, data[filtarr])
642
+ else:
643
+ # Special case of plugin or temporary features.
644
+ shape = data[0].shape
645
+ for dstack in yield_filtered_array_stacks(data, indices):
646
+ hw.store_feature(feat, dstack, shape=shape)
647
+
648
+
649
+ def hdf5_append(h5obj, rtdc_ds, feat, compression, filtarr=None,
650
+ time_offset=0):
651
+ """Append feature data to an HDF5 file
652
+
653
+ Parameters
654
+ ----------
655
+ h5obj: h5py.File
656
+ Opened HDF5 file
657
+ rtdc_ds: dclab.rtdc_dataset.RTDCBase
658
+ Instance from which to obtain the data
659
+ feat: str
660
+ Valid feature name in `rtdc_ds`
661
+ compression: str or None
662
+ Compression method for "contour", "image", and "trace" data
663
+ as well as logs; one of [None, "lzf", "gzip", "szip"].
664
+ filtarr: None or 1d boolean np.ndarray
665
+ Optional boolean array used for filtering. If set to
666
+ `None`, all events are saved.
667
+ time_offset: float
668
+ Do not use! Please use `dclab.cli.task_join.join` instead.
669
+
670
+ Notes
671
+ -----
672
+ Please update the "experiment::event count" attribute manually.
673
+ You may use
674
+ :func:`dclab.rtdc_dataset.writer.RTDCWriter.rectify_metadata`
675
+ for that or use the `RTDCWriter` context manager where it is
676
+ automatically run during `__exit__`.
677
+ """
678
+ # optional array for filtering events
679
+ if filtarr is None:
680
+ filtarr = np.ones(len(rtdc_ds), dtype=bool)
681
+ no_filter = True
682
+ else:
683
+ no_filter = False
684
+
685
+ warnings.warn("`hdf5_append` is deptecated; please use "
686
+ " the dclab.RTDCWriter context manager or the "
687
+ " export.store_filtered_feature function.",
688
+ DeprecationWarning)
689
+
690
+ if time_offset != 0:
691
+ raise ValueError("Setting `time_offset` not supported anymore! "
692
+ "Please use `dclab.cli.task_join.join` instead.")
693
+
694
+ # writer instance
695
+ hw = RTDCWriter(h5obj, mode="append", compression=compression)
696
+ if no_filter:
697
+ hw.store_feature(feat, rtdc_ds[feat])
698
+ else:
699
+ store_filtered_feature(rtdc_writer=hw,
700
+ feat=feat,
701
+ data=rtdc_ds[feat],
702
+ filtarr=filtarr)
703
+
704
+
705
+ def hdf5_autocomplete_config(path_or_h5obj):
706
+ """Autocomplete the configuration of the RTDC-measurement
707
+
708
+ The following configuration keys are updated:
709
+
710
+ - experiment:event count
711
+ - fluorescence:samples per event
712
+ - imaging: roi size x (if image or mask is given)
713
+ - imaging: roi size y (if image or mask is given)
714
+
715
+ The following configuration keys are added if not present:
716
+
717
+ - fluorescence:channel count
718
+
719
+ Parameters
720
+ ----------
721
+ path_or_h5obj: pathlib.Path or str or h5py.File
722
+ Path to or opened RT-DC measurement
723
+ """
724
+ warnings.warn("`hdf5_autocomplete_config` is deptecated; please use "
725
+ " the dclab.RTDCWriter context manager or the "
726
+ " dclab.RTDCWriter.rectify_metadata function.",
727
+ DeprecationWarning)
728
+ if not isinstance(path_or_h5obj, h5py.File):
729
+ close = True
730
+ else:
731
+ close = False
732
+
733
+ hw = RTDCWriter(path_or_h5obj, mode="append")
734
+ hw.rectify_metadata()
735
+
736
+ if close:
737
+ path_or_h5obj.close()