dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,902 @@
1
+ """Check RT-DC datasets for completeness"""
2
+ import copy
3
+ import functools
4
+ import warnings
5
+
6
+ import h5py
7
+ import numpy as np
8
+
9
+ from .copier import is_properly_compressed
10
+ from .core import RTDCBase
11
+ from .fmt_hdf5 import RTDC_HDF5
12
+ from .fmt_hierarchy import RTDC_Hierarchy
13
+ from .load import load_file
14
+
15
+ from .. import definitions as dfn
16
+
17
+ #: These sections should be fully present, except for the
18
+ #: keys in :data:`OPTIONAL_KEYS`.
19
+ DESIRABLE_SECTIONS = {
20
+ "experiment",
21
+ "imaging",
22
+ "setup",
23
+ }
24
+
25
+ #: log names that end with these strings are not checked
26
+ IGNORED_LOG_NAMES = {
27
+ "_para.ini",
28
+ "_image.ini",
29
+ "FG_Config.mcf",
30
+ "parameters.txt",
31
+ "_SoftwareSettings.ini",
32
+ "dckit-history",
33
+ }
34
+
35
+ #: keys that must be present for every measurement
36
+ IMPORTANT_KEYS = {
37
+ "experiment": [
38
+ "date",
39
+ "event count",
40
+ "run index",
41
+ "sample",
42
+ "time"],
43
+ "imaging": [
44
+ "flash device",
45
+ "flash duration",
46
+ "frame rate",
47
+ "pixel size",
48
+ "roi position x",
49
+ "roi position y",
50
+ "roi size x",
51
+ "roi size y"],
52
+ "setup": [
53
+ "channel width",
54
+ "chip region",
55
+ "flow rate",
56
+ "medium"],
57
+ }
58
+
59
+ #: keys that must be present for fluorescence measurements
60
+ IMPORTANT_KEYS_FL = {
61
+ "fluorescence": [
62
+ "bit depth",
63
+ "channel count",
64
+ "channels installed",
65
+ "laser count",
66
+ "lasers installed",
67
+ "sample rate",
68
+ "samples per event",
69
+ "signal max",
70
+ "signal min",
71
+ "trace median"],
72
+ }
73
+
74
+ #: maximum line length in log files
75
+ LOG_MAX_LINE_LENGTH = 100
76
+
77
+ #: keys that are optional
78
+ OPTIONAL_KEYS = {
79
+ "experiment": [
80
+ "run identifier",
81
+ "timestamp",
82
+ ],
83
+ "fluorescence": [
84
+ "baseline 1 offset",
85
+ "baseline 2 offset",
86
+ "baseline 3 offset",
87
+ # name, lambda, power have their own special tests
88
+ "channel 1 name",
89
+ "channel 2 name",
90
+ "channel 3 name",
91
+ "laser 1 lambda",
92
+ "laser 2 lambda",
93
+ "laser 3 lambda",
94
+ "laser 1 power",
95
+ "laser 2 power",
96
+ "laser 3 power",
97
+ ],
98
+ "setup": [
99
+ "temperature",
100
+ "chip identifier",
101
+ ],
102
+ }
103
+
104
+ #: valid metadata choices
105
+ #: .. versionchanged:: 0.29.1
106
+ #: medium not restricted to certain set of choices anymore
107
+ VALID_CHOICES = {}
108
+
109
+
110
+ @functools.total_ordering
111
+ class ICue(object):
112
+ def __init__(self, msg, level, category, data=None, identifier=None,
113
+ cfg_section=None, cfg_key=None, cfg_choices=None):
114
+ """Integrity cue"""
115
+ #: human-readable message
116
+ self.msg = msg
117
+ #: severity level ("violation", "alert", or "info")
118
+ self.level = level
119
+ #: machine-readable data associated with the check
120
+ self.data = data
121
+ #: fail category
122
+ self.category = category
123
+ #: identifier e.g. for UI manipulation in DCKit
124
+ self.identifier = identifier
125
+ #: section (only for categories "missing metadata", "bad metadata")
126
+ self.cfg_section = cfg_section
127
+ #: key (only for categories "missing metadata", "bad metadata");
128
+ #: can be omitted e.g. to communicate that the entire section is
129
+ #: missing
130
+ self.cfg_key = cfg_key
131
+ #: allowed choices for the specific [section]: key combination
132
+ #: (only for categories "missing metadata", "bad metadata")
133
+ self.cfg_choices = cfg_choices
134
+ if self.cfg_choices is None:
135
+ if (cfg_section in VALID_CHOICES
136
+ and cfg_key in VALID_CHOICES[cfg_section]):
137
+ self.cfg_choices = VALID_CHOICES[cfg_section][cfg_key]
138
+
139
+ def __eq__(self, other):
140
+ leveld = {"info": 0,
141
+ "violation": 1,
142
+ "alert": 2,
143
+ }
144
+ return ((leveld[self.level], self.cfg_section or "",
145
+ self.cfg_key or "", self.category, self.msg) ==
146
+ (leveld[other.level], other.cfg_section or "",
147
+ other.cfg_key or "", other.category, other.msg))
148
+
149
+ def __lt__(self, other):
150
+ leveld = {"info": 0,
151
+ "violation": 1,
152
+ "alert": 2, }
153
+ return ((leveld[self.level], self.cfg_section or "",
154
+ self.cfg_key or "", self.category, self.msg) <
155
+ (leveld[other.level], other.cfg_section or "",
156
+ other.cfg_key or "", other.category, other.msg))
157
+
158
+ def __repr__(self):
159
+ return f"<ICue: '{self.msg}' at 0x{hex(id(self))}>"
160
+
161
+ @staticmethod
162
+ def get_level_summary(cues):
163
+ """For a list of ICue, return the abundance of all levels"""
164
+ levels = {"info": 0,
165
+ "alert": 0,
166
+ "violation": 0}
167
+ for cue in cues:
168
+ levels[cue.level] += 1
169
+ return levels
170
+
171
+
172
+ class IntegrityChecker(object):
173
+ def __init__(self, path_or_ds):
174
+ """Check the integrity of a dataset
175
+
176
+ The argument must be either a path to an .rtdc or .tdms file
177
+ or an instance of `RTDCBase`. If a path is given, then all
178
+ warnings (e.g. UnknownConfigurationKeyWarning) are catched
179
+ and added to the cue list.
180
+
181
+ Usage:
182
+
183
+ .. code:: python
184
+
185
+ ic = IntegrityChecker("/path/to/data.rtdc")
186
+ cues = ic.check()
187
+
188
+ """
189
+ self.warn_cues = []
190
+ if isinstance(path_or_ds, RTDCBase):
191
+ self.ds = path_or_ds
192
+ self.finally_close = False
193
+ else:
194
+ with warnings.catch_warnings(record=True) as ws:
195
+ warnings.simplefilter("always")
196
+ self.ds = load_file(path_or_ds, enable_basins=False)
197
+ for ww in ws:
198
+ self.warn_cues.append(ICue(
199
+ msg=f"{ww.category.__name__}: {ww.message}",
200
+ level="alert",
201
+ category="warning"))
202
+ self.finally_close = True
203
+
204
+ def __enter__(self):
205
+ return self
206
+
207
+ def __exit__(self, *args):
208
+ # close the file
209
+ if self.finally_close and hasattr(self.ds, "__exit__"):
210
+ self.ds.__exit__(*args)
211
+
212
+ @property
213
+ def has_fluorescence(self):
214
+ if ("fluorescence" in self.ds
215
+ or "fl1_max" in self.ds
216
+ or "fl2_max" in self.ds
217
+ or "fl3_max" in self.ds):
218
+ fl = True
219
+ else:
220
+ fl = False
221
+ return fl
222
+
223
+ def check(self, **kwargs):
224
+ """Run all checks
225
+
226
+ This method calls all class methods that start with `check_`.
227
+ `kwargs` are passed to all methods. Possible options are:
228
+
229
+ - "expand_section" `bool`: add a cue for every missing
230
+ metadata key if a section is missing
231
+ """
232
+ cues = []
233
+ funcs = IntegrityChecker.__dict__
234
+ if not len(self.ds) == np.sum(self.ds.filter.all):
235
+ raise NotImplementedError(
236
+ "Integrity checks for datasets with active event filters "
237
+ "are not supported!")
238
+ elif self.ds.__class__ == RTDC_Hierarchy:
239
+ raise NotImplementedError(
240
+ "Integrity checks for 'RTDC_Hierarchy' instances are "
241
+ "not supported!")
242
+ for ff in sorted(funcs.keys()):
243
+ if ff.startswith("check_fl_") and not self.has_fluorescence:
244
+ # skip
245
+ continue
246
+ elif ff.startswith("check_"):
247
+ cues += funcs[ff](self, **kwargs)
248
+ return sorted(self.warn_cues + cues)
249
+
250
+ def check_basin_features_internal(self, **kwargs):
251
+ """Check whether internal basin features are properly defined"""
252
+ cues = []
253
+ basins = self.ds.basins_get_dicts()
254
+ for bn in basins:
255
+ if bn["type"] == "internal":
256
+ bpaths = bn["paths"]
257
+ if bpaths != ["basin_events"]:
258
+ cues.append(
259
+ ICue(msg=f"Uncommon internal basin path: {bpaths}",
260
+ level="alert",
261
+ category="basin data",
262
+ ))
263
+ else:
264
+ if "basin_events" not in self.ds.h5file:
265
+ cues.append(
266
+ ICue(msg="Missing internal basin group "
267
+ "'basin_events', although an internal "
268
+ "basin is defined",
269
+ level="violation",
270
+ category="basin data",
271
+ ))
272
+ else:
273
+ for feat in bn["features"]:
274
+ if feat not in self.ds.h5file["basin_events"]:
275
+ cues.append(
276
+ ICue(msg=f"Missing internal basin "
277
+ f"feature {feat}",
278
+ level="violation",
279
+ category="basin data",
280
+ ))
281
+ return cues
282
+
283
+ def check_compression(self, **kwargs):
284
+ cues = []
285
+ if self.ds.format == "tdms":
286
+ compression = "None"
287
+ data = {"compressed": 0,
288
+ "total": 1,
289
+ "uncompressed": 0,
290
+ }
291
+ elif self.ds.format == "hdf5":
292
+ def iter_count_compression(h5):
293
+ comp_i = 0
294
+ noco_i = 0
295
+ for key in h5:
296
+ obj = h5[key]
297
+ if isinstance(obj, h5py.Dataset):
298
+ if is_properly_compressed(obj):
299
+ # data are compressed with at least level 5
300
+ comp_i += 1
301
+ else:
302
+ # no compression
303
+ noco_i += 1
304
+ elif isinstance(obj, h5py.Group):
305
+ coi, noi = iter_count_compression(obj)
306
+ comp_i += coi
307
+ noco_i += noi
308
+ else:
309
+ raise ValueError(f"Unknown object: {obj}")
310
+ return comp_i, noco_i
311
+
312
+ comp, noco = iter_count_compression(self.ds.h5file)
313
+ if noco == 0:
314
+ compression = "All"
315
+ elif comp == 0:
316
+ compression = "None"
317
+ else:
318
+ compression = f"Partial ({comp} of {noco+comp})"
319
+ data = {"compressed": comp,
320
+ "total": noco + comp,
321
+ "uncompressed": noco,
322
+ }
323
+ else:
324
+ compression = "Unknown"
325
+ data = None
326
+ cues.append(ICue(
327
+ msg=f"Compression: {compression}",
328
+ level="info",
329
+ category="general",
330
+ data=data))
331
+ return cues
332
+
333
+ def check_empty(self, **kwargs):
334
+ """The dataset should contain events"""
335
+ cues = []
336
+ lends = len(self.ds)
337
+ if lends == 0:
338
+ cues.append(ICue(
339
+ msg="The dataset does not contain any events",
340
+ level="alert",
341
+ category="feature data"))
342
+ return cues
343
+
344
+ def check_external_links(self, **kwargs):
345
+ """An HDF5 dataset should not contain external links"""
346
+ cues = []
347
+ if isinstance(self.ds, RTDC_HDF5):
348
+ has_external, h5object = hdf5_has_external(self.ds.h5file)
349
+ if has_external:
350
+ cues.append(ICue(
351
+ msg=f"The HDF5 file contains at least one external "
352
+ f"link: '{h5object}'",
353
+ level="violation",
354
+ category="format HDF5"))
355
+ return cues
356
+
357
+ def check_feat_index(self, **kwargs):
358
+ """The index of the dataset should be monotonous"""
359
+ cues = []
360
+ lends = len(self.ds)
361
+ if "index" in self.ds:
362
+ if not np.all(self.ds["index"] == np.arange(1, lends + 1)):
363
+ cues.append(ICue(
364
+ msg="The index feature is not enumerated correctly",
365
+ level="violation",
366
+ category="feature data"))
367
+ return cues
368
+
369
+ def check_feature_size(self, **kwargs):
370
+ cues = []
371
+ lends = len(self.ds)
372
+ for feat in self.ds.features_innate:
373
+ if feat == "trace":
374
+ for tr in list(self.ds["trace"].keys()):
375
+ if len(self.ds["trace"][tr]) != lends:
376
+ cues.append(ICue(
377
+ msg=f"Features: wrong event count: 'trace/{tr}' "
378
+ + f"({len(self.ds['trace'][tr])} of {lends})",
379
+ level="violation",
380
+ category="feature size"))
381
+ else:
382
+ if len(self.ds[feat]) != lends:
383
+ cues.append(ICue(
384
+ msg=f"Features: wrong event count: '{feat}' "
385
+ + f"({len(self.ds[feat])} of {lends})",
386
+ level="violation",
387
+ category="feature size"))
388
+ return cues
389
+
390
+ def check_features_unknown_hdf5(self, **kwargs):
391
+ """Check for features that are not defined in dclab
392
+
393
+ The idea here is to make sure that third-party software is made
394
+ aware of the fact that it is storing unknown features in an HDF5
395
+ file. If this is the case, then this will e.g. raise an error in
396
+ DCOR-Aid, such that people don't lose new features upon upload.
397
+ """
398
+ # Ignored unknown features are features that are actually known
399
+ # but omitting them is not a big deal.
400
+ ignore_unknown_features = [
401
+ "def", # An old Shape-In version stored "def" instead of "deform"
402
+ ]
403
+ cues = []
404
+ if self.ds.format == "hdf5":
405
+ for feat in self.ds.h5file["events"]:
406
+ if not dfn.feature_exists(feat):
407
+ if feat in ignore_unknown_features:
408
+ continue
409
+ cues.append(ICue(
410
+ msg=f"Features: Unknown key '{feat}'",
411
+ level="violation",
412
+ category="feature unknown"))
413
+ return cues
414
+
415
+ def check_fl_metadata_channel_names(self, **kwargs):
416
+ cues = []
417
+ for ii in range(1, 4):
418
+ chn = f"channel {ii} name"
419
+ fli = f"fl{ii}_max"
420
+ if (fli in self.ds
421
+ and chn not in self.ds.config["fluorescence"]):
422
+ # Channel names must be defined when there is
423
+ # a corresponding fluorescence signal.
424
+ cues.append(ICue(
425
+ msg=f"Metadata: Missing key [fluorescence] '{chn}'",
426
+ level="alert",
427
+ category="metadata missing",
428
+ cfg_section="fluorescence",
429
+ cfg_key=chn))
430
+ elif (fli not in self.ds
431
+ and chn in self.ds.config["fluorescence"]):
432
+ # Channel names must not be defined when there is
433
+ # no corresponding fluorescence signal.
434
+ cues.append(ICue(
435
+ msg=f"Metadata: Unused key defined [fluorescence] '{chn}'",
436
+ level="alert",
437
+ category="metadata invalid",
438
+ cfg_section="fluorescence",
439
+ cfg_key=chn))
440
+ return cues
441
+
442
+ def check_fl_num_channels(self, **kwargs):
443
+ cues = []
444
+ # check for number of channels
445
+ if "channel count" in self.ds.config["fluorescence"]:
446
+ chc1 = self.ds.config["fluorescence"]["channel count"]
447
+ chc2 = 0
448
+ for ii in range(1, 4):
449
+ chn = f"channel {ii} name"
450
+ ecn = f"fl{ii}_max"
451
+ if (chn in self.ds.config["fluorescence"] and
452
+ ecn in self.ds._events):
453
+ chc2 += 1
454
+ if chc1 != chc2:
455
+ cues.append(ICue(
456
+ msg="Metadata: fluorescence channel count inconsistent",
457
+ level="violation",
458
+ category="metadata wrong",
459
+ cfg_section="fluorescence",
460
+ cfg_key="channel count"))
461
+ return cues
462
+
463
+ def check_fl_num_lasers(self, **kwargs):
464
+ cues = []
465
+ # check for number of lasers
466
+ if "laser count" in self.ds.config["fluorescence"]:
467
+ lsc1 = self.ds.config["fluorescence"]["laser count"]
468
+ lsc2 = 0
469
+ for ii in range(1, 4):
470
+ kl = f"laser {ii} lambda"
471
+ kp = f"laser {ii} power"
472
+ if (kl in self.ds.config["fluorescence"] and
473
+ kp in self.ds.config["fluorescence"] and
474
+ self.ds.config["fluorescence"][kp] != 0):
475
+ lsc2 += 1
476
+ if lsc1 != lsc2:
477
+ cues.append(ICue(
478
+ msg="Metadata: fluorescence laser count inconsistent",
479
+ level="violation",
480
+ category="metadata wrong",
481
+ cfg_section="fluorescence",
482
+ cfg_key="laser count"))
483
+ return cues
484
+
485
+ def check_fl_samples_per_event(self, **kwargs):
486
+ cues = []
487
+ # check for samples per event
488
+ if "samples per event" in self.ds.config["fluorescence"]:
489
+ spe = self.ds.config["fluorescence"]["samples per event"]
490
+ if "trace" in self.ds:
491
+ for key in self.ds["trace"].keys():
492
+ spek = self.ds["trace"][key][0].size
493
+ if spek != spe:
494
+ cues.append(ICue(
495
+ msg="Metadata: wrong number of samples per "
496
+ + f"event: {key} (expected {spe}, got {spek})",
497
+ level="violation",
498
+ category="metadata wrong",
499
+ cfg_section="fluorescence",
500
+ cfg_key="samples per event"))
501
+ return cues
502
+
503
+ def check_fl_max_positive(self, **kwargs):
504
+ """Check if all fl?_max values are >0.1"""
505
+ cues = []
506
+ neg_feats = []
507
+ for fl in ['fl1_max', 'fl2_max', 'fl3_max']:
508
+ if fl in self.ds:
509
+ if min(self.ds[fl]) <= 0.1:
510
+ neg_feats.append(fl)
511
+ if neg_feats:
512
+ cues.append(ICue(
513
+ msg=f"Negative value for feature(s): {', '.join(neg_feats)}",
514
+ level="alert",
515
+ category="feature data"))
516
+ return cues
517
+
518
+ def check_fl_max_ctc_positive(self, **kwargs):
519
+ """Check if all fl?_max_ctc values are > 0.1"""
520
+ cues = []
521
+ neg_feats = []
522
+ for fl in ['fl1_max_ctc', 'fl2_max_ctc', 'fl3_max_ctc']:
523
+ if fl in self.ds:
524
+ if min(self.ds[fl]) <= 0.1:
525
+ neg_feats.append(fl)
526
+ if neg_feats:
527
+ cues.append(ICue(
528
+ msg=f"Negative value for feature(s): {', '.join(neg_feats)}",
529
+ level="alert",
530
+ category="feature data"))
531
+ return cues
532
+
533
+ def check_flow_rate(self, **kwargs):
534
+ """Make sure sheath and sample flow rates add up"""
535
+ cues = []
536
+ if ("setup" in self.ds.config
537
+ and "flow rate" in self.ds.config["setup"]
538
+ and "flow rate sample" in self.ds.config["setup"]
539
+ and "flow rate sheath" in self.ds.config["setup"]):
540
+ frsum = self.ds.config["setup"]["flow rate"]
541
+ frsam = self.ds.config["setup"]["flow rate sample"]
542
+ frshe = self.ds.config["setup"]["flow rate sheath"]
543
+ if not np.allclose(frsum, frsam + frshe):
544
+ for k in ["flow rate", "flow rate sheath", "flow rate sample"]:
545
+ cues.append(ICue(
546
+ msg="Metadata: Flow rates don't add up (sheath "
547
+ + f"{frshe:g} + sample {frsam:g} "
548
+ + f"!= channel {frsum:g})",
549
+ level="alert",
550
+ category="metadata wrong",
551
+ cfg_section="setup",
552
+ cfg_key=k))
553
+ return cues
554
+
555
+ def check_fmt_hdf5(self, **kwargs):
556
+ cues = []
557
+ # hdf5-based checks
558
+ if self.ds.format == "hdf5":
559
+ # check meta data of images
560
+ for feat in ["image", "image_bg", "mask"]:
561
+ if feat in self.ds._events:
562
+ imdat = self.ds[feat]
563
+ for key, val in [['CLASS', b'IMAGE'],
564
+ ['IMAGE_VERSION', b'1.2'],
565
+ ['IMAGE_SUBCLASS', b'IMAGE_GRAYSCALE']]:
566
+ if key not in imdat.attrs:
567
+ cues.append(ICue(
568
+ msg=f"HDF5: '/{feat}': missing "
569
+ + f"attribute '{key}'",
570
+ level="alert",
571
+ category="format HDF5"))
572
+ elif imdat.attrs.get_id(key).dtype.char != "S":
573
+ cues.append(ICue(
574
+ msg=f"HDF5: '/{feat}': attribute '{key}' "
575
+ + "should be fixed-length ASCII string",
576
+ level="alert",
577
+ category="format HDF5"))
578
+ elif imdat.attrs[key] != val:
579
+ cues.append(ICue(
580
+ msg=f"HDF5: '/{feat}': attribute '{key}' "
581
+ + f"should have value '{val}'",
582
+ level="alert",
583
+ category="format HDF5"))
584
+ # check length of logs
585
+ with h5py.File(self.ds.path, mode="r") as h5:
586
+ if "logs" in h5:
587
+ logs = h5["logs"]
588
+ for logname in logs.keys():
589
+ # ignore tmds meta data log files
590
+ lign = [logname.endswith(n) for n in IGNORED_LOG_NAMES]
591
+ if sum(lign):
592
+ continue
593
+ log = logs[logname]
594
+ for ii in range(len(log)):
595
+ if len(log[ii]) > LOG_MAX_LINE_LENGTH:
596
+ cues.append(ICue(
597
+ msg=f"Logs: {logname} line {ii} "
598
+ + "exceeds maximum line length "
599
+ + f"{LOG_MAX_LINE_LENGTH}",
600
+ level="alert",
601
+ category="format HDF5"))
602
+ return cues
603
+
604
+ def check_info(self, **kwargs):
605
+ cues = [
606
+ ICue(
607
+ msg=f"Fluorescence: {self.has_fluorescence}",
608
+ level="info",
609
+ category="general"),
610
+ ICue(
611
+ msg=f"Data file format: {self.ds.format}",
612
+ level="info",
613
+ category="general")]
614
+ return cues
615
+
616
+ def check_metadata_bad(self, **kwargs):
617
+ cues = []
618
+ # check for ROI size
619
+ if ("imaging" in self.ds.config
620
+ and "roi size x" in self.ds.config["imaging"]
621
+ and "roi size y" in self.ds.config["imaging"]):
622
+ for ii, roi in enumerate(["roi size y", "roi size x"]):
623
+ for feat in ["image", "image_bg", "mask"]:
624
+ if feat in self.ds:
625
+ soll = self.ds[feat].shape[ii+1]
626
+ ist = self.ds.config["imaging"][roi]
627
+ if soll != ist:
628
+ cues.append(ICue(
629
+ msg="Metadata: Mismatch [imaging] "
630
+ + f"'{roi}' and feature {feat} "
631
+ + f"({ist} vs {soll})",
632
+ level="violation",
633
+ category="metadata wrong",
634
+ cfg_section="imaging",
635
+ cfg_key=roi))
636
+ return cues
637
+
638
+ def check_metadata_bad_greater_zero(self, **kwargs):
639
+ cues = []
640
+ # check for ROI size
641
+ for sec, key in [
642
+ ["imaging", "frame rate"],
643
+ ["imaging", "pixel size"],
644
+ ["setup", "channel width"],
645
+ ["setup", "flow rate"],
646
+ ]:
647
+ value = self.ds.config.get(sec, {}).get(key)
648
+ if value is not None and value <= 0:
649
+ cues.append(ICue(
650
+ msg=f"Metadata: Invalid value for [{sec}] '{key}': "
651
+ + f"'{value}'!",
652
+ level="violation",
653
+ category="metadata wrong",
654
+ cfg_section=sec,
655
+ cfg_key=key))
656
+ return cues
657
+
658
+ def check_metadata_choices(self, **kwargs):
659
+ cues = []
660
+ for sec in VALID_CHOICES:
661
+ for key in VALID_CHOICES[sec]:
662
+ if sec in self.ds.config and key in self.ds.config[sec]:
663
+ val = self.ds.config[sec][key]
664
+ if val not in VALID_CHOICES[sec][key]:
665
+ cues.append(ICue(
666
+ msg=f"Metadata: Invalid value [{sec}] {key}: "
667
+ + f"'{val}'",
668
+ level="violation",
669
+ category="metadata wrong",
670
+ cfg_section=sec,
671
+ cfg_key=key,
672
+ cfg_choices=VALID_CHOICES[sec][key]))
673
+ return cues
674
+
675
+ def check_metadata_hdf5_type(self, **kwargs):
676
+ """This is a low-level HDF5 check"""
677
+ cues = []
678
+ if self.ds.format == "hdf5":
679
+ for entry in self.ds.h5file.attrs:
680
+ if entry.count(":"):
681
+ sec, key = entry.split(":")
682
+ val_act = self.ds.h5file.attrs[entry] # actual value
683
+ if isinstance(val_act, bytes):
684
+ val_act = val_act.decode("utf-8")
685
+ # Check whether the config key exists
686
+ if dfn.config_key_exists(sec, key):
687
+ func = dfn.get_config_value_func(sec, key)
688
+ val_exp = func(val_act) # expected value
689
+ if (isinstance(val_exp, (list, tuple, np.ndarray))
690
+ and np.allclose(val_exp, val_act)):
691
+ continue
692
+ elif val_exp == val_act:
693
+ continue
694
+ else:
695
+ cues.append(ICue(
696
+ msg=f"Metadata: [{sec}]: '{key}' should be "
697
+ + f"'{val_exp}', but is '{val_act}'!",
698
+ level="alert",
699
+ category="metadata wrong",
700
+ cfg_section=sec,
701
+ cfg_key=key))
702
+ return cues
703
+
704
+ def check_metadata_online_filter_polygon_points_shape(self, **kwargs):
705
+ cues = []
706
+ if "online_filter" in self.ds.config:
707
+ for key in self.ds.config["online_filter"].keys():
708
+ if key.endswith("polygon points"):
709
+ points = self.ds.config["online_filter"][key]
710
+ if points.shape[1] != 2 or points.shape[0] < 3:
711
+ cues.append(ICue(
712
+ msg="Metadata: Wrong shape [online_filter] "
713
+ + f"{key}: '{points.shape}'",
714
+ level="violation",
715
+ category="metadata wrong",
716
+ cfg_section="online_filter",
717
+ cfg_key=key))
718
+ return cues
719
+
720
+ def check_metadata_missing(self, expand_section=True, **kwargs):
721
+ cues = []
722
+ # These "must" be present:
723
+ important = copy.deepcopy(IMPORTANT_KEYS)
724
+ if self.has_fluorescence:
725
+ important.update(IMPORTANT_KEYS_FL)
726
+ # A list of sections we would like to investigate
727
+ secs_investiage = list(set(important.keys()) | set(DESIRABLE_SECTIONS))
728
+
729
+ for sec in secs_investiage:
730
+ if sec not in self.ds.config and not expand_section:
731
+ cues.append(ICue(
732
+ msg=f"Metadata: Missing section '{sec}'",
733
+ level="violation" if sec in important else "alert",
734
+ category="metadata missing",
735
+ cfg_section=sec))
736
+ else:
737
+ for key in dfn.config_keys[sec]:
738
+ if key not in self.ds.config[sec]:
739
+ if sec in OPTIONAL_KEYS and key in OPTIONAL_KEYS[sec]:
740
+ # ignore this key
741
+ continue
742
+ elif sec in important and key in important[sec]:
743
+ level = "violation"
744
+ else:
745
+ level = "alert"
746
+ cues.append(ICue(
747
+ msg=f"Metadata: Missing key [{sec}] '{key}'",
748
+ level=level,
749
+ category="metadata missing",
750
+ cfg_section=sec,
751
+ cfg_key=key))
752
+ # check for temperature
753
+ if "temp" in self.ds:
754
+ if "temperature" not in self.ds.config["setup"]:
755
+ cues.append(ICue(
756
+ msg="Metadata: Missing key [setup] 'temperature', "
757
+ + "because the 'temp' feature is given",
758
+ level="alert",
759
+ category="metadata missing",
760
+ cfg_section="setup",
761
+ cfg_key="temperature"))
762
+ return cues
763
+
764
+ def check_ml_class(self, **kwargs):
765
+ """Try to comput ml_class feature and display error message"""
766
+ cues = []
767
+ if "ml_class" in self.ds:
768
+ try:
769
+ self.ds["ml_class"]
770
+ except ValueError as e:
771
+ cues.append(ICue(
772
+ msg=e.args[0],
773
+ level="violation",
774
+ category="feature data"))
775
+ return cues
776
+
777
+ def check_shapein_issue3_bad_medium(self, **kwargs):
778
+ """Some versions of Shape-In stored wrong [setup]: medium
779
+
780
+ The problem only affects selection of "CellCarrier" which had
781
+ index 0 and as a result "CellCarrierB" was written to the file.
782
+ This means we only have to check for the Shape-In version and
783
+ whether the medium is 'CellCarrierB'. In DCKit, the user can
784
+ then manually edit the medium.
785
+
786
+ Affected Shape-In versions: >=2.2.1.0,<2.2.2.3
787
+ Also affected if bad config file was used: 2.2.2.4, 2.3.0.0
788
+
789
+ https://github.com/ZELLMECHANIK-DRESDEN/ShapeIn_Issues/issues/3
790
+ """
791
+ cues = []
792
+ medium = self.ds.config["setup"].get("medium", "")
793
+ si_ver = self.ds.config["setup"].get("software version", "")
794
+ si_ver = si_ver.strip("dev") # for e.g. "2.2.1.0dev"
795
+ if (medium == "CellCarrierB"
796
+ and si_ver in ["2.2.1.0", "2.2.2.0", "2.2.2.1", "2.2.2.2",
797
+ "2.2.2.3",
798
+ # The issue can still occur for these versions
799
+ # of Shape-In if a bad configuration file
800
+ # is used.
801
+ "2.2.2.4", "2.3.0.0"]):
802
+ cues.append(ICue(
803
+ msg="Metadata: Please verify that 'medium' is really "
804
+ + "'CellCarrierB' (Shape-In issue #3)",
805
+ level="alert",
806
+ category="metadata wrong",
807
+ identifier="Shape-In issue #3",
808
+ cfg_section="setup",
809
+ cfg_key="medium"))
810
+ return cues
811
+
812
+ def check_temperature_zero_zmd(self, **kwargs):
813
+ """If there is a loose cable, then temperature might be all-zero
814
+
815
+ https://github.com/DC-analysis/dclab/issues/183
816
+ """
817
+ cues = []
818
+ from_zmd = "ZMD" in self.ds.config["setup"].get("identifier", "")
819
+ if from_zmd and "temp" in self.ds:
820
+ temp = self.ds["temp"]
821
+ if np.allclose(temp[:10], 0) and np.allclose(temp, 0):
822
+ cues.append(ICue(
823
+ msg="Feature: The 'temp' feature is all-zero, check "
824
+ "the cables of the temperature sensor!",
825
+ level="violation",
826
+ category="feature data"))
827
+ return cues
828
+
829
+ def sanity_check(self):
830
+ """Sanity check that tests whether the data can be accessed"""
831
+ cues = []
832
+ cues += self.check_feature_size()
833
+ cues += self.check_metadata_bad()
834
+ for cue in cues:
835
+ cue.msg = "Sanity check failed: " + cue.msg
836
+ return [ci for ci in cues if ci.level == "violation"]
837
+
838
+
839
+ def check_dataset(path_or_ds):
840
+ """Check whether a dataset is complete
841
+
842
+ Parameters
843
+ ----------
844
+ path_or_ds: str or pathlib.Path or RTDCBase
845
+ Full path to a dataset on disk or an instance of RTDCBase
846
+
847
+ Returns
848
+ -------
849
+ violations: list of str
850
+ Dataset format violations (hard)
851
+ alerts: list of str
852
+ Dataset format alerts (soft)
853
+ info: list of str
854
+ Dataset information
855
+ """
856
+ aler = []
857
+ info = []
858
+ viol = []
859
+ with IntegrityChecker(path_or_ds) as ic:
860
+ # perform all checks
861
+ icues = ic.check(expand_section=False)
862
+ for cue in icues:
863
+ if cue.level == "info":
864
+ info.append(cue.msg)
865
+ elif cue.level == "alert":
866
+ aler.append(cue.msg)
867
+ elif cue.level == "violation":
868
+ viol.append(cue.msg)
869
+ return sorted(viol), sorted(aler), sorted(info)
870
+
871
+
872
+ def hdf5_has_external(h5):
873
+ """Check recursively, whether an h5py object contains external data
874
+
875
+ External data includes binary data in external files, virtual
876
+ datasets, and external links.
877
+
878
+ Returns a tuple of either
879
+
880
+ - `(True, path_ext)` if the object contains external data
881
+ - `(False, None)` if this is not the case
882
+
883
+ where `path_ext` is the path to the group or dataset in `h5`.
884
+
885
+ .. versionadded:: 0.62.0
886
+
887
+ """
888
+ for key in h5:
889
+ obj = h5[key]
890
+ if (obj.file != h5.file # not in same file
891
+ or (isinstance(obj, h5py.Dataset)
892
+ and (obj.is_virtual # virtual dataset
893
+ or obj.external))): # external dataset
894
+ # These are external data
895
+ return True, f"{h5.name}/{key}".replace("//", "/")
896
+ elif isinstance(obj, h5py.Group):
897
+ # Perform recursive check for external data
898
+ has_ext, path_ext = hdf5_has_external(obj)
899
+ if has_ext:
900
+ return True, path_ext
901
+ else:
902
+ return False, None