dclab 0.62.17__cp39-cp39-macosx_11_0_arm64.whl → 0.67.3__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. dclab/_version.py +16 -3
  2. dclab/cli/task_tdms2rtdc.py +1 -1
  3. dclab/cli/task_verify_dataset.py +3 -3
  4. dclab/definitions/__init__.py +1 -1
  5. dclab/definitions/feat_const.py +6 -4
  6. dclab/definitions/feat_logic.py +27 -28
  7. dclab/downsampling.cpython-39-darwin.so +0 -0
  8. dclab/downsampling.pyx +12 -7
  9. dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
  10. dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
  11. dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
  12. dclab/features/bright.py +11 -2
  13. dclab/features/bright_bc.py +13 -2
  14. dclab/features/bright_perc.py +10 -2
  15. dclab/features/contour.py +12 -7
  16. dclab/features/emodulus/__init__.py +33 -27
  17. dclab/features/emodulus/load.py +8 -6
  18. dclab/features/emodulus/pxcorr.py +33 -15
  19. dclab/features/emodulus/scale_linear.py +79 -52
  20. dclab/features/emodulus/viscosity.py +31 -19
  21. dclab/features/fl_crosstalk.py +19 -10
  22. dclab/features/inert_ratio.py +18 -11
  23. dclab/features/volume.py +24 -14
  24. dclab/http_utils.py +1 -1
  25. dclab/kde/base.py +238 -14
  26. dclab/kde/methods.py +33 -12
  27. dclab/rtdc_dataset/config.py +1 -1
  28. dclab/rtdc_dataset/core.py +22 -8
  29. dclab/rtdc_dataset/export.py +171 -34
  30. dclab/rtdc_dataset/feat_basin.py +250 -33
  31. dclab/rtdc_dataset/fmt_dcor/api.py +69 -7
  32. dclab/rtdc_dataset/fmt_dcor/base.py +103 -4
  33. dclab/rtdc_dataset/fmt_dcor/logs.py +1 -1
  34. dclab/rtdc_dataset/fmt_dcor/tables.py +1 -1
  35. dclab/rtdc_dataset/fmt_hdf5/events.py +20 -1
  36. dclab/rtdc_dataset/fmt_hierarchy/base.py +1 -1
  37. dclab/rtdc_dataset/fmt_s3.py +29 -10
  38. dclab/rtdc_dataset/fmt_tdms/event_trace.py +1 -1
  39. dclab/rtdc_dataset/fmt_tdms/naming.py +1 -1
  40. dclab/rtdc_dataset/writer.py +43 -11
  41. dclab/statistics.py +27 -4
  42. dclab/warn.py +1 -1
  43. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/METADATA +26 -4
  44. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/RECORD +48 -48
  45. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/WHEEL +1 -1
  46. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/entry_points.txt +0 -0
  47. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/licenses/LICENSE +0 -0
  48. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/top_level.txt +0 -0
@@ -39,6 +39,10 @@ class LimitingExportSizeWarning(UserWarning):
39
39
  pass
40
40
 
41
41
 
42
+ class ContourNotExportedWarning(UserWarning):
43
+ pass
44
+
45
+
42
46
  class Export(object):
43
47
  def __init__(self, rtdc_ds):
44
48
  """Export functionalities for RT-DC datasets"""
@@ -51,6 +55,7 @@ class Export(object):
51
55
  pixel_format: str = "yuv420p",
52
56
  codec: str = "rawvideo",
53
57
  codec_options: dict[str, str] = None,
58
+ progress_callback: callable = None,
54
59
  ):
55
60
  """Exports filtered event images to a video file
56
61
 
@@ -72,6 +77,10 @@ class Export(object):
72
77
  codec_options:
73
78
  Additional arguments to give to the codec using ffmpeg,
74
79
  e.g. `{'preset': 'slow', 'crf': '0'}` for "libx264" codec.
80
+ progress_callback: callable
81
+ Function that takes at least two arguments: float between 0 and
82
+ 1 for monitoring progress and a string describing what is being
83
+ done.
75
84
 
76
85
  Notes
77
86
  -----
@@ -103,6 +112,10 @@ class Export(object):
103
112
 
104
113
  # write the filtered frames to the video file
105
114
  for evid in np.arange(len(ds)):
115
+
116
+ if progress_callback is not None and evid % 10_000 == 0:
117
+ progress_callback(evid / len(ds), "exporting video")
118
+
106
119
  # skip frames that were filtered out
107
120
  if filtered and not ds.filter.all[evid]:
108
121
  continue
@@ -116,12 +129,22 @@ class Export(object):
116
129
 
117
130
  for packet in stream.encode(av_frame):
118
131
  container.mux(packet)
132
+
133
+ if progress_callback is not None:
134
+ progress_callback(1.0, "video export complete")
135
+
119
136
  else:
120
137
  msg = "No image data to export: dataset {} !".format(ds.title)
121
138
  raise OSError(msg)
122
139
 
123
- def fcs(self, path, features, meta_data=None, filtered=True,
124
- override=False):
140
+ def fcs(self,
141
+ path: pathlib.Path | str,
142
+ features: list[str],
143
+ meta_data: dict = None,
144
+ filtered: bool = True,
145
+ override: bool = False,
146
+ progress_callback: callable = None,
147
+ ):
125
148
  """Export the data of an RT-DC dataset to an .fcs file
126
149
 
127
150
  Parameters
@@ -142,6 +165,10 @@ class Export(object):
142
165
  override: bool
143
166
  If set to `True`, an existing file ``path`` will be overridden.
144
167
  If set to `False`, raises `OSError` if ``path`` exists.
168
+ progress_callback: callable
169
+ Function that takes at least two arguments: float between 0 and
170
+ 1 for monitoring progress and a string describing what is being
171
+ done.
145
172
 
146
173
  Notes
147
174
  -----
@@ -175,12 +202,18 @@ class Export(object):
175
202
  # Collect the header
176
203
  chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]
177
204
 
205
+ if progress_callback is not None:
206
+ progress_callback(0.0, "collecting data")
207
+
178
208
  # Collect the data
179
209
  if filtered:
180
210
  data = [ds[c][ds.filter.all] for c in features]
181
211
  else:
182
212
  data = [ds[c] for c in features]
183
213
 
214
+ if progress_callback is not None:
215
+ progress_callback(0.5, "exporting data")
216
+
184
217
  data = np.array(data).transpose()
185
218
  meta_data["dclab version"] = version
186
219
  fcswrite.write_fcs(filename=str(path),
@@ -189,6 +222,9 @@ class Export(object):
189
222
  text_kw_pr=meta_data,
190
223
  )
191
224
 
225
+ if progress_callback is not None:
226
+ progress_callback(1.0, "export complete")
227
+
192
228
  def hdf5(self,
193
229
  path: str | pathlib.Path,
194
230
  features: List[str] = None,
@@ -196,11 +232,14 @@ class Export(object):
196
232
  logs: bool = False,
197
233
  tables: bool = False,
198
234
  basins: bool = False,
235
+ allow_contour: bool = False,
199
236
  meta_prefix: str = "src_",
200
237
  override: bool = False,
201
238
  compression_kwargs: Dict = None,
202
239
  compression: str = "deprecated",
203
- skip_checks: bool = False):
240
+ skip_checks: bool = False,
241
+ progress_callback: callable = None,
242
+ ):
204
243
  """Export the data of the current instance to an HDF5 file
205
244
 
206
245
  Parameters
@@ -226,6 +265,14 @@ class Export(object):
226
265
  Whether to export basins. If filtering is disabled, basins
227
266
  are copied directly to the output file. If filtering is enabled,
228
267
  then mapped basins are exported.
268
+ allow_contour: bool
269
+ Whether to allow exporting the "contour" feature. Writing this
270
+ feature to an HDF5 file is extremely inefficient, because it
271
+ cannot be represented by an ND array and thus must be stored
272
+ in a group, each contour stored in a separate dataset. The
273
+ contour can easily be computed via the mask, so actually storing
274
+ the contour should be avoided. If "contour" is in `features`,
275
+ it will only be written to the output file if `allow_contour=True`.
229
276
  meta_prefix: str
230
277
  Prefix for log and table names in the exported file
231
278
  override: bool
@@ -234,8 +281,8 @@ class Export(object):
234
281
  compression_kwargs: dict
235
282
  Dictionary with the keys "compression" and "compression_opts"
236
283
  which are passed to :func:`h5py.H5File.create_dataset`. The
237
- default is Zstandard compression with the lowest compression
238
- level `hdf5plugin.Zstd(clevel=1)`.
284
+ default is Zstandard compression with the compression
285
+ level 5 `hdf5plugin.Zstd(clevel=5)`.
239
286
  compression: str or None
240
287
  Compression method used for data storage;
241
288
  one of [None, "lzf", "gzip", "szip"].
@@ -244,7 +291,10 @@ class Export(object):
244
291
  Use `compression_kwargs` instead.
245
292
  skip_checks: bool
246
293
  Disable checking whether all features have the same length.
247
-
294
+ progress_callback: callable
295
+ Function that takes at least two arguments: float between 0 and
296
+ 1 for monitoring progress and a string describing what is being
297
+ done.
248
298
 
249
299
  .. versionchanged:: 0.58.0
250
300
 
@@ -263,7 +313,7 @@ class Export(object):
263
313
  # be backwards-compatible
264
314
  compression_kwargs = {"compression": compression}
265
315
  if compression_kwargs is None:
266
- compression_kwargs = hdf5plugin.Zstd(clevel=1)
316
+ compression_kwargs = hdf5plugin.Zstd(clevel=5)
267
317
  path = pathlib.Path(path)
268
318
  # Make sure that path ends with .rtdc
269
319
  if path.suffix not in [".rtdc", ".rtdc~"]:
@@ -281,8 +331,25 @@ class Export(object):
281
331
  # for convenience
282
332
  ds = self.rtdc_ds
283
333
 
334
+ # remove contour information from user-specified features
335
+ if "contour" in (features or []) and not allow_contour:
336
+ features = list(features)
337
+ features.remove("contour")
338
+ warnings.warn(
339
+ "Feature 'contour' not exported to output file, because "
340
+ "`allow_contour` is `False`. If you really need the "
341
+ "'contour' feature in the output file (unlikely, unless you "
342
+ "are venturing outside the DC Cosmos), you must set "
343
+ "`allow_contour=True`. Otherwise, you can safely ignore "
344
+ "this warning or silence it by not providing 'contour' in "
345
+ "`features`.",
346
+ ContourNotExportedWarning)
347
+
284
348
  if features is None:
285
349
  features = ds.features_innate
350
+ # silently remove contour information
351
+ if "contour" in features and not allow_contour:
352
+ features.remove("contour")
286
353
 
287
354
  # decide which metadata to export
288
355
  meta = {}
@@ -297,8 +364,8 @@ class Export(object):
297
364
  # Define a new measurement identifier, so that we are not running
298
365
  # into any problems with basins being defined for filtered data.
299
366
  ds_run_id = ds.get_measurement_identifier()
300
- random_ap = str(uuid.uuid4())[:4]
301
- meta["experiment"]["run identifier"] = f"{ds_run_id}-{random_ap}"
367
+ random_ap = f"dclab-{str(uuid.uuid4())[:7]}"
368
+ meta["experiment"]["run identifier"] = f"{ds_run_id}_{random_ap}"
302
369
 
303
370
  if filtered:
304
371
  filter_arr = ds.filter.all
@@ -335,6 +402,8 @@ class Export(object):
335
402
  with RTDCWriter(path,
336
403
  mode="append",
337
404
  compression_kwargs=compression_kwargs) as hw:
405
+ if progress_callback is not None:
406
+ progress_callback(0.0, "writing metadata")
338
407
  # write meta data
339
408
  hw.store_metadata(meta)
340
409
 
@@ -369,7 +438,10 @@ class Export(object):
369
438
  ds.tables[tab])
370
439
 
371
440
  # write each feature individually
372
- for feat in features:
441
+ for ii, feat in enumerate(features):
442
+ if progress_callback is not None:
443
+ progress_callback(ii / len(features), f"exporting {feat}")
444
+
373
445
  if (filter_arr is None or
374
446
  # This does not work for the .tdms file format
375
447
  # (and probably also not for DCOR).
@@ -393,6 +465,9 @@ class Export(object):
393
465
  filtarr=filter_arr)
394
466
 
395
467
  if basins:
468
+ if progress_callback:
469
+ progress_callback(1 - 1 / (len(features) or 1),
470
+ "writing basins")
396
471
  # We have to store basins. There are three options:
397
472
  # - filtering disabled: just copy basins
398
473
  # - filtering enabled
@@ -404,6 +479,8 @@ class Export(object):
404
479
  basin_list = [bn.as_dict() for bn in ds.basins]
405
480
  # In addition to the upstream basins, also store a reference
406
481
  # to the original file from which the export was done.
482
+ # Get the identifier of the current dataset for the new basins.
483
+ basin_id = ds.get_measurement_identifier()
407
484
  if ds.format in get_basin_classes():
408
485
  # The dataset has a format that matches a basin format
409
486
  # directly.
@@ -418,17 +495,13 @@ class Export(object):
418
495
  "basin_format": ds.format,
419
496
  "basin_locs": basin_locs,
420
497
  "basin_descr": f"Exported with dclab {version}",
498
+ "basin_id": basin_id,
421
499
  })
422
500
  elif (ds.format == "hierarchy"
423
501
  and ds.get_root_parent().format in get_basin_classes()):
424
- # avoid circular imports
425
- from .fmt_hierarchy import map_indices_child2root
426
502
  # The dataset is a hierarchy child, and it is derived
427
503
  # from a dataset that has a matching basin format.
428
- # We have to add the indices of the root parent, which
429
- # identify the child, to the basin dictionary. Note
430
- # that additional basin filtering is applied below
431
- # this case for all basins.
504
+ #
432
505
  # For the sake of clarity I wrote this as a separate case,
433
506
  # even if that means duplicating code from the previous
434
507
  # case.
@@ -445,36 +518,83 @@ class Export(object):
445
518
  "basin_locs": basin_locs,
446
519
  "basin_descr": f"Exported with dclab {version} from a "
447
520
  f"hierarchy dataset",
448
- # This is where this basin differs from the basin
449
- # definition in the previous case.
450
- "basin_map": map_indices_child2root(
451
- child=ds,
452
- child_indices=np.arange(len(ds))
453
- ),
521
+ # Here we do not yet treat the conversion from the
522
+ # root dataset indices to the child indices,
523
+ # because we will fill in the missing values below
524
+ # in the basin mapping correction step.
525
+ "basin_map": None,
526
+ "basin_id": basin_id,
454
527
  })
455
528
 
456
529
  for bn_dict in basin_list:
457
- if bn_dict.get("basin_type") == "internal":
530
+ if bn_dict.get("basin_format") not in get_basin_classes():
531
+ # Whichever software stored this basin in the
532
+ # original file, we do not support it or don't want
533
+ # to break it.
534
+ continue
535
+ elif bn_dict.get("basin_type") == "internal":
458
536
  # Internal basins are only valid for files they were
459
537
  # defined in. Since we are exporting, it does not
460
538
  # make sense to store these basins in the output file.
461
539
  continue
540
+ elif bn_dict.get("perishable"):
541
+ # Perishable basins require secret keys or complicated
542
+ # logic to execute in order to refresh them. We do not
543
+ # store them in the output file.
544
+ continue
545
+
546
+ # Basin mapping correction: If we are filtering, or
547
+ # if we are exporting from a hierarchy dataset, we have
548
+ # to correct or add basin mapping arrays.
462
549
  basinmap_orig = bn_dict.get("basin_map")
463
- if not filtered:
464
- # filtering disabled: just copy basins
465
- pass
466
- elif basinmap_orig is None:
467
- # basins with "same" mapping: create new mapping
468
- bn_dict["basin_map"] = np.where(filter_arr)[0]
550
+ if ds.format == "hierarchy":
551
+ # Hierarchy dataset
552
+ # Compute mapping from hierarchy root.
553
+ from .fmt_hierarchy import map_indices_child2root
554
+ map_root = map_indices_child2root(
555
+ child=ds,
556
+ child_indices=np.arange(len(ds))
557
+ )
558
+
559
+ if not filtered and basinmap_orig is None:
560
+ # We only have to consider the hierarchy.
561
+ bn_dict["basin_map"] = map_root
562
+ elif filtered and basinmap_orig is None:
563
+ # Filtering must be taken into account.
564
+ bn_dict["basin_map"] = map_root[filter_arr]
565
+ else:
566
+ # The source file has mapping defined which we
567
+ # have to take into account.
568
+ map_child = basinmap_orig[map_root]
569
+ if filtered:
570
+ # Subsetting additional filters
571
+ bn_dict["basin_map"] = map_child[filter_arr]
572
+ else:
573
+ bn_dict["basin_map"] = map_child
469
574
  else:
470
- # mapped basins: correct nested mapping
471
- bn_dict["basin_map"] = basinmap_orig[filter_arr]
575
+ if not filtered:
576
+ # filtering disabled: just copy basins
577
+ pass
578
+ elif filtered and basinmap_orig is None:
579
+ # basins with mapping "same": create new mapping
580
+ bn_dict["basin_map"] = np.where(filter_arr)[0]
581
+ else:
582
+ # filter the source mapping
583
+ bn_dict["basin_map"] = basinmap_orig[filter_arr]
472
584
 
473
585
  # Do not verify basins, it takes too long.
474
586
  hw.store_basin(**bn_dict, verify=False)
587
+ if progress_callback is not None:
588
+ progress_callback(1.0, "export complete")
475
589
 
476
- def tsv(self, path, features, meta_data=None, filtered=True,
477
- override=False):
590
+ def tsv(self,
591
+ path: pathlib.Path | str,
592
+ features: list[str],
593
+ meta_data: dict = None,
594
+ filtered: bool = True,
595
+ override: bool = False,
596
+ progress_callback: callable = None,
597
+ ):
478
598
  """Export the data of the current instance to a .tsv file
479
599
 
480
600
  Parameters
@@ -496,6 +616,10 @@ class Export(object):
496
616
  override: bool
497
617
  If set to `True`, an existing file ``path`` will be overridden.
498
618
  If set to `False`, raises `OSError` if ``path`` exists.
619
+ progress_callback: callable
620
+ Function that takes at least two arguments: float between 0 and
621
+ 1 for monitoring progress and a string describing what is being
622
+ done.
499
623
  """
500
624
  if meta_data is None:
501
625
  meta_data = {}
@@ -516,6 +640,10 @@ class Export(object):
516
640
  if c not in ds.features_scalar:
517
641
  raise ValueError("Invalid feature name {}".format(c))
518
642
  meta_data["dclab version"] = version
643
+
644
+ if progress_callback is not None:
645
+ progress_callback(0.0, "writing metadata")
646
+
519
647
  # Write BOM header
520
648
  with path.open("wb") as fd:
521
649
  fd.write(codecs.BOM_UTF8)
@@ -539,17 +667,26 @@ class Export(object):
539
667
  fd.write("# "+header2+"\n")
540
668
 
541
669
  with path.open("ab") as fd:
542
- # write data
670
+ if progress_callback is not None:
671
+ progress_callback(0.1, "collecting data")
672
+
673
+ # collect data
543
674
  if filtered:
544
675
  data = [ds[c][ds.filter.all] for c in features]
545
676
  else:
546
677
  data = [ds[c] for c in features]
547
678
 
679
+ if progress_callback is not None:
680
+ progress_callback(0.5, "writing data")
681
+
548
682
  np.savetxt(fd,
549
683
  np.array(data).transpose(),
550
684
  fmt=str("%.10e"),
551
685
  delimiter="\t")
552
686
 
687
+ if progress_callback is not None:
688
+ progress_callback(1.0, "export complete")
689
+
553
690
 
554
691
  def yield_filtered_array_stacks(data, indices):
555
692
  """Generator returning chunks with the filtered feature data