dcnum 0.23.2__py3-none-any.whl → 0.25.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

dcnum/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.23.2'
16
- __version_tuple__ = version_tuple = (0, 23, 2)
15
+ __version__ = version = '0.25.1'
16
+ __version_tuple__ = version_tuple = (0, 25, 1)
@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
96
96
  # If the writer_dq starts filling up, then this could lead to
97
97
  # an oom-kill signal. Stall for the writer to prevent this.
98
98
  if (ldq := len(self.writer_dq)) > 1000:
99
- time.sleep(1)
100
- ldq2 = len(self.writer_dq)
101
- stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
102
- time.sleep(stall_time)
99
+ stalled_sec = 0.
100
+ for ii in range(60):
101
+ if len(self.writer_dq) > 200:
102
+ time.sleep(.5)
103
+ stalled_sec += .5
103
104
  self.logger.warning(
104
- f"Stalled {stall_time + 1:.1f}s for slow writer "
105
+ f"Stalled {stalled_sec:.1f}s due to slow writer "
105
106
  f"({ldq} chunks queued)")
106
107
 
107
108
  unavailable_slots = 0
@@ -1,8 +1,10 @@
1
1
  import abc
2
2
  import functools
3
3
  import inspect
4
+ import logging
4
5
  import multiprocessing as mp
5
6
  import pathlib
7
+ import time
6
8
 
7
9
  import h5py
8
10
 
@@ -41,8 +43,11 @@ class Background(abc.ABC):
41
43
  kwargs:
42
44
  Additional keyword arguments passed to the subclass.
43
45
  """
46
+ self.logger = logging.getLogger(
47
+ f"dcnum.feat.feat_background.{self.__class__.__name__}")
44
48
  # proper conversion to Path objects
45
49
  output_path = pathlib.Path(output_path)
50
+ self.output_path = output_path
46
51
  if isinstance(input_data, str):
47
52
  input_data = pathlib.Path(input_data)
48
53
  # kwargs checks
@@ -188,20 +193,30 @@ class Background(abc.ABC):
188
193
  return self.image_proc.value
189
194
 
190
195
  def process(self):
196
+ """Perform the background computation
197
+
198
+ This irreversibly removes/overrides any "image_bg" and
199
+ "bg_off" features defined in the output file `self.h5out`.
200
+ """
201
+ t0 = time.perf_counter()
191
202
  # Delete any old background data
192
- for key in ["image_bg", "bg_off"]:
193
- if key in self.h5out["events"]:
194
- del self.h5out["events"][key]
203
+ for ds_key in ["image_bg", "bg_off"]:
204
+ for grp_key in ["events", "basin_events"]:
205
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
206
+ del self.h5out[grp_key][ds_key]
195
207
  # Perform the actual background computation
196
208
  self.process_approach()
197
209
  bg_ppid = self.get_ppid()
198
210
  # Store pipeline information in the image_bg/bg_off feature
199
- for key in ["image_bg", "bg_off"]:
200
- if key in self.h5out["events"]:
201
- self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
202
- bg_ppid
203
- self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
204
- ppid.DCNUM_PPID_GENERATION
211
+ for ds_key in ["image_bg", "bg_off"]:
212
+ for grp_key in ["events", "basin_events"]:
213
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
214
+ self.h5out[f"{grp_key}/{ds_key}"].attrs[
215
+ "dcnum ppid background"] = bg_ppid
216
+ self.h5out[F"{grp_key}/{ds_key}"].attrs[
217
+ "dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
218
+ self.logger.info(
219
+ f"Background computation time: {time.perf_counter()-t0:.1f}s")
205
220
 
206
221
  @abc.abstractmethod
207
222
  def process_approach(self):
@@ -1,4 +1,3 @@
1
- import logging
2
1
  import queue
3
2
  import time
4
3
 
@@ -9,14 +8,13 @@ from ...read import HDF5Data
9
8
 
10
9
  from .base import mp_spawn, Background
11
10
 
12
- logger = logging.getLogger(__name__)
13
-
14
11
 
15
12
  class BackgroundSparseMed(Background):
16
13
  def __init__(self, input_data, output_path, kernel_size=200,
17
14
  split_time=1., thresh_cleansing=0, frac_cleansing=.8,
18
15
  offset_correction=True,
19
- compress=True, num_cpus=None):
16
+ compress=True,
17
+ num_cpus=None):
20
18
  """Sparse median background correction with cleansing
21
19
 
22
20
  In contrast to the rolling median background correction,
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
61
59
  offset_correction: bool
62
60
  The sparse median background correction produces one median
63
61
  image for multiple input frames (BTW this also leads to very
64
- efficient data storage with HDF5 data compression filters). In
62
+ efficient data storage with internal HDF5 basins). In
65
63
  case the input frames are subject to frame-by-frame brightness
66
64
  variations (e.g. flickering of the illumination source), it
67
65
  is useful to have an offset value per frame that can then be
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
79
77
  num_cpus: int
80
78
  Number of CPUs to use for median computation. Defaults to
81
79
  `multiprocessing.cpu_count()`.
80
+
81
+ .. versionchanged:: 0.23.5
82
+
83
+ The background image data are stored as an internal
84
+ mapped basin to reduce the output file size.
82
85
  """
83
86
  super(BackgroundSparseMed, self).__init__(
84
87
  input_data=input_data,
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
93
96
  )
94
97
 
95
98
  if kernel_size > len(self.input_data):
96
- logger.warning(
99
+ self.logger.warning(
97
100
  f"The kernel size {kernel_size} is too large for input data"
98
101
  f"size {len(self.input_data)}. Setting it to input data size!")
99
102
  kernel_size = len(self.input_data)
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
126
129
  else:
127
130
  # compute time using frame rate (approximate)
128
131
  dur = self.image_count / fr * 1.5
129
- logger.info(f"Approximating duration: {dur/60:.1f}min")
132
+ self.logger.info(
133
+ f"Approximating duration: {dur/60:.1f}min")
130
134
  self.time = np.linspace(0, dur, self.image_count,
131
135
  endpoint=True)
132
136
  if self.time is None:
133
137
  # No HDF5 file or no information therein; Make an educated guess.
134
138
  dur = self.image_count / 3600 * 1.5
135
- logger.info(f"Guessing duration: {dur/60:.1f}min")
139
+ self.logger.info(f"Guessing duration: {dur/60:.1f}min")
136
140
  self.time = np.linspace(0, dur, self.image_count,
137
141
  endpoint=True)
138
142
 
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
222
226
  offset_correction: bool
223
227
  The sparse median background correction produces one median
224
228
  image for multiple input frames (BTW this also leads to very
225
- efficient data storage with HDF5 data compression filters). In
229
+ efficient data storage with internal HDF5 basins). In
226
230
  case the input frames are subject to frame-by-frame brightness
227
231
  variations (e.g. flickering of the illumination source), it
228
232
  is useful to have an offset value per frame that can then be
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
301
305
  thresh = np.quantile(ref, self.frac_cleansing)
302
306
  used = ref <= thresh
303
307
  frac_remove = np.sum(~used) / used.size
304
- logger.warning(
308
+ self.logger.warning(
305
309
  f"{frac_remove_user:.1%} of the background images would "
306
310
  f"be removed with the current settings, so we enforce "
307
311
  f"`frac_cleansing`. To avoid this warning, try decreasing "
308
312
  f"`thresh_cleansing` or `frac_cleansing`. The new "
309
313
  f"threshold is {thresh_fact / thresh}.")
310
314
 
311
- logger.info(f"Cleansed {frac_remove:.2%}")
315
+ self.logger.info(f"Cleansed {frac_remove:.2%}")
312
316
  step_times = self.step_times[used]
313
317
  bg_images = self.bg_images[used]
314
318
  else:
315
- logger.info("Background series cleansing disabled")
319
+ self.logger.info("Background series cleansing disabled")
316
320
  step_times = self.step_times
317
321
  bg_images = self.bg_images
318
322
 
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
322
326
  idx1 = None
323
327
  for ii in range(len(step_times)):
324
328
  t1 = step_times[ii]
325
- idx1 = np.argmin(np.abs(self.time - t1 + self.split_time/2))
329
+ idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
326
330
  bg_idx[idx0:idx1] = ii
327
331
  idx0 = idx1
328
332
  if idx1 is not None:
329
333
  # Fill up remainder of index array with last entry
330
334
  bg_idx[idx1:] = ii
331
335
 
332
- self.image_proc.value = 1
333
-
334
- # Write background data
335
- pos = 0
336
- step = 1000
337
- while pos < self.image_count:
338
- stop = min(pos + step, self.image_count)
339
- cur_slice = slice(pos, stop)
340
- cur_bg_data = bg_images[bg_idx[cur_slice]]
341
- self.writer.store_feature_chunk("image_bg", cur_bg_data)
342
- if self.offset_correction:
336
+ # Store the background images as an internal mapped basin
337
+ self.writer.store_basin(
338
+ name="background images",
339
+ description=f"Pipeline identifier: {self.get_ppid()}",
340
+ mapping=bg_idx,
341
+ internal_data={"image_bg": bg_images}
342
+ )
343
+
344
+ # store the offset correction, if applicable
345
+ if self.offset_correction:
346
+ self.logger.info("Computing offset correction")
347
+ # compute the mean at the top of all background images
348
+ sh, sw = self.input_data.shape[1:]
349
+ roi_full = (slice(None), slice(0, 20), slice(0, sw))
350
+ bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
351
+ pos = 0
352
+ step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
353
+ feat_dtype=np.uint8)[0]
354
+ bg_off = np.zeros(self.image_count, dtype=float)
355
+ # For every chunk in the input image data, compute that
356
+ # value as well and store the resulting offset value.
357
+ # TODO: Could this be parallelized, or are we limited in reading?
358
+ while pos < self.image_count:
359
+ stop = min(pos + step, self.image_count)
343
360
  # Record background offset correction "bg_off". We take a
344
361
  # slice of 20px from the top of the image (there are normally
345
362
  # no events here, only the channel walls are visible).
346
- sh, sw = self.input_data.shape[1:]
347
- roi_full = (slice(None), slice(0, 20), slice(0, sw))
363
+ cur_slice = slice(pos, stop)
364
+ # mean background brightness
365
+ val_bg = bg_data_mean[bg_idx[cur_slice]]
366
+ # mean image brightness
348
367
  roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
349
- val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
350
368
  val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
351
369
  # background image = image_bg + bg_off
352
- self.writer.store_feature_chunk("bg_off", val_dat - val_bg)
353
- pos += step
370
+ bg_off[cur_slice] = val_dat - val_bg
371
+ # set progress
372
+ self.image_proc.value = 0.5 * (1 + pos / self.image_count)
373
+ pos = stop
374
+ # finally, store the background offset feature
375
+ self.writer.store_feature_chunk("bg_off", bg_off)
376
+
377
+ self.image_proc.value = 1
354
378
 
355
379
  def process_second(self,
356
380
  ii: int,
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
393
417
 
394
418
  self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
395
419
 
396
- self.image_proc.value = idx_stop / self.image_count
420
+ self.image_proc.value = idx_stop / (
421
+ # with offset correction, everything is slower
422
+ self.image_count * (1 + self.offset_correction))
397
423
 
398
424
 
399
425
  class WorkerSparseMed(mp_spawn.Process):
dcnum/logic/ctrl.py CHANGED
@@ -34,6 +34,7 @@ from ..write import (
34
34
  from .job import DCNumPipelineJob
35
35
  from .json_encoder import ExtendedJSONEncoder
36
36
 
37
+
37
38
  # Force using "spawn" method for multiprocessing, because we are using
38
39
  # queues and threads and would end up with race conditions otherwise.
39
40
  mp_spawn = mp.get_context("spawn")
@@ -402,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
402
403
  features=orig_feats,
403
404
  mapping=None)
404
405
 
406
+ # Handle basin data according to the user's request
407
+ self.state = "plumbing"
408
+ self.task_enforce_basin_strategy()
409
+
410
+ self.state = "cleanup"
411
+
405
412
  with HDF5Writer(self.path_temp_out) as hw:
406
413
  # pipeline metadata
407
414
  hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
@@ -461,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
461
468
 
462
469
  # copy metadata/logs/tables from original file
463
470
  with h5py.File(self.job["path_in"]) as h5_src:
464
- copy_metadata(h5_src=h5_src,
465
- h5_dst=hw.h5,
466
- # Don't copy basins, we would have to index-map
467
- # them first.
468
- copy_basins=False)
471
+ copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
469
472
  if redo_seg:
470
473
  # Store the correct measurement identifier. This is used to
471
474
  # identify this file as a correct basin in subsequent pipeline
@@ -489,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
489
492
  mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
490
493
  hw.h5.attrs["experiment:run identifier"] = mid_new
491
494
 
492
- # Handle basin data according to the user's request
493
- self.state = "plumbing"
494
- self.task_enforce_basin_strategy()
495
-
496
- self.state = "cleanup"
497
-
498
495
  trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
499
496
  self.logger.info(f"Run duration: {str(trun)}")
500
497
  self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
@@ -546,24 +543,19 @@ class DCNumJobRunner(threading.Thread):
546
543
  """
547
544
  self._progress_bn = 0
548
545
  t0 = time.perf_counter()
549
- # We need to make sure that the features are correctly attributed
550
- # from the input files. E.g. if the input file already has
551
- # background images, but we recompute the background images, then
552
- # we have to use the data from the recomputed background file.
553
- # We achieve this by keeping a specific order and only copying those
554
- # features that we don't already have in the output file.
555
- feats_raw = [
556
- # 1. background data from the temporary input image
557
- # (this must come before draw [sic!])
558
- [self.dtin.h5, ["image_bg", "bg_off"], "critical"],
559
- # 2. frame-based scalar features from the raw input file
560
- # (e.g. "temp" or "frame")
561
- [self.draw.h5, self.draw.features_scalar_frame, "optional"],
562
- # 3. image features from the input file
563
- [self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
564
- ]
565
- with h5py.File(self.path_temp_out, "a") as hout:
566
- hw = HDF5Writer(hout)
546
+ # We have these points to consider:
547
+ # - We must use the `basinmap` feature to map from the original
548
+ # file to the output file.
549
+ # - We must copy "bg_off" and "image_bg" to the output file.
550
+ # - For the "drain" basin strategy, we also have to copy all the
551
+ # other features.
552
+ # - If "image_bg" is defined as an internal basin in the input
553
+ # file, we have to convert the mapping and store a corresponding
554
+ # internal basin in the output file.
555
+
556
+ # Determine the basinmap feature
557
+ with HDF5Writer(self.path_temp_out) as hw:
558
+ hout = hw.h5
567
559
  # First, we have to determine the basin mapping from input to
568
560
  # output. This information is stored by the QueueCollectorThread
569
561
  # in the "basinmap0" feature, ready to be used by us.
@@ -576,21 +568,22 @@ class DCNumJobRunner(threading.Thread):
576
568
  # mapping of the input file was set to slice(1, 100), then the
577
569
  # first image would not be there, and we would have
578
570
  # [1, 1, 1, ...].
579
- idx_um = hout["events/index_unmapped"]
571
+ idx_um = hout["events/index_unmapped"][:]
580
572
 
581
573
  # If we want to convert this to an actual basinmap feature,
582
574
  # then we have to convert those indices to indices that map
583
575
  # to the original input HDF5 file.
584
576
  raw_im = self.draw.index_mapping
585
577
  if raw_im is None:
586
- self.logger.info("Input file mapped with basinmap0")
587
578
  # Create a hard link to save time and space
588
579
  hout["events/basinmap0"] = hout["events/index_unmapped"]
589
- basinmap = idx_um
580
+ basinmap0 = idx_um
590
581
  else:
591
- basinmap = get_mapping_indices(raw_im)[idx_um]
582
+ self.logger.info("Converting input mapping")
583
+ basinmap0 = get_mapping_indices(raw_im)[idx_um]
592
584
  # Store the mapped basin data in the output file.
593
- hw.store_feature_chunk("basinmap0", basinmap)
585
+ hw.store_feature_chunk("basinmap0", basinmap0)
586
+ self.logger.info("Input mapped to output with basinmap0")
594
587
  # We don't need them anymore.
595
588
  del hout["events/index_unmapped"]
596
589
 
@@ -598,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
598
591
  # is the size of the raw dataset and the latter is its mapped
599
592
  # size!
600
593
  size_raw = self.draw.h5.attrs["experiment:event count"]
601
- if (len(basinmap) == size_raw
602
- and np.all(basinmap == np.arange(size_raw))):
594
+ if (len(basinmap0) == size_raw
595
+ and np.all(basinmap0 == np.arange(size_raw))):
603
596
  # This means that the images in the input overlap perfectly
604
597
  # with the images in the output, i.e. a "copy" segmenter
605
598
  # was used or something is very reproducible.
606
599
  # We set basinmap to None to be more efficient.
607
- basinmap = None
600
+ basinmap0 = None
608
601
 
609
602
  else:
610
603
  # The input is identical to the output, because we are using
611
604
  # the same pipeline identifier.
612
- basinmap = None
613
-
605
+ basinmap0 = None
606
+
607
+ # List of features we have to copy from input to output.
608
+ # We need to make sure that the features are correctly attributed
609
+ # from the input files. E.g. if the input file already has
610
+ # background images, but we recompute the background images, then
611
+ # we have to use the data from the recomputed background file.
612
+ # We achieve this by keeping a specific order and only copying
613
+ # those features that we don't already have in the output file.
614
+ feats_raw = [
615
+ # background data from the temporary input image
616
+ [self.dtin.h5, ["bg_off"], "critical"],
617
+ [self.draw.h5, self.draw.features_scalar_frame, "optional"],
618
+ [self.draw.h5, ["image", "bg_off"], "optional"],
619
+ ]
620
+
621
+ # Store image_bg as an internal basin, if defined in input
622
+ for idx in range(len(self.dtin.basins)):
623
+ bn_dict = self.dtin.basins[idx]
624
+ if (bn_dict["type"] == "internal"
625
+ and "image_bg" in bn_dict["features"]):
626
+ self.logger.info(
627
+ "Copying internal basin background images")
628
+ bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
629
+ assert "image_bg" in bn_feats
630
+ # Load all images into memory (should only be ~600)
631
+ bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
632
+ # Get the original internal mapping for these images
633
+ # Note that `basinmap0` always refers to indices in the
634
+ # original raw input file, and not to indices in an
635
+ # optional mapped input file (using `index_mapping`).
636
+ # Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
637
+ # instead of `self.dtin["basinmap0"]`
638
+ basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
639
+ # Now we have to convert the indices in `basinmap_in`
640
+ # to indices in the output file.
641
+ basinmap1 = basinmap_in[basinmap0]
642
+ # Store the internal mapping in the output file
643
+ hw.store_basin(name=bn_dict["name"],
644
+ description=bn_dict["description"],
645
+ mapping=basinmap1,
646
+ internal_data={"image_bg": bg_images1}
647
+ )
648
+ break
649
+ else:
650
+ self.logger.info("Background images must be copied")
651
+ # There is no internal image_bg feature, probably because
652
+ # the user did not use the sparsemed background correction.
653
+ # In this case, we simply add "image_bg" to the `feats_raw`.
654
+ feats_raw += [
655
+ [self.dtin.h5, ["image_bg"], "critical"],
656
+ [self.draw.h5, ["image_bg"], "optional"],
657
+ ]
658
+
659
+ # Copy the features required in the output file.
614
660
  for hin, feats, importance in feats_raw:
615
661
  # Only consider features that are available in the input
616
662
  # and that are not already in the output.
@@ -625,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
625
671
  copy_features(h5_src=hin,
626
672
  h5_dst=hout,
627
673
  features=feats,
628
- mapping=basinmap)
674
+ mapping=basinmap0)
629
675
  else:
630
676
  # TAP: Create basins for the "optional" features in the
631
677
  # output file. Note that the "critical" features never
@@ -633,11 +679,17 @@ class DCNumJobRunner(threading.Thread):
633
679
  self.logger.debug(f"Creating basin for {feats}")
634
680
  # Relative and absolute paths.
635
681
  pin = pathlib.Path(hin.filename).resolve()
682
+ paths = [pin]
636
683
  pout = pathlib.Path(hout.filename).resolve().parent
637
- paths = [pin, os.path.relpath(pin, pout)]
684
+ try:
685
+ paths.append(os.path.relpath(pin, pout))
686
+ except ValueError:
687
+ # This means it is impossible to compute a relative
688
+ # path (e.g. different drive letter on Windows).
689
+ pass
638
690
  hw.store_basin(name="dcnum basin",
639
691
  features=feats,
640
- mapping=basinmap,
692
+ mapping=basinmap0,
641
693
  paths=paths,
642
694
  description=f"Created with dcnum {version}",
643
695
  )
@@ -730,7 +782,6 @@ class DCNumJobRunner(threading.Thread):
730
782
 
731
783
  # Start the data collection thread
732
784
  thr_coll = QueueCollectorThread(
733
- data=self.dtin,
734
785
  event_queue=fe_kwargs["event_queue"],
735
786
  writer_dq=writer_dq,
736
787
  feat_nevents=fe_kwargs["feat_nevents"],
dcnum/meta/ppid.py CHANGED
@@ -7,10 +7,11 @@ import pathlib
7
7
  from typing import Dict, List, Protocol
8
8
  import warnings
9
9
 
10
+ import numpy as np
10
11
 
11
12
  #: Increment this string if there are breaking changes that make
12
13
  #: previous pipelines unreproducible.
13
- DCNUM_PPID_GENERATION = "10"
14
+ DCNUM_PPID_GENERATION = "11"
14
15
 
15
16
 
16
17
  class ClassWithPPIDCapabilities(Protocol):
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
140
141
  path = pathlib.Path(val)
141
142
  if path.exists():
142
143
  val = path.name
143
- if isinstance(val, bool):
144
+ if isinstance(val, (bool, np.bool_)):
144
145
  val = int(val) # do not print e.g. "True"
145
- elif isinstance(val, float):
146
+ elif isinstance(val, (float, np.floating)):
146
147
  if val == int(val):
147
148
  val = int(val) # omit the ".0" at the end
148
149
  concat_strings.append(f"{abr}={val}")
dcnum/read/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  # flake8: noqa: F401
2
2
  from .cache import md5sum
3
3
  from .const import PROTECTED_FEATURES
4
+ from .detect_flicker import detect_flickering
4
5
  from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
5
6
  from .mapped import get_mapping_indices, get_mapped_object
dcnum/read/cache.py CHANGED
@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
36
36
  def __getitem__(self, index):
37
37
  if isinstance(index, (slice, list, np.ndarray)):
38
38
  if isinstance(index, slice):
39
- indices = np.arange(index.start or 0,
40
- index.stop or len(self),
41
- index.step)
39
+ indices = np.arange(
40
+ index.start or 0,
41
+ min(index.stop, len(self)) if index.stop else len(self),
42
+ index.step)
42
43
  else:
43
44
  indices = index
44
45
  array_out = np.empty((len(indices),) + self.image_shape,
@@ -0,0 +1,44 @@
1
+ import numpy as np
2
+
3
+ from .hdf5_data import HDF5Data
4
+
5
+
6
+ def detect_flickering(image_data: np.ndarray | HDF5Data,
7
+ roi_height: int = 10,
8
+ brightness_threshold: float = 2.5,
9
+ count_threshold: int = 5,
10
+ max_frames: int = 1000):
11
+ """Determine whether an image series experiences flickering
12
+
13
+ Flickering is an unwelcome phenomenon due to a faulty data
14
+ acquisition device. For instance, if there is random voltage noise in
15
+ the electronics managing the LED power, then the brightness of the
16
+ LED will vary randomly when the noise signal overlaps with the flash
17
+ triggering signal.
18
+
19
+ If flickering is detected, you should use the "sparsemed" background
20
+ computation with `offset_correction` set to True.
21
+
22
+ Parameters
23
+ ----------
24
+ image_data:
25
+ sliceable object (e.g. numpy array or HDF5Data) containing
26
+ image data.
27
+ roi_height: int
28
+ height of the ROI in pixels for which to search for flickering;
29
+ the entire width of the image is used
30
+ brightness_threshold: float
31
+ brightness difference between individual ROIs median and median
32
+ of all ROI medians leading to a positive flickering event
33
+ count_threshold: int
34
+ minimum number of flickering events that would lead to a positive
35
+ flickering decision
36
+ max_frames: int
37
+ maximum number of frames to include in the flickering analysis
38
+ """
39
+ # slice event axis first in case we have and HDF5Data instance
40
+ roi_data = image_data[:max_frames][:, :roi_height, :]
41
+ roi_median = np.median(roi_data, axis=(1, 2))
42
+ roi_offset = roi_median - np.median(roi_median)
43
+ flickering_events = np.sum(np.abs(roi_offset) >= abs(brightness_threshold))
44
+ return flickering_events >= count_threshold