dcnum 0.23.1__py3-none-any.whl → 0.25.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

dcnum/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.23.1'
16
- __version_tuple__ = version_tuple = (0, 23, 1)
15
+ __version__ = version = '0.25.1'
16
+ __version_tuple__ = version_tuple = (0, 25, 1)
@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
96
96
  # If the writer_dq starts filling up, then this could lead to
97
97
  # an oom-kill signal. Stall for the writer to prevent this.
98
98
  if (ldq := len(self.writer_dq)) > 1000:
99
- time.sleep(1)
100
- ldq2 = len(self.writer_dq)
101
- stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
102
- time.sleep(stall_time)
99
+ stalled_sec = 0.
100
+ for ii in range(60):
101
+ if len(self.writer_dq) > 200:
102
+ time.sleep(.5)
103
+ stalled_sec += .5
103
104
  self.logger.warning(
104
- f"Stalled {stall_time + 1:.1f}s for slow writer "
105
+ f"Stalled {stalled_sec:.1f}s due to slow writer "
105
106
  f"({ldq} chunks queued)")
106
107
 
107
108
  unavailable_slots = 0
@@ -1,8 +1,10 @@
1
1
  import abc
2
2
  import functools
3
3
  import inspect
4
+ import logging
4
5
  import multiprocessing as mp
5
6
  import pathlib
7
+ import time
6
8
 
7
9
  import h5py
8
10
 
@@ -41,8 +43,11 @@ class Background(abc.ABC):
41
43
  kwargs:
42
44
  Additional keyword arguments passed to the subclass.
43
45
  """
46
+ self.logger = logging.getLogger(
47
+ f"dcnum.feat.feat_background.{self.__class__.__name__}")
44
48
  # proper conversion to Path objects
45
49
  output_path = pathlib.Path(output_path)
50
+ self.output_path = output_path
46
51
  if isinstance(input_data, str):
47
52
  input_data = pathlib.Path(input_data)
48
53
  # kwargs checks
@@ -188,20 +193,30 @@ class Background(abc.ABC):
188
193
  return self.image_proc.value
189
194
 
190
195
  def process(self):
196
+ """Perform the background computation
197
+
198
+ This irreversibly removes/overrides any "image_bg" and
199
+ "bg_off" features defined in the output file `self.h5out`.
200
+ """
201
+ t0 = time.perf_counter()
191
202
  # Delete any old background data
192
- for key in ["image_bg", "bg_off"]:
193
- if key in self.h5out["events"]:
194
- del self.h5out["events"][key]
203
+ for ds_key in ["image_bg", "bg_off"]:
204
+ for grp_key in ["events", "basin_events"]:
205
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
206
+ del self.h5out[grp_key][ds_key]
195
207
  # Perform the actual background computation
196
208
  self.process_approach()
197
209
  bg_ppid = self.get_ppid()
198
210
  # Store pipeline information in the image_bg/bg_off feature
199
- for key in ["image_bg", "bg_off"]:
200
- if key in self.h5out["events"]:
201
- self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
202
- bg_ppid
203
- self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
204
- ppid.DCNUM_PPID_GENERATION
211
+ for ds_key in ["image_bg", "bg_off"]:
212
+ for grp_key in ["events", "basin_events"]:
213
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
214
+ self.h5out[f"{grp_key}/{ds_key}"].attrs[
215
+ "dcnum ppid background"] = bg_ppid
216
+ self.h5out[F"{grp_key}/{ds_key}"].attrs[
217
+ "dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
218
+ self.logger.info(
219
+ f"Background computation time: {time.perf_counter()-t0:.1f}s")
205
220
 
206
221
  @abc.abstractmethod
207
222
  def process_approach(self):
@@ -1,4 +1,3 @@
1
- import logging
2
1
  import queue
3
2
  import time
4
3
 
@@ -9,14 +8,13 @@ from ...read import HDF5Data
9
8
 
10
9
  from .base import mp_spawn, Background
11
10
 
12
- logger = logging.getLogger(__name__)
13
-
14
11
 
15
12
  class BackgroundSparseMed(Background):
16
13
  def __init__(self, input_data, output_path, kernel_size=200,
17
14
  split_time=1., thresh_cleansing=0, frac_cleansing=.8,
18
15
  offset_correction=True,
19
- compress=True, num_cpus=None):
16
+ compress=True,
17
+ num_cpus=None):
20
18
  """Sparse median background correction with cleansing
21
19
 
22
20
  In contrast to the rolling median background correction,
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
61
59
  offset_correction: bool
62
60
  The sparse median background correction produces one median
63
61
  image for multiple input frames (BTW this also leads to very
64
- efficient data storage with HDF5 data compression filters). In
62
+ efficient data storage with internal HDF5 basins). In
65
63
  case the input frames are subject to frame-by-frame brightness
66
64
  variations (e.g. flickering of the illumination source), it
67
65
  is useful to have an offset value per frame that can then be
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
79
77
  num_cpus: int
80
78
  Number of CPUs to use for median computation. Defaults to
81
79
  `multiprocessing.cpu_count()`.
80
+
81
+ .. versionchanged:: 0.23.5
82
+
83
+ The background image data are stored as an internal
84
+ mapped basin to reduce the output file size.
82
85
  """
83
86
  super(BackgroundSparseMed, self).__init__(
84
87
  input_data=input_data,
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
93
96
  )
94
97
 
95
98
  if kernel_size > len(self.input_data):
96
- logger.warning(
99
+ self.logger.warning(
97
100
  f"The kernel size {kernel_size} is too large for input data"
98
101
  f"size {len(self.input_data)}. Setting it to input data size!")
99
102
  kernel_size = len(self.input_data)
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
126
129
  else:
127
130
  # compute time using frame rate (approximate)
128
131
  dur = self.image_count / fr * 1.5
129
- logger.info(f"Approximating duration: {dur/60:.1f}min")
132
+ self.logger.info(
133
+ f"Approximating duration: {dur/60:.1f}min")
130
134
  self.time = np.linspace(0, dur, self.image_count,
131
135
  endpoint=True)
132
136
  if self.time is None:
133
137
  # No HDF5 file or no information therein; Make an educated guess.
134
138
  dur = self.image_count / 3600 * 1.5
135
- logger.info(f"Guessing duration: {dur/60:.1f}min")
139
+ self.logger.info(f"Guessing duration: {dur/60:.1f}min")
136
140
  self.time = np.linspace(0, dur, self.image_count,
137
141
  endpoint=True)
138
142
 
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
222
226
  offset_correction: bool
223
227
  The sparse median background correction produces one median
224
228
  image for multiple input frames (BTW this also leads to very
225
- efficient data storage with HDF5 data compression filters). In
229
+ efficient data storage with internal HDF5 basins). In
226
230
  case the input frames are subject to frame-by-frame brightness
227
231
  variations (e.g. flickering of the illumination source), it
228
232
  is useful to have an offset value per frame that can then be
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
301
305
  thresh = np.quantile(ref, self.frac_cleansing)
302
306
  used = ref <= thresh
303
307
  frac_remove = np.sum(~used) / used.size
304
- logger.warning(
308
+ self.logger.warning(
305
309
  f"{frac_remove_user:.1%} of the background images would "
306
310
  f"be removed with the current settings, so we enforce "
307
311
  f"`frac_cleansing`. To avoid this warning, try decreasing "
308
312
  f"`thresh_cleansing` or `frac_cleansing`. The new "
309
313
  f"threshold is {thresh_fact / thresh}.")
310
314
 
311
- logger.info(f"Cleansed {frac_remove:.2%}")
315
+ self.logger.info(f"Cleansed {frac_remove:.2%}")
312
316
  step_times = self.step_times[used]
313
317
  bg_images = self.bg_images[used]
314
318
  else:
315
- logger.info("Background series cleansing disabled")
319
+ self.logger.info("Background series cleansing disabled")
316
320
  step_times = self.step_times
317
321
  bg_images = self.bg_images
318
322
 
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
322
326
  idx1 = None
323
327
  for ii in range(len(step_times)):
324
328
  t1 = step_times[ii]
325
- idx1 = np.argmin(np.abs(self.time - t1 + self.split_time/2))
329
+ idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
326
330
  bg_idx[idx0:idx1] = ii
327
331
  idx0 = idx1
328
332
  if idx1 is not None:
329
333
  # Fill up remainder of index array with last entry
330
334
  bg_idx[idx1:] = ii
331
335
 
332
- self.image_proc.value = 1
333
-
334
- # Write background data
335
- pos = 0
336
- step = 1000
337
- while pos < self.image_count:
338
- stop = min(pos + step, self.image_count)
339
- cur_slice = slice(pos, stop)
340
- cur_bg_data = bg_images[bg_idx[cur_slice]]
341
- self.writer.store_feature_chunk("image_bg", cur_bg_data)
342
- if self.offset_correction:
336
+ # Store the background images as an internal mapped basin
337
+ self.writer.store_basin(
338
+ name="background images",
339
+ description=f"Pipeline identifier: {self.get_ppid()}",
340
+ mapping=bg_idx,
341
+ internal_data={"image_bg": bg_images}
342
+ )
343
+
344
+ # store the offset correction, if applicable
345
+ if self.offset_correction:
346
+ self.logger.info("Computing offset correction")
347
+ # compute the mean at the top of all background images
348
+ sh, sw = self.input_data.shape[1:]
349
+ roi_full = (slice(None), slice(0, 20), slice(0, sw))
350
+ bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
351
+ pos = 0
352
+ step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
353
+ feat_dtype=np.uint8)[0]
354
+ bg_off = np.zeros(self.image_count, dtype=float)
355
+ # For every chunk in the input image data, compute that
356
+ # value as well and store the resulting offset value.
357
+ # TODO: Could this be parallelized, or are we limited in reading?
358
+ while pos < self.image_count:
359
+ stop = min(pos + step, self.image_count)
343
360
  # Record background offset correction "bg_off". We take a
344
361
  # slice of 20px from the top of the image (there are normally
345
362
  # no events here, only the channel walls are visible).
346
- sh, sw = self.input_data.shape[1:]
347
- roi_full = (slice(None), slice(0, 20), slice(0, sw))
363
+ cur_slice = slice(pos, stop)
364
+ # mean background brightness
365
+ val_bg = bg_data_mean[bg_idx[cur_slice]]
366
+ # mean image brightness
348
367
  roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
349
- val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
350
368
  val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
351
369
  # background image = image_bg + bg_off
352
- self.writer.store_feature_chunk("bg_off", val_dat - val_bg)
353
- pos += step
370
+ bg_off[cur_slice] = val_dat - val_bg
371
+ # set progress
372
+ self.image_proc.value = 0.5 * (1 + pos / self.image_count)
373
+ pos = stop
374
+ # finally, store the background offset feature
375
+ self.writer.store_feature_chunk("bg_off", bg_off)
376
+
377
+ self.image_proc.value = 1
354
378
 
355
379
  def process_second(self,
356
380
  ii: int,
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
393
417
 
394
418
  self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
395
419
 
396
- self.image_proc.value = idx_stop / self.image_count
420
+ self.image_proc.value = idx_stop / (
421
+ # with offset correction, everything is slower
422
+ self.image_count * (1 + self.offset_correction))
397
423
 
398
424
 
399
425
  class WorkerSparseMed(mp_spawn.Process):
dcnum/logic/ctrl.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import collections
2
2
  import datetime
3
3
  import hashlib
4
+ import importlib
4
5
  import json
5
6
  import logging
6
7
  from logging.handlers import QueueListener
@@ -33,6 +34,7 @@ from ..write import (
33
34
  from .job import DCNumPipelineJob
34
35
  from .json_encoder import ExtendedJSONEncoder
35
36
 
37
+
36
38
  # Force using "spawn" method for multiprocessing, because we are using
37
39
  # queues and threads and would end up with race conditions otherwise.
38
40
  mp_spawn = mp.get_context("spawn")
@@ -401,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
401
403
  features=orig_feats,
402
404
  mapping=None)
403
405
 
406
+ # Handle basin data according to the user's request
407
+ self.state = "plumbing"
408
+ self.task_enforce_basin_strategy()
409
+
410
+ self.state = "cleanup"
411
+
404
412
  with HDF5Writer(self.path_temp_out) as hw:
405
413
  # pipeline metadata
406
414
  hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
@@ -430,6 +438,16 @@ class DCNumJobRunner(threading.Thread):
430
438
  "build": ", ".join(platform.python_build()),
431
439
  "implementation":
432
440
  platform.python_implementation(),
441
+ "libraries": get_library_versions_dict([
442
+ "cv2",
443
+ "h5py",
444
+ "mahotas",
445
+ "numba",
446
+ "numpy",
447
+ "scipy",
448
+ "skimage",
449
+ "torch",
450
+ ]),
433
451
  "version": platform.python_version(),
434
452
  },
435
453
  "system": {
@@ -450,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
450
468
 
451
469
  # copy metadata/logs/tables from original file
452
470
  with h5py.File(self.job["path_in"]) as h5_src:
453
- copy_metadata(h5_src=h5_src,
454
- h5_dst=hw.h5,
455
- # Don't copy basins, we would have to index-map
456
- # them first.
457
- copy_basins=False)
471
+ copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
458
472
  if redo_seg:
459
473
  # Store the correct measurement identifier. This is used to
460
474
  # identify this file as a correct basin in subsequent pipeline
@@ -478,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
478
492
  mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
479
493
  hw.h5.attrs["experiment:run identifier"] = mid_new
480
494
 
481
- # Handle basin data according to the user's request
482
- self.state = "plumbing"
483
- self.task_enforce_basin_strategy()
484
-
485
- self.state = "cleanup"
486
-
487
495
  trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
488
496
  self.logger.info(f"Run duration: {str(trun)}")
489
497
  self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
@@ -535,24 +543,19 @@ class DCNumJobRunner(threading.Thread):
535
543
  """
536
544
  self._progress_bn = 0
537
545
  t0 = time.perf_counter()
538
- # We need to make sure that the features are correctly attributed
539
- # from the input files. E.g. if the input file already has
540
- # background images, but we recompute the background images, then
541
- # we have to use the data from the recomputed background file.
542
- # We achieve this by keeping a specific order and only copying those
543
- # features that we don't already have in the output file.
544
- feats_raw = [
545
- # 1. background data from the temporary input image
546
- # (this must come before draw [sic!])
547
- [self.dtin.h5, ["image_bg", "bg_off"], "critical"],
548
- # 2. frame-based scalar features from the raw input file
549
- # (e.g. "temp" or "frame")
550
- [self.draw.h5, self.draw.features_scalar_frame, "optional"],
551
- # 3. image features from the input file
552
- [self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
553
- ]
554
- with h5py.File(self.path_temp_out, "a") as hout:
555
- hw = HDF5Writer(hout)
546
+ # We have these points to consider:
547
+ # - We must use the `basinmap` feature to map from the original
548
+ # file to the output file.
549
+ # - We must copy "bg_off" and "image_bg" to the output file.
550
+ # - For the "drain" basin strategy, we also have to copy all the
551
+ # other features.
552
+ # - If "image_bg" is defined as an internal basin in the input
553
+ # file, we have to convert the mapping and store a corresponding
554
+ # internal basin in the output file.
555
+
556
+ # Determine the basinmap feature
557
+ with HDF5Writer(self.path_temp_out) as hw:
558
+ hout = hw.h5
556
559
  # First, we have to determine the basin mapping from input to
557
560
  # output. This information is stored by the QueueCollectorThread
558
561
  # in the "basinmap0" feature, ready to be used by us.
@@ -565,21 +568,22 @@ class DCNumJobRunner(threading.Thread):
565
568
  # mapping of the input file was set to slice(1, 100), then the
566
569
  # first image would not be there, and we would have
567
570
  # [1, 1, 1, ...].
568
- idx_um = hout["events/index_unmapped"]
571
+ idx_um = hout["events/index_unmapped"][:]
569
572
 
570
573
  # If we want to convert this to an actual basinmap feature,
571
574
  # then we have to convert those indices to indices that map
572
575
  # to the original input HDF5 file.
573
576
  raw_im = self.draw.index_mapping
574
577
  if raw_im is None:
575
- self.logger.info("Input file mapped with basinmap0")
576
578
  # Create a hard link to save time and space
577
579
  hout["events/basinmap0"] = hout["events/index_unmapped"]
578
- basinmap = idx_um
580
+ basinmap0 = idx_um
579
581
  else:
580
- basinmap = get_mapping_indices(raw_im)[idx_um]
582
+ self.logger.info("Converting input mapping")
583
+ basinmap0 = get_mapping_indices(raw_im)[idx_um]
581
584
  # Store the mapped basin data in the output file.
582
- hw.store_feature_chunk("basinmap0", basinmap)
585
+ hw.store_feature_chunk("basinmap0", basinmap0)
586
+ self.logger.info("Input mapped to output with basinmap0")
583
587
  # We don't need them anymore.
584
588
  del hout["events/index_unmapped"]
585
589
 
@@ -587,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
587
591
  # is the size of the raw dataset and the latter is its mapped
588
592
  # size!
589
593
  size_raw = self.draw.h5.attrs["experiment:event count"]
590
- if (len(basinmap) == size_raw
591
- and np.all(basinmap == np.arange(size_raw))):
594
+ if (len(basinmap0) == size_raw
595
+ and np.all(basinmap0 == np.arange(size_raw))):
592
596
  # This means that the images in the input overlap perfectly
593
597
  # with the images in the output, i.e. a "copy" segmenter
594
598
  # was used or something is very reproducible.
595
599
  # We set basinmap to None to be more efficient.
596
- basinmap = None
600
+ basinmap0 = None
597
601
 
598
602
  else:
599
603
  # The input is identical to the output, because we are using
600
604
  # the same pipeline identifier.
601
- basinmap = None
602
-
605
+ basinmap0 = None
606
+
607
+ # List of features we have to copy from input to output.
608
+ # We need to make sure that the features are correctly attributed
609
+ # from the input files. E.g. if the input file already has
610
+ # background images, but we recompute the background images, then
611
+ # we have to use the data from the recomputed background file.
612
+ # We achieve this by keeping a specific order and only copying
613
+ # those features that we don't already have in the output file.
614
+ feats_raw = [
615
+ # background data from the temporary input image
616
+ [self.dtin.h5, ["bg_off"], "critical"],
617
+ [self.draw.h5, self.draw.features_scalar_frame, "optional"],
618
+ [self.draw.h5, ["image", "bg_off"], "optional"],
619
+ ]
620
+
621
+ # Store image_bg as an internal basin, if defined in input
622
+ for idx in range(len(self.dtin.basins)):
623
+ bn_dict = self.dtin.basins[idx]
624
+ if (bn_dict["type"] == "internal"
625
+ and "image_bg" in bn_dict["features"]):
626
+ self.logger.info(
627
+ "Copying internal basin background images")
628
+ bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
629
+ assert "image_bg" in bn_feats
630
+ # Load all images into memory (should only be ~600)
631
+ bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
632
+ # Get the original internal mapping for these images
633
+ # Note that `basinmap0` always refers to indices in the
634
+ # original raw input file, and not to indices in an
635
+ # optional mapped input file (using `index_mapping`).
636
+ # Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
637
+ # instead of `self.dtin["basinmap0"]`
638
+ basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
639
+ # Now we have to convert the indices in `basinmap_in`
640
+ # to indices in the output file.
641
+ basinmap1 = basinmap_in[basinmap0]
642
+ # Store the internal mapping in the output file
643
+ hw.store_basin(name=bn_dict["name"],
644
+ description=bn_dict["description"],
645
+ mapping=basinmap1,
646
+ internal_data={"image_bg": bg_images1}
647
+ )
648
+ break
649
+ else:
650
+ self.logger.info("Background images must be copied")
651
+ # There is no internal image_bg feature, probably because
652
+ # the user did not use the sparsemed background correction.
653
+ # In this case, we simply add "image_bg" to the `feats_raw`.
654
+ feats_raw += [
655
+ [self.dtin.h5, ["image_bg"], "critical"],
656
+ [self.draw.h5, ["image_bg"], "optional"],
657
+ ]
658
+
659
+ # Copy the features required in the output file.
603
660
  for hin, feats, importance in feats_raw:
604
661
  # Only consider features that are available in the input
605
662
  # and that are not already in the output.
@@ -614,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
614
671
  copy_features(h5_src=hin,
615
672
  h5_dst=hout,
616
673
  features=feats,
617
- mapping=basinmap)
674
+ mapping=basinmap0)
618
675
  else:
619
676
  # TAP: Create basins for the "optional" features in the
620
677
  # output file. Note that the "critical" features never
@@ -622,11 +679,17 @@ class DCNumJobRunner(threading.Thread):
622
679
  self.logger.debug(f"Creating basin for {feats}")
623
680
  # Relative and absolute paths.
624
681
  pin = pathlib.Path(hin.filename).resolve()
682
+ paths = [pin]
625
683
  pout = pathlib.Path(hout.filename).resolve().parent
626
- paths = [pin, os.path.relpath(pin, pout)]
684
+ try:
685
+ paths.append(os.path.relpath(pin, pout))
686
+ except ValueError:
687
+ # This means it is impossible to compute a relative
688
+ # path (e.g. different drive letter on Windows).
689
+ pass
627
690
  hw.store_basin(name="dcnum basin",
628
691
  features=feats,
629
- mapping=basinmap,
692
+ mapping=basinmap0,
630
693
  paths=paths,
631
694
  description=f"Created with dcnum {version}",
632
695
  )
@@ -719,7 +782,6 @@ class DCNumJobRunner(threading.Thread):
719
782
 
720
783
  # Start the data collection thread
721
784
  thr_coll = QueueCollectorThread(
722
- data=self.dtin,
723
785
  event_queue=fe_kwargs["event_queue"],
724
786
  writer_dq=writer_dq,
725
787
  feat_nevents=fe_kwargs["feat_nevents"],
@@ -780,6 +842,19 @@ class DCNumJobRunner(threading.Thread):
780
842
  self.logger.info("Finished segmentation and feature extraction")
781
843
 
782
844
 
845
+ def get_library_versions_dict(library_name_list):
846
+ version_dict = {}
847
+ for library_name in library_name_list:
848
+ try:
849
+ lib = importlib.import_module(library_name)
850
+ except BaseException:
851
+ version = None
852
+ else:
853
+ version = lib.__version__
854
+ version_dict[library_name] = version
855
+ return version_dict
856
+
857
+
783
858
  def join_thread_helper(thr, timeout, retries, logger, name):
784
859
  for _ in range(retries):
785
860
  thr.join(timeout=timeout)
dcnum/logic/job.py CHANGED
@@ -182,3 +182,25 @@ class DCNumPipelineJob:
182
182
  if len(ret) == 1:
183
183
  ret = ret[0]
184
184
  return ret
185
+
186
+ def validate(self):
187
+ """Make sure the pipeline will run given the job kwargs
188
+
189
+ Returns
190
+ -------
191
+ True:
192
+ for testing convenience
193
+
194
+ Raises
195
+ ------
196
+ dcnum.segm.SegmenterNotApplicableError:
197
+ the segmenter is incompatible with the input path
198
+ """
199
+ # Check segmenter applicability applicability
200
+ seg_cls = get_available_segmenters()[self.kwargs["segmenter_code"]]
201
+ with HDF5Data(self.kwargs["path_in"]) as hd:
202
+ seg_cls.validate_applicability(
203
+ segmenter_kwargs=self.kwargs["segmenter_kwargs"],
204
+ logs=hd.logs,
205
+ meta=hd.meta)
206
+ return True
dcnum/meta/ppid.py CHANGED
@@ -7,10 +7,11 @@ import pathlib
7
7
  from typing import Dict, List, Protocol
8
8
  import warnings
9
9
 
10
+ import numpy as np
10
11
 
11
12
  #: Increment this string if there are breaking changes that make
12
13
  #: previous pipelines unreproducible.
13
- DCNUM_PPID_GENERATION = "10"
14
+ DCNUM_PPID_GENERATION = "11"
14
15
 
15
16
 
16
17
  class ClassWithPPIDCapabilities(Protocol):
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
140
141
  path = pathlib.Path(val)
141
142
  if path.exists():
142
143
  val = path.name
143
- if isinstance(val, bool):
144
+ if isinstance(val, (bool, np.bool_)):
144
145
  val = int(val) # do not print e.g. "True"
145
- elif isinstance(val, float):
146
+ elif isinstance(val, (float, np.floating)):
146
147
  if val == int(val):
147
148
  val = int(val) # omit the ".0" at the end
148
149
  concat_strings.append(f"{abr}={val}")
dcnum/read/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  # flake8: noqa: F401
2
2
  from .cache import md5sum
3
3
  from .const import PROTECTED_FEATURES
4
+ from .detect_flicker import detect_flickering
4
5
  from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
5
6
  from .mapped import get_mapping_indices, get_mapped_object
dcnum/read/cache.py CHANGED
@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
36
36
  def __getitem__(self, index):
37
37
  if isinstance(index, (slice, list, np.ndarray)):
38
38
  if isinstance(index, slice):
39
- indices = np.arange(index.start or 0,
40
- index.stop or len(self),
41
- index.step)
39
+ indices = np.arange(
40
+ index.start or 0,
41
+ min(index.stop, len(self)) if index.stop else len(self),
42
+ index.step)
42
43
  else:
43
44
  indices = index
44
45
  array_out = np.empty((len(indices),) + self.image_shape,